scai 0.1.22 β 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/DaemonCmd.js +45 -39
- package/dist/commands/IndexCmd.js +35 -31
- package/dist/commands/ResetDbCmd.js +6 -7
- package/dist/constants.js +29 -4
- package/dist/db/fileIndex.js +54 -53
- package/dist/db/schema.js +13 -13
- package/dist/db/sqlTemplates.js +21 -26
- package/dist/index.js +4 -3
- package/package.json +1 -1
|
@@ -2,79 +2,85 @@ import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
|
2
2
|
import { db } from '../db/client.js';
|
|
3
3
|
import fs from 'fs/promises';
|
|
4
4
|
import fsSync from 'fs';
|
|
5
|
-
import os from 'os';
|
|
6
5
|
import path from 'path';
|
|
7
6
|
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
8
7
|
import { IGNORED_EXTENSIONS } from '../config/IgnoredExtensions.js';
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
import { PID_PATH, SCAI_HOME } from '../constants.js';
|
|
9
|
+
const LOG_PATH = path.join(SCAI_HOME, 'daemon.log');
|
|
10
|
+
const SLEEP_MS = 30 * 1000; // π€ Pause between batches
|
|
11
|
+
const IDLE_SLEEP_MS = 4 * SLEEP_MS; // π€ Longer pause if idle
|
|
12
|
+
const MAX_FILES_PER_BATCH = 5; // ποΈ Throttle indexing per cycle
|
|
13
|
+
// π€ Utility
|
|
14
|
+
function sleep(ms) {
|
|
15
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
16
|
+
}
|
|
17
|
+
// πͺ΅ Append to log file
|
|
18
|
+
function log(message) {
|
|
19
|
+
const timestamp = new Date().toISOString();
|
|
20
|
+
fsSync.appendFileSync(LOG_PATH, `[${timestamp}] ${message}\n`);
|
|
21
|
+
}
|
|
22
|
+
// β Skip unwanted file types
|
|
23
|
+
function shouldIgnoreFile(filePath) {
|
|
15
24
|
const ext = path.extname(filePath).toLowerCase();
|
|
16
25
|
return IGNORED_EXTENSIONS.includes(ext);
|
|
17
|
-
}
|
|
26
|
+
}
|
|
27
|
+
// π§ One summarization batch
|
|
18
28
|
export async function runDaemonBatch() {
|
|
19
|
-
console.log('π₯ Daemon batch: scanning for files to summarize...');
|
|
20
29
|
const rows = db.prepare(`
|
|
21
30
|
SELECT path, type FROM files
|
|
22
31
|
WHERE summary IS NULL OR summary = ''
|
|
23
32
|
ORDER BY last_modified DESC
|
|
24
33
|
LIMIT ?
|
|
25
|
-
|
|
34
|
+
`).all(MAX_FILES_PER_BATCH);
|
|
26
35
|
if (rows.length === 0) {
|
|
27
|
-
|
|
28
|
-
return;
|
|
36
|
+
log('β
No files left to summarize.');
|
|
37
|
+
return false; // π€ Idle
|
|
29
38
|
}
|
|
30
39
|
for (const row of rows) {
|
|
40
|
+
if (!fsSync.existsSync(row.path)) {
|
|
41
|
+
log(`β οΈ Skipped missing file: ${row.path}`);
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
31
44
|
if (shouldIgnoreFile(row.path)) {
|
|
32
|
-
|
|
45
|
+
log(`β οΈ Skipped (extension): ${row.path}`);
|
|
33
46
|
continue;
|
|
34
47
|
}
|
|
35
48
|
try {
|
|
36
49
|
const content = await fs.readFile(row.path, 'utf-8');
|
|
37
50
|
const result = await summaryModule.run({ content, filepath: row.path });
|
|
38
|
-
const summary = result?.summary?.trim()
|
|
51
|
+
const summary = result?.summary?.trim() || null;
|
|
39
52
|
let embedding = null;
|
|
40
53
|
if (summary) {
|
|
41
54
|
const vector = await generateEmbedding(summary);
|
|
42
55
|
if (vector)
|
|
43
56
|
embedding = JSON.stringify(vector);
|
|
44
57
|
}
|
|
45
|
-
// Using named parameters for better readability and flexibility
|
|
46
58
|
db.prepare(`
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
console.log(`π’ Embedded: ${row.path}`);
|
|
59
|
+
UPDATE files
|
|
60
|
+
SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
|
|
61
|
+
WHERE path = @path
|
|
62
|
+
`).run({ summary, embedding, path: row.path });
|
|
63
|
+
log(`π Summarized: ${row.path}`);
|
|
53
64
|
}
|
|
54
65
|
catch (err) {
|
|
55
|
-
|
|
66
|
+
log(`β Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}`);
|
|
56
67
|
}
|
|
68
|
+
await sleep(200); // π§ Micro delay between each file
|
|
57
69
|
}
|
|
70
|
+
return true; // β
Work was done
|
|
58
71
|
}
|
|
72
|
+
// π Daemon loop: runs until killed
|
|
59
73
|
export async function runDaemonScheduler() {
|
|
60
|
-
|
|
61
|
-
fsSync.mkdirSync(path.dirname(PID_PATH), { recursive: true });
|
|
74
|
+
fsSync.mkdirSync(SCAI_HOME, { recursive: true });
|
|
62
75
|
fsSync.writeFileSync(PID_PATH, process.pid.toString(), 'utf-8');
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
console.log('π§ Background summarizer started. Will run every 30 minutes for 10 minutes.');
|
|
66
|
-
const startDaemonCycle = async () => {
|
|
67
|
-
const startTime = Date.now();
|
|
68
|
-
const endTime = startTime + DAEMON_DURATION_MINUTES * 60 * 1000;
|
|
69
|
-
while (Date.now() < endTime) {
|
|
70
|
-
await runDaemonBatch();
|
|
71
|
-
await new Promise(res => setTimeout(res, 60 * 1000)); // 1 min pause between mini-batches
|
|
72
|
-
}
|
|
73
|
-
console.log(`β±οΈ Daemon completed 10-minute cycle. Next in ${DAEMON_INTERVAL_MINUTES} min.`);
|
|
74
|
-
};
|
|
75
|
-
// Repeat every 30 minutes
|
|
76
|
+
fsSync.appendFileSync(LOG_PATH, `\n\nπ§ Daemon started at ${new Date().toISOString()} β PID ${process.pid}\n`);
|
|
77
|
+
let cycles = 0;
|
|
76
78
|
while (true) {
|
|
77
|
-
await
|
|
78
|
-
|
|
79
|
+
const didWork = await runDaemonBatch();
|
|
80
|
+
cycles++;
|
|
81
|
+
if (cycles % 20 === 0) {
|
|
82
|
+
log(`π Still running. Cycles: ${cycles}`);
|
|
83
|
+
}
|
|
84
|
+
await sleep(didWork ? SLEEP_MS : IDLE_SLEEP_MS);
|
|
79
85
|
}
|
|
80
86
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import fg from 'fast-glob';
|
|
2
|
+
import fs from 'fs';
|
|
2
3
|
import path from 'path';
|
|
3
4
|
import { initSchema } from '../db/schema.js';
|
|
4
5
|
import { indexFile } from '../db/fileIndex.js';
|
|
@@ -6,35 +7,33 @@ import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
|
|
|
6
7
|
import { detectFileType } from '../utils/detectFileType.js';
|
|
7
8
|
import { runDaemonScheduler } from './DaemonCmd.js';
|
|
8
9
|
import { IGNORED_FOLDER_GLOBS } from '../config/IgnoredPaths.js';
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
initSchema();
|
|
18
|
-
// π§ Check if another directory has already been indexed
|
|
19
|
-
const indexedPaths = db.prepare(`
|
|
20
|
-
SELECT DISTINCT path FROM files LIMIT 100
|
|
21
|
-
`).all();
|
|
22
|
-
const knownRoot = indexedPaths.length > 0
|
|
23
|
-
? path.dirname(indexedPaths[0].path)
|
|
24
|
-
: null;
|
|
25
|
-
if (knownRoot && !resolvedDir.startsWith(knownRoot) && !options.force) {
|
|
26
|
-
console.warn(`β οΈ You're indexing a different folder than before:
|
|
27
|
-
- Previously: ${knownRoot}
|
|
28
|
-
- Now: ${resolvedDir}
|
|
29
|
-
|
|
30
|
-
This will add more files into the existing index and may reduce accuracy or performance.
|
|
31
|
-
|
|
32
|
-
Use --force to continue, or consider clearing the index:
|
|
33
|
-
scai reset-db
|
|
34
|
-
|
|
35
|
-
Aborting.`);
|
|
10
|
+
import { Config } from '../config.js';
|
|
11
|
+
import { PID_PATH } from '../constants.js';
|
|
12
|
+
export async function runIndexCommand(targetDir, options = {}) {
|
|
13
|
+
try {
|
|
14
|
+
initSchema();
|
|
15
|
+
}
|
|
16
|
+
catch (err) {
|
|
17
|
+
console.error('β Failed to initialize schema:', err);
|
|
36
18
|
process.exit(1);
|
|
37
19
|
}
|
|
20
|
+
let resolvedDir;
|
|
21
|
+
if (options.force) {
|
|
22
|
+
// Force: use passed dir or fallback to cwd, no config updates
|
|
23
|
+
resolvedDir = path.resolve(targetDir || process.cwd());
|
|
24
|
+
console.warn('β οΈ Running in --force mode. Config will not be updated.');
|
|
25
|
+
}
|
|
26
|
+
else if (targetDir) {
|
|
27
|
+
// User provided a directory: resolve and persist to config
|
|
28
|
+
resolvedDir = path.resolve(targetDir);
|
|
29
|
+
Config.setIndexDir(resolvedDir);
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
// Use configured indexDir or fallback to cwd
|
|
33
|
+
resolvedDir = Config.getIndexDir() || process.cwd();
|
|
34
|
+
Config.setIndexDir(resolvedDir); // persist if not yet saved
|
|
35
|
+
}
|
|
36
|
+
console.log(`π Indexing files in: ${resolvedDir}`);
|
|
38
37
|
const files = await fg('**/*.*', {
|
|
39
38
|
cwd: resolvedDir,
|
|
40
39
|
ignore: IGNORED_FOLDER_GLOBS,
|
|
@@ -47,20 +46,25 @@ Aborting.`);
|
|
|
47
46
|
continue;
|
|
48
47
|
try {
|
|
49
48
|
const type = detectFileType(file);
|
|
50
|
-
indexFile(file, null, type); //
|
|
49
|
+
indexFile(file, null, type); // Index file without summary
|
|
51
50
|
const ext = path.extname(file);
|
|
52
51
|
countByExt[ext] = (countByExt[ext] || 0) + 1;
|
|
53
52
|
console.log(`π Indexed: ${path.relative(resolvedDir, file)}`);
|
|
54
53
|
count++;
|
|
55
54
|
}
|
|
56
55
|
catch (err) {
|
|
57
|
-
console.warn(`β οΈ Skipped ${file}:`, err instanceof Error ? err.message : err);
|
|
56
|
+
console.warn(`β οΈ Skipped in indexCmd ${file}:`, err instanceof Error ? err.message : err);
|
|
58
57
|
}
|
|
59
58
|
}
|
|
60
59
|
console.log('π Indexed files by extension:', countByExt);
|
|
61
60
|
console.log(`β
Done. Indexed ${count} files.`);
|
|
62
61
|
if (options.detached) {
|
|
63
|
-
|
|
64
|
-
|
|
62
|
+
if (fs.existsSync(PID_PATH)) {
|
|
63
|
+
console.warn(`β οΈ Daemon already running (PID file found at ${PID_PATH}). Skipping launch.`);
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
console.log('π Starting summarizer daemon in background mode...');
|
|
67
|
+
runDaemonScheduler();
|
|
68
|
+
}
|
|
65
69
|
}
|
|
66
70
|
}
|
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
2
|
import { db } from '../db/client.js';
|
|
3
|
+
import { DB_PATH } from '../constants.js';
|
|
4
4
|
export function resetDatabase() {
|
|
5
|
-
const dbPath = path.resolve(process.cwd(), '.scai/db.sqlite');
|
|
6
5
|
try {
|
|
7
|
-
db.close(); // π
|
|
6
|
+
db.close(); // π Ensure the DB connection is closed
|
|
8
7
|
console.log('π Closed SQLite database connection.');
|
|
9
8
|
}
|
|
10
9
|
catch (err) {
|
|
11
10
|
console.warn('β οΈ Could not close database:', err);
|
|
12
11
|
}
|
|
13
|
-
if (fs.existsSync(
|
|
12
|
+
if (fs.existsSync(DB_PATH)) {
|
|
14
13
|
try {
|
|
15
|
-
fs.unlinkSync(
|
|
16
|
-
console.log(
|
|
14
|
+
fs.unlinkSync(DB_PATH);
|
|
15
|
+
console.log(`π§Ή Deleted existing database at ${DB_PATH}`);
|
|
17
16
|
}
|
|
18
17
|
catch (err) {
|
|
19
18
|
console.error('β Failed to delete DB file:', err instanceof Error ? err.message : err);
|
|
@@ -21,7 +20,7 @@ export function resetDatabase() {
|
|
|
21
20
|
}
|
|
22
21
|
}
|
|
23
22
|
else {
|
|
24
|
-
console.log('βΉοΈ No existing database found
|
|
23
|
+
console.log('βΉοΈ No existing database found at:', DB_PATH);
|
|
25
24
|
}
|
|
26
25
|
console.log('β
Database has been reset. You can now re-run: scai index');
|
|
27
26
|
}
|
package/dist/constants.js
CHANGED
|
@@ -1,19 +1,44 @@
|
|
|
1
1
|
import os from 'os';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import fs from 'fs';
|
|
4
|
+
/**
|
|
5
|
+
* The base directory where internal SCAI config/state is stored:
|
|
6
|
+
* ~/.scai
|
|
7
|
+
*/
|
|
4
8
|
export const SCAI_HOME = path.join(os.homedir(), '.scai');
|
|
9
|
+
/**
|
|
10
|
+
* Full path to the SQLite database used by SCAI:
|
|
11
|
+
* ~/.scai/db.sqlite
|
|
12
|
+
*/
|
|
5
13
|
export const DB_PATH = path.join(SCAI_HOME, 'db.sqlite');
|
|
14
|
+
/**
|
|
15
|
+
* Path to the daemon process ID file (if running in background mode):
|
|
16
|
+
* ~/.scai/daemon.pid
|
|
17
|
+
*/
|
|
6
18
|
export const PID_PATH = path.join(SCAI_HOME, 'daemon.pid');
|
|
19
|
+
/**
|
|
20
|
+
* Path to the config file that stores user settings like model, language, indexDir, etc.:
|
|
21
|
+
* ~/.scai/config.json
|
|
22
|
+
*/
|
|
7
23
|
export const CONFIG_PATH = path.join(SCAI_HOME, 'config.json');
|
|
8
|
-
|
|
24
|
+
/**
|
|
25
|
+
* Get the active index directory.
|
|
26
|
+
*
|
|
27
|
+
* - If the user has configured an `indexDir`, use it.
|
|
28
|
+
* - If not, default to the userβs home directory (`~`), not `.scai`.
|
|
29
|
+
*/
|
|
9
30
|
export function getIndexDir() {
|
|
10
31
|
try {
|
|
11
32
|
const config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'));
|
|
12
|
-
return config.indexDir ||
|
|
33
|
+
return config.indexDir || os.homedir(); // π Default: ~
|
|
13
34
|
}
|
|
14
35
|
catch (e) {
|
|
15
|
-
return
|
|
36
|
+
return os.homedir(); // π Fallback if config file is missing or invalid
|
|
16
37
|
}
|
|
17
38
|
}
|
|
18
|
-
|
|
39
|
+
/**
|
|
40
|
+
* On-demand index directory to scan for files.
|
|
41
|
+
*
|
|
42
|
+
* Used by indexing logic (`scai index`) to determine what folder to scan.
|
|
43
|
+
*/
|
|
19
44
|
export const INDEX_DIR = getIndexDir();
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -1,51 +1,57 @@
|
|
|
1
|
-
// File: src/db/fileIndex.ts
|
|
2
1
|
import { db } from './client.js';
|
|
3
2
|
import fs from 'fs';
|
|
4
3
|
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
5
|
-
import * as sqlTemplates from './sqlTemplates.js';
|
|
4
|
+
import * as sqlTemplates from './sqlTemplates.js';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
/**
|
|
7
|
+
* Index a file into the local SQLite database.
|
|
8
|
+
*
|
|
9
|
+
* - Normalizes the file path for cross-platform compatibility.
|
|
10
|
+
* - Extracts file metadata (last modified time).
|
|
11
|
+
* - Performs an UPSERT into the `files` table with the latest summary/type/timestamp.
|
|
12
|
+
*
|
|
13
|
+
* @param filePath - Absolute path to the file being indexed
|
|
14
|
+
* @param summary - Optional summary of the file content
|
|
15
|
+
* @param type - File type or extension (e.g., 'md', 'ts')
|
|
16
|
+
*/
|
|
6
17
|
export function indexFile(filePath, summary, type) {
|
|
7
18
|
const stats = fs.statSync(filePath);
|
|
8
19
|
const lastModified = stats.mtime.toISOString();
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
20
|
+
const indexedAt = new Date().toISOString();
|
|
21
|
+
const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
|
|
22
|
+
db.prepare(sqlTemplates.upsertFileTemplate).run({
|
|
23
|
+
path: normalizedPath,
|
|
24
|
+
summary,
|
|
25
|
+
type,
|
|
26
|
+
lastModified,
|
|
27
|
+
indexedAt,
|
|
28
|
+
});
|
|
29
|
+
console.log(`π Indexed: ${normalizedPath}`);
|
|
19
30
|
}
|
|
31
|
+
/**
|
|
32
|
+
* Perform a raw keyword-based full-text search using the FTS5 index.
|
|
33
|
+
*
|
|
34
|
+
* - Tokenizes and sanitizes the input query string.
|
|
35
|
+
* - Performs a ranked search using BM25 scoring via the virtual FTS table.
|
|
36
|
+
* - Returns basic file metadata along with rank for ordering.
|
|
37
|
+
*
|
|
38
|
+
* @param query - The search query string (e.g., "api router config")
|
|
39
|
+
* @param limit - Max number of results to return (default: 10)
|
|
40
|
+
*/
|
|
20
41
|
export function queryFiles(query, limit = 10) {
|
|
21
|
-
// Sanitize the query by removing or escaping special characters
|
|
22
42
|
const safeQuery = query
|
|
23
43
|
.trim()
|
|
24
44
|
.split(/\s+/)
|
|
25
45
|
.map(token => {
|
|
26
|
-
token = token
|
|
27
|
-
|
|
28
|
-
.replace(/'/g, "''"); // Escape single quotes for SQL safety
|
|
29
|
-
// For multi-word queries, wrap the token in quotes for exact phrase matching
|
|
30
|
-
if (token.includes(' ')) {
|
|
31
|
-
return `"${token}"`; // Exact phrase match for multi-word tokens
|
|
32
|
-
}
|
|
33
|
-
return `${token}*`; // Prefix match for single tokens
|
|
46
|
+
token = token.replace(/[?*\\"]/g, '').replace(/'/g, "''");
|
|
47
|
+
return token.includes(' ') ? `"${token}"` : `${token}*`;
|
|
34
48
|
})
|
|
35
49
|
.join(' OR ');
|
|
36
|
-
// Log the constructed query for debugging purposes
|
|
37
50
|
console.log(`Executing search query: ${safeQuery}`);
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
FROM files_fts
|
|
43
|
-
JOIN files f ON files_fts.rowid = f.id
|
|
44
|
-
WHERE files_fts MATCH :query
|
|
45
|
-
ORDER BY rank
|
|
46
|
-
LIMIT :limit
|
|
47
|
-
`;
|
|
48
|
-
const results = db.prepare(sql).all({ query: safeQuery, limit });
|
|
51
|
+
const results = db.prepare(sqlTemplates.rawQueryTemplate).all({
|
|
52
|
+
query: safeQuery,
|
|
53
|
+
limit
|
|
54
|
+
});
|
|
49
55
|
return results;
|
|
50
56
|
}
|
|
51
57
|
export function cosineSimilarity(a, b) {
|
|
@@ -54,52 +60,47 @@ export function cosineSimilarity(a, b) {
|
|
|
54
60
|
const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
|
|
55
61
|
return dot / (magA * magB);
|
|
56
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Perform a hybrid semantic + keyword-based search.
|
|
65
|
+
*
|
|
66
|
+
* - Generates a vector embedding of the query.
|
|
67
|
+
* - Runs an FTS search using BM25 ranking.
|
|
68
|
+
* - Looks up file embeddings from the database and compares using cosine similarity.
|
|
69
|
+
* - Combines similarity score and BM25 rank into a weighted final score.
|
|
70
|
+
*
|
|
71
|
+
* @param query - Natural language search query
|
|
72
|
+
* @param topK - Max number of top-ranked results to return (default: 5)
|
|
73
|
+
*/
|
|
57
74
|
export async function searchFiles(query, topK = 5) {
|
|
58
|
-
// Generate the query embedding
|
|
59
75
|
const embedding = await generateEmbedding(query);
|
|
60
76
|
if (!embedding)
|
|
61
77
|
return [];
|
|
62
|
-
// Sanitize the query by removing or escaping special characters
|
|
63
78
|
const safeQuery = query
|
|
64
79
|
.trim()
|
|
65
80
|
.split(/\s+/)
|
|
66
81
|
.map(token => {
|
|
67
|
-
token = token
|
|
68
|
-
|
|
69
|
-
.replace(/'/g, "''"); // Escape single quotes for SQL safety
|
|
70
|
-
// For multi-word queries, wrap the token in quotes for exact phrase matching
|
|
71
|
-
if (token.includes(' ')) {
|
|
72
|
-
return `"${token}"`; // Exact phrase match for multi-word tokens
|
|
73
|
-
}
|
|
74
|
-
return `${token}*`; // Prefix match for single tokens
|
|
82
|
+
token = token.replace(/[?*\\"]/g, '').replace(/'/g, "''");
|
|
83
|
+
return token.includes(' ') ? `"${token}"` : `${token}*`;
|
|
75
84
|
})
|
|
76
85
|
.join(' OR ');
|
|
77
|
-
// Log the constructed query for debugging purposes
|
|
78
86
|
console.log(`Executing search query: ${safeQuery}`);
|
|
79
|
-
// Fetch BM25 scores from the FTS using the safeQuery string directly
|
|
80
87
|
const ftsResults = db.prepare(sqlTemplates.fetchBm25ScoresTemplate).all({ query: safeQuery });
|
|
81
88
|
const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
|
|
82
89
|
const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
|
|
83
|
-
// Calculate final score combining BM25 and cosine similarity
|
|
84
90
|
const scored = ftsResults.map(result => {
|
|
85
91
|
try {
|
|
86
|
-
// Fetch embedding for each file from the `files` table
|
|
87
92
|
const embResult = db.prepare(sqlTemplates.fetchEmbeddingTemplate).get({ path: result.path });
|
|
88
|
-
// Check if embedding exists and has the correct structure
|
|
89
93
|
if (!embResult || typeof embResult.embedding !== 'string')
|
|
90
94
|
return null;
|
|
91
|
-
// Parse the embedding
|
|
92
95
|
const vector = JSON.parse(embResult.embedding);
|
|
93
96
|
const sim = cosineSimilarity(embedding, vector);
|
|
94
|
-
// Normalize BM25 scores
|
|
95
97
|
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
96
|
-
const
|
|
97
|
-
const finalScore = 0.7 * normalizedSim + 0.3 * normalizedBm25;
|
|
98
|
+
const finalScore = 0.7 * sim + 0.3 * normalizedBm25;
|
|
98
99
|
return {
|
|
99
100
|
path: result.path,
|
|
100
101
|
summary: result.summary,
|
|
101
102
|
score: finalScore,
|
|
102
|
-
sim
|
|
103
|
+
sim,
|
|
103
104
|
bm25: normalizedBm25
|
|
104
105
|
};
|
|
105
106
|
}
|
package/dist/db/schema.js
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
import { db } from "./client.js";
|
|
2
2
|
export function initSchema() {
|
|
3
3
|
db.exec(`
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
4
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
5
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
6
|
+
path TEXT UNIQUE,
|
|
7
|
+
summary TEXT,
|
|
8
|
+
type TEXT,
|
|
9
|
+
indexed_at TEXT,
|
|
10
|
+
last_modified TEXT,
|
|
11
|
+
embedding TEXT
|
|
12
|
+
);
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
|
|
15
|
+
USING fts5(path, summary, content='files', content_rowid='id');
|
|
16
|
+
|
|
17
|
+
`);
|
|
18
18
|
console.log('β
SQLite schema initialized');
|
|
19
19
|
}
|
package/dist/db/sqlTemplates.js
CHANGED
|
@@ -1,29 +1,14 @@
|
|
|
1
|
-
//
|
|
2
|
-
export const
|
|
3
|
-
INSERT
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
// Upsert file metadata into `files`
|
|
2
|
+
export const upsertFileTemplate = `
|
|
3
|
+
INSERT INTO files (path, summary, type, last_modified, indexed_at)
|
|
4
|
+
VALUES (:path, :summary, :type, :lastModified, :indexedAt)
|
|
5
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
6
|
+
summary = excluded.summary,
|
|
7
|
+
type = excluded.type,
|
|
8
|
+
last_modified = excluded.last_modified,
|
|
9
|
+
indexed_at = excluded.indexed_at
|
|
6
10
|
`;
|
|
7
|
-
//
|
|
8
|
-
export const updateFileTemplate = `
|
|
9
|
-
UPDATE files
|
|
10
|
-
SET type = :type,
|
|
11
|
-
last_modified = :lastModified,
|
|
12
|
-
indexed_at = datetime('now')
|
|
13
|
-
WHERE path = :path
|
|
14
|
-
AND last_modified != :lastModified
|
|
15
|
-
`;
|
|
16
|
-
// Template for deleting a file from FTS
|
|
17
|
-
export const deleteFromFtsTemplate = `
|
|
18
|
-
DELETE FROM files_fts
|
|
19
|
-
WHERE rowid = (SELECT id FROM files WHERE path = :path)
|
|
20
|
-
`;
|
|
21
|
-
// Template for inserting a file into FTS with its ID
|
|
22
|
-
export const insertIntoFtsTemplate = `
|
|
23
|
-
INSERT INTO files_fts(rowid, path, summary)
|
|
24
|
-
VALUES((SELECT id FROM files WHERE path = :path), :path, :summary)
|
|
25
|
-
`;
|
|
26
|
-
// Template for fetching BM25 scores from FTS
|
|
11
|
+
// Fetch search results with BM25 ranking
|
|
27
12
|
export const fetchBm25ScoresTemplate = `
|
|
28
13
|
SELECT f.path, f.summary, f.type, bm25(files_fts) AS bm25Score
|
|
29
14
|
FROM files_fts
|
|
@@ -31,7 +16,17 @@ export const fetchBm25ScoresTemplate = `
|
|
|
31
16
|
WHERE files_fts MATCH :query
|
|
32
17
|
LIMIT 50
|
|
33
18
|
`;
|
|
34
|
-
//
|
|
19
|
+
// Fetch embedding vector for a file
|
|
35
20
|
export const fetchEmbeddingTemplate = `
|
|
36
21
|
SELECT embedding FROM files WHERE path = :path
|
|
37
22
|
`;
|
|
23
|
+
// Used for non-embedding query in `queryFiles()`
|
|
24
|
+
export const rawQueryTemplate = `
|
|
25
|
+
SELECT f.path, f.summary, f.type, f.last_modified, f.indexed_at,
|
|
26
|
+
bm25(files_fts) AS rank
|
|
27
|
+
FROM files_fts
|
|
28
|
+
JOIN files f ON files_fts.rowid = f.id
|
|
29
|
+
WHERE files_fts MATCH :query
|
|
30
|
+
ORDER BY rank
|
|
31
|
+
LIMIT :limit
|
|
32
|
+
`;
|
package/dist/index.js
CHANGED
|
@@ -73,8 +73,7 @@ cmd
|
|
|
73
73
|
.option('-d, --detached', 'Run summarizer daemon after indexing')
|
|
74
74
|
.option('--force', 'Force indexing even if another folder has already been indexed')
|
|
75
75
|
.action((targetDir, options) => {
|
|
76
|
-
|
|
77
|
-
runIndexCommand(resolvedDir, { detached: options.detached, force: options.force });
|
|
76
|
+
runIndexCommand(targetDir, { detached: options.detached, force: options.force });
|
|
78
77
|
});
|
|
79
78
|
// βοΈ Group: Configuration settings
|
|
80
79
|
const set = cmd.command('set').description('Set configuration values');
|
|
@@ -127,7 +126,9 @@ cmd
|
|
|
127
126
|
cmd
|
|
128
127
|
.command('daemon')
|
|
129
128
|
.description('Run background summarization of indexed files')
|
|
130
|
-
.action(
|
|
129
|
+
.action(async () => {
|
|
130
|
+
await runDaemonBatch(); // ignore the return value
|
|
131
|
+
});
|
|
131
132
|
cmd
|
|
132
133
|
.command('stop-daemon')
|
|
133
134
|
.description('Stop the background summarizer daemon')
|