scai 0.1.35 â 0.1.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon/daemonBatch.js +27 -8
- package/dist/db/fileIndex.js +2 -5
- package/dist/db/functionExtractors/extractFromJava.js +4 -0
- package/dist/db/functionExtractors/extractFromJs.js +89 -0
- package/dist/db/functionExtractors/extractFromXML.js +4 -0
- package/dist/db/functionExtractors/index.js +29 -0
- package/dist/db/functionIndex.js +11 -0
- package/dist/db/schema.js +18 -0
- package/dist/db/sqlTemplates.js +10 -0
- package/dist/scripts/addFunctionsExtractedColumn.js +29 -0
- package/package.json +4 -2
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { indexFunctionsForFile } from '../db/functionIndex.js';
|
|
2
2
|
import { db } from '../db/client.js';
|
|
3
3
|
import fs from 'fs/promises';
|
|
4
4
|
import fsSync from 'fs';
|
|
@@ -7,6 +7,7 @@ import { DB_PATH } from '../constants.js';
|
|
|
7
7
|
import { log } from '../utils/log.js';
|
|
8
8
|
import lockfile from 'proper-lockfile';
|
|
9
9
|
import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
|
|
10
|
+
import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
10
11
|
const MAX_FILES_PER_BATCH = 5;
|
|
11
12
|
async function lockDb() {
|
|
12
13
|
try {
|
|
@@ -18,9 +19,11 @@ async function lockDb() {
|
|
|
18
19
|
}
|
|
19
20
|
}
|
|
20
21
|
export async function runDaemonBatch() {
|
|
22
|
+
log('đĄ Starting daemon batch...');
|
|
21
23
|
const rows = db.prepare(`
|
|
22
|
-
SELECT path, type FROM files
|
|
23
|
-
WHERE summary IS NULL OR summary = ''
|
|
24
|
+
SELECT path, type, functions_extracted FROM files
|
|
25
|
+
WHERE (summary IS NULL OR summary = '')
|
|
26
|
+
OR (functions_extracted IS NULL OR functions_extracted = 0)
|
|
24
27
|
ORDER BY last_modified DESC
|
|
25
28
|
LIMIT ?
|
|
26
29
|
`).all(MAX_FILES_PER_BATCH);
|
|
@@ -30,6 +33,7 @@ export async function runDaemonBatch() {
|
|
|
30
33
|
}
|
|
31
34
|
const release = await lockDb();
|
|
32
35
|
for (const row of rows) {
|
|
36
|
+
log(`đ Processing file: ${row.path}`);
|
|
33
37
|
if (!fsSync.existsSync(row.path)) {
|
|
34
38
|
log(`â ī¸ Skipped missing file: ${row.path}`);
|
|
35
39
|
continue;
|
|
@@ -40,26 +44,41 @@ export async function runDaemonBatch() {
|
|
|
40
44
|
}
|
|
41
45
|
try {
|
|
42
46
|
const content = await fs.readFile(row.path, 'utf-8');
|
|
43
|
-
|
|
44
|
-
const
|
|
47
|
+
// 1. Extract functions and function calls
|
|
48
|
+
const fileResult = db
|
|
49
|
+
.prepare(`SELECT id FROM files WHERE path = ?`)
|
|
50
|
+
.get(row.path);
|
|
51
|
+
const fileId = fileResult?.id;
|
|
52
|
+
if (!fileId) {
|
|
53
|
+
log(`â ī¸ Could not find fileId for ${row.path}`);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
await indexFunctionsForFile(row.path, fileId);
|
|
57
|
+
}
|
|
58
|
+
// 2. Summarize the file
|
|
59
|
+
log(`đ Generating summary for ${row.path}...`);
|
|
60
|
+
const summaryResult = await summaryModule.run({ content, filepath: row.path });
|
|
61
|
+
const summary = summaryResult?.summary?.trim() || null;
|
|
45
62
|
let embedding = null;
|
|
46
63
|
if (summary) {
|
|
47
64
|
const vector = await generateEmbedding(summary);
|
|
48
|
-
if (vector)
|
|
65
|
+
if (vector) {
|
|
49
66
|
embedding = JSON.stringify(vector);
|
|
67
|
+
}
|
|
50
68
|
}
|
|
51
69
|
db.prepare(`
|
|
52
70
|
UPDATE files
|
|
53
71
|
SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
|
|
54
72
|
WHERE path = @path
|
|
55
73
|
`).run({ summary, embedding, path: row.path });
|
|
56
|
-
log(
|
|
74
|
+
log(`â
Updated summary & embedding for ${row.path}\n`);
|
|
57
75
|
}
|
|
58
76
|
catch (err) {
|
|
59
|
-
log(`â Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}`);
|
|
77
|
+
log(`â Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
60
78
|
}
|
|
61
79
|
await new Promise(resolve => setTimeout(resolve, 200));
|
|
62
80
|
}
|
|
63
81
|
await release();
|
|
82
|
+
log('â
Finished daemon batch.\n\n');
|
|
64
83
|
return true;
|
|
65
84
|
}
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -2,8 +2,8 @@ import { db } from './client.js';
|
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
4
|
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
5
|
-
import * as sqlTemplates from './sqlTemplates.js';
|
|
6
5
|
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
6
|
+
import * as sqlTemplates from './sqlTemplates.js';
|
|
7
7
|
/**
|
|
8
8
|
* Index a file into the local SQLite database.
|
|
9
9
|
*
|
|
@@ -16,6 +16,7 @@ export function indexFile(filePath, summary, type) {
|
|
|
16
16
|
const lastModified = stats.mtime.toISOString();
|
|
17
17
|
const indexedAt = new Date().toISOString();
|
|
18
18
|
const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
|
|
19
|
+
// Index the file metadata
|
|
19
20
|
db.prepare(sqlTemplates.upsertFileTemplate).run({
|
|
20
21
|
path: normalizedPath,
|
|
21
22
|
summary,
|
|
@@ -64,9 +65,6 @@ export async function searchFiles(query, topK = 5) {
|
|
|
64
65
|
if (ftsResults.length === 0) {
|
|
65
66
|
return [];
|
|
66
67
|
}
|
|
67
|
-
ftsResults.forEach(result => {
|
|
68
|
-
console.log(`đ FTS found: ${result.path}`);
|
|
69
|
-
});
|
|
70
68
|
const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
|
|
71
69
|
const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
|
|
72
70
|
const scored = ftsResults.map(result => {
|
|
@@ -85,7 +83,6 @@ export async function searchFiles(query, topK = 5) {
|
|
|
85
83
|
}
|
|
86
84
|
}
|
|
87
85
|
else {
|
|
88
|
-
// No embedding: fallback to inverse bm25-only
|
|
89
86
|
finalScore = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
90
87
|
}
|
|
91
88
|
return {
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { parse } from 'acorn';
|
|
2
|
+
import { simple as walkSimple } from 'acorn-walk';
|
|
3
|
+
import { generateEmbedding } from '../../lib/generateEmbedding.js';
|
|
4
|
+
import { db } from '../client.js';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import { log } from '../../utils/log.js';
|
|
7
|
+
/**
|
|
8
|
+
* Parses a JavaScript/TypeScript file, extracts all top-level functions,
|
|
9
|
+
* generates embeddings, and indexes both the functions and any calls made
|
|
10
|
+
* within each function into the database.
|
|
11
|
+
*/
|
|
12
|
+
export async function extractFromJS(filePath, content, fileId) {
|
|
13
|
+
const ast = parse(content, { ecmaVersion: 'latest', locations: true });
|
|
14
|
+
const functions = [];
|
|
15
|
+
walkSimple(ast, {
|
|
16
|
+
FunctionDeclaration(node) {
|
|
17
|
+
const name = node.id?.name || `${path.basename(filePath)}:<anon>`;
|
|
18
|
+
const startLine = node?.loc?.start.line ?? -1;
|
|
19
|
+
const endLine = node?.loc?.end.line ?? -1;
|
|
20
|
+
const body = content.slice(node.start, node.end);
|
|
21
|
+
functions.push({ name, startLine, endLine, body });
|
|
22
|
+
},
|
|
23
|
+
FunctionExpression(node) {
|
|
24
|
+
const name = `${path.basename(filePath)}:<anon>`;
|
|
25
|
+
const startLine = node?.loc?.start.line ?? -1;
|
|
26
|
+
const endLine = node?.loc?.end.line ?? -1;
|
|
27
|
+
const body = content.slice(node.start, node.end);
|
|
28
|
+
functions.push({ name, startLine, endLine, body });
|
|
29
|
+
},
|
|
30
|
+
ArrowFunctionExpression(node) {
|
|
31
|
+
const name = `${path.basename(filePath)}:<anon>`;
|
|
32
|
+
const startLine = node?.loc?.start.line ?? -1;
|
|
33
|
+
const endLine = node?.loc?.end.line ?? -1;
|
|
34
|
+
const body = content.slice(node.start, node.end);
|
|
35
|
+
functions.push({ name, startLine, endLine, body });
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
if (functions.length === 0) {
|
|
39
|
+
log(`â ī¸ No functions found in: ${filePath}`);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
log(`đ Found ${functions.length} functions in ${filePath}`);
|
|
43
|
+
for (const fn of functions) {
|
|
44
|
+
const embedding = await generateEmbedding(fn.body);
|
|
45
|
+
const result = db.prepare(`
|
|
46
|
+
INSERT INTO functions (
|
|
47
|
+
file_id, name, start_line, end_line, content, embedding, lang
|
|
48
|
+
) VALUES (
|
|
49
|
+
@file_id, @name, @start_line, @end_line, @content, @embedding, @lang
|
|
50
|
+
)
|
|
51
|
+
`).run({
|
|
52
|
+
file_id: fileId,
|
|
53
|
+
name: fn.name,
|
|
54
|
+
start_line: fn.startLine,
|
|
55
|
+
end_line: fn.endLine,
|
|
56
|
+
content: fn.body,
|
|
57
|
+
embedding: JSON.stringify(embedding),
|
|
58
|
+
lang: 'js'
|
|
59
|
+
});
|
|
60
|
+
const callerId = result.lastInsertRowid;
|
|
61
|
+
const fnAst = parse(fn.body, { ecmaVersion: 'latest' });
|
|
62
|
+
const calls = [];
|
|
63
|
+
walkSimple(fnAst, {
|
|
64
|
+
CallExpression(node) {
|
|
65
|
+
if (node.callee.type === 'Identifier' && node.callee.name) {
|
|
66
|
+
calls.push({ calleeName: node.callee.name });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
for (const call of calls) {
|
|
71
|
+
db.prepare(`
|
|
72
|
+
INSERT INTO function_calls (caller_id, callee_name)
|
|
73
|
+
VALUES (@caller_id, @callee_name)
|
|
74
|
+
`).run({
|
|
75
|
+
caller_id: callerId,
|
|
76
|
+
callee_name: call.calleeName
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
log(`đ Indexed function: ${fn.name} with ${calls.length} calls`);
|
|
80
|
+
}
|
|
81
|
+
// Mark the file as successfully extracted
|
|
82
|
+
db.prepare(`
|
|
83
|
+
UPDATE files
|
|
84
|
+
SET functions_extracted = 1,
|
|
85
|
+
functions_extracted_at = datetime('now')
|
|
86
|
+
WHERE id = @fileId
|
|
87
|
+
`).run({ fileId });
|
|
88
|
+
log(`â
Marked functions as extracted for ${filePath}`);
|
|
89
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { log } from '../../utils/log.js';
|
|
2
|
+
import { detectFileType } from '../../utils/detectFileType.js';
|
|
3
|
+
import { extractFromJava } from './extractFromJava.js';
|
|
4
|
+
import { extractFromJS } from './extractFromJs.js';
|
|
5
|
+
import { extractFromXML } from './extractFromXML.js';
|
|
6
|
+
/**
|
|
7
|
+
* Detects file type and delegates to the appropriate extractor.
|
|
8
|
+
*/
|
|
9
|
+
export async function extractFunctionsFromFile(filePath, content, fileId) {
|
|
10
|
+
const type = detectFileType(filePath).trim().toLowerCase();
|
|
11
|
+
;
|
|
12
|
+
if (type === 'js' || type === 'ts') {
|
|
13
|
+
log(`â
Attempting to extract JS functions from ${filePath}\n`);
|
|
14
|
+
await extractFromJS(filePath, content, fileId);
|
|
15
|
+
return true;
|
|
16
|
+
}
|
|
17
|
+
if (type === 'java') {
|
|
18
|
+
log(`â Nothing extracted for ${filePath} due to missing implementation`);
|
|
19
|
+
await extractFromJava(filePath, content, fileId);
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
if (type === 'xml') {
|
|
23
|
+
log(`â Nothing extracted for ${filePath} due to missing implementation`);
|
|
24
|
+
await extractFromXML(filePath, content, fileId);
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
log(`â ī¸ Unsupported file type: ${type} for function extraction. Skipping ${filePath}`);
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { extractFunctionsFromFile } from './functionExtractors/index.js';
|
|
4
|
+
/**
|
|
5
|
+
* Extracts functions from file if language is supported.
|
|
6
|
+
*/
|
|
7
|
+
export async function indexFunctionsForFile(filePath, fileId) {
|
|
8
|
+
const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
|
|
9
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
10
|
+
await extractFunctionsFromFile(normalizedPath, content, fileId);
|
|
11
|
+
}
|
package/dist/db/schema.js
CHANGED
|
@@ -32,4 +32,22 @@ export function initSchema() {
|
|
|
32
32
|
END;
|
|
33
33
|
`);
|
|
34
34
|
console.log('â
SQLite schema initialized with FTS5 triggers');
|
|
35
|
+
db.exec(`
|
|
36
|
+
CREATE TABLE IF NOT EXISTS functions (
|
|
37
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
38
|
+
file_id INTEGER REFERENCES files(id),
|
|
39
|
+
name TEXT,
|
|
40
|
+
start_line INTEGER,
|
|
41
|
+
end_line INTEGER,
|
|
42
|
+
content TEXT,
|
|
43
|
+
embedding TEXT,
|
|
44
|
+
lang TEXT
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
CREATE TABLE IF NOT EXISTS function_calls (
|
|
48
|
+
caller_id INTEGER REFERENCES functions(id),
|
|
49
|
+
callee_name TEXT
|
|
50
|
+
);
|
|
51
|
+
`);
|
|
52
|
+
console.log('â
Schema for functions and function_calls initialized');
|
|
35
53
|
}
|
package/dist/db/sqlTemplates.js
CHANGED
|
@@ -39,3 +39,13 @@ export const rawQueryTemplate = `
|
|
|
39
39
|
ORDER BY rank
|
|
40
40
|
LIMIT :limit
|
|
41
41
|
`;
|
|
42
|
+
// Insert function metadata
|
|
43
|
+
export const insertFunctionTemplate = `
|
|
44
|
+
INSERT INTO functions (file_id, name, start_line, end_line, content, embedding, lang)
|
|
45
|
+
VALUES (:file_id, :name, :start_line, :end_line, :content, :embedding, :lang)
|
|
46
|
+
`;
|
|
47
|
+
// Insert function call edge
|
|
48
|
+
export const insertFunctionCallTemplate = `
|
|
49
|
+
INSERT INTO function_calls (caller_id, callee_name)
|
|
50
|
+
VALUES (:caller_id, :callee_name)
|
|
51
|
+
`;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { db } from '../db/client.js';
|
|
2
|
+
try {
|
|
3
|
+
db.prepare(`
|
|
4
|
+
ALTER TABLE files ADD COLUMN functions_extracted BOOLEAN DEFAULT 0
|
|
5
|
+
`).run();
|
|
6
|
+
console.log('â
Added functions_extracted column.');
|
|
7
|
+
}
|
|
8
|
+
catch (e) {
|
|
9
|
+
if (e instanceof Error ? e.message.includes('duplicate column name') : e) {
|
|
10
|
+
console.log('âšī¸ Column functions_extracted already exists. Skipping.');
|
|
11
|
+
}
|
|
12
|
+
else {
|
|
13
|
+
console.error('â Migration failed:', e instanceof Error ? e.message : e);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
try {
|
|
17
|
+
db.prepare(`
|
|
18
|
+
ALTER TABLE files ADD COLUMN functions_extracted_at DATETIME
|
|
19
|
+
`).run();
|
|
20
|
+
console.log('â
Added functions_extracted_at column.');
|
|
21
|
+
}
|
|
22
|
+
catch (e) {
|
|
23
|
+
if (e instanceof Error ? e.message.includes('duplicate column name') : e) {
|
|
24
|
+
console.log('âšī¸ Column functions_extracted_at already exists. Skipping.');
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
console.error('â Migration failed:', e instanceof Error ? e.message : e);
|
|
28
|
+
}
|
|
29
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scai",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.36",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"scai": "./dist/index.js"
|
|
@@ -25,6 +25,8 @@
|
|
|
25
25
|
"start": "node dist/index.js"
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
|
+
"acorn": "^8.11.3",
|
|
29
|
+
"acorn-walk": "^8.3.2",
|
|
28
30
|
"better-sqlite3": "^12.1.1",
|
|
29
31
|
"commander": "^11.0.0",
|
|
30
32
|
"fast-glob": "^3.3.3",
|
|
@@ -33,7 +35,7 @@
|
|
|
33
35
|
"devDependencies": {
|
|
34
36
|
"@types/better-sqlite3": "^7.6.13",
|
|
35
37
|
"@types/jest": "^30.0.0",
|
|
36
|
-
"@types/node": "^24.0.
|
|
38
|
+
"@types/node": "^24.0.13",
|
|
37
39
|
"@types/proper-lockfile": "^4.1.4",
|
|
38
40
|
"jest": "^30.0.2",
|
|
39
41
|
"ts-jest": "^29.4.0",
|