scai 0.1.48 ā 0.1.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/AskCmd.js +5 -4
- package/dist/commands/FindCmd.js +10 -3
- package/dist/daemon/daemonBatch.js +30 -32
- package/dist/db/functionExtractors/extractFromJava.js +6 -2
- package/dist/db/functionExtractors/extractFromJs.js +93 -83
- package/dist/db/functionExtractors/extractFromXML.js +6 -2
- package/dist/db/functionExtractors/index.js +5 -15
- package/dist/db/sqlTemplates.js +48 -0
- package/dist/fileRules/fileClassifier.js +42 -3
- package/dist/utils/fileTree.js +23 -22
- package/package.json +1 -1
package/dist/commands/AskCmd.js
CHANGED
|
@@ -5,12 +5,12 @@ import { searchFiles, queryFiles, getFunctionsForFiles } from '../db/fileIndex.j
|
|
|
5
5
|
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
6
6
|
import { generate } from '../lib/generate.js';
|
|
7
7
|
import { buildContextualPrompt } from '../utils/buildContextualPrompt.js';
|
|
8
|
-
import {
|
|
8
|
+
import { generateFocusedFileTree } from '../utils/fileTree.js';
|
|
9
9
|
import { log } from '../utils/log.js';
|
|
10
10
|
import { PROMPT_LOG_PATH, SCAI_HOME, INDEX_DIR, RELATED_FILES_LIMIT, MAX_SUMMARY_LINES } from '../constants.js';
|
|
11
11
|
export async function runAskCommand(query) {
|
|
12
12
|
if (!query) {
|
|
13
|
-
query = await promptOnce('
|
|
13
|
+
query = await promptOnce('š¬ Ask your question:\n');
|
|
14
14
|
}
|
|
15
15
|
query = query.trim();
|
|
16
16
|
if (!query) {
|
|
@@ -103,7 +103,7 @@ export async function runAskCommand(query) {
|
|
|
103
103
|
// š© STEP 6: Generate file tree
|
|
104
104
|
let fileTree = '';
|
|
105
105
|
try {
|
|
106
|
-
fileTree =
|
|
106
|
+
fileTree = generateFocusedFileTree(INDEX_DIR, filepath, 2);
|
|
107
107
|
}
|
|
108
108
|
catch (e) {
|
|
109
109
|
console.warn('ā ļø Could not generate file tree:', e);
|
|
@@ -144,11 +144,12 @@ export async function runAskCommand(query) {
|
|
|
144
144
|
// š© Helper: Prompt once
|
|
145
145
|
function promptOnce(promptText) {
|
|
146
146
|
return new Promise(resolve => {
|
|
147
|
+
console.log(promptText); // Instead of putting it *in* rl.question
|
|
147
148
|
const rl = readline.createInterface({
|
|
148
149
|
input: process.stdin,
|
|
149
150
|
output: process.stdout,
|
|
150
151
|
});
|
|
151
|
-
rl.question(
|
|
152
|
+
rl.question('> ', answer => {
|
|
152
153
|
rl.close();
|
|
153
154
|
resolve(answer.trim());
|
|
154
155
|
});
|
package/dist/commands/FindCmd.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { queryFiles } from '../db/fileIndex.js';
|
|
2
2
|
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
3
3
|
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
4
5
|
export async function runFindCommand(query) {
|
|
5
6
|
if (!query) {
|
|
6
7
|
console.error('ā Please provide a search query.\nš Usage: scai find "keyword"');
|
|
@@ -13,9 +14,15 @@ export async function runFindCommand(query) {
|
|
|
13
14
|
console.log('ā ļø No matching files found.');
|
|
14
15
|
return;
|
|
15
16
|
}
|
|
16
|
-
console.log(`ā
Found ${results.length} result(s)
|
|
17
|
-
|
|
17
|
+
console.log(`ā
Found ${results.length} result(s).\n`);
|
|
18
|
+
const homeDir = os.homedir();
|
|
18
19
|
results.forEach((result, index) => {
|
|
19
|
-
|
|
20
|
+
let absPath = path.resolve(result.path); // ensure absolute path
|
|
21
|
+
if (absPath.startsWith(homeDir)) {
|
|
22
|
+
absPath = absPath.replace(homeDir, '~');
|
|
23
|
+
}
|
|
24
|
+
// Normalize to forward slashes (especially for Windows)
|
|
25
|
+
absPath = absPath.replace(/\\/g, '/');
|
|
26
|
+
console.log(`š [${index + 1}] ${absPath}`);
|
|
20
27
|
});
|
|
21
28
|
}
|
|
@@ -8,7 +8,12 @@ import { log } from '../utils/log.js';
|
|
|
8
8
|
import lockfile from 'proper-lockfile';
|
|
9
9
|
import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
10
10
|
import { classifyFile } from '../fileRules/classifyFile.js';
|
|
11
|
+
import { markFileAsSkippedByPath, selectUnprocessedFiles, updateFileWithSummaryAndEmbedding, } from '../db/sqlTemplates.js';
|
|
11
12
|
const MAX_FILES_PER_BATCH = 5;
|
|
13
|
+
/**
|
|
14
|
+
* Acquires a lock on the database to ensure that only one daemon batch
|
|
15
|
+
* can modify it at a time.
|
|
16
|
+
*/
|
|
12
17
|
async function lockDb() {
|
|
13
18
|
try {
|
|
14
19
|
return await lockfile.lock(DB_PATH);
|
|
@@ -18,15 +23,18 @@ async function lockDb() {
|
|
|
18
23
|
throw err;
|
|
19
24
|
}
|
|
20
25
|
}
|
|
26
|
+
/**
|
|
27
|
+
* Runs a daemon batch to process up to MAX_FILES_PER_BATCH unprocessed files.
|
|
28
|
+
* This includes:
|
|
29
|
+
* - Verifying file existence and validity
|
|
30
|
+
* - Generating summaries and embeddings if needed
|
|
31
|
+
* - Extracting functions from source files
|
|
32
|
+
* - Marking skipped files as necessary
|
|
33
|
+
*/
|
|
21
34
|
export async function runDaemonBatch() {
|
|
22
35
|
log('š” Starting daemon batch...');
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
FROM files
|
|
26
|
-
WHERE processing_status = 'unprocessed' OR summary IS NULL OR summary = ''
|
|
27
|
-
ORDER BY last_modified DESC
|
|
28
|
-
LIMIT ?
|
|
29
|
-
`).all(MAX_FILES_PER_BATCH);
|
|
36
|
+
// Selects up to MAX_FILES_PER_BATCH files that haven't been processed yet
|
|
37
|
+
const rows = db.prepare(selectUnprocessedFiles).all(MAX_FILES_PER_BATCH);
|
|
30
38
|
if (rows.length === 0) {
|
|
31
39
|
log('ā
No files left to process.');
|
|
32
40
|
return false;
|
|
@@ -34,72 +42,62 @@ export async function runDaemonBatch() {
|
|
|
34
42
|
const release = await lockDb();
|
|
35
43
|
for (const row of rows) {
|
|
36
44
|
log(`š Processing file: ${row.path}`);
|
|
45
|
+
// Skip if file is missing from the file system
|
|
37
46
|
if (!fsSync.existsSync(row.path)) {
|
|
38
47
|
log(`ā ļø Skipped missing file: ${row.path}`);
|
|
39
|
-
db.prepare(
|
|
48
|
+
db.prepare(markFileAsSkippedByPath).run({ path: row.path });
|
|
40
49
|
continue;
|
|
41
50
|
}
|
|
51
|
+
// Skip if file is classified as something we don't process
|
|
42
52
|
const classification = classifyFile(row.path);
|
|
43
53
|
if (classification !== 'valid') {
|
|
44
54
|
log(`āļø Skipping (${classification}): ${row.path}`);
|
|
45
|
-
db.prepare(
|
|
55
|
+
db.prepare(markFileAsSkippedByPath).run({ path: row.path });
|
|
46
56
|
continue;
|
|
47
57
|
}
|
|
48
58
|
try {
|
|
49
59
|
const content = await fs.readFile(row.path, 'utf-8');
|
|
50
|
-
//
|
|
60
|
+
// Determine whether the file needs to be re-summarized
|
|
51
61
|
const needsResummary = !row.summary ||
|
|
52
62
|
!row.indexed_at ||
|
|
53
63
|
(row.last_modified && new Date(row.last_modified) > new Date(row.indexed_at));
|
|
54
64
|
if (needsResummary) {
|
|
55
65
|
log(`š Generating summary for ${row.path}...`);
|
|
66
|
+
// Generate a summary using the summary pipeline
|
|
56
67
|
const summaryResult = await summaryModule.run({ content, filepath: row.path });
|
|
57
68
|
const summary = summaryResult?.summary?.trim() || null;
|
|
58
69
|
let embedding = null;
|
|
70
|
+
// Generate an embedding from the summary (if present)
|
|
59
71
|
if (summary) {
|
|
60
72
|
const vector = await generateEmbedding(summary);
|
|
61
73
|
if (vector) {
|
|
62
74
|
embedding = JSON.stringify(vector);
|
|
63
75
|
}
|
|
64
76
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
`).run({ summary, embedding, path: row.path });
|
|
77
|
+
// Update the file record with the new summary and embedding
|
|
78
|
+
db.prepare(updateFileWithSummaryAndEmbedding).run({
|
|
79
|
+
summary,
|
|
80
|
+
embedding,
|
|
81
|
+
path: row.path,
|
|
82
|
+
});
|
|
72
83
|
log(`ā
Updated summary & embedding for ${row.path}`);
|
|
73
84
|
}
|
|
74
85
|
else {
|
|
75
86
|
log(`ā” Skipped summary (up-to-date) for ${row.path}`);
|
|
76
87
|
}
|
|
77
|
-
//
|
|
88
|
+
// Extract top-level functions from the file and update the DB
|
|
78
89
|
const extracted = await indexFunctionsForFile(row.path, row.id);
|
|
79
90
|
if (extracted) {
|
|
80
|
-
db.prepare(`
|
|
81
|
-
UPDATE files
|
|
82
|
-
SET processing_status = 'extracted',
|
|
83
|
-
functions_extracted_at = datetime('now')
|
|
84
|
-
WHERE id = @id
|
|
85
|
-
`).run({ id: row.id });
|
|
86
91
|
log(`ā
Function extraction complete for ${row.path}\n`);
|
|
87
92
|
}
|
|
88
93
|
else {
|
|
89
|
-
// If no functions were found, set processing status to 'skipped' or 'failed'
|
|
90
|
-
db.prepare(`
|
|
91
|
-
UPDATE files
|
|
92
|
-
SET processing_status = 'failed',
|
|
93
|
-
functions_extracted_at = datetime('now')
|
|
94
|
-
WHERE id = @id
|
|
95
|
-
`).run({ id: row.id });
|
|
96
94
|
log(`ā¹ļø No functions extracted for ${row.path}\n`);
|
|
97
95
|
}
|
|
98
96
|
}
|
|
99
97
|
catch (err) {
|
|
100
98
|
log(`ā Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
101
|
-
db.prepare(`UPDATE files SET processing_status = 'failed' WHERE path = @path`).run({ path: row.path });
|
|
102
99
|
}
|
|
100
|
+
// Add a small delay to throttle processing
|
|
103
101
|
await new Promise(resolve => setTimeout(resolve, 200));
|
|
104
102
|
}
|
|
105
103
|
await release();
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import { db } from '../client.js';
|
|
2
|
+
import { markFileAsSkippedTemplate } from '../sqlTemplates.js';
|
|
3
|
+
export async function extractFromJava(filePath, _content, fileId) {
|
|
3
4
|
console.warn(`āļø Java extraction not implemented: ${filePath}`);
|
|
5
|
+
// Mark the file as skipped with the relevant status update
|
|
6
|
+
db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
|
|
7
|
+
return false;
|
|
4
8
|
}
|
|
@@ -4,6 +4,8 @@ import { generateEmbedding } from '../../lib/generateEmbedding.js';
|
|
|
4
4
|
import { db } from '../client.js';
|
|
5
5
|
import path from 'path';
|
|
6
6
|
import { log } from '../../utils/log.js';
|
|
7
|
+
import fs from 'fs';
|
|
8
|
+
import { markFileAsSkippedTemplate, markFileAsExtractedTemplate, markFileAsFailedTemplate } from '../sqlTemplates.js';
|
|
7
9
|
function getFunctionName(node, parent, fileName) {
|
|
8
10
|
if (node.id?.name)
|
|
9
11
|
return node.id.name;
|
|
@@ -18,96 +20,104 @@ function getFunctionName(node, parent, fileName) {
|
|
|
18
20
|
return `${fileName}:<anon>`;
|
|
19
21
|
}
|
|
20
22
|
export async function extractFromJS(filePath, content, fileId) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
const functions = [];
|
|
27
|
-
walkAncestor(ast, {
|
|
28
|
-
FunctionDeclaration(node, ancestors) {
|
|
29
|
-
const parent = ancestors[ancestors.length - 2];
|
|
30
|
-
const name = getFunctionName(node, parent, path.basename(filePath));
|
|
31
|
-
functions.push({
|
|
32
|
-
name,
|
|
33
|
-
start_line: node.loc?.start.line ?? -1,
|
|
34
|
-
end_line: node.loc?.end.line ?? -1,
|
|
35
|
-
content: content.slice(node.start, node.end),
|
|
36
|
-
});
|
|
37
|
-
},
|
|
38
|
-
FunctionExpression(node, ancestors) {
|
|
39
|
-
const parent = ancestors[ancestors.length - 2];
|
|
40
|
-
const name = getFunctionName(node, parent, path.basename(filePath));
|
|
41
|
-
functions.push({
|
|
42
|
-
name,
|
|
43
|
-
start_line: node.loc?.start.line ?? -1,
|
|
44
|
-
end_line: node.loc?.end.line ?? -1,
|
|
45
|
-
content: content.slice(node.start, node.end),
|
|
46
|
-
});
|
|
47
|
-
},
|
|
48
|
-
ArrowFunctionExpression(node, ancestors) {
|
|
49
|
-
const parent = ancestors[ancestors.length - 2];
|
|
50
|
-
const name = getFunctionName(node, parent, path.basename(filePath));
|
|
51
|
-
functions.push({
|
|
52
|
-
name,
|
|
53
|
-
start_line: node.loc?.start.line ?? -1,
|
|
54
|
-
end_line: node.loc?.end.line ?? -1,
|
|
55
|
-
content: content.slice(node.start, node.end),
|
|
56
|
-
});
|
|
57
|
-
},
|
|
58
|
-
});
|
|
59
|
-
if (functions.length === 0) {
|
|
60
|
-
log(`ā ļø No functions found in: ${filePath}`);
|
|
61
|
-
return false;
|
|
62
|
-
}
|
|
63
|
-
log(`š Found ${functions.length} functions in ${filePath}`);
|
|
64
|
-
for (const fn of functions) {
|
|
65
|
-
const embedding = await generateEmbedding(fn.content);
|
|
66
|
-
const result = db.prepare(`
|
|
67
|
-
INSERT INTO functions (
|
|
68
|
-
file_id, name, start_line, end_line, content, embedding, lang
|
|
69
|
-
) VALUES (
|
|
70
|
-
@file_id, @name, @start_line, @end_line, @content, @embedding, @lang
|
|
71
|
-
)
|
|
72
|
-
`).run({
|
|
73
|
-
file_id: fileId,
|
|
74
|
-
name: fn.name,
|
|
75
|
-
start_line: fn.start_line,
|
|
76
|
-
end_line: fn.end_line,
|
|
77
|
-
content: fn.content,
|
|
78
|
-
embedding: JSON.stringify(embedding),
|
|
79
|
-
lang: 'js'
|
|
80
|
-
});
|
|
81
|
-
const callerId = result.lastInsertRowid;
|
|
82
|
-
const fnAst = parse(fn.content, {
|
|
23
|
+
try {
|
|
24
|
+
const code = fs.readFileSync(filePath, 'utf-8');
|
|
25
|
+
console.log(`[Debug] Attempting to parse: ${filePath}`);
|
|
26
|
+
console.log(`[Debug] First 3 lines:\n${code.split('\n').slice(0, 3).join('\n')}`);
|
|
27
|
+
const ast = parse(content, {
|
|
83
28
|
ecmaVersion: 'latest',
|
|
84
29
|
sourceType: 'module',
|
|
85
30
|
locations: true,
|
|
86
31
|
});
|
|
87
|
-
const
|
|
88
|
-
walkAncestor(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
32
|
+
const functions = [];
|
|
33
|
+
walkAncestor(ast, {
|
|
34
|
+
FunctionDeclaration(node, ancestors) {
|
|
35
|
+
const parent = ancestors[ancestors.length - 2];
|
|
36
|
+
const name = getFunctionName(node, parent, path.basename(filePath));
|
|
37
|
+
functions.push({
|
|
38
|
+
name,
|
|
39
|
+
start_line: node.loc?.start.line ?? -1,
|
|
40
|
+
end_line: node.loc?.end.line ?? -1,
|
|
41
|
+
content: content.slice(node.start, node.end),
|
|
42
|
+
});
|
|
43
|
+
},
|
|
44
|
+
FunctionExpression(node, ancestors) {
|
|
45
|
+
const parent = ancestors[ancestors.length - 2];
|
|
46
|
+
const name = getFunctionName(node, parent, path.basename(filePath));
|
|
47
|
+
functions.push({
|
|
48
|
+
name,
|
|
49
|
+
start_line: node.loc?.start.line ?? -1,
|
|
50
|
+
end_line: node.loc?.end.line ?? -1,
|
|
51
|
+
content: content.slice(node.start, node.end),
|
|
52
|
+
});
|
|
53
|
+
},
|
|
54
|
+
ArrowFunctionExpression(node, ancestors) {
|
|
55
|
+
const parent = ancestors[ancestors.length - 2];
|
|
56
|
+
const name = getFunctionName(node, parent, path.basename(filePath));
|
|
57
|
+
functions.push({
|
|
58
|
+
name,
|
|
59
|
+
start_line: node.loc?.start.line ?? -1,
|
|
60
|
+
end_line: node.loc?.end.line ?? -1,
|
|
61
|
+
content: content.slice(node.start, node.end),
|
|
62
|
+
});
|
|
63
|
+
},
|
|
94
64
|
});
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
65
|
+
if (functions.length === 0) {
|
|
66
|
+
log(`ā ļø No functions found in: ${filePath}`);
|
|
67
|
+
db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
log(`š Found ${functions.length} functions in ${filePath}`);
|
|
71
|
+
for (const fn of functions) {
|
|
72
|
+
const embedding = await generateEmbedding(fn.content);
|
|
73
|
+
const result = db.prepare(`
|
|
74
|
+
INSERT INTO functions (
|
|
75
|
+
file_id, name, start_line, end_line, content, embedding, lang
|
|
76
|
+
) VALUES (
|
|
77
|
+
@file_id, @name, @start_line, @end_line, @content, @embedding, @lang
|
|
78
|
+
)
|
|
99
79
|
`).run({
|
|
100
|
-
|
|
101
|
-
|
|
80
|
+
file_id: fileId,
|
|
81
|
+
name: fn.name,
|
|
82
|
+
start_line: fn.start_line,
|
|
83
|
+
end_line: fn.end_line,
|
|
84
|
+
content: fn.content,
|
|
85
|
+
embedding: JSON.stringify(embedding),
|
|
86
|
+
lang: 'js'
|
|
87
|
+
});
|
|
88
|
+
const callerId = result.lastInsertRowid;
|
|
89
|
+
const fnAst = parse(fn.content, {
|
|
90
|
+
ecmaVersion: 'latest',
|
|
91
|
+
sourceType: 'module',
|
|
92
|
+
locations: true,
|
|
93
|
+
});
|
|
94
|
+
const calls = [];
|
|
95
|
+
walkAncestor(fnAst, {
|
|
96
|
+
CallExpression(node) {
|
|
97
|
+
if (node.callee?.type === 'Identifier' && node.callee.name) {
|
|
98
|
+
calls.push({ calleeName: node.callee.name });
|
|
99
|
+
}
|
|
100
|
+
}
|
|
102
101
|
});
|
|
102
|
+
for (const call of calls) {
|
|
103
|
+
db.prepare(`
|
|
104
|
+
INSERT INTO function_calls (caller_id, callee_name)
|
|
105
|
+
VALUES (@caller_id, @callee_name)
|
|
106
|
+
`).run({
|
|
107
|
+
caller_id: callerId,
|
|
108
|
+
callee_name: call.calleeName
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
log(`š Indexed function: ${fn.name} with ${calls.length} calls`);
|
|
103
112
|
}
|
|
104
|
-
|
|
113
|
+
db.prepare(markFileAsExtractedTemplate).run({ id: fileId });
|
|
114
|
+
log(`ā
Marked functions as extracted for ${filePath}`);
|
|
115
|
+
return true;
|
|
116
|
+
}
|
|
117
|
+
catch (err) {
|
|
118
|
+
log(`ā Failed to extract from: ${filePath}`);
|
|
119
|
+
log(` ā³ ${String(err.message)}`);
|
|
120
|
+
db.prepare(markFileAsFailedTemplate).run({ id: fileId });
|
|
121
|
+
return false;
|
|
105
122
|
}
|
|
106
|
-
db.prepare(`
|
|
107
|
-
UPDATE files
|
|
108
|
-
SET processing_status = 'extracted'
|
|
109
|
-
WHERE id = @fileId
|
|
110
|
-
`).run({ fileId });
|
|
111
|
-
log(`ā
Marked functions as extracted for ${filePath}`);
|
|
112
|
-
return true;
|
|
113
123
|
}
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import { db } from '../client.js';
|
|
2
|
+
import { markFileAsSkippedTemplate } from '../sqlTemplates.js';
|
|
3
|
+
export async function extractFromXML(filePath, _content, fileId) {
|
|
3
4
|
console.warn(`āļø XML extraction not implemented: ${filePath}`);
|
|
5
|
+
// Mark the file as skipped with the relevant status update
|
|
6
|
+
db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
|
|
7
|
+
return false;
|
|
4
8
|
}
|
|
@@ -4,6 +4,7 @@ import { extractFromJava } from './extractFromJava.js';
|
|
|
4
4
|
import { extractFromJS } from './extractFromJs.js';
|
|
5
5
|
import { extractFromXML } from './extractFromXML.js';
|
|
6
6
|
import { db } from '../client.js';
|
|
7
|
+
import { markFileAsFailedTemplate, markFileAsSkippedByPath } from '../sqlTemplates.js';
|
|
7
8
|
/**
|
|
8
9
|
* Detects file type and delegates to the appropriate extractor.
|
|
9
10
|
*/
|
|
@@ -11,38 +12,27 @@ export async function extractFunctionsFromFile(filePath, content, fileId) {
|
|
|
11
12
|
const type = detectFileType(filePath).trim().toLowerCase();
|
|
12
13
|
try {
|
|
13
14
|
if (type === 'js' || type === 'ts' || type === 'javascript' || type === 'typescript') {
|
|
14
|
-
log(`ā
Attempting to extract JS functions from ${filePath}
|
|
15
|
+
log(`ā
Attempting to extract JS functions from ${filePath}`);
|
|
15
16
|
return await extractFromJS(filePath, content, fileId);
|
|
16
17
|
}
|
|
17
18
|
if (type === 'java') {
|
|
18
19
|
log(`ā Nothing extracted for ${filePath} due to missing implementation`);
|
|
19
20
|
await extractFromJava(filePath, content, fileId);
|
|
20
|
-
// move into extract file
|
|
21
|
-
db.prepare(`
|
|
22
|
-
UPDATE files SET processing_status = 'skipped' WHERE id = @id
|
|
23
|
-
`).run({ id: fileId });
|
|
24
21
|
return false;
|
|
25
22
|
}
|
|
26
23
|
if (type === 'xml') {
|
|
27
24
|
log(`ā Nothing extracted for ${filePath} due to missing implementation`);
|
|
28
25
|
await extractFromXML(filePath, content, fileId);
|
|
29
|
-
// move into extract file
|
|
30
|
-
db.prepare(`
|
|
31
|
-
UPDATE files SET processing_status = 'skipped' WHERE id = @id
|
|
32
|
-
`).run({ id: fileId });
|
|
33
26
|
return false;
|
|
34
27
|
}
|
|
35
28
|
log(`ā ļø Unsupported file type: ${type} for function extraction. Skipping ${filePath}`);
|
|
36
|
-
db.prepare(
|
|
37
|
-
UPDATE files SET processing_status = 'skipped' WHERE id = @id
|
|
38
|
-
`).run({ id: fileId });
|
|
29
|
+
db.prepare(markFileAsSkippedByPath).run({ path: filePath });
|
|
39
30
|
return false;
|
|
40
31
|
}
|
|
41
32
|
catch (error) {
|
|
42
33
|
log(`ā Failed to extract functions from ${filePath}: ${error instanceof Error ? error.message : error}`);
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
`).run({ id: fileId });
|
|
34
|
+
// Use the sqlTemplate to mark the file as 'failed'
|
|
35
|
+
db.prepare(markFileAsFailedTemplate).run({ id: fileId });
|
|
46
36
|
return false;
|
|
47
37
|
}
|
|
48
38
|
}
|
package/dist/db/sqlTemplates.js
CHANGED
|
@@ -51,3 +51,51 @@ export const insertFunctionCallTemplate = `
|
|
|
51
51
|
INSERT INTO function_calls (caller_id, callee_name)
|
|
52
52
|
VALUES (:caller_id, :callee_name)
|
|
53
53
|
`;
|
|
54
|
+
// Mark a file as unprocessed
|
|
55
|
+
export const markFileAsUnprocessedTemplate = `
|
|
56
|
+
UPDATE files
|
|
57
|
+
SET processing_status = 'unprocessed',
|
|
58
|
+
functions_extracted_at = NULL
|
|
59
|
+
WHERE id = :id
|
|
60
|
+
`;
|
|
61
|
+
// Mark a file as extracted
|
|
62
|
+
export const markFileAsExtractedTemplate = `
|
|
63
|
+
UPDATE files
|
|
64
|
+
SET processing_status = 'extracted',
|
|
65
|
+
functions_extracted_at = CURRENT_TIMESTAMP
|
|
66
|
+
WHERE id = :id
|
|
67
|
+
`;
|
|
68
|
+
// Mark a file as skipped (not extractable)
|
|
69
|
+
export const markFileAsSkippedTemplate = `
|
|
70
|
+
UPDATE files
|
|
71
|
+
SET processing_status = 'skipped',
|
|
72
|
+
functions_extracted_at = NULL
|
|
73
|
+
WHERE id = :id
|
|
74
|
+
`;
|
|
75
|
+
// Mark a file as failed
|
|
76
|
+
export const markFileAsFailedTemplate = `
|
|
77
|
+
UPDATE files
|
|
78
|
+
SET processing_status = 'failed',
|
|
79
|
+
functions_extracted_at = NULL
|
|
80
|
+
WHERE id = :id
|
|
81
|
+
`;
|
|
82
|
+
export const selectUnprocessedFiles = `
|
|
83
|
+
SELECT id, path, type, summary, indexed_at, last_modified, processing_status
|
|
84
|
+
FROM files
|
|
85
|
+
WHERE processing_status = 'unprocessed' OR summary IS NULL OR summary = ''
|
|
86
|
+
ORDER BY last_modified DESC
|
|
87
|
+
LIMIT ?
|
|
88
|
+
`;
|
|
89
|
+
export const markFileAsSkippedByPath = `
|
|
90
|
+
UPDATE files
|
|
91
|
+
SET processing_status = 'skipped',
|
|
92
|
+
functions_extracted_at = NULL
|
|
93
|
+
WHERE path = @path
|
|
94
|
+
`;
|
|
95
|
+
export const updateFileWithSummaryAndEmbedding = `
|
|
96
|
+
UPDATE files
|
|
97
|
+
SET summary = @summary,
|
|
98
|
+
embedding = @embedding,
|
|
99
|
+
indexed_at = datetime('now')
|
|
100
|
+
WHERE path = @path
|
|
101
|
+
`;
|
|
@@ -1,9 +1,48 @@
|
|
|
1
1
|
// utils/fileClassifier.ts
|
|
2
2
|
import path from 'path';
|
|
3
|
+
/**
|
|
4
|
+
* Determines whether a file is likely to be a *generated* or *bundled* file,
|
|
5
|
+
* rather than handwritten source code.
|
|
6
|
+
*
|
|
7
|
+
* This helps filter out files that shouldn't be analyzed for user-authored logic,
|
|
8
|
+
* like minified JS bundles, Webpack chunks, TypeScript output, etc.
|
|
9
|
+
*/
|
|
3
10
|
export function isGeneratedOrBundledFile(filePath) {
|
|
4
11
|
const base = path.basename(filePath);
|
|
5
|
-
|
|
12
|
+
/**
|
|
13
|
+
* Minified file detection:
|
|
14
|
+
* Matches file names like `something.min.js` or `app.min.ts`.
|
|
15
|
+
* These are typically compiled output intended for production and are not original source code.
|
|
16
|
+
*/
|
|
17
|
+
const isMinified = /\.min\.(js|ts)$/.test(base);
|
|
18
|
+
/**
|
|
19
|
+
* Hash-named or chunk file detection:
|
|
20
|
+
* Matches file names like `bundle.839abc.js`, `chunk.123abc.ts`, or `main-worker.js`.
|
|
21
|
+
* These are often created by bundlers like Webpack, Vite, or Rollup.
|
|
22
|
+
*/
|
|
6
23
|
const isHashNamed = /[-_.](worker|bundle|chunk|[a-f0-9]{6,})\.(js|ts)$/.test(base);
|
|
7
|
-
|
|
8
|
-
|
|
24
|
+
/**
|
|
25
|
+
* Output folder detection:
|
|
26
|
+
* These folders are commonly used to store compiled or bundled output.
|
|
27
|
+
* Examples: `dist/`, `build/`, `assets/`, `node_modules/`, `plugins/`
|
|
28
|
+
*
|
|
29
|
+
* If a file is inside any of these folders, we consider it generated/bundled.
|
|
30
|
+
*/
|
|
31
|
+
const outputDirs = ['dist', 'build', 'assets', 'node_modules', 'plugins'];
|
|
32
|
+
const isInKnownOutputFolder = outputDirs.some(dir => new RegExp(`[\\\\/]${dir}[\\\\/]`, 'i').test(filePath));
|
|
33
|
+
/**
|
|
34
|
+
* Special case: `lib/` folder
|
|
35
|
+
* The `lib` folder may contain either handwritten code or compiled output, depending on the project.
|
|
36
|
+
* To avoid over-filtering, we only treat files in `lib/` as generated if they also look minified or hashed.
|
|
37
|
+
*/
|
|
38
|
+
const isInLib = /[\\/]lib[\\/]/i.test(filePath);
|
|
39
|
+
const isLikelyBundledLib = isInLib && (isMinified || isHashNamed);
|
|
40
|
+
/**
|
|
41
|
+
* Return true if *any* of the following conditions are met:
|
|
42
|
+
* - The file looks minified (e.g., `.min.js`)
|
|
43
|
+
* - The file has a hash or is a known bundle/chunk/worker
|
|
44
|
+
* - The file is located in a known output directory
|
|
45
|
+
* - The file is in `lib/` and has signs of being generated (minified or hashed)
|
|
46
|
+
*/
|
|
47
|
+
return isMinified || isHashNamed || isInKnownOutputFolder || isLikelyBundledLib;
|
|
9
48
|
}
|
package/dist/utils/fileTree.js
CHANGED
|
@@ -1,30 +1,31 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
export function generateFocusedFileTree(rootDir, focusPath, maxDepth = 2) {
|
|
4
|
+
const absoluteFocus = path.resolve(focusPath);
|
|
5
|
+
const parentDir = path.dirname(absoluteFocus);
|
|
6
|
+
const relativeTitle = path.relative(rootDir, parentDir).replace(/\\/g, '/');
|
|
7
|
+
const tree = generateFileTree(parentDir, maxDepth, absoluteFocus);
|
|
8
|
+
return `š ${relativeTitle || '.'}\n${tree}`;
|
|
9
|
+
}
|
|
10
|
+
function generateFileTree(dir, depth, highlightPath, prefix = '') {
|
|
11
|
+
if (depth < 0)
|
|
8
12
|
return '';
|
|
9
13
|
let output = '';
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
if (a.isDirectory() && !b.isDirectory())
|
|
15
|
-
return -1;
|
|
16
|
-
if (!a.isDirectory() && b.isDirectory())
|
|
17
|
-
return 1;
|
|
18
|
-
return a.name.localeCompare(b.name);
|
|
19
|
-
});
|
|
20
|
-
for (const [i, item] of items.entries()) {
|
|
21
|
-
const isLast = i === items.length - 1;
|
|
14
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
15
|
+
const sorted = entries.sort((a, b) => Number(b.isDirectory()) - Number(a.isDirectory()));
|
|
16
|
+
sorted.forEach((entry, index) => {
|
|
17
|
+
const isLast = index === sorted.length - 1;
|
|
22
18
|
const connector = isLast ? 'āāā ' : 'āāā ';
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
if (
|
|
26
|
-
output +=
|
|
19
|
+
const fullPath = path.join(dir, entry.name);
|
|
20
|
+
const isHighlighted = highlightPath && path.resolve(fullPath) === path.resolve(highlightPath);
|
|
21
|
+
if (entry.isDirectory()) {
|
|
22
|
+
output += `${prefix}${connector}${entry.name}/\n`;
|
|
23
|
+
output += generateFileTree(fullPath, depth - 1, highlightPath, prefix + (isLast ? ' ' : 'ā '));
|
|
27
24
|
}
|
|
28
|
-
|
|
25
|
+
else {
|
|
26
|
+
const name = isHighlighted ? `ā”ļø ${entry.name}` : entry.name;
|
|
27
|
+
output += `${prefix}${connector}${name}\n`;
|
|
28
|
+
}
|
|
29
|
+
});
|
|
29
30
|
return output;
|
|
30
31
|
}
|