scai 0.1.36 → 0.1.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -8,6 +8,7 @@ import { log } from '../utils/log.js';
|
|
|
8
8
|
import lockfile from 'proper-lockfile';
|
|
9
9
|
import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
|
|
10
10
|
import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
11
|
+
import { isGeneratedOrBundledFile } from '../utils/fileClassifier.js';
|
|
11
12
|
const MAX_FILES_PER_BATCH = 5;
|
|
12
13
|
async function lockDb() {
|
|
13
14
|
try {
|
|
@@ -42,30 +43,39 @@ export async function runDaemonBatch() {
|
|
|
42
43
|
log(`⚠️ Skipped (extension): ${row.path}`);
|
|
43
44
|
continue;
|
|
44
45
|
}
|
|
46
|
+
if (isGeneratedOrBundledFile(row.path)) {
|
|
47
|
+
log(`🚫 Skipping likely generated file: ${row.path}`);
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
45
50
|
try {
|
|
46
51
|
const content = await fs.readFile(row.path, 'utf-8');
|
|
47
|
-
// 1.
|
|
52
|
+
// 1. Lookup file ID in the database using its path
|
|
48
53
|
const fileResult = db
|
|
49
|
-
.prepare(`SELECT id FROM files WHERE path =
|
|
50
|
-
.get(row.path);
|
|
54
|
+
.prepare(`SELECT id FROM files WHERE path = @path`) // Replaced `?` with `@path`
|
|
55
|
+
.get({ path: row.path });
|
|
51
56
|
const fileId = fileResult?.id;
|
|
52
|
-
if (
|
|
53
|
-
|
|
57
|
+
if (fileId) {
|
|
58
|
+
// 2. Extract and index functions for this file
|
|
59
|
+
await indexFunctionsForFile(row.path, fileId);
|
|
60
|
+
// 3. Mark the file as having functions extracted in the database
|
|
61
|
+
db.prepare(`UPDATE files SET functions_extracted = 1 WHERE id = @id`).run({ id: fileId });
|
|
54
62
|
}
|
|
55
63
|
else {
|
|
56
|
-
|
|
64
|
+
log(`⚠️ Could not find fileId for ${row.path}`);
|
|
57
65
|
}
|
|
58
|
-
//
|
|
66
|
+
// 4. Summarize the file content
|
|
59
67
|
log(`📝 Generating summary for ${row.path}...`);
|
|
60
68
|
const summaryResult = await summaryModule.run({ content, filepath: row.path });
|
|
61
69
|
const summary = summaryResult?.summary?.trim() || null;
|
|
62
70
|
let embedding = null;
|
|
63
71
|
if (summary) {
|
|
72
|
+
// 5. Generate embedding for the summary
|
|
64
73
|
const vector = await generateEmbedding(summary);
|
|
65
74
|
if (vector) {
|
|
66
75
|
embedding = JSON.stringify(vector);
|
|
67
76
|
}
|
|
68
77
|
}
|
|
78
|
+
// 6. Save the summary and embedding in the database
|
|
69
79
|
db.prepare(`
|
|
70
80
|
UPDATE files
|
|
71
81
|
SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
|
|
@@ -74,8 +84,10 @@ export async function runDaemonBatch() {
|
|
|
74
84
|
log(`✅ Updated summary & embedding for ${row.path}\n`);
|
|
75
85
|
}
|
|
76
86
|
catch (err) {
|
|
87
|
+
// 7. Error handling: Log the error message if the process fails
|
|
77
88
|
log(`❌ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
78
89
|
}
|
|
90
|
+
// 8. Optional delay between file processing to reduce load on the system
|
|
79
91
|
await new Promise(resolve => setTimeout(resolve, 200));
|
|
80
92
|
}
|
|
81
93
|
await release();
|
|
@@ -3,27 +3,53 @@ import { detectFileType } from '../../utils/detectFileType.js';
|
|
|
3
3
|
import { extractFromJava } from './extractFromJava.js';
|
|
4
4
|
import { extractFromJS } from './extractFromJs.js';
|
|
5
5
|
import { extractFromXML } from './extractFromXML.js';
|
|
6
|
+
import { db } from '../client.js'; // Assuming db is imported from your DB utility
|
|
6
7
|
/**
|
|
7
8
|
* Detects file type and delegates to the appropriate extractor.
|
|
8
9
|
*/
|
|
9
10
|
export async function extractFunctionsFromFile(filePath, content, fileId) {
|
|
10
11
|
const type = detectFileType(filePath).trim().toLowerCase();
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
12
|
+
try {
|
|
13
|
+
if (type === 'js' || type === 'ts' || type === 'javascript' || type === 'typescript') {
|
|
14
|
+
log(`✅ Attempting to extract JS functions from ${filePath}\n`);
|
|
15
|
+
await extractFromJS(filePath, content, fileId);
|
|
16
|
+
// Mark the file as "extracted" even if the extraction succeeds
|
|
17
|
+
db.prepare(`
|
|
18
|
+
UPDATE files SET functions_extracted = 1 WHERE id = @id
|
|
19
|
+
`).run({ id: fileId });
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
if (type === 'java') {
|
|
23
|
+
log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
|
|
24
|
+
await extractFromJava(filePath, content, fileId);
|
|
25
|
+
// Mark the file as "extracted" after the attempt
|
|
26
|
+
db.prepare(`
|
|
27
|
+
UPDATE files SET functions_extracted = 1 WHERE id = @id
|
|
28
|
+
`).run({ id: fileId });
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
if (type === 'xml') {
|
|
32
|
+
log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
|
|
33
|
+
await extractFromXML(filePath, content, fileId);
|
|
34
|
+
// Mark the file as "extracted" after the attempt
|
|
35
|
+
db.prepare(`
|
|
36
|
+
UPDATE files SET functions_extracted = 1 WHERE id = @id
|
|
37
|
+
`).run({ id: fileId });
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
log(`⚠️ Unsupported file type: ${type} for function extraction. Skipping ${filePath}`);
|
|
41
|
+
// Mark unsupported file types as extracted to prevent future retries
|
|
42
|
+
db.prepare(`
|
|
43
|
+
UPDATE files SET functions_extracted = 1 WHERE id = @id
|
|
44
|
+
`).run({ id: fileId });
|
|
20
45
|
return false;
|
|
21
46
|
}
|
|
22
|
-
|
|
23
|
-
log(`❌
|
|
24
|
-
|
|
47
|
+
catch (error) {
|
|
48
|
+
log(`❌ Failed to extract functions from ${filePath}: ${error instanceof Error ? error.message : error}`);
|
|
49
|
+
// Mark as extracted on failure to ensure the system keeps moving forward
|
|
50
|
+
db.prepare(`
|
|
51
|
+
UPDATE files SET functions_extracted = 1 WHERE id = @id
|
|
52
|
+
`).run({ id: fileId });
|
|
25
53
|
return false;
|
|
26
54
|
}
|
|
27
|
-
log(`⚠️ Unsupported file type: ${type} for function extraction. Skipping ${filePath}`);
|
|
28
|
-
return false;
|
|
29
55
|
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
// utils/fileClassifier.ts
|
|
3
|
+
export function isGeneratedOrBundledFile(filePath) {
|
|
4
|
+
const base = path.basename(filePath);
|
|
5
|
+
const isHashNamed = /[-_.](worker|bundle|chunk|[a-f0-9]{6,})\.(js|ts)$/.test(base);
|
|
6
|
+
const isInOutputFolder = /[\\/]dist[\\/]|[\\/]build[\\/]|[\\/]assets[\\/]|[\\/]node_modules[\\/]/i.test(filePath);
|
|
7
|
+
return isHashNamed || isInOutputFolder;
|
|
8
|
+
}
|