scai 0.1.117 → 0.1.119

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/README.md +88 -503
  2. package/dist/agents/MainAgent.js +255 -0
  3. package/dist/agents/contextReviewStep.js +104 -0
  4. package/dist/agents/finalPlanGenStep.js +123 -0
  5. package/dist/agents/infoPlanGenStep.js +126 -0
  6. package/dist/agents/planGeneratorStep.js +118 -0
  7. package/dist/agents/planResolverStep.js +95 -0
  8. package/dist/agents/planTargetFilesStep.js +48 -0
  9. package/dist/agents/preFileSearchCheckStep.js +95 -0
  10. package/dist/agents/selectRelevantSourcesStep.js +100 -0
  11. package/dist/agents/semanticAnalysisStep.js +144 -0
  12. package/dist/agents/structuralAnalysisStep.js +46 -0
  13. package/dist/agents/transformPlanGenStep.js +107 -0
  14. package/dist/agents/understandIntentStep.js +72 -0
  15. package/dist/agents/validationAnalysisStep.js +87 -0
  16. package/dist/commands/AskCmd.js +47 -116
  17. package/dist/commands/ChangeLogUpdateCmd.js +11 -5
  18. package/dist/commands/CommitSuggesterCmd.js +50 -75
  19. package/dist/commands/DaemonCmd.js +119 -29
  20. package/dist/commands/IndexCmd.js +41 -24
  21. package/dist/commands/InspectCmd.js +0 -1
  22. package/dist/commands/ReadlineSingleton.js +18 -0
  23. package/dist/commands/ResetDbCmd.js +20 -21
  24. package/dist/commands/ReviewCmd.js +89 -54
  25. package/dist/commands/SummaryCmd.js +12 -18
  26. package/dist/commands/WorkflowCmd.js +41 -0
  27. package/dist/commands/factory.js +254 -0
  28. package/dist/config.js +67 -15
  29. package/dist/constants.js +20 -4
  30. package/dist/context.js +10 -11
  31. package/dist/daemon/daemonQueues.js +63 -0
  32. package/dist/daemon/daemonWorker.js +40 -63
  33. package/dist/daemon/generateSummaries.js +58 -0
  34. package/dist/daemon/runFolderCapsuleBatch.js +247 -0
  35. package/dist/daemon/runIndexingBatch.js +147 -0
  36. package/dist/daemon/runKgBatch.js +104 -0
  37. package/dist/db/fileIndex.js +168 -63
  38. package/dist/db/functionExtractors/extractFromJava.js +210 -6
  39. package/dist/db/functionExtractors/extractFromJs.js +173 -214
  40. package/dist/db/functionExtractors/extractFromTs.js +159 -160
  41. package/dist/db/functionExtractors/index.js +7 -5
  42. package/dist/db/schema.js +55 -20
  43. package/dist/db/sqlTemplates.js +50 -19
  44. package/dist/fileRules/builtins.js +31 -14
  45. package/dist/fileRules/codeAllowedExtensions.js +4 -0
  46. package/dist/fileRules/fileExceptions.js +0 -13
  47. package/dist/fileRules/ignoredExtensions.js +10 -0
  48. package/dist/index.js +128 -325
  49. package/dist/lib/generate.js +37 -14
  50. package/dist/lib/generateFolderCapsules.js +109 -0
  51. package/dist/lib/spinner.js +12 -5
  52. package/dist/modelSetup.js +1 -11
  53. package/dist/pipeline/modules/changeLogModule.js +16 -19
  54. package/dist/pipeline/modules/chunkManagerModule.js +24 -0
  55. package/dist/pipeline/modules/cleanupModule.js +95 -91
  56. package/dist/pipeline/modules/codeTransformModule.js +208 -0
  57. package/dist/pipeline/modules/commentModule.js +20 -11
  58. package/dist/pipeline/modules/commitSuggesterModule.js +36 -14
  59. package/dist/pipeline/modules/contextReviewModule.js +52 -0
  60. package/dist/pipeline/modules/fileReaderModule.js +72 -0
  61. package/dist/pipeline/modules/fileSearchModule.js +136 -0
  62. package/dist/pipeline/modules/finalAnswerModule.js +53 -0
  63. package/dist/pipeline/modules/gatherInfoModule.js +176 -0
  64. package/dist/pipeline/modules/generateTestsModule.js +63 -54
  65. package/dist/pipeline/modules/kgModule.js +26 -11
  66. package/dist/pipeline/modules/preserveCodeModule.js +91 -49
  67. package/dist/pipeline/modules/refactorModule.js +19 -7
  68. package/dist/pipeline/modules/repairTestsModule.js +44 -36
  69. package/dist/pipeline/modules/reviewModule.js +23 -13
  70. package/dist/pipeline/modules/summaryModule.js +27 -35
  71. package/dist/pipeline/modules/writeFileModule.js +86 -0
  72. package/dist/pipeline/registry/moduleRegistry.js +38 -93
  73. package/dist/pipeline/runModulePipeline.js +22 -19
  74. package/dist/scripts/dbcheck.js +143 -228
  75. package/dist/utils/buildContextualPrompt.js +245 -172
  76. package/dist/utils/debugContext.js +24 -0
  77. package/dist/utils/fileTree.js +16 -6
  78. package/dist/utils/loadRelevantFolderCapsules.js +64 -0
  79. package/dist/utils/log.js +2 -0
  80. package/dist/utils/normalizeData.js +23 -0
  81. package/dist/utils/planActions.js +60 -0
  82. package/dist/utils/promptBuilderHelper.js +67 -0
  83. package/dist/utils/promptLogHelper.js +52 -0
  84. package/dist/utils/sanitizeQuery.js +20 -8
  85. package/dist/utils/sleep.js +3 -0
  86. package/dist/utils/splitCodeIntoChunk.js +65 -32
  87. package/dist/utils/vscode.js +49 -0
  88. package/dist/workflow/workflowResolver.js +14 -0
  89. package/dist/workflow/workflowRunner.js +103 -0
  90. package/package.json +6 -5
  91. package/dist/agent/agentManager.js +0 -39
  92. package/dist/agent/workflowManager.js +0 -95
  93. package/dist/commands/ModulePipelineCmd.js +0 -31
  94. package/dist/daemon/daemonBatch.js +0 -186
  95. package/dist/fileRules/scoreFiles.js +0 -71
  96. package/dist/lib/generateEmbedding.js +0 -22
@@ -0,0 +1,247 @@
1
+ import fg from 'fast-glob';
2
+ import path from 'path';
3
+ import { getDbForRepo } from '../db/client.js';
4
+ import { log } from '../utils/log.js';
5
+ import { generate } from '../lib/generate.js';
6
+ import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
7
+ import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
8
+ export async function runFolderCapsuleBatch(maxFolders = 20) {
9
+ const db = getDbForRepo();
10
+ log('📦 Starting folder capsule batch...');
11
+ // --------------------------------------------------
12
+ // Stop if we already have enough capsules
13
+ // --------------------------------------------------
14
+ const existing = db.prepare(`SELECT COUNT(*) as c FROM folder_capsules`).get();
15
+ log(`📦 Existing folder capsules: ${existing.c}`);
16
+ if (existing.c >= maxFolders) {
17
+ log(`📦 Folder capsule batch: cap reached (${existing.c})`);
18
+ return false;
19
+ }
20
+ // --------------------------------------------------
21
+ // Scan folders with fast-glob
22
+ // --------------------------------------------------
23
+ log('🔍 Scanning folders...');
24
+ const folderPathsRaw = await fg('**/', {
25
+ cwd: process.cwd(),
26
+ onlyDirectories: true,
27
+ ignore: IGNORED_FOLDER_GLOBS,
28
+ dot: false,
29
+ absolute: true,
30
+ });
31
+ const folderPaths = folderPathsRaw.map(p => path.normalize(p).replace(/\\/g, '/'));
32
+ log(`📂 Found ${folderPaths.length} folders`);
33
+ if (!folderPaths.length) {
34
+ log('📦 No folders found after filtering');
35
+ return false;
36
+ }
37
+ // --------------------------------------------------
38
+ // Collect uncapsuled folders from DB
39
+ // --------------------------------------------------
40
+ log('📦 Collecting uncapsuled folders from DB...');
41
+ const uncapsuledFoldersRaw = db.prepare(`
42
+ SELECT folder, COUNT(*) AS fileCount
43
+ FROM (
44
+ SELECT substr(path, 1, length(path) - length(filename) - 1) AS folder,
45
+ filename, type
46
+ FROM files
47
+ WHERE processing_status != 'skipped'
48
+ )
49
+ LEFT JOIN folder_capsules fc ON fc.path = folder
50
+ WHERE fc.path IS NULL
51
+ GROUP BY folder
52
+ `).all();
53
+ const uncapsuledFolders = uncapsuledFoldersRaw
54
+ .map(f => ({ ...f, folder: path.normalize(path.resolve(f.folder)).replace(/\\/g, '/') }))
55
+ .filter(f => folderPaths.includes(f.folder));
56
+ log(`📦 ${uncapsuledFolders.length} uncapsuled folders match actual paths`);
57
+ if (!uncapsuledFolders.length) {
58
+ log('📦 No uncapsuled folders found');
59
+ return false;
60
+ }
61
+ // --------------------------------------------------
62
+ // Heuristic folder scoring
63
+ // --------------------------------------------------
64
+ function scoreFolder(folder, fileCount) {
65
+ let score = 0;
66
+ const depth = folder.split('/').length;
67
+ const p = folder.toLowerCase();
68
+ if (fileCount > 5)
69
+ score += 2;
70
+ if (fileCount > 15)
71
+ score += 2;
72
+ if (depth <= 6)
73
+ score += 2;
74
+ if (/src|modules|pipeline|agents|services|commands|lib/.test(p))
75
+ score += 3;
76
+ if (/css|sql|html|assets/.test(p))
77
+ score -= 3;
78
+ return score;
79
+ }
80
+ const ranked = uncapsuledFolders
81
+ .map(f => ({ ...f, score: scoreFolder(f.folder, f.fileCount) }))
82
+ .sort((a, b) => b.score - a.score)
83
+ .slice(0, maxFolders);
84
+ log(`📦 Top ${ranked.length} folders selected for processing`);
85
+ // --------------------------------------------------
86
+ // Process each top folder with try/catch
87
+ // --------------------------------------------------
88
+ for (const target of ranked) {
89
+ const folderPath = target.folder;
90
+ if (!folderPath) {
91
+ log(`⚠️ Skipping folder because folderPath is undefined`);
92
+ continue;
93
+ }
94
+ try {
95
+ log(`📦 Building folder capsule: ${folderPath} (score=${target.score})`);
96
+ // --------------------------------------------------
97
+ // Load files in folder from DB (inline folderPath)
98
+ // --------------------------------------------------
99
+ const folderPathSQL = folderPath.replace(/'/g, "''"); // escape single quotes
100
+ const files = db.prepare(`
101
+ SELECT path, filename, type
102
+ FROM files
103
+ WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped'
104
+ `).all();
105
+ log(`📄 ${files.length} files found in folder`);
106
+ if (!files.length) {
107
+ log('⚠️ Folder empty, skipping');
108
+ continue;
109
+ }
110
+ // --------------------------------------------------
111
+ // Compute file-level scores
112
+ // --------------------------------------------------
113
+ const fileScores = files.map(f => {
114
+ const heuristicName = /index|main|cli|app|server|config/i.test(f.filename) ? 5 : 0;
115
+ const basename = path.basename(f.path).replace(/'/g, "''");
116
+ const incoming = db.prepare(`
117
+ SELECT COUNT(*) AS c
118
+ FROM files
119
+ WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped' AND content_text LIKE '%${basename}%'
120
+ `).get();
121
+ const incomingScore = (incoming?.c || 0) * 2;
122
+ const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get();
123
+ const code = row?.contentText || '';
124
+ const outgoingCount = (code.match(/from\s+['"].+['"]/g)?.length || 0) + (code.match(/require\(['"].+['"]\)/g)?.length || 0);
125
+ const sizeScore = Math.min(code.length / 2000, 3);
126
+ const totalScore = heuristicName + incomingScore + outgoingCount + sizeScore;
127
+ log(`📄 File score: ${f.filename} -> ${totalScore}`);
128
+ return { ...f, score: totalScore };
129
+ });
130
+ const representativeFiles = fileScores.sort((a, b) => b.score - a.score).slice(0, 2);
131
+ log(`📌 Representative files: ${representativeFiles.map(f => f.filename).join(', ')}`);
132
+ const candidateFiles = representativeFiles.map(f => {
133
+ const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get();
134
+ return { path: f.path, code: row?.contentText?.slice(0, 2000) || '' };
135
+ });
136
+ // --------------------------------------------------
137
+ // Ask LLM to generate folder summary
138
+ // --------------------------------------------------
139
+ const prompt = `
140
+ You are analyzing a source code folder.
141
+
142
+ Folder path:
143
+ ${folderPath}
144
+
145
+ Candidate files with code snippets:
146
+ ${JSON.stringify(candidateFiles, null, 2)}
147
+
148
+ Task:
149
+ - Identify up to TWO files that best represent the purpose of this folder.
150
+ - Explain the folder's responsibility in ONE concise sentence.
151
+ - Return ONLY valid JSON.
152
+
153
+ Expected JSON shape:
154
+ {
155
+ "summary": "one sentence description",
156
+ "files": [
157
+ { "path": "absolute/file/path", "summary": "optional short note" }
158
+ ]
159
+ }
160
+ `.trim();
161
+ let folderSummary = '';
162
+ let keyFiles = [];
163
+ try {
164
+ log('🤖 Asking LLM for folder summary...');
165
+ const response = await generate({ content: prompt, query: '' });
166
+ const cleaned = await cleanupModule.run({ query: '', content: response.data });
167
+ const data = typeof cleaned.data === 'string' ? JSON.parse(cleaned.data) : cleaned.data;
168
+ if (data && typeof data === 'object') {
169
+ if (typeof data.summary === 'string')
170
+ folderSummary = data.summary;
171
+ if (Array.isArray(data.files)) {
172
+ keyFiles = data.files
173
+ .filter((f) => typeof f?.path === 'string')
174
+ .slice(0, 2)
175
+ .map((f) => ({ path: f.path, reason: f.summary || f.reason || 'representative file' }));
176
+ }
177
+ }
178
+ log(`📌 Folder summary: ${folderSummary}`);
179
+ }
180
+ catch (err) {
181
+ if (err instanceof Error) {
182
+ log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`);
183
+ }
184
+ else {
185
+ log(`🔥 Failed processing folder ${folderPath}:`, err);
186
+ }
187
+ }
188
+ // --------------------------------------------------
189
+ // Compute stats by file type
190
+ // --------------------------------------------------
191
+ const byType = {};
192
+ for (const f of files) {
193
+ const ext = f.type || path.extname(f.filename || '').replace('.', '') || 'unknown';
194
+ byType[ext] = (byType[ext] || 0) + 1;
195
+ }
196
+ // --------------------------------------------------
197
+ // Folder-level dependencies
198
+ // --------------------------------------------------
199
+ const importsFrom = new Set();
200
+ for (const f of files) {
201
+ const dir = path.dirname(f.path).replace(/\\/g, '/');
202
+ if (dir !== folderPath)
203
+ importsFrom.add(dir);
204
+ }
205
+ // --------------------------------------------------
206
+ // Build capsule
207
+ // --------------------------------------------------
208
+ const capsule = {
209
+ path: folderPath,
210
+ depth: folderPath.split('/').length,
211
+ stats: { fileCount: files.length, byType },
212
+ roles: [],
213
+ concerns: [],
214
+ keyFiles,
215
+ dependencies: { importsFrom: Array.from(importsFrom), usedBy: [] },
216
+ summary: folderSummary,
217
+ confidence: Math.min(0.9, 0.4 + target.score * 0.1),
218
+ };
219
+ // --------------------------------------------------
220
+ // Persist capsule (inline folderPath)
221
+ // --------------------------------------------------
222
+ const now = new Date().toISOString();
223
+ db.prepare(`
224
+ INSERT INTO folder_capsules (
225
+ path, depth, capsule_json, confidence, last_generated, source_file_count
226
+ )
227
+ VALUES ('${capsule.path.replace(/'/g, "''")}', ${capsule.depth}, '${JSON.stringify(capsule)}', ${capsule.confidence}, '${now}', ${files.length})
228
+ `).run();
229
+ db.prepare(`
230
+ UPDATE files
231
+ SET processing_status = 'capsuled'
232
+ WHERE path LIKE '${folderPathSQL}/%'
233
+ `).run();
234
+ log(`✅ Folder capsule written: ${folderPath}`);
235
+ }
236
+ catch (err) {
237
+ if (err instanceof Error) {
238
+ log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`);
239
+ }
240
+ else {
241
+ log(`🔥 Failed processing folder ${folderPath}:`, err);
242
+ }
243
+ }
244
+ }
245
+ log('📦 Folder capsule batch complete.');
246
+ return true;
247
+ }
@@ -0,0 +1,147 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import lockfile from 'proper-lockfile';
4
+ import { log } from '../utils/log.js';
5
+ import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
6
+ import { sleep } from '../utils/sleep.js';
7
+ import { selectUnprocessedFiles, markFileAsSkippedByPath, countUnprocessedFiles, upsertFileTemplate, upsertFileFtsTemplate, markFileAsIndexed } from '../db/sqlTemplates.js';
8
+ // --------------------------------------------------
9
+ // DB LOCK
10
+ // --------------------------------------------------
11
+ async function lockDbWithRetry(retries = 3, delayMs = 100) {
12
+ for (let i = 0; i < retries; i++) {
13
+ try {
14
+ return await lockfile.lock(getDbPathForRepo());
15
+ }
16
+ catch (err) {
17
+ if (i < retries - 1) {
18
+ log(`⏳ DB lock busy, retrying... (${i + 1})`);
19
+ await sleep(delayMs);
20
+ }
21
+ else {
22
+ log('❌ Failed to acquire DB lock after retries:', err);
23
+ throw err;
24
+ }
25
+ }
26
+ }
27
+ }
28
+ // --------------------------------------------------
29
+ // SINGLE FILE INDEXER (daemon-side)
30
+ // --------------------------------------------------
31
+ export function indexFile(filePath, summary, type) {
32
+ const stats = fs.statSync(filePath);
33
+ const lastModified = stats.mtime.toISOString();
34
+ const indexedAt = new Date().toISOString();
35
+ const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
36
+ const filename = path.basename(normalizedPath);
37
+ // ----------------------------------------------
38
+ // Extract text content (guarded)
39
+ // ----------------------------------------------
40
+ let contentText = '';
41
+ try {
42
+ if (stats.size <= 2000000) {
43
+ const buffer = fs.readFileSync(filePath);
44
+ const nonTextRatio = buffer
45
+ .slice(0, 1000)
46
+ .filter(b => b < 9 || (b > 13 && b < 32))
47
+ .length / Math.min(buffer.length, 1000);
48
+ if (nonTextRatio <= 0.3) {
49
+ contentText = buffer.toString('utf-8');
50
+ }
51
+ else {
52
+ log(`⚠️ Binary-like content skipped: ${normalizedPath}`);
53
+ }
54
+ }
55
+ else {
56
+ log(`⚠️ Large file content skipped: ${normalizedPath}`);
57
+ }
58
+ }
59
+ catch (err) {
60
+ log(`⚠️ Failed reading content: ${normalizedPath}`, err);
61
+ }
62
+ const db = getDbForRepo();
63
+ // ----------------------------------------------
64
+ // Metadata upsert
65
+ // ----------------------------------------------
66
+ try {
67
+ db.prepare(upsertFileTemplate).run({
68
+ path: normalizedPath,
69
+ filename,
70
+ summary,
71
+ type,
72
+ lastModified,
73
+ indexedAt,
74
+ });
75
+ }
76
+ catch (err) {
77
+ log(`⚠️ Failed metadata upsert for ${normalizedPath}:`, err);
78
+ }
79
+ // ----------------------------------------------
80
+ // FTS upsert
81
+ // ----------------------------------------------
82
+ db.prepare(upsertFileFtsTemplate).run({
83
+ path: normalizedPath,
84
+ filename,
85
+ summary,
86
+ contentText,
87
+ });
88
+ // ----------------------------------------------
89
+ // Mark as indexed
90
+ // ----------------------------------------------
91
+ db.prepare(markFileAsIndexed).run({ path: normalizedPath });
92
+ }
93
+ // --------------------------------------------------
94
+ // FTS REBUILD
95
+ // --------------------------------------------------
96
+ function rebuildFts() {
97
+ const db = getDbForRepo();
98
+ log('🔍 Rebuilding FTS index...');
99
+ db.exec(`INSERT INTO files_fts(files_fts) VALUES('rebuild');`);
100
+ }
101
+ // --------------------------------------------------
102
+ // INDEXING BATCH
103
+ // --------------------------------------------------
104
+ export async function runIndexingBatch() {
105
+ log('⚡ Starting indexing batch...');
106
+ const db = getDbForRepo();
107
+ const BATCH_SIZE = 25; // adjust as needed
108
+ const rows = db.prepare(selectUnprocessedFiles).all(BATCH_SIZE);
109
+ if (rows.length === 0) {
110
+ log('✅ No files left to index.');
111
+ return false;
112
+ }
113
+ const release = await lockDbWithRetry();
114
+ let didIndexWork = false;
115
+ log('Release: ', release);
116
+ try {
117
+ for (const row of rows) {
118
+ log(`📄 Indexing: ${row.path}`);
119
+ try {
120
+ indexFile(row.path, null, 'auto');
121
+ didIndexWork = true;
122
+ }
123
+ catch (err) {
124
+ log(`⚠️ Failed indexing ${row.path}`, err);
125
+ db.prepare(markFileAsSkippedByPath).run({ path: row.path });
126
+ }
127
+ }
128
+ try {
129
+ rebuildFts();
130
+ }
131
+ catch (err) {
132
+ log('⚠️ Failed FTS rebuild:', err);
133
+ }
134
+ const remaining = db.prepare(countUnprocessedFiles).get();
135
+ log(`📦 Remaining unindexed files: ${remaining.count}`);
136
+ return didIndexWork;
137
+ }
138
+ finally {
139
+ if (release) {
140
+ await release();
141
+ log('🔓 DB lock released');
142
+ }
143
+ else {
144
+ log('⚠️ DB lock was not acquired, nothing to release');
145
+ }
146
+ }
147
+ }
@@ -0,0 +1,104 @@
1
+ // File: src/daemon/runKgBatch.ts
2
+ import fs from 'fs/promises';
3
+ import fsSync from 'fs';
4
+ import path from 'path';
5
+ import { insertGraphTagTemplate, selectGraphTagIdTemplate, insertGraphEntityTagTemplate, } from '../db/sqlTemplates.js';
6
+ import { getDbForRepo } from '../db/client.js';
7
+ import { log } from '../utils/log.js';
8
+ import { kgModule } from '../pipeline/modules/kgModule.js';
9
+ import { KG_IGNORED_EXTENSIONS } from '../fileRules/ignoredExtensions.js';
10
+ import * as sqlTemplates from '../db/sqlTemplates.js'; // import the template
11
+ import { indexCodeForFile } from '../db/functionIndex.js';
12
+ const MAX_KG_FILES_PER_BATCH = 3;
13
+ export async function runKgBatch() {
14
+ const db = getDbForRepo();
15
+ const rows = db.prepare(`
16
+ SELECT id, path, summary
17
+ FROM files
18
+ WHERE functions_extracted_at IS NULL
19
+ AND processing_status NOT IN ('skipped', 'failed', 'kg_done')
20
+ LIMIT ?
21
+ `).all(MAX_KG_FILES_PER_BATCH);
22
+ if (rows.length === 0) {
23
+ log('🧠 KG batch: no pending files');
24
+ return;
25
+ }
26
+ for (const row of rows) {
27
+ log(`\n🔗 KG: Processing ${row.path}`);
28
+ if (!fsSync.existsSync(row.path)) {
29
+ log('⚠️ KG skipped: file missing');
30
+ db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
31
+ continue;
32
+ }
33
+ // --- CODE EXTRACTION SECTION ---
34
+ log('\n');
35
+ log('📄 Code Extraction');
36
+ log('--------------------------------------------------------------------');
37
+ try {
38
+ const success = await indexCodeForFile(row.path, row.id);
39
+ if (success) {
40
+ log('✅ Indexed code');
41
+ }
42
+ else {
43
+ log('⚡ No code elements extracted or unsupported file type');
44
+ }
45
+ }
46
+ catch (err) {
47
+ log(`❌ Code extraction failed for ${row.path}:`, err);
48
+ }
49
+ // --- KNOWLEDGE GRAPH SECTION ---
50
+ log('\n');
51
+ log('🔗 Knowledge Graph');
52
+ log('--------------------------------------------------------------------');
53
+ const ext = path.extname(row.path).toLowerCase();
54
+ if (KG_IGNORED_EXTENSIONS.includes(ext)) {
55
+ log(`⚠️ KG skipped (ignored extension): ${ext}`);
56
+ db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
57
+ continue;
58
+ }
59
+ try {
60
+ const content = await fs.readFile(row.path, 'utf-8');
61
+ const kgInput = {
62
+ fileId: row.id,
63
+ filepath: row.path,
64
+ summary: row.summary || undefined,
65
+ };
66
+ const kgResult = await kgModule.run(kgInput, content);
67
+ log(`✅ KG built: ${kgResult.entities.length} entities, ${kgResult.edges.length} edges`);
68
+ if (kgResult.entities.length > 0) {
69
+ const insertTag = db.prepare(insertGraphTagTemplate);
70
+ const getTagId = db.prepare(selectGraphTagIdTemplate);
71
+ const insertEntityTag = db.prepare(insertGraphEntityTagTemplate);
72
+ for (const entity of kgResult.entities) {
73
+ if (!entity.type || !Array.isArray(entity.tags))
74
+ continue;
75
+ for (const tag of entity.tags) {
76
+ if (!tag)
77
+ continue;
78
+ try {
79
+ insertTag.run({ name: tag });
80
+ const tagRow = getTagId.get({ name: tag });
81
+ if (!tagRow)
82
+ continue;
83
+ insertEntityTag.run({
84
+ entity_type: entity.type,
85
+ entity_unique_id: `${entity.name}@${row.path}`,
86
+ tag_id: tagRow.id,
87
+ });
88
+ }
89
+ catch {
90
+ // ignore per-entity failures
91
+ }
92
+ }
93
+ }
94
+ }
95
+ // ✅ Mark KG as done for this file
96
+ db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
97
+ }
98
+ catch (err) {
99
+ log(`❌ KG failed for ${row.path}:`, err);
100
+ // Still mark as done to avoid infinite retries
101
+ db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
102
+ }
103
+ }
104
+ }