scai 0.1.116 → 0.1.118
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/MainAgent.js +255 -0
- package/dist/agents/contextReviewStep.js +104 -0
- package/dist/agents/finalPlanGenStep.js +123 -0
- package/dist/agents/infoPlanGenStep.js +126 -0
- package/dist/agents/planGeneratorStep.js +118 -0
- package/dist/agents/planResolverStep.js +95 -0
- package/dist/agents/planTargetFilesStep.js +48 -0
- package/dist/agents/preFileSearchCheckStep.js +95 -0
- package/dist/agents/selectRelevantSourcesStep.js +100 -0
- package/dist/agents/semanticAnalysisStep.js +144 -0
- package/dist/agents/structuralAnalysisStep.js +46 -0
- package/dist/agents/transformPlanGenStep.js +107 -0
- package/dist/agents/understandIntentStep.js +72 -0
- package/dist/agents/validationAnalysisStep.js +87 -0
- package/dist/commands/AskCmd.js +47 -116
- package/dist/commands/ChangeLogUpdateCmd.js +11 -5
- package/dist/commands/CommitSuggesterCmd.js +50 -75
- package/dist/commands/DaemonCmd.js +119 -29
- package/dist/commands/IndexCmd.js +41 -24
- package/dist/commands/InspectCmd.js +0 -1
- package/dist/commands/ReadlineSingleton.js +18 -0
- package/dist/commands/ResetDbCmd.js +20 -21
- package/dist/commands/ReviewCmd.js +89 -54
- package/dist/commands/SummaryCmd.js +12 -18
- package/dist/commands/WorkflowCmd.js +41 -0
- package/dist/commands/factory.js +254 -0
- package/dist/config.js +67 -15
- package/dist/constants.js +20 -4
- package/dist/context.js +10 -11
- package/dist/daemon/daemonQueues.js +63 -0
- package/dist/daemon/daemonWorker.js +40 -63
- package/dist/daemon/generateSummaries.js +58 -0
- package/dist/daemon/runFolderCapsuleBatch.js +247 -0
- package/dist/daemon/runIndexingBatch.js +147 -0
- package/dist/daemon/runKgBatch.js +104 -0
- package/dist/db/fileIndex.js +168 -63
- package/dist/db/functionExtractors/extractFromJava.js +210 -6
- package/dist/db/functionExtractors/extractFromJs.js +186 -198
- package/dist/db/functionExtractors/extractFromTs.js +181 -192
- package/dist/db/functionExtractors/index.js +7 -5
- package/dist/db/schema.js +55 -20
- package/dist/db/sqlTemplates.js +50 -19
- package/dist/fileRules/builtins.js +31 -0
- package/dist/fileRules/codeAllowedExtensions.js +4 -0
- package/dist/fileRules/fileExceptions.js +0 -13
- package/dist/fileRules/ignoredExtensions.js +10 -0
- package/dist/index.js +128 -325
- package/dist/lib/generate.js +37 -14
- package/dist/lib/generateFolderCapsules.js +109 -0
- package/dist/lib/spinner.js +12 -5
- package/dist/modelSetup.js +35 -6
- package/dist/pipeline/modules/changeLogModule.js +16 -19
- package/dist/pipeline/modules/chunkManagerModule.js +24 -0
- package/dist/pipeline/modules/cleanupModule.js +96 -91
- package/dist/pipeline/modules/codeTransformModule.js +208 -0
- package/dist/pipeline/modules/commentModule.js +20 -11
- package/dist/pipeline/modules/commitSuggesterModule.js +36 -14
- package/dist/pipeline/modules/contextReviewModule.js +52 -0
- package/dist/pipeline/modules/fileReaderModule.js +72 -0
- package/dist/pipeline/modules/fileSearchModule.js +136 -0
- package/dist/pipeline/modules/finalAnswerModule.js +53 -0
- package/dist/pipeline/modules/gatherInfoModule.js +176 -0
- package/dist/pipeline/modules/generateTestsModule.js +63 -54
- package/dist/pipeline/modules/kgModule.js +26 -11
- package/dist/pipeline/modules/preserveCodeModule.js +91 -49
- package/dist/pipeline/modules/refactorModule.js +19 -7
- package/dist/pipeline/modules/repairTestsModule.js +44 -36
- package/dist/pipeline/modules/reviewModule.js +23 -13
- package/dist/pipeline/modules/summaryModule.js +27 -35
- package/dist/pipeline/modules/writeFileModule.js +86 -0
- package/dist/pipeline/registry/moduleRegistry.js +38 -93
- package/dist/pipeline/runModulePipeline.js +22 -19
- package/dist/scripts/dbcheck.js +156 -91
- package/dist/utils/buildContextualPrompt.js +245 -164
- package/dist/utils/debugContext.js +24 -0
- package/dist/utils/fileTree.js +16 -6
- package/dist/utils/loadRelevantFolderCapsules.js +64 -0
- package/dist/utils/log.js +2 -0
- package/dist/utils/normalizeData.js +23 -0
- package/dist/utils/planActions.js +60 -0
- package/dist/utils/promptBuilderHelper.js +67 -0
- package/dist/utils/promptLogHelper.js +52 -0
- package/dist/utils/sanitizeQuery.js +20 -8
- package/dist/utils/sharedUtils.js +8 -0
- package/dist/utils/sleep.js +3 -0
- package/dist/utils/splitCodeIntoChunk.js +65 -32
- package/dist/utils/vscode.js +49 -0
- package/dist/workflow/workflowResolver.js +14 -0
- package/dist/workflow/workflowRunner.js +103 -0
- package/package.json +6 -5
- package/dist/agent/agentManager.js +0 -39
- package/dist/agent/workflowManager.js +0 -95
- package/dist/commands/ModulePipelineCmd.js +0 -31
- package/dist/daemon/daemonBatch.js +0 -186
- package/dist/fileRules/scoreFiles.js +0 -71
- package/dist/lib/generateEmbedding.js +0 -22
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import fg from 'fast-glob';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { getDbForRepo } from '../db/client.js';
|
|
4
|
+
import { log } from '../utils/log.js';
|
|
5
|
+
import { generate } from '../lib/generate.js';
|
|
6
|
+
import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
|
|
7
|
+
import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
|
|
8
|
+
export async function runFolderCapsuleBatch(maxFolders = 20) {
|
|
9
|
+
const db = getDbForRepo();
|
|
10
|
+
log('📦 Starting folder capsule batch...');
|
|
11
|
+
// --------------------------------------------------
|
|
12
|
+
// Stop if we already have enough capsules
|
|
13
|
+
// --------------------------------------------------
|
|
14
|
+
const existing = db.prepare(`SELECT COUNT(*) as c FROM folder_capsules`).get();
|
|
15
|
+
log(`📦 Existing folder capsules: ${existing.c}`);
|
|
16
|
+
if (existing.c >= maxFolders) {
|
|
17
|
+
log(`📦 Folder capsule batch: cap reached (${existing.c})`);
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
// --------------------------------------------------
|
|
21
|
+
// Scan folders with fast-glob
|
|
22
|
+
// --------------------------------------------------
|
|
23
|
+
log('🔍 Scanning folders...');
|
|
24
|
+
const folderPathsRaw = await fg('**/', {
|
|
25
|
+
cwd: process.cwd(),
|
|
26
|
+
onlyDirectories: true,
|
|
27
|
+
ignore: IGNORED_FOLDER_GLOBS,
|
|
28
|
+
dot: false,
|
|
29
|
+
absolute: true,
|
|
30
|
+
});
|
|
31
|
+
const folderPaths = folderPathsRaw.map(p => path.normalize(p).replace(/\\/g, '/'));
|
|
32
|
+
log(`📂 Found ${folderPaths.length} folders`);
|
|
33
|
+
if (!folderPaths.length) {
|
|
34
|
+
log('📦 No folders found after filtering');
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
// --------------------------------------------------
|
|
38
|
+
// Collect uncapsuled folders from DB
|
|
39
|
+
// --------------------------------------------------
|
|
40
|
+
log('📦 Collecting uncapsuled folders from DB...');
|
|
41
|
+
const uncapsuledFoldersRaw = db.prepare(`
|
|
42
|
+
SELECT folder, COUNT(*) AS fileCount
|
|
43
|
+
FROM (
|
|
44
|
+
SELECT substr(path, 1, length(path) - length(filename) - 1) AS folder,
|
|
45
|
+
filename, type
|
|
46
|
+
FROM files
|
|
47
|
+
WHERE processing_status != 'skipped'
|
|
48
|
+
)
|
|
49
|
+
LEFT JOIN folder_capsules fc ON fc.path = folder
|
|
50
|
+
WHERE fc.path IS NULL
|
|
51
|
+
GROUP BY folder
|
|
52
|
+
`).all();
|
|
53
|
+
const uncapsuledFolders = uncapsuledFoldersRaw
|
|
54
|
+
.map(f => ({ ...f, folder: path.normalize(path.resolve(f.folder)).replace(/\\/g, '/') }))
|
|
55
|
+
.filter(f => folderPaths.includes(f.folder));
|
|
56
|
+
log(`📦 ${uncapsuledFolders.length} uncapsuled folders match actual paths`);
|
|
57
|
+
if (!uncapsuledFolders.length) {
|
|
58
|
+
log('📦 No uncapsuled folders found');
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
// --------------------------------------------------
|
|
62
|
+
// Heuristic folder scoring
|
|
63
|
+
// --------------------------------------------------
|
|
64
|
+
function scoreFolder(folder, fileCount) {
|
|
65
|
+
let score = 0;
|
|
66
|
+
const depth = folder.split('/').length;
|
|
67
|
+
const p = folder.toLowerCase();
|
|
68
|
+
if (fileCount > 5)
|
|
69
|
+
score += 2;
|
|
70
|
+
if (fileCount > 15)
|
|
71
|
+
score += 2;
|
|
72
|
+
if (depth <= 6)
|
|
73
|
+
score += 2;
|
|
74
|
+
if (/src|modules|pipeline|agents|services|commands|lib/.test(p))
|
|
75
|
+
score += 3;
|
|
76
|
+
if (/css|sql|html|assets/.test(p))
|
|
77
|
+
score -= 3;
|
|
78
|
+
return score;
|
|
79
|
+
}
|
|
80
|
+
const ranked = uncapsuledFolders
|
|
81
|
+
.map(f => ({ ...f, score: scoreFolder(f.folder, f.fileCount) }))
|
|
82
|
+
.sort((a, b) => b.score - a.score)
|
|
83
|
+
.slice(0, maxFolders);
|
|
84
|
+
log(`📦 Top ${ranked.length} folders selected for processing`);
|
|
85
|
+
// --------------------------------------------------
|
|
86
|
+
// Process each top folder with try/catch
|
|
87
|
+
// --------------------------------------------------
|
|
88
|
+
for (const target of ranked) {
|
|
89
|
+
const folderPath = target.folder;
|
|
90
|
+
if (!folderPath) {
|
|
91
|
+
log(`⚠️ Skipping folder because folderPath is undefined`);
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
try {
|
|
95
|
+
log(`📦 Building folder capsule: ${folderPath} (score=${target.score})`);
|
|
96
|
+
// --------------------------------------------------
|
|
97
|
+
// Load files in folder from DB (inline folderPath)
|
|
98
|
+
// --------------------------------------------------
|
|
99
|
+
const folderPathSQL = folderPath.replace(/'/g, "''"); // escape single quotes
|
|
100
|
+
const files = db.prepare(`
|
|
101
|
+
SELECT path, filename, type
|
|
102
|
+
FROM files
|
|
103
|
+
WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped'
|
|
104
|
+
`).all();
|
|
105
|
+
log(`📄 ${files.length} files found in folder`);
|
|
106
|
+
if (!files.length) {
|
|
107
|
+
log('⚠️ Folder empty, skipping');
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
// --------------------------------------------------
|
|
111
|
+
// Compute file-level scores
|
|
112
|
+
// --------------------------------------------------
|
|
113
|
+
const fileScores = files.map(f => {
|
|
114
|
+
const heuristicName = /index|main|cli|app|server|config/i.test(f.filename) ? 5 : 0;
|
|
115
|
+
const basename = path.basename(f.path).replace(/'/g, "''");
|
|
116
|
+
const incoming = db.prepare(`
|
|
117
|
+
SELECT COUNT(*) AS c
|
|
118
|
+
FROM files
|
|
119
|
+
WHERE path LIKE '${folderPathSQL}/%' AND processing_status != 'skipped' AND content_text LIKE '%${basename}%'
|
|
120
|
+
`).get();
|
|
121
|
+
const incomingScore = (incoming?.c || 0) * 2;
|
|
122
|
+
const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get();
|
|
123
|
+
const code = row?.contentText || '';
|
|
124
|
+
const outgoingCount = (code.match(/from\s+['"].+['"]/g)?.length || 0) + (code.match(/require\(['"].+['"]\)/g)?.length || 0);
|
|
125
|
+
const sizeScore = Math.min(code.length / 2000, 3);
|
|
126
|
+
const totalScore = heuristicName + incomingScore + outgoingCount + sizeScore;
|
|
127
|
+
log(`📄 File score: ${f.filename} -> ${totalScore}`);
|
|
128
|
+
return { ...f, score: totalScore };
|
|
129
|
+
});
|
|
130
|
+
const representativeFiles = fileScores.sort((a, b) => b.score - a.score).slice(0, 2);
|
|
131
|
+
log(`📌 Representative files: ${representativeFiles.map(f => f.filename).join(', ')}`);
|
|
132
|
+
const candidateFiles = representativeFiles.map(f => {
|
|
133
|
+
const row = db.prepare(`SELECT content_text FROM files_fts WHERE path = '${f.path.replace(/'/g, "''")}'`).get();
|
|
134
|
+
return { path: f.path, code: row?.contentText?.slice(0, 2000) || '' };
|
|
135
|
+
});
|
|
136
|
+
// --------------------------------------------------
|
|
137
|
+
// Ask LLM to generate folder summary
|
|
138
|
+
// --------------------------------------------------
|
|
139
|
+
const prompt = `
|
|
140
|
+
You are analyzing a source code folder.
|
|
141
|
+
|
|
142
|
+
Folder path:
|
|
143
|
+
${folderPath}
|
|
144
|
+
|
|
145
|
+
Candidate files with code snippets:
|
|
146
|
+
${JSON.stringify(candidateFiles, null, 2)}
|
|
147
|
+
|
|
148
|
+
Task:
|
|
149
|
+
- Identify up to TWO files that best represent the purpose of this folder.
|
|
150
|
+
- Explain the folder's responsibility in ONE concise sentence.
|
|
151
|
+
- Return ONLY valid JSON.
|
|
152
|
+
|
|
153
|
+
Expected JSON shape:
|
|
154
|
+
{
|
|
155
|
+
"summary": "one sentence description",
|
|
156
|
+
"files": [
|
|
157
|
+
{ "path": "absolute/file/path", "summary": "optional short note" }
|
|
158
|
+
]
|
|
159
|
+
}
|
|
160
|
+
`.trim();
|
|
161
|
+
let folderSummary = '';
|
|
162
|
+
let keyFiles = [];
|
|
163
|
+
try {
|
|
164
|
+
log('🤖 Asking LLM for folder summary...');
|
|
165
|
+
const response = await generate({ content: prompt, query: '' });
|
|
166
|
+
const cleaned = await cleanupModule.run({ query: '', content: response.data });
|
|
167
|
+
const data = typeof cleaned.data === 'string' ? JSON.parse(cleaned.data) : cleaned.data;
|
|
168
|
+
if (data && typeof data === 'object') {
|
|
169
|
+
if (typeof data.summary === 'string')
|
|
170
|
+
folderSummary = data.summary;
|
|
171
|
+
if (Array.isArray(data.files)) {
|
|
172
|
+
keyFiles = data.files
|
|
173
|
+
.filter((f) => typeof f?.path === 'string')
|
|
174
|
+
.slice(0, 2)
|
|
175
|
+
.map((f) => ({ path: f.path, reason: f.summary || f.reason || 'representative file' }));
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
log(`📌 Folder summary: ${folderSummary}`);
|
|
179
|
+
}
|
|
180
|
+
catch (err) {
|
|
181
|
+
if (err instanceof Error) {
|
|
182
|
+
log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`);
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
log(`🔥 Failed processing folder ${folderPath}:`, err);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
// --------------------------------------------------
|
|
189
|
+
// Compute stats by file type
|
|
190
|
+
// --------------------------------------------------
|
|
191
|
+
const byType = {};
|
|
192
|
+
for (const f of files) {
|
|
193
|
+
const ext = f.type || path.extname(f.filename || '').replace('.', '') || 'unknown';
|
|
194
|
+
byType[ext] = (byType[ext] || 0) + 1;
|
|
195
|
+
}
|
|
196
|
+
// --------------------------------------------------
|
|
197
|
+
// Folder-level dependencies
|
|
198
|
+
// --------------------------------------------------
|
|
199
|
+
const importsFrom = new Set();
|
|
200
|
+
for (const f of files) {
|
|
201
|
+
const dir = path.dirname(f.path).replace(/\\/g, '/');
|
|
202
|
+
if (dir !== folderPath)
|
|
203
|
+
importsFrom.add(dir);
|
|
204
|
+
}
|
|
205
|
+
// --------------------------------------------------
|
|
206
|
+
// Build capsule
|
|
207
|
+
// --------------------------------------------------
|
|
208
|
+
const capsule = {
|
|
209
|
+
path: folderPath,
|
|
210
|
+
depth: folderPath.split('/').length,
|
|
211
|
+
stats: { fileCount: files.length, byType },
|
|
212
|
+
roles: [],
|
|
213
|
+
concerns: [],
|
|
214
|
+
keyFiles,
|
|
215
|
+
dependencies: { importsFrom: Array.from(importsFrom), usedBy: [] },
|
|
216
|
+
summary: folderSummary,
|
|
217
|
+
confidence: Math.min(0.9, 0.4 + target.score * 0.1),
|
|
218
|
+
};
|
|
219
|
+
// --------------------------------------------------
|
|
220
|
+
// Persist capsule (inline folderPath)
|
|
221
|
+
// --------------------------------------------------
|
|
222
|
+
const now = new Date().toISOString();
|
|
223
|
+
db.prepare(`
|
|
224
|
+
INSERT INTO folder_capsules (
|
|
225
|
+
path, depth, capsule_json, confidence, last_generated, source_file_count
|
|
226
|
+
)
|
|
227
|
+
VALUES ('${capsule.path.replace(/'/g, "''")}', ${capsule.depth}, '${JSON.stringify(capsule)}', ${capsule.confidence}, '${now}', ${files.length})
|
|
228
|
+
`).run();
|
|
229
|
+
db.prepare(`
|
|
230
|
+
UPDATE files
|
|
231
|
+
SET processing_status = 'capsuled'
|
|
232
|
+
WHERE path LIKE '${folderPathSQL}/%'
|
|
233
|
+
`).run();
|
|
234
|
+
log(`✅ Folder capsule written: ${folderPath}`);
|
|
235
|
+
}
|
|
236
|
+
catch (err) {
|
|
237
|
+
if (err instanceof Error) {
|
|
238
|
+
log(`🔥 Failed processing folder ${folderPath}: ${err.message}\n${err.stack}`);
|
|
239
|
+
}
|
|
240
|
+
else {
|
|
241
|
+
log(`🔥 Failed processing folder ${folderPath}:`, err);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
log('📦 Folder capsule batch complete.');
|
|
246
|
+
return true;
|
|
247
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import lockfile from 'proper-lockfile';
|
|
4
|
+
import { log } from '../utils/log.js';
|
|
5
|
+
import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
|
|
6
|
+
import { sleep } from '../utils/sleep.js';
|
|
7
|
+
import { selectUnprocessedFiles, markFileAsSkippedByPath, countUnprocessedFiles, upsertFileTemplate, upsertFileFtsTemplate, markFileAsIndexed } from '../db/sqlTemplates.js';
|
|
8
|
+
// --------------------------------------------------
|
|
9
|
+
// DB LOCK
|
|
10
|
+
// --------------------------------------------------
|
|
11
|
+
async function lockDbWithRetry(retries = 3, delayMs = 100) {
|
|
12
|
+
for (let i = 0; i < retries; i++) {
|
|
13
|
+
try {
|
|
14
|
+
return await lockfile.lock(getDbPathForRepo());
|
|
15
|
+
}
|
|
16
|
+
catch (err) {
|
|
17
|
+
if (i < retries - 1) {
|
|
18
|
+
log(`⏳ DB lock busy, retrying... (${i + 1})`);
|
|
19
|
+
await sleep(delayMs);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
log('❌ Failed to acquire DB lock after retries:', err);
|
|
23
|
+
throw err;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
// --------------------------------------------------
|
|
29
|
+
// SINGLE FILE INDEXER (daemon-side)
|
|
30
|
+
// --------------------------------------------------
|
|
31
|
+
export function indexFile(filePath, summary, type) {
|
|
32
|
+
const stats = fs.statSync(filePath);
|
|
33
|
+
const lastModified = stats.mtime.toISOString();
|
|
34
|
+
const indexedAt = new Date().toISOString();
|
|
35
|
+
const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
|
|
36
|
+
const filename = path.basename(normalizedPath);
|
|
37
|
+
// ----------------------------------------------
|
|
38
|
+
// Extract text content (guarded)
|
|
39
|
+
// ----------------------------------------------
|
|
40
|
+
let contentText = '';
|
|
41
|
+
try {
|
|
42
|
+
if (stats.size <= 2000000) {
|
|
43
|
+
const buffer = fs.readFileSync(filePath);
|
|
44
|
+
const nonTextRatio = buffer
|
|
45
|
+
.slice(0, 1000)
|
|
46
|
+
.filter(b => b < 9 || (b > 13 && b < 32))
|
|
47
|
+
.length / Math.min(buffer.length, 1000);
|
|
48
|
+
if (nonTextRatio <= 0.3) {
|
|
49
|
+
contentText = buffer.toString('utf-8');
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
log(`⚠️ Binary-like content skipped: ${normalizedPath}`);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
log(`⚠️ Large file content skipped: ${normalizedPath}`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
log(`⚠️ Failed reading content: ${normalizedPath}`, err);
|
|
61
|
+
}
|
|
62
|
+
const db = getDbForRepo();
|
|
63
|
+
// ----------------------------------------------
|
|
64
|
+
// Metadata upsert
|
|
65
|
+
// ----------------------------------------------
|
|
66
|
+
try {
|
|
67
|
+
db.prepare(upsertFileTemplate).run({
|
|
68
|
+
path: normalizedPath,
|
|
69
|
+
filename,
|
|
70
|
+
summary,
|
|
71
|
+
type,
|
|
72
|
+
lastModified,
|
|
73
|
+
indexedAt,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
log(`⚠️ Failed metadata upsert for ${normalizedPath}:`, err);
|
|
78
|
+
}
|
|
79
|
+
// ----------------------------------------------
|
|
80
|
+
// FTS upsert
|
|
81
|
+
// ----------------------------------------------
|
|
82
|
+
db.prepare(upsertFileFtsTemplate).run({
|
|
83
|
+
path: normalizedPath,
|
|
84
|
+
filename,
|
|
85
|
+
summary,
|
|
86
|
+
contentText,
|
|
87
|
+
});
|
|
88
|
+
// ----------------------------------------------
|
|
89
|
+
// Mark as indexed
|
|
90
|
+
// ----------------------------------------------
|
|
91
|
+
db.prepare(markFileAsIndexed).run({ path: normalizedPath });
|
|
92
|
+
}
|
|
93
|
+
// --------------------------------------------------
|
|
94
|
+
// FTS REBUILD
|
|
95
|
+
// --------------------------------------------------
|
|
96
|
+
function rebuildFts() {
|
|
97
|
+
const db = getDbForRepo();
|
|
98
|
+
log('🔍 Rebuilding FTS index...');
|
|
99
|
+
db.exec(`INSERT INTO files_fts(files_fts) VALUES('rebuild');`);
|
|
100
|
+
}
|
|
101
|
+
// --------------------------------------------------
|
|
102
|
+
// INDEXING BATCH
|
|
103
|
+
// --------------------------------------------------
|
|
104
|
+
export async function runIndexingBatch() {
|
|
105
|
+
log('⚡ Starting indexing batch...');
|
|
106
|
+
const db = getDbForRepo();
|
|
107
|
+
const BATCH_SIZE = 25; // adjust as needed
|
|
108
|
+
const rows = db.prepare(selectUnprocessedFiles).all(BATCH_SIZE);
|
|
109
|
+
if (rows.length === 0) {
|
|
110
|
+
log('✅ No files left to index.');
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
const release = await lockDbWithRetry();
|
|
114
|
+
let didIndexWork = false;
|
|
115
|
+
log('Release: ', release);
|
|
116
|
+
try {
|
|
117
|
+
for (const row of rows) {
|
|
118
|
+
log(`📄 Indexing: ${row.path}`);
|
|
119
|
+
try {
|
|
120
|
+
indexFile(row.path, null, 'auto');
|
|
121
|
+
didIndexWork = true;
|
|
122
|
+
}
|
|
123
|
+
catch (err) {
|
|
124
|
+
log(`⚠️ Failed indexing ${row.path}`, err);
|
|
125
|
+
db.prepare(markFileAsSkippedByPath).run({ path: row.path });
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
try {
|
|
129
|
+
rebuildFts();
|
|
130
|
+
}
|
|
131
|
+
catch (err) {
|
|
132
|
+
log('⚠️ Failed FTS rebuild:', err);
|
|
133
|
+
}
|
|
134
|
+
const remaining = db.prepare(countUnprocessedFiles).get();
|
|
135
|
+
log(`📦 Remaining unindexed files: ${remaining.count}`);
|
|
136
|
+
return didIndexWork;
|
|
137
|
+
}
|
|
138
|
+
finally {
|
|
139
|
+
if (release) {
|
|
140
|
+
await release();
|
|
141
|
+
log('🔓 DB lock released');
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
log('⚠️ DB lock was not acquired, nothing to release');
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
// File: src/daemon/runKgBatch.ts
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import fsSync from 'fs';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { insertGraphTagTemplate, selectGraphTagIdTemplate, insertGraphEntityTagTemplate, } from '../db/sqlTemplates.js';
|
|
6
|
+
import { getDbForRepo } from '../db/client.js';
|
|
7
|
+
import { log } from '../utils/log.js';
|
|
8
|
+
import { kgModule } from '../pipeline/modules/kgModule.js';
|
|
9
|
+
import { KG_IGNORED_EXTENSIONS } from '../fileRules/ignoredExtensions.js';
|
|
10
|
+
import * as sqlTemplates from '../db/sqlTemplates.js'; // import the template
|
|
11
|
+
import { indexCodeForFile } from '../db/functionIndex.js';
|
|
12
|
+
const MAX_KG_FILES_PER_BATCH = 3;
|
|
13
|
+
export async function runKgBatch() {
|
|
14
|
+
const db = getDbForRepo();
|
|
15
|
+
const rows = db.prepare(`
|
|
16
|
+
SELECT id, path, summary
|
|
17
|
+
FROM files
|
|
18
|
+
WHERE functions_extracted_at IS NULL
|
|
19
|
+
AND processing_status NOT IN ('skipped', 'failed', 'kg_done')
|
|
20
|
+
LIMIT ?
|
|
21
|
+
`).all(MAX_KG_FILES_PER_BATCH);
|
|
22
|
+
if (rows.length === 0) {
|
|
23
|
+
log('🧠 KG batch: no pending files');
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
for (const row of rows) {
|
|
27
|
+
log(`\n🔗 KG: Processing ${row.path}`);
|
|
28
|
+
if (!fsSync.existsSync(row.path)) {
|
|
29
|
+
log('⚠️ KG skipped: file missing');
|
|
30
|
+
db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
// --- CODE EXTRACTION SECTION ---
|
|
34
|
+
log('\n');
|
|
35
|
+
log('📄 Code Extraction');
|
|
36
|
+
log('--------------------------------------------------------------------');
|
|
37
|
+
try {
|
|
38
|
+
const success = await indexCodeForFile(row.path, row.id);
|
|
39
|
+
if (success) {
|
|
40
|
+
log('✅ Indexed code');
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
log('⚡ No code elements extracted or unsupported file type');
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
log(`❌ Code extraction failed for ${row.path}:`, err);
|
|
48
|
+
}
|
|
49
|
+
// --- KNOWLEDGE GRAPH SECTION ---
|
|
50
|
+
log('\n');
|
|
51
|
+
log('🔗 Knowledge Graph');
|
|
52
|
+
log('--------------------------------------------------------------------');
|
|
53
|
+
const ext = path.extname(row.path).toLowerCase();
|
|
54
|
+
if (KG_IGNORED_EXTENSIONS.includes(ext)) {
|
|
55
|
+
log(`⚠️ KG skipped (ignored extension): ${ext}`);
|
|
56
|
+
db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
try {
|
|
60
|
+
const content = await fs.readFile(row.path, 'utf-8');
|
|
61
|
+
const kgInput = {
|
|
62
|
+
fileId: row.id,
|
|
63
|
+
filepath: row.path,
|
|
64
|
+
summary: row.summary || undefined,
|
|
65
|
+
};
|
|
66
|
+
const kgResult = await kgModule.run(kgInput, content);
|
|
67
|
+
log(`✅ KG built: ${kgResult.entities.length} entities, ${kgResult.edges.length} edges`);
|
|
68
|
+
if (kgResult.entities.length > 0) {
|
|
69
|
+
const insertTag = db.prepare(insertGraphTagTemplate);
|
|
70
|
+
const getTagId = db.prepare(selectGraphTagIdTemplate);
|
|
71
|
+
const insertEntityTag = db.prepare(insertGraphEntityTagTemplate);
|
|
72
|
+
for (const entity of kgResult.entities) {
|
|
73
|
+
if (!entity.type || !Array.isArray(entity.tags))
|
|
74
|
+
continue;
|
|
75
|
+
for (const tag of entity.tags) {
|
|
76
|
+
if (!tag)
|
|
77
|
+
continue;
|
|
78
|
+
try {
|
|
79
|
+
insertTag.run({ name: tag });
|
|
80
|
+
const tagRow = getTagId.get({ name: tag });
|
|
81
|
+
if (!tagRow)
|
|
82
|
+
continue;
|
|
83
|
+
insertEntityTag.run({
|
|
84
|
+
entity_type: entity.type,
|
|
85
|
+
entity_unique_id: `${entity.name}@${row.path}`,
|
|
86
|
+
tag_id: tagRow.id,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
// ignore per-entity failures
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// ✅ Mark KG as done for this file
|
|
96
|
+
db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
|
|
97
|
+
}
|
|
98
|
+
catch (err) {
|
|
99
|
+
log(`❌ KG failed for ${row.path}:`, err);
|
|
100
|
+
// Still mark as done to avoid infinite retries
|
|
101
|
+
db.prepare(sqlTemplates.markFileAsKgDone).run({ path: row.path });
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|