@code-rag/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +27 -0
- package/dist/cli.test.d.ts +1 -0
- package/dist/cli.test.js +369 -0
- package/dist/cli.test.js.map +1 -0
- package/dist/commands/hooks-cmd.d.ts +53 -0
- package/dist/commands/hooks-cmd.js +279 -0
- package/dist/commands/index-cmd.d.ts +4 -0
- package/dist/commands/index-cmd.js +1037 -0
- package/dist/commands/index-cmd.js.map +1 -0
- package/dist/commands/index-cmd.test.d.ts +1 -0
- package/dist/commands/index-cmd.test.js +74 -0
- package/dist/commands/index-cmd.test.js.map +1 -0
- package/dist/commands/init-wizard.d.ts +95 -0
- package/dist/commands/init-wizard.js +526 -0
- package/dist/commands/init.d.ts +7 -0
- package/dist/commands/init.js +125 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/search.d.ts +7 -0
- package/dist/commands/search.js +124 -0
- package/dist/commands/search.js.map +1 -0
- package/dist/commands/serve.d.ts +2 -0
- package/dist/commands/serve.js +56 -0
- package/dist/commands/serve.js.map +1 -0
- package/dist/commands/status.d.ts +21 -0
- package/dist/commands/status.js +117 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/commands/viewer.d.ts +20 -0
- package/dist/commands/viewer.js +197 -0
- package/dist/commands/viewer.js.map +1 -0
- package/dist/commands/viewer.test.d.ts +1 -0
- package/dist/commands/viewer.test.js +69 -0
- package/dist/commands/viewer.test.js.map +1 -0
- package/dist/commands/watch-cmd.d.ts +8 -0
- package/dist/commands/watch-cmd.js +152 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/package.json +66 -0
|
@@ -0,0 +1,1037 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import ora from 'ora';
|
|
3
|
+
import { writeFile, readFile, mkdir, appendFile, unlink } from 'node:fs/promises';
|
|
4
|
+
import { join, resolve, sep } from 'node:path';
|
|
5
|
+
import { createHash } from 'node:crypto';
|
|
6
|
+
import { existsSync } from 'node:fs';
|
|
7
|
+
import { loadConfig, createIgnoreFilter, FileScanner, TreeSitterParser, MarkdownParser, ASTChunker, OllamaClient, NLEnricher, OllamaEmbeddingProvider, OpenAICompatibleEmbeddingProvider, ModelLifecycleManager, LanceDBStore, BM25Index, GraphBuilder, DependencyGraph, scanForABReferences, IndexState, MultiRepoIndexer, AzureDevOpsProvider, JiraProvider, ClickUpProvider, } from '@code-rag/core';
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Simple embedding provider factory — dispatches based on provider name
|
|
10
|
+
// (Used for non-lifecycle providers like openai-compatible and direct ollama)
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
export function createSimpleEmbeddingProvider(embeddingConfig) {
|
|
13
|
+
const provider = embeddingConfig.provider;
|
|
14
|
+
switch (provider) {
|
|
15
|
+
case 'openai-compatible': {
|
|
16
|
+
const compat = embeddingConfig.openaiCompatible;
|
|
17
|
+
return new OpenAICompatibleEmbeddingProvider({
|
|
18
|
+
baseUrl: compat?.baseUrl ?? 'http://localhost:1234/v1',
|
|
19
|
+
apiKey: compat?.apiKey,
|
|
20
|
+
model: embeddingConfig.model,
|
|
21
|
+
dimensions: embeddingConfig.dimensions,
|
|
22
|
+
maxBatchSize: compat?.maxBatchSize ?? 100,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
case 'ollama':
|
|
26
|
+
default:
|
|
27
|
+
return new OllamaEmbeddingProvider({
|
|
28
|
+
model: embeddingConfig.model,
|
|
29
|
+
dimensions: embeddingConfig.dimensions,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// IndexLogger — dual output: ora spinner (interactive) + file log
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
class IndexLogger {
|
|
37
|
+
spinner;
|
|
38
|
+
logPath;
|
|
39
|
+
progressPath;
|
|
40
|
+
phase = 'init';
|
|
41
|
+
counts = {};
|
|
42
|
+
quiet;
|
|
43
|
+
constructor(storagePath, quiet = false) {
|
|
44
|
+
this.spinner = ora();
|
|
45
|
+
this.logPath = join(storagePath, 'index.log');
|
|
46
|
+
this.progressPath = join(storagePath, 'index-progress.json');
|
|
47
|
+
this.quiet = quiet;
|
|
48
|
+
}
|
|
49
|
+
async init() {
|
|
50
|
+
const dir = resolve(this.logPath, '..');
|
|
51
|
+
if (!existsSync(dir)) {
|
|
52
|
+
await mkdir(dir, { recursive: true });
|
|
53
|
+
}
|
|
54
|
+
await this.log('='.repeat(60));
|
|
55
|
+
await this.log(`Indexing started at ${new Date().toISOString()}`);
|
|
56
|
+
await this.log('='.repeat(60));
|
|
57
|
+
}
|
|
58
|
+
start(text) {
|
|
59
|
+
if (!this.quiet)
|
|
60
|
+
this.spinner.start(text);
|
|
61
|
+
void this.log(text);
|
|
62
|
+
}
|
|
63
|
+
async info(text) {
|
|
64
|
+
if (!this.quiet)
|
|
65
|
+
this.spinner.text = text;
|
|
66
|
+
await this.log(text);
|
|
67
|
+
}
|
|
68
|
+
async succeed(text) {
|
|
69
|
+
if (!this.quiet)
|
|
70
|
+
this.spinner.succeed(text);
|
|
71
|
+
await this.log(`[OK] ${text}`);
|
|
72
|
+
}
|
|
73
|
+
async warn(text) {
|
|
74
|
+
if (!this.quiet)
|
|
75
|
+
this.spinner.warn(text);
|
|
76
|
+
await this.log(`[WARN] ${text}`);
|
|
77
|
+
}
|
|
78
|
+
async fail(text) {
|
|
79
|
+
if (!this.quiet)
|
|
80
|
+
this.spinner.fail(text);
|
|
81
|
+
await this.log(`[FAIL] ${text}`);
|
|
82
|
+
}
|
|
83
|
+
async setPhase(phase, counts) {
|
|
84
|
+
this.phase = phase;
|
|
85
|
+
if (counts)
|
|
86
|
+
this.counts = { ...this.counts, ...counts };
|
|
87
|
+
await this.writeProgress();
|
|
88
|
+
}
|
|
89
|
+
async updateCount(key, value) {
|
|
90
|
+
this.counts[key] = value;
|
|
91
|
+
await this.writeProgress();
|
|
92
|
+
}
|
|
93
|
+
async log(message) {
|
|
94
|
+
const timestamp = new Date().toISOString();
|
|
95
|
+
const line = `[${timestamp}] ${message}\n`;
|
|
96
|
+
try {
|
|
97
|
+
await appendFile(this.logPath, line, 'utf-8');
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
// Ignore log write failures
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
async writeProgress() {
|
|
104
|
+
const progress = {
|
|
105
|
+
phase: this.phase,
|
|
106
|
+
updatedAt: new Date().toISOString(),
|
|
107
|
+
...this.counts,
|
|
108
|
+
};
|
|
109
|
+
try {
|
|
110
|
+
await writeFile(this.progressPath, JSON.stringify(progress, null, 2), 'utf-8');
|
|
111
|
+
}
|
|
112
|
+
catch {
|
|
113
|
+
// Ignore progress write failures
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
async function loadEnrichmentCheckpoint(storagePath) {
|
|
118
|
+
const checkpointPath = join(storagePath, 'enrichment-checkpoint.json');
|
|
119
|
+
try {
|
|
120
|
+
const data = await readFile(checkpointPath, 'utf-8');
|
|
121
|
+
return JSON.parse(data);
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
async function saveEnrichmentCheckpoint(storagePath, checkpoint) {
|
|
128
|
+
const checkpointPath = join(storagePath, 'enrichment-checkpoint.json');
|
|
129
|
+
await writeFile(checkpointPath, JSON.stringify(checkpoint), 'utf-8');
|
|
130
|
+
}
|
|
131
|
+
async function clearEnrichmentCheckpoint(storagePath) {
|
|
132
|
+
const checkpointPath = join(storagePath, 'enrichment-checkpoint.json');
|
|
133
|
+
try {
|
|
134
|
+
await unlink(checkpointPath);
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
// Ignore if file doesn't exist
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const ENRICHMENT_BATCH_SIZE = 100;
|
|
141
|
+
async function createManagedEmbeddingProvider(config, logger) {
|
|
142
|
+
const embeddingConfig = config.embedding;
|
|
143
|
+
const providerName = embeddingConfig.provider;
|
|
144
|
+
if (providerName === 'openai-compatible') {
|
|
145
|
+
// OpenAI-compatible provider — no lifecycle management needed
|
|
146
|
+
return {
|
|
147
|
+
provider: createSimpleEmbeddingProvider(embeddingConfig),
|
|
148
|
+
lifecycleManager: null,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
if (providerName === 'ollama') {
|
|
152
|
+
// Direct Ollama — no lifecycle management
|
|
153
|
+
return {
|
|
154
|
+
provider: new OllamaEmbeddingProvider({
|
|
155
|
+
model: embeddingConfig.model,
|
|
156
|
+
dimensions: embeddingConfig.dimensions,
|
|
157
|
+
}),
|
|
158
|
+
lifecycleManager: null,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
if (providerName === 'auto') {
|
|
162
|
+
// Auto provider: detect/start backend, pull model, then create Ollama provider
|
|
163
|
+
const manager = new ModelLifecycleManager({
|
|
164
|
+
model: embeddingConfig.model,
|
|
165
|
+
autoStart: embeddingConfig.autoStart,
|
|
166
|
+
autoStop: embeddingConfig.autoStop,
|
|
167
|
+
docker: embeddingConfig.docker,
|
|
168
|
+
});
|
|
169
|
+
await logger.info('Auto-detecting embedding backend...');
|
|
170
|
+
const backendResult = await manager.ensureRunning();
|
|
171
|
+
if (backendResult.isErr()) {
|
|
172
|
+
await logger.fail(backendResult.error.message);
|
|
173
|
+
throw backendResult.error;
|
|
174
|
+
}
|
|
175
|
+
const backend = backendResult.value;
|
|
176
|
+
await logger.info(`Embedding backend: ${backend.type}${backend.managedByUs ? ' (auto-started)' : ' (already running)'}`);
|
|
177
|
+
// Ensure model is available
|
|
178
|
+
await logger.info(`Checking model "${embeddingConfig.model}"...`);
|
|
179
|
+
const modelResult = await manager.ensureModel(embeddingConfig.model, (status, completed, total) => {
|
|
180
|
+
if (total > 0) {
|
|
181
|
+
const pct = Math.round((completed / total) * 100);
|
|
182
|
+
void logger.info(`Pulling model: ${status} ${pct}%`);
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
void logger.info(`Pulling model: ${status}`);
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
if (modelResult.isErr()) {
|
|
189
|
+
await logger.fail(modelResult.error.message);
|
|
190
|
+
throw modelResult.error;
|
|
191
|
+
}
|
|
192
|
+
await logger.info(`Model "${embeddingConfig.model}" is ready`);
|
|
193
|
+
return {
|
|
194
|
+
provider: new OllamaEmbeddingProvider({
|
|
195
|
+
baseUrl: backend.baseUrl,
|
|
196
|
+
model: embeddingConfig.model,
|
|
197
|
+
dimensions: embeddingConfig.dimensions,
|
|
198
|
+
}),
|
|
199
|
+
lifecycleManager: manager,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
// Fallback: treat as direct Ollama (backward compat)
|
|
203
|
+
return {
|
|
204
|
+
provider: new OllamaEmbeddingProvider({
|
|
205
|
+
model: embeddingConfig.model,
|
|
206
|
+
dimensions: embeddingConfig.dimensions,
|
|
207
|
+
}),
|
|
208
|
+
lifecycleManager: null,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Rebuild BM25 index from all documents in LanceDB.
|
|
213
|
+
* Used as a fallback when incremental update fails (e.g., corrupted index).
|
|
214
|
+
*/
|
|
215
|
+
async function rebuildBm25FromStore(store, logger, prefix) {
|
|
216
|
+
const bm25 = new BM25Index();
|
|
217
|
+
try {
|
|
218
|
+
const internal = store;
|
|
219
|
+
const table = internal.table;
|
|
220
|
+
if (table) {
|
|
221
|
+
const allRows = await table.query().toArray();
|
|
222
|
+
const chunks = allRows.map((row) => {
|
|
223
|
+
let parsedMeta = {};
|
|
224
|
+
try {
|
|
225
|
+
parsedMeta = JSON.parse(row.metadata);
|
|
226
|
+
}
|
|
227
|
+
catch { /* ignore */ }
|
|
228
|
+
return {
|
|
229
|
+
id: row.id,
|
|
230
|
+
content: row.content,
|
|
231
|
+
nlSummary: row.nl_summary,
|
|
232
|
+
filePath: row.file_path,
|
|
233
|
+
startLine: 0,
|
|
234
|
+
endLine: 0,
|
|
235
|
+
language: row.language,
|
|
236
|
+
metadata: {
|
|
237
|
+
chunkType: (row.chunk_type ?? 'function'),
|
|
238
|
+
name: parsedMeta['name'] ?? '',
|
|
239
|
+
declarations: [],
|
|
240
|
+
imports: [],
|
|
241
|
+
exports: [],
|
|
242
|
+
},
|
|
243
|
+
};
|
|
244
|
+
});
|
|
245
|
+
bm25.addChunks(chunks);
|
|
246
|
+
await logger.info(`${prefix}Rebuilt BM25 from LanceDB: ${chunks.length} documents`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
catch (err) {
|
|
250
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
251
|
+
await logger.warn(`${prefix}BM25 rebuild from LanceDB failed: ${msg}`);
|
|
252
|
+
}
|
|
253
|
+
return bm25;
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Index a single repo directory using the full pipeline:
|
|
257
|
+
* scan, parse, chunk, enrich, embed, store.
|
|
258
|
+
*
|
|
259
|
+
* Shared between single-repo and multi-repo paths.
|
|
260
|
+
*/
|
|
261
|
+
async function indexSingleRepo(rootDir, storagePath, config, options, logger, repoLabel, embeddingProvider) {
|
|
262
|
+
const prefix = repoLabel ? `[${repoLabel}] ` : '';
|
|
263
|
+
// Load or create index state
|
|
264
|
+
let indexState = new IndexState();
|
|
265
|
+
const indexStatePath = join(storagePath, 'index-state.json');
|
|
266
|
+
if (!options.full) {
|
|
267
|
+
try {
|
|
268
|
+
const stateData = await readFile(indexStatePath, 'utf-8');
|
|
269
|
+
indexState = IndexState.fromJSON(JSON.parse(stateData));
|
|
270
|
+
}
|
|
271
|
+
catch {
|
|
272
|
+
// No saved state, start fresh
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
// Scan files
|
|
276
|
+
await logger.setPhase('scan');
|
|
277
|
+
await logger.info(`${prefix}Scanning files...`);
|
|
278
|
+
const ignoreFilter = createIgnoreFilter(rootDir);
|
|
279
|
+
const scanner = new FileScanner(rootDir, ignoreFilter);
|
|
280
|
+
const scanResult = await scanner.scanFiles();
|
|
281
|
+
if (scanResult.isErr()) {
|
|
282
|
+
throw new Error(`Scan failed: ${scanResult.error.message}`);
|
|
283
|
+
}
|
|
284
|
+
const scannedFiles = scanResult.value;
|
|
285
|
+
await logger.info(`${prefix}Scanned ${scannedFiles.length} files`);
|
|
286
|
+
// Filter to changed files (incremental)
|
|
287
|
+
let filesToProcess = scannedFiles;
|
|
288
|
+
if (!options.full) {
|
|
289
|
+
filesToProcess = scannedFiles.filter((f) => indexState.isDirty(f.filePath, f.contentHash));
|
|
290
|
+
if (filesToProcess.length === 0) {
|
|
291
|
+
return { filesProcessed: 0, chunksCreated: 0, parseErrors: 0, skippedFiles: 0, parseErrorDetails: [] };
|
|
292
|
+
}
|
|
293
|
+
await logger.info(`${prefix}${filesToProcess.length} file(s) changed, processing...`);
|
|
294
|
+
}
|
|
295
|
+
// Initialize parsers
|
|
296
|
+
await logger.setPhase('parse');
|
|
297
|
+
await logger.info(`${prefix}Initializing parser...`);
|
|
298
|
+
const parser = new TreeSitterParser();
|
|
299
|
+
const initResult = await parser.initialize();
|
|
300
|
+
if (initResult.isErr()) {
|
|
301
|
+
throw new Error(`Parser init failed: ${initResult.error.message}`);
|
|
302
|
+
}
|
|
303
|
+
const mdParser = new MarkdownParser({ maxTokensPerChunk: config.ingestion.maxTokensPerChunk });
|
|
304
|
+
// Parse and chunk
|
|
305
|
+
await logger.info(`${prefix}Parsing ${filesToProcess.length} files...`);
|
|
306
|
+
const chunker = new ASTChunker({ maxTokensPerChunk: config.ingestion.maxTokensPerChunk });
|
|
307
|
+
const allChunks = [];
|
|
308
|
+
const allParsedFiles = [];
|
|
309
|
+
let parseErrors = 0;
|
|
310
|
+
let skippedFiles = 0;
|
|
311
|
+
const parseErrorDetails = [];
|
|
312
|
+
for (const file of filesToProcess) {
|
|
313
|
+
// Route .md/.mdx files to MarkdownParser (produces chunks directly)
|
|
314
|
+
if (MarkdownParser.isMarkdownFile(file.filePath)) {
|
|
315
|
+
const mdResult = mdParser.parse(file.filePath, file.content);
|
|
316
|
+
if (mdResult.isErr()) {
|
|
317
|
+
parseErrors++;
|
|
318
|
+
parseErrorDetails.push({ file: file.filePath, reason: mdResult.error.message });
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
allChunks.push(...mdResult.value.chunks);
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
const parseResult = await parser.parse(file.filePath, file.content);
|
|
325
|
+
if (parseResult.isErr()) {
|
|
326
|
+
// Distinguish true parse errors from unsupported file types
|
|
327
|
+
if (parseResult.error.message.startsWith('Unsupported file type:')) {
|
|
328
|
+
skippedFiles++;
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
|
+
parseErrors++;
|
|
332
|
+
parseErrorDetails.push({ file: file.filePath, reason: parseResult.error.message });
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
const parsed = parseResult.value;
|
|
336
|
+
allParsedFiles.push(parsed);
|
|
337
|
+
const chunkResult = await chunker.chunk(parsed);
|
|
338
|
+
if (chunkResult.isErr()) {
|
|
339
|
+
parseErrors++;
|
|
340
|
+
parseErrorDetails.push({ file: file.filePath, reason: chunkResult.error.message });
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
allChunks.push(...chunkResult.value);
|
|
344
|
+
}
|
|
345
|
+
const parsedCount = filesToProcess.length - parseErrors - skippedFiles;
|
|
346
|
+
await logger.info(`${prefix}Parsed ${parsedCount} files, created ${allChunks.length} chunks${skippedFiles > 0 ? ` (${skippedFiles} unsupported skipped)` : ''}`);
|
|
347
|
+
await logger.updateCount('totalChunks', allChunks.length);
|
|
348
|
+
// Stamp repoName in chunk metadata if multi-repo
|
|
349
|
+
if (repoLabel) {
|
|
350
|
+
for (const chunk of allChunks) {
|
|
351
|
+
chunk.metadata.repoName = repoLabel;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
if (allChunks.length === 0) {
|
|
355
|
+
// Still update index state for processed files (even if no chunks)
|
|
356
|
+
for (const file of filesToProcess) {
|
|
357
|
+
indexState.setFileState(file.filePath, {
|
|
358
|
+
filePath: file.filePath,
|
|
359
|
+
contentHash: file.contentHash,
|
|
360
|
+
lastIndexedAt: new Date(),
|
|
361
|
+
chunkIds: [],
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
await writeFile(indexStatePath, JSON.stringify(indexState.toJSON(), null, 2), 'utf-8');
|
|
365
|
+
return { filesProcessed: filesToProcess.length, chunksCreated: 0, parseErrors, skippedFiles, parseErrorDetails };
|
|
366
|
+
}
|
|
367
|
+
// Enrich with NL summaries — batched with checkpointing
|
|
368
|
+
await logger.setPhase('enrich', { totalChunks: allChunks.length, enrichedChunks: 0 });
|
|
369
|
+
const ollamaClient = new OllamaClient({ model: config.llm.model });
|
|
370
|
+
const enricher = new NLEnricher(ollamaClient);
|
|
371
|
+
// Load checkpoint to resume after crash/restart
|
|
372
|
+
const checkpoint = await loadEnrichmentCheckpoint(storagePath);
|
|
373
|
+
const savedSummaries = checkpoint?.summaries ?? {};
|
|
374
|
+
await logger.info(`${prefix}Checkpoint: ${checkpoint ? `loaded (${Object.keys(savedSummaries).length} summaries)` : 'none found'}`);
|
|
375
|
+
const chunksToEnrich = allChunks.filter((c) => !(c.id in savedSummaries));
|
|
376
|
+
if (Object.keys(savedSummaries).length > 0) {
|
|
377
|
+
await logger.info(`${prefix}Resuming enrichment: ${Object.keys(savedSummaries).length} already done, ${chunksToEnrich.length} remaining`);
|
|
378
|
+
}
|
|
379
|
+
else {
|
|
380
|
+
await logger.info(`${prefix}Enriching ${allChunks.length} chunks with NL summaries...`);
|
|
381
|
+
}
|
|
382
|
+
// Pre-flight: verify Ollama is reachable before starting enrichment
|
|
383
|
+
const ollamaAvailable = await ollamaClient.isAvailable();
|
|
384
|
+
if (!ollamaAvailable) {
|
|
385
|
+
await logger.fail(`${prefix}Ollama is not reachable at ${ollamaClient.currentConfig.baseUrl}. Start Ollama first, then re-run.`);
|
|
386
|
+
throw new Error(`Ollama is not reachable at ${ollamaClient.currentConfig.baseUrl}`);
|
|
387
|
+
}
|
|
388
|
+
let enrichErrors = 0;
|
|
389
|
+
let consecutiveFailures = 0;
|
|
390
|
+
const MAX_CONSECUTIVE_FAILURES = 3;
|
|
391
|
+
const totalBatches = Math.ceil(chunksToEnrich.length / ENRICHMENT_BATCH_SIZE);
|
|
392
|
+
for (let i = 0; i < chunksToEnrich.length; i += ENRICHMENT_BATCH_SIZE) {
|
|
393
|
+
const batchNum = Math.floor(i / ENRICHMENT_BATCH_SIZE) + 1;
|
|
394
|
+
const batch = chunksToEnrich.slice(i, i + ENRICHMENT_BATCH_SIZE);
|
|
395
|
+
await logger.info(`${prefix}Enrichment batch ${batchNum}/${totalBatches} (${batch.length} chunks, ${Object.keys(savedSummaries).length}/${allChunks.length} total)...`);
|
|
396
|
+
const enrichResult = await enricher.enrichBatch(batch);
|
|
397
|
+
if (enrichResult.isOk()) {
|
|
398
|
+
const { enriched, failedCount } = enrichResult.value;
|
|
399
|
+
for (const chunk of enriched) {
|
|
400
|
+
if (chunk.nlSummary) {
|
|
401
|
+
savedSummaries[chunk.id] = chunk.nlSummary;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
if (failedCount === 0) {
|
|
405
|
+
consecutiveFailures = 0;
|
|
406
|
+
}
|
|
407
|
+
else if (enriched.length > 0) {
|
|
408
|
+
// Partial success — reset consecutive failures but log the partial failure
|
|
409
|
+
consecutiveFailures = 0;
|
|
410
|
+
enrichErrors++;
|
|
411
|
+
await logger.warn(`${prefix}Batch ${batchNum}: ${enriched.length} OK, ${failedCount} failed`);
|
|
412
|
+
}
|
|
413
|
+
else {
|
|
414
|
+
// Complete batch failure — all chunks failed
|
|
415
|
+
consecutiveFailures++;
|
|
416
|
+
enrichErrors++;
|
|
417
|
+
await logger.warn(`${prefix}Batch ${batchNum}: all ${failedCount} chunks failed`);
|
|
418
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
419
|
+
await logger.fail(`${prefix}Enrichment aborted: ${MAX_CONSECUTIVE_FAILURES} consecutive batch failures. ` +
|
|
420
|
+
`Is Ollama running? Check: curl ${ollamaClient.currentConfig.baseUrl}/api/tags`);
|
|
421
|
+
await saveEnrichmentCheckpoint(storagePath, {
|
|
422
|
+
summaries: savedSummaries,
|
|
423
|
+
totalProcessed: Object.keys(savedSummaries).length,
|
|
424
|
+
});
|
|
425
|
+
throw new Error(`Enrichment aborted after ${MAX_CONSECUTIVE_FAILURES} consecutive failures`);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
else {
|
|
430
|
+
// Should not happen with new enrichBatch, but handle gracefully
|
|
431
|
+
enrichErrors++;
|
|
432
|
+
consecutiveFailures++;
|
|
433
|
+
await logger.warn(`${prefix}Batch ${batchNum} enrichment error: ${enrichResult.error.message}`);
|
|
434
|
+
}
|
|
435
|
+
// Save checkpoint after every batch
|
|
436
|
+
await saveEnrichmentCheckpoint(storagePath, {
|
|
437
|
+
summaries: savedSummaries,
|
|
438
|
+
totalProcessed: Object.keys(savedSummaries).length,
|
|
439
|
+
});
|
|
440
|
+
await logger.updateCount('enrichedChunks', Object.keys(savedSummaries).length);
|
|
441
|
+
}
|
|
442
|
+
// Apply saved summaries to all chunks
|
|
443
|
+
const enrichedChunks = allChunks.map((c) => {
|
|
444
|
+
const summary = savedSummaries[c.id];
|
|
445
|
+
return summary ? { ...c, nlSummary: summary } : c;
|
|
446
|
+
});
|
|
447
|
+
if (enrichErrors > 0) {
|
|
448
|
+
await logger.warn(`${prefix}${enrichErrors} enrichment batch(es) failed, some chunks have no NL summary`);
|
|
449
|
+
}
|
|
450
|
+
// Clear checkpoint — enrichment phase complete
|
|
451
|
+
await clearEnrichmentCheckpoint(storagePath);
|
|
452
|
+
// Embed chunks
|
|
453
|
+
await logger.setPhase('embed');
|
|
454
|
+
await logger.info(`${prefix}Embedding ${enrichedChunks.length} chunks...`);
|
|
455
|
+
const resolvedEmbeddingProvider = embeddingProvider ?? createSimpleEmbeddingProvider(config.embedding);
|
|
456
|
+
const textsToEmbed = enrichedChunks.map((c) => c.nlSummary ? `${c.nlSummary}\n\n${c.content}` : c.content);
|
|
457
|
+
const embedResult = await resolvedEmbeddingProvider.embed(textsToEmbed);
|
|
458
|
+
if (embedResult.isErr()) {
|
|
459
|
+
throw new Error(`Embedding failed: ${embedResult.error.message}`);
|
|
460
|
+
}
|
|
461
|
+
const embeddings = embedResult.value;
|
|
462
|
+
// Store in LanceDB
|
|
463
|
+
await logger.setPhase('store');
|
|
464
|
+
await logger.info(`${prefix}Storing embeddings in LanceDB...`);
|
|
465
|
+
const store = new LanceDBStore(storagePath, config.embedding.dimensions);
|
|
466
|
+
await store.connect();
|
|
467
|
+
const ids = enrichedChunks.map((c) => c.id);
|
|
468
|
+
const metadata = enrichedChunks.map((c) => ({
|
|
469
|
+
content: c.content,
|
|
470
|
+
nl_summary: c.nlSummary,
|
|
471
|
+
chunk_type: c.metadata.chunkType,
|
|
472
|
+
file_path: c.filePath,
|
|
473
|
+
language: c.language,
|
|
474
|
+
start_line: c.startLine,
|
|
475
|
+
end_line: c.endLine,
|
|
476
|
+
name: c.metadata.name,
|
|
477
|
+
...(c.metadata.repoName ? { repo_name: c.metadata.repoName } : {}),
|
|
478
|
+
}));
|
|
479
|
+
const upsertResult = await store.upsert(ids, embeddings, metadata);
|
|
480
|
+
if (upsertResult.isErr()) {
|
|
481
|
+
store.close();
|
|
482
|
+
throw new Error(`Store failed: ${upsertResult.error.message}`);
|
|
483
|
+
}
|
|
484
|
+
// Build / update BM25 index
|
|
485
|
+
const bm25Path = join(storagePath, 'bm25-index.json');
|
|
486
|
+
let bm25;
|
|
487
|
+
if (options.full) {
|
|
488
|
+
// Full reindex: start fresh
|
|
489
|
+
await logger.info(`${prefix}Building BM25 index from scratch...`);
|
|
490
|
+
bm25 = new BM25Index();
|
|
491
|
+
}
|
|
492
|
+
else {
|
|
493
|
+
// Incremental: load existing, remove stale chunks for re-indexed files
|
|
494
|
+
await logger.info(`${prefix}Updating BM25 index incrementally...`);
|
|
495
|
+
try {
|
|
496
|
+
const existingBm25 = await readFile(bm25Path, 'utf-8');
|
|
497
|
+
bm25 = BM25Index.deserialize(existingBm25);
|
|
498
|
+
// Remove old chunks that belong to the files being re-indexed
|
|
499
|
+
const staleChunkIds = [];
|
|
500
|
+
for (const file of filesToProcess) {
|
|
501
|
+
const fileState = indexState.getFileState(file.filePath);
|
|
502
|
+
if (fileState) {
|
|
503
|
+
staleChunkIds.push(...fileState.chunkIds);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
if (staleChunkIds.length > 0) {
|
|
507
|
+
try {
|
|
508
|
+
bm25.removeChunks(staleChunkIds);
|
|
509
|
+
}
|
|
510
|
+
catch {
|
|
511
|
+
// Some IDs may not exist (e.g., after a corrupted incremental run);
|
|
512
|
+
// fall back to rebuilding from scratch via LanceDB
|
|
513
|
+
await logger.warn(`${prefix}BM25 stale chunk removal failed, rebuilding from LanceDB...`);
|
|
514
|
+
bm25 = await rebuildBm25FromStore(store, logger, prefix);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
// No existing BM25 index, start fresh
|
|
520
|
+
bm25 = new BM25Index();
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
bm25.addChunks(enrichedChunks);
|
|
524
|
+
await writeFile(bm25Path, bm25.serialize(), 'utf-8');
|
|
525
|
+
// Build / update dependency graph
|
|
526
|
+
await logger.info(`${prefix}Building dependency graph...`);
|
|
527
|
+
const graphBuilder = new GraphBuilder(rootDir);
|
|
528
|
+
const graphResult = graphBuilder.buildFromFiles(allParsedFiles);
|
|
529
|
+
if (graphResult.isOk()) {
|
|
530
|
+
const graphPath = join(storagePath, 'graph.json');
|
|
531
|
+
const newGraph = graphResult.value;
|
|
532
|
+
if (options.full) {
|
|
533
|
+
// Full reindex: write the new graph directly
|
|
534
|
+
await writeFile(graphPath, JSON.stringify(newGraph.toJSON()), 'utf-8');
|
|
535
|
+
}
|
|
536
|
+
else {
|
|
537
|
+
// Incremental: merge new graph into existing
|
|
538
|
+
try {
|
|
539
|
+
const existingData = await readFile(graphPath, 'utf-8');
|
|
540
|
+
const existingGraph = DependencyGraph.fromJSON(JSON.parse(existingData));
|
|
541
|
+
// Collect file paths being re-indexed to identify stale nodes
|
|
542
|
+
const reindexedFiles = new Set(filesToProcess.map((f) => f.filePath));
|
|
543
|
+
const existingNodes = existingGraph.getAllNodes();
|
|
544
|
+
const existingEdges = existingGraph.getAllEdges();
|
|
545
|
+
// Keep nodes NOT from re-indexed files, then add all new nodes
|
|
546
|
+
const keptNodes = existingNodes.filter((n) => !reindexedFiles.has(n.filePath));
|
|
547
|
+
const keptNodeIds = new Set(keptNodes.map((n) => n.id));
|
|
548
|
+
const keptEdges = existingEdges.filter((e) => keptNodeIds.has(e.source) && keptNodeIds.has(e.target));
|
|
549
|
+
// Rebuild merged graph
|
|
550
|
+
const merged = new DependencyGraph();
|
|
551
|
+
for (const node of keptNodes)
|
|
552
|
+
merged.addNode(node);
|
|
553
|
+
for (const edge of keptEdges)
|
|
554
|
+
merged.addEdge(edge);
|
|
555
|
+
for (const node of newGraph.getAllNodes())
|
|
556
|
+
merged.addNode(node);
|
|
557
|
+
for (const edge of newGraph.getAllEdges())
|
|
558
|
+
merged.addEdge(edge);
|
|
559
|
+
await writeFile(graphPath, JSON.stringify(merged.toJSON()), 'utf-8');
|
|
560
|
+
}
|
|
561
|
+
catch {
|
|
562
|
+
// No existing graph, write new one
|
|
563
|
+
await writeFile(graphPath, JSON.stringify(newGraph.toJSON()), 'utf-8');
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
// Update index state
|
|
568
|
+
await logger.setPhase('finalize');
|
|
569
|
+
await logger.info(`${prefix}Saving index state...`);
|
|
570
|
+
for (const file of filesToProcess) {
|
|
571
|
+
const fileChunkIds = enrichedChunks
|
|
572
|
+
.filter((c) => c.filePath === file.filePath)
|
|
573
|
+
.map((c) => c.id);
|
|
574
|
+
indexState.setFileState(file.filePath, {
|
|
575
|
+
filePath: file.filePath,
|
|
576
|
+
contentHash: file.contentHash,
|
|
577
|
+
lastIndexedAt: new Date(),
|
|
578
|
+
chunkIds: fileChunkIds,
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
await writeFile(indexStatePath, JSON.stringify(indexState.toJSON(), null, 2), 'utf-8');
|
|
582
|
+
store.close();
|
|
583
|
+
return { filesProcessed: filesToProcess.length, chunksCreated: enrichedChunks.length, parseErrors, skippedFiles, parseErrorDetails };
|
|
584
|
+
}
|
|
585
|
+
// ---------------------------------------------------------------------------
|
|
586
|
+
// Backlog indexing
|
|
587
|
+
// ---------------------------------------------------------------------------
|
|
588
|
+
function createBacklogProvider(backlogConfig) {
|
|
589
|
+
switch (backlogConfig.provider) {
|
|
590
|
+
case 'ado':
|
|
591
|
+
case 'azure-devops':
|
|
592
|
+
return new AzureDevOpsProvider();
|
|
593
|
+
case 'jira':
|
|
594
|
+
return new JiraProvider();
|
|
595
|
+
case 'clickup':
|
|
596
|
+
return new ClickUpProvider();
|
|
597
|
+
default:
|
|
598
|
+
return null;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
function backlogItemToChunk(item) {
|
|
602
|
+
const lines = [];
|
|
603
|
+
lines.push(`# ${item.externalId}: ${item.title}`);
|
|
604
|
+
lines.push('');
|
|
605
|
+
lines.push(`**Type:** ${item.type} | **State:** ${item.state}`);
|
|
606
|
+
if (item.assignedTo)
|
|
607
|
+
lines.push(`**Assigned to:** ${item.assignedTo}`);
|
|
608
|
+
if (item.tags.length > 0)
|
|
609
|
+
lines.push(`**Tags:** ${item.tags.join(', ')}`);
|
|
610
|
+
if (item.url)
|
|
611
|
+
lines.push(`**URL:** ${item.url}`);
|
|
612
|
+
lines.push('');
|
|
613
|
+
if (item.description) {
|
|
614
|
+
// Strip HTML tags for cleaner embedding
|
|
615
|
+
const plainDesc = item.description.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
616
|
+
if (plainDesc) {
|
|
617
|
+
lines.push('## Description');
|
|
618
|
+
lines.push(plainDesc);
|
|
619
|
+
lines.push('');
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
if (item.linkedCodePaths.length > 0) {
|
|
623
|
+
lines.push('## Linked Code');
|
|
624
|
+
for (const path of item.linkedCodePaths) {
|
|
625
|
+
lines.push(`- ${path}`);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
const content = lines.join('\n');
|
|
629
|
+
const metadata = {
|
|
630
|
+
chunkType: 'doc',
|
|
631
|
+
name: `${item.externalId}: ${item.title}`,
|
|
632
|
+
declarations: [],
|
|
633
|
+
imports: [],
|
|
634
|
+
exports: [],
|
|
635
|
+
tags: item.tags,
|
|
636
|
+
docTitle: item.title,
|
|
637
|
+
};
|
|
638
|
+
return {
|
|
639
|
+
id: `backlog:${item.externalId.replace('#', '-')}`,
|
|
640
|
+
content,
|
|
641
|
+
nlSummary: `${item.type} work item "${item.title}" (${item.state})${item.assignedTo ? ` assigned to ${item.assignedTo}` : ''}`,
|
|
642
|
+
filePath: `backlog/${item.externalId}`,
|
|
643
|
+
startLine: 1,
|
|
644
|
+
endLine: content.split('\n').length,
|
|
645
|
+
language: 'markdown',
|
|
646
|
+
metadata,
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
function hashBacklogItem(item) {
|
|
650
|
+
const data = JSON.stringify({
|
|
651
|
+
title: item.title,
|
|
652
|
+
description: item.description,
|
|
653
|
+
state: item.state,
|
|
654
|
+
type: item.type,
|
|
655
|
+
assignedTo: item.assignedTo,
|
|
656
|
+
tags: item.tags,
|
|
657
|
+
linkedCodePaths: item.linkedCodePaths,
|
|
658
|
+
});
|
|
659
|
+
return createHash('sha256').update(data).digest('hex');
|
|
660
|
+
}
|
|
661
|
+
async function indexBacklog(backlogConfig, storagePath, config, options, logger, embeddingProvider) {
|
|
662
|
+
// Create provider
|
|
663
|
+
const provider = createBacklogProvider(backlogConfig);
|
|
664
|
+
if (!provider) {
|
|
665
|
+
return { itemsFetched: 0, itemsIndexed: 0, skipped: 0, error: `Unknown backlog provider: ${backlogConfig.provider}` };
|
|
666
|
+
}
|
|
667
|
+
// Initialize provider
|
|
668
|
+
await logger.info('Backlog: connecting to provider...');
|
|
669
|
+
const initResult = await provider.initialize(backlogConfig.config ?? {});
|
|
670
|
+
if (initResult.isErr()) {
|
|
671
|
+
return { itemsFetched: 0, itemsIndexed: 0, skipped: 0, error: `Backlog init failed: ${initResult.error.message}` };
|
|
672
|
+
}
|
|
673
|
+
// Fetch all items
|
|
674
|
+
await logger.info('Backlog: fetching work items...');
|
|
675
|
+
const itemsResult = await provider.getItems({ limit: 500 });
|
|
676
|
+
if (itemsResult.isErr()) {
|
|
677
|
+
return { itemsFetched: 0, itemsIndexed: 0, skipped: 0, error: `Backlog fetch failed: ${itemsResult.error.message}` };
|
|
678
|
+
}
|
|
679
|
+
const items = itemsResult.value;
|
|
680
|
+
if (items.length === 0) {
|
|
681
|
+
return { itemsFetched: 0, itemsIndexed: 0, skipped: 0 };
|
|
682
|
+
}
|
|
683
|
+
// Load backlog index state for incremental indexing
|
|
684
|
+
const backlogStatePath = join(storagePath, 'backlog-state.json');
|
|
685
|
+
let backlogState = {};
|
|
686
|
+
if (!options.full) {
|
|
687
|
+
try {
|
|
688
|
+
const stateData = await readFile(backlogStatePath, 'utf-8');
|
|
689
|
+
backlogState = JSON.parse(stateData);
|
|
690
|
+
}
|
|
691
|
+
catch {
|
|
692
|
+
// No saved state, index all
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
// Filter to changed items (incremental)
|
|
696
|
+
const changedItems = [];
|
|
697
|
+
let skipped = 0;
|
|
698
|
+
for (const item of items) {
|
|
699
|
+
const currentHash = hashBacklogItem(item);
|
|
700
|
+
if (!options.full && backlogState[item.externalId] === currentHash) {
|
|
701
|
+
skipped++;
|
|
702
|
+
continue;
|
|
703
|
+
}
|
|
704
|
+
backlogState[item.externalId] = currentHash;
|
|
705
|
+
changedItems.push(item);
|
|
706
|
+
}
|
|
707
|
+
if (changedItems.length === 0) {
|
|
708
|
+
return { itemsFetched: items.length, itemsIndexed: 0, skipped };
|
|
709
|
+
}
|
|
710
|
+
await logger.info(`Backlog: converting ${changedItems.length} items to chunks...`);
|
|
711
|
+
const chunks = changedItems.map(backlogItemToChunk);
|
|
712
|
+
// Embed chunks
|
|
713
|
+
await logger.info(`Backlog: embedding ${chunks.length} items...`);
|
|
714
|
+
const resolvedEmbeddingProvider = embeddingProvider ?? createSimpleEmbeddingProvider(config.embedding);
|
|
715
|
+
const textsToEmbed = chunks.map((c) => c.nlSummary ? `${c.nlSummary}\n\n${c.content}` : c.content);
|
|
716
|
+
const embedResult = await resolvedEmbeddingProvider.embed(textsToEmbed);
|
|
717
|
+
if (embedResult.isErr()) {
|
|
718
|
+
return { itemsFetched: items.length, itemsIndexed: 0, skipped, error: `Backlog embedding failed: ${embedResult.error.message}` };
|
|
719
|
+
}
|
|
720
|
+
const embeddings = embedResult.value;
|
|
721
|
+
// Store in LanceDB
|
|
722
|
+
await logger.info(`Backlog: storing ${chunks.length} items in vector database...`);
|
|
723
|
+
const store = new LanceDBStore(storagePath, config.embedding.dimensions);
|
|
724
|
+
await store.connect();
|
|
725
|
+
const ids = chunks.map((c) => c.id);
|
|
726
|
+
const metadata = chunks.map((c) => ({
|
|
727
|
+
content: c.content,
|
|
728
|
+
nl_summary: c.nlSummary,
|
|
729
|
+
chunk_type: c.metadata.chunkType,
|
|
730
|
+
file_path: c.filePath,
|
|
731
|
+
language: c.language,
|
|
732
|
+
start_line: c.startLine,
|
|
733
|
+
end_line: c.endLine,
|
|
734
|
+
name: c.metadata.name,
|
|
735
|
+
}));
|
|
736
|
+
const upsertResult = await store.upsert(ids, embeddings, metadata);
|
|
737
|
+
store.close();
|
|
738
|
+
if (upsertResult.isErr()) {
|
|
739
|
+
return { itemsFetched: items.length, itemsIndexed: 0, skipped, error: `Backlog store failed: ${upsertResult.error.message}` };
|
|
740
|
+
}
|
|
741
|
+
// Add to BM25 index (append to existing)
|
|
742
|
+
await logger.info('Backlog: updating BM25 index...');
|
|
743
|
+
const bm25Path = join(storagePath, 'bm25-index.json');
|
|
744
|
+
let bm25;
|
|
745
|
+
try {
|
|
746
|
+
const existingBm25 = await readFile(bm25Path, 'utf-8');
|
|
747
|
+
bm25 = BM25Index.deserialize(existingBm25);
|
|
748
|
+
}
|
|
749
|
+
catch {
|
|
750
|
+
bm25 = new BM25Index();
|
|
751
|
+
}
|
|
752
|
+
bm25.addChunks(chunks);
|
|
753
|
+
await writeFile(bm25Path, bm25.serialize(), 'utf-8');
|
|
754
|
+
// Save backlog state
|
|
755
|
+
await writeFile(backlogStatePath, JSON.stringify(backlogState, null, 2), 'utf-8');
|
|
756
|
+
// Link backlog items into the dependency graph
|
|
757
|
+
await logger.info('Backlog: linking items to dependency graph...');
|
|
758
|
+
await linkBacklogToGraph(items, storagePath, bm25Path, logger);
|
|
759
|
+
return { itemsFetched: items.length, itemsIndexed: changedItems.length, skipped };
|
|
760
|
+
}
|
|
761
|
+
/**
|
|
762
|
+
* Augment the dependency graph with backlog nodes and edges.
|
|
763
|
+
*
|
|
764
|
+
* Two linking directions:
|
|
765
|
+
* 1. Backlog → Code: each item's linkedCodePaths creates a 'references' edge
|
|
766
|
+
* 2. Code → Backlog: scan code chunks for AB#XXXX references, create reverse edges
|
|
767
|
+
*/
|
|
768
|
+
async function linkBacklogToGraph(items, storagePath, bm25Path, logger) {
|
|
769
|
+
// Load existing graph
|
|
770
|
+
const graphPath = join(storagePath, 'graph.json');
|
|
771
|
+
let graph;
|
|
772
|
+
try {
|
|
773
|
+
const graphData = await readFile(graphPath, 'utf-8');
|
|
774
|
+
graph = DependencyGraph.fromJSON(JSON.parse(graphData));
|
|
775
|
+
}
|
|
776
|
+
catch {
|
|
777
|
+
await logger.warn('Backlog: no graph.json found, skipping graph linking');
|
|
778
|
+
return;
|
|
779
|
+
}
|
|
780
|
+
const existingNodeIds = new Set(graph.getAllNodes().map((n) => n.id));
|
|
781
|
+
let edgesAdded = 0;
|
|
782
|
+
// 1. Add backlog items as nodes + edges from linkedCodePaths
|
|
783
|
+
for (const item of items) {
|
|
784
|
+
const nodeId = `backlog:${item.externalId.replace('#', '-')}`;
|
|
785
|
+
if (!existingNodeIds.has(nodeId)) {
|
|
786
|
+
graph.addNode({
|
|
787
|
+
id: nodeId,
|
|
788
|
+
filePath: `backlog/${item.externalId}`,
|
|
789
|
+
symbols: [item.title],
|
|
790
|
+
type: 'backlog',
|
|
791
|
+
});
|
|
792
|
+
existingNodeIds.add(nodeId);
|
|
793
|
+
}
|
|
794
|
+
// Edges: backlog item → linked code files
|
|
795
|
+
for (const codePath of item.linkedCodePaths) {
|
|
796
|
+
const normalizedPath = codePath.replace(/\\/g, '/');
|
|
797
|
+
if (existingNodeIds.has(normalizedPath)) {
|
|
798
|
+
graph.addEdge({ source: nodeId, target: normalizedPath, type: 'references' });
|
|
799
|
+
edgesAdded++;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
// 2. Scan code chunks for AB# references → create code → backlog edges
|
|
804
|
+
// Search BM25 for "AB" to find chunks likely containing AB#XXXX references
|
|
805
|
+
const backlogIdSet = new Set(items.map((i) => i.externalId));
|
|
806
|
+
try {
|
|
807
|
+
const bm25Data = await readFile(bm25Path, 'utf-8');
|
|
808
|
+
const bm25 = BM25Index.deserialize(bm25Data);
|
|
809
|
+
const candidateResults = bm25.search('AB', 500);
|
|
810
|
+
for (const result of candidateResults) {
|
|
811
|
+
if (result.chunkId.startsWith('backlog:') || !result.chunk)
|
|
812
|
+
continue;
|
|
813
|
+
const refs = scanForABReferences(result.content);
|
|
814
|
+
for (const refId of refs) {
|
|
815
|
+
const externalId = `AB#${refId}`;
|
|
816
|
+
const backlogNodeId = `backlog:${externalId.replace('#', '-')}`;
|
|
817
|
+
if (backlogIdSet.has(externalId) && existingNodeIds.has(backlogNodeId)) {
|
|
818
|
+
const codeNodeId = result.chunk.filePath.replace(/\\/g, '/');
|
|
819
|
+
if (existingNodeIds.has(codeNodeId)) {
|
|
820
|
+
graph.addEdge({ source: codeNodeId, target: backlogNodeId, type: 'references' });
|
|
821
|
+
edgesAdded++;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
catch {
|
|
828
|
+
// BM25 not available yet — skip code→backlog linking
|
|
829
|
+
}
|
|
830
|
+
// Save augmented graph
|
|
831
|
+
await writeFile(graphPath, JSON.stringify(graph.toJSON()), 'utf-8');
|
|
832
|
+
await logger.info(`Backlog: added ${items.length} backlog nodes, ${edgesAdded} reference edges to graph`);
|
|
833
|
+
}
|
|
834
|
+
export function registerIndexCommand(program) {
|
|
835
|
+
program
|
|
836
|
+
.command('index')
|
|
837
|
+
.description('Index the codebase: scan, parse, chunk, enrich, embed, and store')
|
|
838
|
+
.option('--full', 'Force a complete re-index (ignore incremental state)')
|
|
839
|
+
.option('--quiet', 'Suppress progress output (used by git hooks and background processes)')
|
|
840
|
+
.action(async (options) => {
|
|
841
|
+
const quiet = options.quiet === true;
|
|
842
|
+
const startTime = Date.now();
|
|
843
|
+
// Use a temporary spinner for config loading (logger needs storagePath from config)
|
|
844
|
+
const bootSpinner = quiet ? null : ora('Loading configuration...').start();
|
|
845
|
+
try {
|
|
846
|
+
const rootDir = process.cwd();
|
|
847
|
+
// Step 1: Load config
|
|
848
|
+
const configResult = await loadConfig(rootDir);
|
|
849
|
+
if (configResult.isErr()) {
|
|
850
|
+
bootSpinner?.fail(configResult.error.message);
|
|
851
|
+
if (!quiet) {
|
|
852
|
+
// eslint-disable-next-line no-console
|
|
853
|
+
console.error(chalk.red('Run "coderag init" first to create a configuration file.'));
|
|
854
|
+
}
|
|
855
|
+
process.exit(1);
|
|
856
|
+
}
|
|
857
|
+
const config = configResult.value;
|
|
858
|
+
const storagePath = resolve(rootDir, config.storage.path);
|
|
859
|
+
// Prevent path traversal outside project root
|
|
860
|
+
if (!storagePath.startsWith(resolve(rootDir) + sep) && storagePath !== resolve(rootDir)) {
|
|
861
|
+
bootSpinner?.fail('Storage path escapes project root');
|
|
862
|
+
process.exit(1);
|
|
863
|
+
}
|
|
864
|
+
bootSpinner?.succeed('Configuration loaded');
|
|
865
|
+
// Create IndexLogger — writes to .coderag/index.log + progress JSON
|
|
866
|
+
const logger = new IndexLogger(storagePath, quiet);
|
|
867
|
+
await logger.init();
|
|
868
|
+
// Create embedding provider (with auto-start lifecycle if provider is 'auto')
|
|
869
|
+
const managed = await createManagedEmbeddingProvider(config, logger);
|
|
870
|
+
try {
|
|
871
|
+
// Multi-repo path: if repos are configured, index each independently
|
|
872
|
+
if (config.repos && config.repos.length > 0) {
|
|
873
|
+
await indexMultiRepo(config, storagePath, options, logger, startTime);
|
|
874
|
+
return;
|
|
875
|
+
}
|
|
876
|
+
// Single-repo path
|
|
877
|
+
logger.start('Starting indexing...');
|
|
878
|
+
const result = await indexSingleRepo(rootDir, storagePath, config, options, logger, undefined, managed.provider);
|
|
879
|
+
if (result.filesProcessed === 0 && result.chunksCreated === 0 && result.parseErrors === 0) {
|
|
880
|
+
await logger.succeed('No changes detected, index is up to date.');
|
|
881
|
+
return;
|
|
882
|
+
}
|
|
883
|
+
if (result.chunksCreated === 0 && result.parseErrors > 0) {
|
|
884
|
+
await logger.warn('No chunks produced. Nothing to index.');
|
|
885
|
+
// eslint-disable-next-line no-console
|
|
886
|
+
console.log(chalk.yellow(` ${result.parseErrors} file(s) failed to parse:`));
|
|
887
|
+
for (const detail of result.parseErrorDetails.slice(0, 5)) {
|
|
888
|
+
// eslint-disable-next-line no-console
|
|
889
|
+
console.log(` ${chalk.gray('→')} ${detail.file}: ${chalk.yellow(detail.reason)}`);
|
|
890
|
+
}
|
|
891
|
+
if (result.parseErrorDetails.length > 5) {
|
|
892
|
+
// eslint-disable-next-line no-console
|
|
893
|
+
console.log(` ${chalk.gray(`… and ${result.parseErrorDetails.length - 5} more`)}`);
|
|
894
|
+
}
|
|
895
|
+
return;
|
|
896
|
+
}
|
|
897
|
+
// Backlog indexing (if configured)
|
|
898
|
+
let backlogResult = null;
|
|
899
|
+
if (config.backlog) {
|
|
900
|
+
try {
|
|
901
|
+
backlogResult = await indexBacklog(config.backlog, storagePath, config, options, logger, managed.provider);
|
|
902
|
+
}
|
|
903
|
+
catch (backlogError) {
|
|
904
|
+
const msg = backlogError instanceof Error ? backlogError.message : String(backlogError);
|
|
905
|
+
backlogResult = { itemsFetched: 0, itemsIndexed: 0, skipped: 0, error: msg };
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
// Summary
|
|
909
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
910
|
+
await logger.succeed('Indexing complete!');
|
|
911
|
+
// eslint-disable-next-line no-console
|
|
912
|
+
console.log('');
|
|
913
|
+
// eslint-disable-next-line no-console
|
|
914
|
+
console.log(chalk.bold('Summary:'));
|
|
915
|
+
// eslint-disable-next-line no-console
|
|
916
|
+
console.log(` Files processed: ${chalk.cyan(String(result.filesProcessed))}`);
|
|
917
|
+
// eslint-disable-next-line no-console
|
|
918
|
+
console.log(` Chunks created: ${chalk.cyan(String(result.chunksCreated))}`);
|
|
919
|
+
if (result.skippedFiles > 0) {
|
|
920
|
+
// eslint-disable-next-line no-console
|
|
921
|
+
console.log(` Skipped: ${chalk.gray(String(result.skippedFiles))} (unsupported file types)`);
|
|
922
|
+
}
|
|
923
|
+
if (result.parseErrors > 0) {
|
|
924
|
+
// eslint-disable-next-line no-console
|
|
925
|
+
console.log(` Parse errors: ${chalk.yellow(String(result.parseErrors))}`);
|
|
926
|
+
for (const detail of result.parseErrorDetails.slice(0, 10)) {
|
|
927
|
+
// eslint-disable-next-line no-console
|
|
928
|
+
console.log(` ${chalk.gray('→')} ${detail.file}: ${chalk.yellow(detail.reason)}`);
|
|
929
|
+
}
|
|
930
|
+
if (result.parseErrorDetails.length > 10) {
|
|
931
|
+
// eslint-disable-next-line no-console
|
|
932
|
+
console.log(` ${chalk.gray(`… and ${result.parseErrorDetails.length - 10} more`)}`);
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
if (backlogResult) {
|
|
936
|
+
if (backlogResult.error) {
|
|
937
|
+
// eslint-disable-next-line no-console
|
|
938
|
+
console.log(` Backlog: ${chalk.yellow(backlogResult.error)}`);
|
|
939
|
+
}
|
|
940
|
+
else if (backlogResult.itemsIndexed > 0) {
|
|
941
|
+
// eslint-disable-next-line no-console
|
|
942
|
+
console.log(` Backlog indexed: ${chalk.cyan(String(backlogResult.itemsIndexed))} items (${backlogResult.skipped} unchanged)`);
|
|
943
|
+
}
|
|
944
|
+
else if (backlogResult.itemsFetched > 0) {
|
|
945
|
+
// eslint-disable-next-line no-console
|
|
946
|
+
console.log(` Backlog: ${chalk.green('up to date')} (${backlogResult.itemsFetched} items)`);
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
// eslint-disable-next-line no-console
|
|
950
|
+
console.log(` Time elapsed: ${chalk.cyan(elapsed + 's')}`);
|
|
951
|
+
// eslint-disable-next-line no-console
|
|
952
|
+
console.log(` Log file: ${chalk.gray(join(storagePath, 'index.log'))}`);
|
|
953
|
+
}
|
|
954
|
+
finally {
|
|
955
|
+
// Auto-stop backend if configured
|
|
956
|
+
if (managed.lifecycleManager && config.embedding.autoStop) {
|
|
957
|
+
await logger.info('Stopping embedding backend (auto_stop enabled)...');
|
|
958
|
+
await managed.lifecycleManager.stop();
|
|
959
|
+
await logger.info('Embedding backend stopped.');
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
catch (error) {
|
|
964
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
965
|
+
// eslint-disable-next-line no-console
|
|
966
|
+
console.error(chalk.red('\nIndexing failed:'), message);
|
|
967
|
+
process.exit(1);
|
|
968
|
+
}
|
|
969
|
+
});
|
|
970
|
+
}
|
|
971
|
+
/**
|
|
972
|
+
* Multi-repo indexing: iterate configured repos, index each with separate
|
|
973
|
+
* progress reporting and per-repo storage directories.
|
|
974
|
+
*/
|
|
975
|
+
async function indexMultiRepo(config, storagePath, options, logger, startTime) {
|
|
976
|
+
const repos = config.repos;
|
|
977
|
+
// eslint-disable-next-line no-console
|
|
978
|
+
console.log('');
|
|
979
|
+
// eslint-disable-next-line no-console
|
|
980
|
+
console.log(chalk.bold(`Indexing ${repos.length} repo(s)...`));
|
|
981
|
+
// eslint-disable-next-line no-console
|
|
982
|
+
console.log('');
|
|
983
|
+
const multiRepoIndexer = new MultiRepoIndexer(repos, storagePath);
|
|
984
|
+
let totalFiles = 0;
|
|
985
|
+
let totalChunks = 0;
|
|
986
|
+
let totalErrors = 0;
|
|
987
|
+
logger.start('Starting multi-repo indexing...');
|
|
988
|
+
const result = await multiRepoIndexer.indexAll({
|
|
989
|
+
full: options.full,
|
|
990
|
+
onProgress: (repoName, status) => {
|
|
991
|
+
void logger.info(`[${repoName}] ${status}`);
|
|
992
|
+
},
|
|
993
|
+
});
|
|
994
|
+
if (result.isErr()) {
|
|
995
|
+
await logger.fail(`Multi-repo indexing failed: ${result.error.message}`);
|
|
996
|
+
process.exit(1);
|
|
997
|
+
}
|
|
998
|
+
// Per-repo summary
|
|
999
|
+
for (const repoResult of result.value.repoResults) {
|
|
1000
|
+
totalFiles += repoResult.filesProcessed;
|
|
1001
|
+
totalChunks += repoResult.chunksCreated;
|
|
1002
|
+
if (repoResult.errors.length > 0) {
|
|
1003
|
+
totalErrors += repoResult.errors.length;
|
|
1004
|
+
await logger.fail(`[${repoResult.repoName}] Failed`);
|
|
1005
|
+
for (const error of repoResult.errors) {
|
|
1006
|
+
// eslint-disable-next-line no-console
|
|
1007
|
+
console.log(` ${chalk.gray('→')} ${chalk.red(error)}`);
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
else if (repoResult.filesProcessed === 0) {
|
|
1011
|
+
await logger.succeed(`[${repoResult.repoName}] Up to date`);
|
|
1012
|
+
}
|
|
1013
|
+
else {
|
|
1014
|
+
await logger.succeed(`[${repoResult.repoName}] ${repoResult.filesProcessed} file(s) processed`);
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
// Total summary
|
|
1018
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
1019
|
+
// eslint-disable-next-line no-console
|
|
1020
|
+
console.log('');
|
|
1021
|
+
// eslint-disable-next-line no-console
|
|
1022
|
+
console.log(chalk.bold('Total Summary:'));
|
|
1023
|
+
// eslint-disable-next-line no-console
|
|
1024
|
+
console.log(` Repos indexed: ${chalk.cyan(String(repos.length))}`);
|
|
1025
|
+
// eslint-disable-next-line no-console
|
|
1026
|
+
console.log(` Files processed: ${chalk.cyan(String(totalFiles))}`);
|
|
1027
|
+
// eslint-disable-next-line no-console
|
|
1028
|
+
console.log(` Chunks created: ${chalk.cyan(String(totalChunks))}`);
|
|
1029
|
+
if (totalErrors > 0) {
|
|
1030
|
+
// eslint-disable-next-line no-console
|
|
1031
|
+
console.log(` Errors: ${chalk.yellow(String(totalErrors))}`);
|
|
1032
|
+
}
|
|
1033
|
+
// eslint-disable-next-line no-console
|
|
1034
|
+
console.log(` Time elapsed: ${chalk.cyan(elapsed + 's')}`);
|
|
1035
|
+
// eslint-disable-next-line no-console
|
|
1036
|
+
console.log(` Log file: ${chalk.gray(join(storagePath, 'index.log'))}`);
|
|
1037
|
+
}
|