@code-rag/cli 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -980,40 +980,347 @@ async function indexMultiRepo(config, storagePath, options, logger, startTime, e
980
980
  console.log(chalk.bold(`Indexing ${repos.length} repo(s)...`));
981
981
  // eslint-disable-next-line no-console
982
982
  console.log('');
983
+ logger.start('Starting multi-repo indexing...');
984
+ const parser = new TreeSitterParser();
985
+ const initResult = await parser.initialize();
986
+ if (initResult.isErr()) {
987
+ throw new Error(`Parser init failed: ${initResult.error.message}`);
988
+ }
989
+ const mdParser = new MarkdownParser({ maxTokensPerChunk: config.ingestion.maxTokensPerChunk });
990
+ const chunker = new ASTChunker({ maxTokensPerChunk: config.ingestion.maxTokensPerChunk });
991
+ const repoResults = [];
983
992
  let totalFiles = 0;
984
993
  let totalChunks = 0;
985
994
  let totalErrors = 0;
986
- logger.start('Starting multi-repo indexing...');
987
995
  for (const repo of repos) {
988
996
  const repoName = repo.name ?? basename(repo.path);
989
997
  const repoPath = resolve(repo.path);
990
998
  const repoStoragePath = join(storagePath, repoName);
991
999
  await mkdir(repoStoragePath, { recursive: true });
992
- try {
993
- const result = await indexSingleRepo(repoPath, repoStoragePath, config, options, logger, repoName, embeddingProvider);
994
- totalFiles += result.filesProcessed;
995
- totalChunks += result.chunksCreated;
996
- if (result.filesProcessed === 0 && result.chunksCreated === 0 && result.parseErrors === 0) {
1000
+ // Load or create index state
1001
+ let indexState = new IndexState();
1002
+ const indexStatePath = join(repoStoragePath, 'index-state.json');
1003
+ if (!options.full) {
1004
+ try {
1005
+ const stateData = await readFile(indexStatePath, 'utf-8');
1006
+ indexState = IndexState.fromJSON(JSON.parse(stateData));
1007
+ }
1008
+ catch {
1009
+ // No saved state, start fresh
1010
+ }
1011
+ }
1012
+ // Scan files
1013
+ await logger.info(`[${repoName}] Scanning files...`);
1014
+ const ignoreFilter = createIgnoreFilter(repoPath);
1015
+ const scanner = new FileScanner(repoPath, ignoreFilter);
1016
+ const scanResult = await scanner.scanFiles();
1017
+ if (scanResult.isErr()) {
1018
+ totalErrors++;
1019
+ await logger.fail(`[${repoName}] Scan failed: ${scanResult.error.message}`);
1020
+ continue;
1021
+ }
1022
+ const scannedFiles = scanResult.value;
1023
+ let filesToProcess = scannedFiles;
1024
+ if (!options.full) {
1025
+ filesToProcess = scannedFiles.filter((f) => indexState.isDirty(f.filePath, f.contentHash));
1026
+ if (filesToProcess.length === 0) {
1027
+ repoResults.push({
1028
+ repoName, repoPath, repoStoragePath, filesToProcess: [], chunks: [],
1029
+ parsedFiles: [], indexState, indexStatePath, parseErrors: 0, skippedFiles: 0, parseErrorDetails: [],
1030
+ });
997
1031
  await logger.succeed(`[${repoName}] Up to date`);
1032
+ continue;
998
1033
  }
999
- else {
1000
- await logger.succeed(`[${repoName}] ${result.filesProcessed} file(s), ${result.chunksCreated} chunks`);
1034
+ }
1035
+ // Parse & chunk
1036
+ const repoChunks = [];
1037
+ const repoParsedFiles = [];
1038
+ let parseErrors = 0;
1039
+ let skippedFiles = 0;
1040
+ const parseErrorDetails = [];
1041
+ for (const file of filesToProcess) {
1042
+ if (MarkdownParser.isMarkdownFile(file.filePath)) {
1043
+ const mdResult = mdParser.parse(file.filePath, file.content);
1044
+ if (mdResult.isErr()) {
1045
+ parseErrors++;
1046
+ parseErrorDetails.push({ file: file.filePath, reason: mdResult.error.message });
1047
+ continue;
1048
+ }
1049
+ repoChunks.push(...mdResult.value.chunks);
1050
+ continue;
1001
1051
  }
1002
- if (result.parseErrors > 0) {
1003
- totalErrors += result.parseErrors;
1004
- for (const detail of result.parseErrorDetails.slice(0, 3)) {
1005
- // eslint-disable-next-line no-console
1006
- console.log(` ${chalk.gray('→')} ${detail.file}: ${chalk.yellow(detail.reason)}`);
1052
+ const parseResult = await parser.parse(file.filePath, file.content);
1053
+ if (parseResult.isErr()) {
1054
+ if (parseResult.error.message.startsWith('Unsupported file type:')) {
1055
+ skippedFiles++;
1056
+ continue;
1007
1057
  }
1058
+ parseErrors++;
1059
+ parseErrorDetails.push({ file: file.filePath, reason: parseResult.error.message });
1060
+ continue;
1008
1061
  }
1062
+ const parsed = parseResult.value;
1063
+ repoParsedFiles.push(parsed);
1064
+ const chunkResult = await chunker.chunk(parsed);
1065
+ if (chunkResult.isErr()) {
1066
+ parseErrors++;
1067
+ parseErrorDetails.push({ file: file.filePath, reason: chunkResult.error.message });
1068
+ continue;
1069
+ }
1070
+ repoChunks.push(...chunkResult.value);
1009
1071
  }
1010
- catch (error) {
1011
- const message = error instanceof Error ? error.message : String(error);
1072
+ // Stamp repoName in chunk metadata
1073
+ for (const chunk of repoChunks) {
1074
+ chunk.metadata.repoName = repoName;
1075
+ }
1076
+ totalFiles += filesToProcess.length;
1077
+ totalChunks += repoChunks.length;
1078
+ totalErrors += parseErrors;
1079
+ const parsedCount = filesToProcess.length - parseErrors - skippedFiles;
1080
+ await logger.succeed(`[${repoName}] ${filesToProcess.length} files, ${parsedCount} parsed, ${repoChunks.length} chunks` +
1081
+ (skippedFiles > 0 ? ` (${skippedFiles} unsupported skipped)` : ''));
1082
+ if (parseErrors > 0) {
1083
+ for (const detail of parseErrorDetails.slice(0, 3)) {
1084
+ // eslint-disable-next-line no-console
1085
+ console.log(` ${chalk.gray('→')} ${detail.file}: ${chalk.yellow(detail.reason)}`);
1086
+ }
1087
+ }
1088
+ repoResults.push({
1089
+ repoName, repoPath, repoStoragePath, filesToProcess, chunks: repoChunks,
1090
+ parsedFiles: repoParsedFiles, indexState, indexStatePath, parseErrors, skippedFiles, parseErrorDetails,
1091
+ });
1092
+ }
1093
+ // Show aggregate totals after scan & parse
1094
+ // eslint-disable-next-line no-console
1095
+ console.log('');
1096
+ // eslint-disable-next-line no-console
1097
+ console.log(chalk.bold('Scan complete: ') +
1098
+ `${chalk.cyan(String(totalFiles))} files, ${chalk.cyan(String(totalChunks))} chunks across ${chalk.cyan(String(repos.length))} repos`);
1099
+ // eslint-disable-next-line no-console
1100
+ console.log('');
1101
+ // All chunks across all repos (for unified enrichment)
1102
+ const allChunks = repoResults.flatMap((r) => r.chunks);
1103
+ if (allChunks.length === 0) {
1104
+ // Still update index state for files with no chunks
1105
+ for (const rr of repoResults) {
1106
+ for (const file of rr.filesToProcess) {
1107
+ rr.indexState.setFileState(file.filePath, {
1108
+ filePath: file.filePath,
1109
+ contentHash: file.contentHash,
1110
+ lastIndexedAt: new Date(),
1111
+ chunkIds: [],
1112
+ });
1113
+ }
1114
+ await writeFile(rr.indexStatePath, JSON.stringify(rr.indexState.toJSON(), null, 2), 'utf-8');
1115
+ }
1116
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
1117
+ // eslint-disable-next-line no-console
1118
+ console.log(chalk.yellow('No chunks produced. Nothing to embed.'));
1119
+ // eslint-disable-next-line no-console
1120
+ console.log(` Time elapsed: ${chalk.cyan(elapsed + 's')}`);
1121
+ return;
1122
+ }
1123
+ // ── Phase 2: Enrich all chunks together (slow) ─────────────────────
1124
+ await logger.setPhase('enrich', { totalChunks: allChunks.length, enrichedChunks: 0 });
1125
+ const ollamaClient = new OllamaClient({ model: config.llm.model });
1126
+ const enricher = new NLEnricher(ollamaClient);
1127
+ // Load checkpoint (shared across repos)
1128
+ const checkpoint = await loadEnrichmentCheckpoint(storagePath);
1129
+ const savedSummaries = checkpoint?.summaries ?? {};
1130
+ await logger.info(`Checkpoint: ${checkpoint ? `loaded (${Object.keys(savedSummaries).length} summaries)` : 'none found'}`);
1131
+ const chunksToEnrich = allChunks.filter((c) => !(c.id in savedSummaries));
1132
+ if (Object.keys(savedSummaries).length > 0) {
1133
+ await logger.info(`Resuming enrichment: ${Object.keys(savedSummaries).length} already done, ${chunksToEnrich.length} remaining`);
1134
+ }
1135
+ else {
1136
+ await logger.info(`Enriching ${allChunks.length} chunks with NL summaries...`);
1137
+ }
1138
+ // Pre-flight: verify Ollama
1139
+ const ollamaAvailable = await ollamaClient.isAvailable();
1140
+ if (!ollamaAvailable) {
1141
+ await logger.fail(`Ollama is not reachable at ${ollamaClient.currentConfig.baseUrl}. Start Ollama first, then re-run.`);
1142
+ throw new Error(`Ollama is not reachable at ${ollamaClient.currentConfig.baseUrl}`);
1143
+ }
1144
+ let enrichErrors = 0;
1145
+ let consecutiveFailures = 0;
1146
+ const MAX_CONSECUTIVE_FAILURES = 3;
1147
+ const totalBatches = Math.ceil(chunksToEnrich.length / ENRICHMENT_BATCH_SIZE);
1148
+ for (let i = 0; i < chunksToEnrich.length; i += ENRICHMENT_BATCH_SIZE) {
1149
+ const batchNum = Math.floor(i / ENRICHMENT_BATCH_SIZE) + 1;
1150
+ const batch = chunksToEnrich.slice(i, i + ENRICHMENT_BATCH_SIZE);
1151
+ await logger.info(`Enrichment batch ${batchNum}/${totalBatches} (${batch.length} chunks, ${Object.keys(savedSummaries).length}/${allChunks.length} total)...`);
1152
+ const enrichResult = await enricher.enrichBatch(batch);
1153
+ if (enrichResult.isOk()) {
1154
+ const { enriched, failedCount } = enrichResult.value;
1155
+ for (const chunk of enriched) {
1156
+ if (chunk.nlSummary) {
1157
+ savedSummaries[chunk.id] = chunk.nlSummary;
1158
+ }
1159
+ }
1160
+ if (failedCount === 0) {
1161
+ consecutiveFailures = 0;
1162
+ }
1163
+ else if (enriched.length > 0) {
1164
+ consecutiveFailures = 0;
1165
+ enrichErrors++;
1166
+ await logger.warn(`Batch ${batchNum}: ${enriched.length} OK, ${failedCount} failed`);
1167
+ }
1168
+ else {
1169
+ consecutiveFailures++;
1170
+ enrichErrors++;
1171
+ await logger.warn(`Batch ${batchNum}: all ${failedCount} chunks failed`);
1172
+ if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1173
+ await logger.fail(`Enrichment aborted: ${MAX_CONSECUTIVE_FAILURES} consecutive batch failures. ` +
1174
+ `Is Ollama running? Check: curl ${ollamaClient.currentConfig.baseUrl}/api/tags`);
1175
+ await saveEnrichmentCheckpoint(storagePath, {
1176
+ summaries: savedSummaries,
1177
+ totalProcessed: Object.keys(savedSummaries).length,
1178
+ });
1179
+ throw new Error(`Enrichment aborted after ${MAX_CONSECUTIVE_FAILURES} consecutive failures`);
1180
+ }
1181
+ }
1182
+ }
1183
+ else {
1184
+ enrichErrors++;
1185
+ consecutiveFailures++;
1186
+ await logger.warn(`Batch ${batchNum} enrichment error: ${enrichResult.error.message}`);
1187
+ }
1188
+ await saveEnrichmentCheckpoint(storagePath, {
1189
+ summaries: savedSummaries,
1190
+ totalProcessed: Object.keys(savedSummaries).length,
1191
+ });
1192
+ await logger.updateCount('enrichedChunks', Object.keys(savedSummaries).length);
1193
+ }
1194
+ if (enrichErrors > 0) {
1195
+ await logger.warn(`${enrichErrors} enrichment batch(es) failed, some chunks have no NL summary`);
1196
+ }
1197
+ await clearEnrichmentCheckpoint(storagePath);
1198
+ // ── Phase 3: Embed & Store per repo ─────────────────────────────────
1199
+ await logger.setPhase('embed');
1200
+ const resolvedEmbeddingProvider = embeddingProvider ?? createSimpleEmbeddingProvider(config.embedding);
1201
+ for (const rr of repoResults) {
1202
+ if (rr.chunks.length === 0)
1203
+ continue;
1204
+ // Apply saved summaries to this repo's chunks
1205
+ const enrichedChunks = rr.chunks.map((c) => {
1206
+ const summary = savedSummaries[c.id];
1207
+ return summary ? { ...c, nlSummary: summary } : c;
1208
+ });
1209
+ await logger.info(`[${rr.repoName}] Embedding ${enrichedChunks.length} chunks...`);
1210
+ const textsToEmbed = enrichedChunks.map((c) => c.nlSummary ? `${c.nlSummary}\n\n${c.content}` : c.content);
1211
+ const embedResult = await resolvedEmbeddingProvider.embed(textsToEmbed);
1212
+ if (embedResult.isErr()) {
1213
+ totalErrors++;
1214
+ await logger.fail(`[${rr.repoName}] Embedding failed: ${embedResult.error.message}`);
1215
+ continue;
1216
+ }
1217
+ const embeddings = embedResult.value;
1218
+ // Store in LanceDB
1219
+ await logger.info(`[${rr.repoName}] Storing in LanceDB...`);
1220
+ const store = new LanceDBStore(rr.repoStoragePath, config.embedding.dimensions);
1221
+ await store.connect();
1222
+ const ids = enrichedChunks.map((c) => c.id);
1223
+ const metadata = enrichedChunks.map((c) => ({
1224
+ content: c.content,
1225
+ nl_summary: c.nlSummary,
1226
+ chunk_type: c.metadata.chunkType,
1227
+ file_path: c.filePath,
1228
+ language: c.language,
1229
+ start_line: c.startLine,
1230
+ end_line: c.endLine,
1231
+ name: c.metadata.name,
1232
+ ...(c.metadata.repoName ? { repo_name: c.metadata.repoName } : {}),
1233
+ }));
1234
+ const upsertResult = await store.upsert(ids, embeddings, metadata);
1235
+ if (upsertResult.isErr()) {
1236
+ store.close();
1012
1237
  totalErrors++;
1013
- await logger.fail(`[${repoName}] ${message}`);
1238
+ await logger.fail(`[${rr.repoName}] Store failed: ${upsertResult.error.message}`);
1239
+ continue;
1014
1240
  }
1241
+ // BM25 index
1242
+ const bm25Path = join(rr.repoStoragePath, 'bm25-index.json');
1243
+ let bm25;
1244
+ if (options.full) {
1245
+ bm25 = new BM25Index();
1246
+ }
1247
+ else {
1248
+ try {
1249
+ const existingBm25 = await readFile(bm25Path, 'utf-8');
1250
+ bm25 = BM25Index.deserialize(existingBm25);
1251
+ const staleChunkIds = [];
1252
+ for (const file of rr.filesToProcess) {
1253
+ const fileState = rr.indexState.getFileState(file.filePath);
1254
+ if (fileState)
1255
+ staleChunkIds.push(...fileState.chunkIds);
1256
+ }
1257
+ if (staleChunkIds.length > 0) {
1258
+ try {
1259
+ bm25.removeChunks(staleChunkIds);
1260
+ }
1261
+ catch {
1262
+ bm25 = await rebuildBm25FromStore(store, logger, `[${rr.repoName}] `);
1263
+ }
1264
+ }
1265
+ }
1266
+ catch {
1267
+ bm25 = new BM25Index();
1268
+ }
1269
+ }
1270
+ bm25.addChunks(enrichedChunks);
1271
+ await writeFile(bm25Path, bm25.serialize(), 'utf-8');
1272
+ // Dependency graph
1273
+ const graphBuilder = new GraphBuilder(rr.repoPath);
1274
+ const graphResult = graphBuilder.buildFromFiles(rr.parsedFiles);
1275
+ if (graphResult.isOk()) {
1276
+ const graphPath = join(rr.repoStoragePath, 'graph.json');
1277
+ const newGraph = graphResult.value;
1278
+ if (options.full) {
1279
+ await writeFile(graphPath, JSON.stringify(newGraph.toJSON()), 'utf-8');
1280
+ }
1281
+ else {
1282
+ try {
1283
+ const existingData = await readFile(graphPath, 'utf-8');
1284
+ const existingGraph = DependencyGraph.fromJSON(JSON.parse(existingData));
1285
+ const reindexedFiles = new Set(rr.filesToProcess.map((f) => f.filePath));
1286
+ const existingNodes = existingGraph.getAllNodes();
1287
+ const existingEdges = existingGraph.getAllEdges();
1288
+ const keptNodes = existingNodes.filter((n) => !reindexedFiles.has(n.filePath));
1289
+ const keptNodeIds = new Set(keptNodes.map((n) => n.id));
1290
+ const keptEdges = existingEdges.filter((e) => keptNodeIds.has(e.source) && keptNodeIds.has(e.target));
1291
+ const merged = new DependencyGraph();
1292
+ for (const node of keptNodes)
1293
+ merged.addNode(node);
1294
+ for (const edge of keptEdges)
1295
+ merged.addEdge(edge);
1296
+ for (const node of newGraph.getAllNodes())
1297
+ merged.addNode(node);
1298
+ for (const edge of newGraph.getAllEdges())
1299
+ merged.addEdge(edge);
1300
+ await writeFile(graphPath, JSON.stringify(merged.toJSON()), 'utf-8');
1301
+ }
1302
+ catch {
1303
+ await writeFile(graphPath, JSON.stringify(newGraph.toJSON()), 'utf-8');
1304
+ }
1305
+ }
1306
+ }
1307
+ // Update index state
1308
+ for (const file of rr.filesToProcess) {
1309
+ const fileChunkIds = enrichedChunks
1310
+ .filter((c) => c.filePath === file.filePath)
1311
+ .map((c) => c.id);
1312
+ rr.indexState.setFileState(file.filePath, {
1313
+ filePath: file.filePath,
1314
+ contentHash: file.contentHash,
1315
+ lastIndexedAt: new Date(),
1316
+ chunkIds: fileChunkIds,
1317
+ });
1318
+ }
1319
+ await writeFile(rr.indexStatePath, JSON.stringify(rr.indexState.toJSON(), null, 2), 'utf-8');
1320
+ store.close();
1321
+ await logger.succeed(`[${rr.repoName}] ${enrichedChunks.length} chunks indexed`);
1015
1322
  }
1016
- // Total summary
1323
+ // ── Final summary ───────────────────────────────────────────────────
1017
1324
  const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
1018
1325
  // eslint-disable-next-line no-console
1019
1326
  console.log('');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@code-rag/cli",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "CLI tool for CodeRAG — init, index, search, serve, and status commands for codebase context engine",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -48,9 +48,9 @@
48
48
  "commander": "^13.1.0",
49
49
  "ora": "^8.2.0",
50
50
  "yaml": "^2.7.0",
51
- "@code-rag/api-server": "0.1.3",
52
- "@code-rag/mcp-server": "0.1.3",
53
- "@code-rag/core": "0.1.3"
51
+ "@code-rag/api-server": "0.1.5",
52
+ "@code-rag/core": "0.1.5",
53
+ "@code-rag/mcp-server": "0.1.5"
54
54
  },
55
55
  "devDependencies": {
56
56
  "@types/node": "^22.13.4",