smart-coding-mcp 1.2.4 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -168
- package/config.json +4 -3
- package/example.png +0 -0
- package/features/clear-cache.js +30 -7
- package/features/index-codebase.js +507 -37
- package/how-its-works.png +0 -0
- package/index.js +2 -2
- package/lib/cache.js +5 -0
- package/lib/config.js +29 -4
- package/lib/embedding-worker.js +67 -0
- package/lib/tokenizer.js +142 -0
- package/lib/utils.js +113 -25
- package/package.json +9 -3
- package/test/clear-cache.test.js +288 -0
- package/test/embedding-model.test.js +230 -0
- package/test/helpers.js +128 -0
- package/test/hybrid-search.test.js +243 -0
- package/test/index-codebase.test.js +246 -0
- package/test/integration.test.js +223 -0
- package/test/tokenizer.test.js +225 -0
- package/vitest.config.js +29 -0
|
@@ -1,15 +1,244 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { fdir } from "fdir";
|
|
2
2
|
import fs from "fs/promises";
|
|
3
3
|
import chokidar from "chokidar";
|
|
4
4
|
import path from "path";
|
|
5
|
+
import os from "os";
|
|
6
|
+
import { Worker } from "worker_threads";
|
|
7
|
+
import { fileURLToPath } from "url";
|
|
5
8
|
import { smartChunk, hashContent } from "../lib/utils.js";
|
|
6
9
|
|
|
10
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
11
|
+
|
|
7
12
|
export class CodebaseIndexer {
|
|
8
|
-
constructor(embedder, cache, config) {
|
|
13
|
+
constructor(embedder, cache, config, server = null) {
|
|
9
14
|
this.embedder = embedder;
|
|
10
15
|
this.cache = cache;
|
|
11
16
|
this.config = config;
|
|
17
|
+
this.server = server;
|
|
12
18
|
this.watcher = null;
|
|
19
|
+
this.workers = [];
|
|
20
|
+
this.workerReady = [];
|
|
21
|
+
this.isIndexing = false;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Initialize worker thread pool for parallel embedding
|
|
26
|
+
*/
|
|
27
|
+
async initializeWorkers() {
|
|
28
|
+
const numWorkers = this.config.workerThreads === "auto"
|
|
29
|
+
? Math.max(1, os.cpus().length - 1)
|
|
30
|
+
: (this.config.workerThreads || 1);
|
|
31
|
+
|
|
32
|
+
// Only use workers if we have more than 1 CPU
|
|
33
|
+
if (numWorkers <= 1) {
|
|
34
|
+
console.error("[Indexer] Single-threaded mode (1 CPU detected)");
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (this.config.verbose) {
|
|
39
|
+
console.error(`[Indexer] Worker config: workerThreads=${this.config.workerThreads}, resolved to ${numWorkers}`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
console.error(`[Indexer] Initializing ${numWorkers} worker threads...`);
|
|
43
|
+
|
|
44
|
+
const workerPath = path.join(__dirname, "../lib/embedding-worker.js");
|
|
45
|
+
|
|
46
|
+
for (let i = 0; i < numWorkers; i++) {
|
|
47
|
+
try {
|
|
48
|
+
const worker = new Worker(workerPath, {
|
|
49
|
+
workerData: {
|
|
50
|
+
embeddingModel: this.config.embeddingModel,
|
|
51
|
+
verbose: this.config.verbose
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const readyPromise = new Promise((resolve, reject) => {
|
|
56
|
+
const timeout = setTimeout(() => reject(new Error("Worker init timeout")), 120000);
|
|
57
|
+
|
|
58
|
+
worker.once("message", (msg) => {
|
|
59
|
+
clearTimeout(timeout);
|
|
60
|
+
if (msg.type === "ready") {
|
|
61
|
+
resolve(worker);
|
|
62
|
+
} else if (msg.type === "error") {
|
|
63
|
+
reject(new Error(msg.error));
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
worker.once("error", (err) => {
|
|
68
|
+
clearTimeout(timeout);
|
|
69
|
+
reject(err);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
this.workers.push(worker);
|
|
74
|
+
this.workerReady.push(readyPromise);
|
|
75
|
+
} catch (err) {
|
|
76
|
+
console.error(`[Indexer] Failed to create worker ${i}: ${err.message}`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Wait for all workers to be ready
|
|
81
|
+
try {
|
|
82
|
+
await Promise.all(this.workerReady);
|
|
83
|
+
console.error(`[Indexer] ${this.workers.length} workers ready`);
|
|
84
|
+
if (this.config.verbose) {
|
|
85
|
+
console.error(`[Indexer] Each worker loaded model: ${this.config.embeddingModel}`);
|
|
86
|
+
}
|
|
87
|
+
} catch (err) {
|
|
88
|
+
console.error(`[Indexer] Worker initialization failed: ${err.message}, falling back to single-threaded`);
|
|
89
|
+
this.terminateWorkers();
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Terminate all worker threads
|
|
95
|
+
*/
|
|
96
|
+
terminateWorkers() {
|
|
97
|
+
for (const worker of this.workers) {
|
|
98
|
+
worker.postMessage({ type: "shutdown" });
|
|
99
|
+
}
|
|
100
|
+
this.workers = [];
|
|
101
|
+
this.workerReady = [];
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Send MCP progress notification to connected clients
|
|
106
|
+
*/
|
|
107
|
+
sendProgress(progress, total, message) {
|
|
108
|
+
if (this.server) {
|
|
109
|
+
try {
|
|
110
|
+
this.server.sendNotification("notifications/progress", {
|
|
111
|
+
progressToken: "indexing",
|
|
112
|
+
progress,
|
|
113
|
+
total,
|
|
114
|
+
message
|
|
115
|
+
});
|
|
116
|
+
} catch (err) {
|
|
117
|
+
// Silently ignore if client doesn't support progress notifications
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Process chunks using worker thread pool with timeout and error recovery
|
|
124
|
+
*/
|
|
125
|
+
async processChunksWithWorkers(allChunks) {
|
|
126
|
+
if (this.workers.length === 0) {
|
|
127
|
+
// Fallback to single-threaded processing
|
|
128
|
+
return this.processChunksSingleThreaded(allChunks);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const results = [];
|
|
132
|
+
const chunkSize = Math.ceil(allChunks.length / this.workers.length);
|
|
133
|
+
const workerPromises = [];
|
|
134
|
+
const WORKER_TIMEOUT = 300000; // 5 minutes per batch
|
|
135
|
+
|
|
136
|
+
if (this.config.verbose) {
|
|
137
|
+
console.error(`[Indexer] Distributing ${allChunks.length} chunks across ${this.workers.length} workers (~${chunkSize} chunks each)`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
for (let i = 0; i < this.workers.length; i++) {
|
|
141
|
+
const workerChunks = allChunks.slice(i * chunkSize, (i + 1) * chunkSize);
|
|
142
|
+
if (workerChunks.length === 0) continue;
|
|
143
|
+
|
|
144
|
+
if (this.config.verbose) {
|
|
145
|
+
console.error(`[Indexer] Worker ${i}: processing ${workerChunks.length} chunks`);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const promise = new Promise((resolve, reject) => {
|
|
149
|
+
const worker = this.workers[i];
|
|
150
|
+
const batchId = `batch-${i}-${Date.now()}`;
|
|
151
|
+
|
|
152
|
+
// Timeout handler
|
|
153
|
+
const timeout = setTimeout(() => {
|
|
154
|
+
worker.off("message", handler);
|
|
155
|
+
console.error(`[Indexer] Worker ${i} timed out, falling back to single-threaded for this batch`);
|
|
156
|
+
// Return empty and let fallback handle it
|
|
157
|
+
resolve([]);
|
|
158
|
+
}, WORKER_TIMEOUT);
|
|
159
|
+
|
|
160
|
+
const handler = (msg) => {
|
|
161
|
+
if (msg.batchId === batchId) {
|
|
162
|
+
clearTimeout(timeout);
|
|
163
|
+
worker.off("message", handler);
|
|
164
|
+
if (msg.type === "results") {
|
|
165
|
+
resolve(msg.results);
|
|
166
|
+
} else if (msg.type === "error") {
|
|
167
|
+
console.error(`[Indexer] Worker ${i} error: ${msg.error}`);
|
|
168
|
+
resolve([]); // Return empty, don't reject - let fallback handle
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
// Handle worker crash
|
|
174
|
+
const errorHandler = (err) => {
|
|
175
|
+
clearTimeout(timeout);
|
|
176
|
+
worker.off("message", handler);
|
|
177
|
+
console.error(`[Indexer] Worker ${i} crashed: ${err.message}`);
|
|
178
|
+
resolve([]); // Return empty, don't reject
|
|
179
|
+
};
|
|
180
|
+
worker.once("error", errorHandler);
|
|
181
|
+
|
|
182
|
+
worker.on("message", handler);
|
|
183
|
+
worker.postMessage({ type: "process", chunks: workerChunks, batchId });
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
workerPromises.push({ promise, chunks: workerChunks });
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Wait for all workers with error recovery
|
|
190
|
+
const workerResults = await Promise.all(workerPromises.map(p => p.promise));
|
|
191
|
+
|
|
192
|
+
// Collect results and identify failed chunks that need retry
|
|
193
|
+
const failedChunks = [];
|
|
194
|
+
for (let i = 0; i < workerResults.length; i++) {
|
|
195
|
+
if (workerResults[i].length > 0) {
|
|
196
|
+
results.push(...workerResults[i]);
|
|
197
|
+
} else if (workerPromises[i].chunks.length > 0) {
|
|
198
|
+
// Worker failed or timed out, need to retry these chunks
|
|
199
|
+
failedChunks.push(...workerPromises[i].chunks);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Retry failed chunks with single-threaded fallback
|
|
204
|
+
if (failedChunks.length > 0) {
|
|
205
|
+
console.error(`[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`);
|
|
206
|
+
const retryResults = await this.processChunksSingleThreaded(failedChunks);
|
|
207
|
+
results.push(...retryResults);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return results;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Single-threaded chunk processing (fallback)
|
|
215
|
+
*/
|
|
216
|
+
async processChunksSingleThreaded(chunks) {
|
|
217
|
+
const results = [];
|
|
218
|
+
|
|
219
|
+
for (const chunk of chunks) {
|
|
220
|
+
try {
|
|
221
|
+
const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
|
|
222
|
+
results.push({
|
|
223
|
+
file: chunk.file,
|
|
224
|
+
startLine: chunk.startLine,
|
|
225
|
+
endLine: chunk.endLine,
|
|
226
|
+
content: chunk.text,
|
|
227
|
+
vector: Array.from(output.data),
|
|
228
|
+
success: true
|
|
229
|
+
});
|
|
230
|
+
} catch (error) {
|
|
231
|
+
results.push({
|
|
232
|
+
file: chunk.file,
|
|
233
|
+
startLine: chunk.startLine,
|
|
234
|
+
endLine: chunk.endLine,
|
|
235
|
+
error: error.message,
|
|
236
|
+
success: false
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return results;
|
|
13
242
|
}
|
|
14
243
|
|
|
15
244
|
async indexFile(file) {
|
|
@@ -83,47 +312,272 @@ export class CodebaseIndexer {
|
|
|
83
312
|
}
|
|
84
313
|
}
|
|
85
314
|
|
|
86
|
-
|
|
87
|
-
|
|
315
|
+
/**
|
|
316
|
+
* Discover files using fdir (3-5x faster than glob)
|
|
317
|
+
* Uses config.excludePatterns which includes smart patterns from ignore-patterns.js
|
|
318
|
+
*/
|
|
319
|
+
async discoverFiles() {
|
|
320
|
+
const startTime = Date.now();
|
|
88
321
|
|
|
89
|
-
|
|
90
|
-
const
|
|
91
|
-
ignore: this.config.excludePatterns,
|
|
92
|
-
absolute: true
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
console.error(`[Indexer] Found ${files.length} files to process`);
|
|
322
|
+
// Build extension filter from config
|
|
323
|
+
const extensions = new Set(this.config.fileExtensions.map(ext => `.${ext}`));
|
|
96
324
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
325
|
+
// Extract directory names from glob patterns in config.excludePatterns
|
|
326
|
+
// Patterns like "**/node_modules/**" -> "node_modules"
|
|
327
|
+
const excludeDirs = new Set();
|
|
328
|
+
for (const pattern of this.config.excludePatterns) {
|
|
329
|
+
// Extract directory names from glob patterns
|
|
330
|
+
const match = pattern.match(/\*\*\/([^/*]+)\/?\*?\*?$/);
|
|
331
|
+
if (match) {
|
|
332
|
+
excludeDirs.add(match[1]);
|
|
333
|
+
}
|
|
334
|
+
// Also handle patterns like "**/dirname/**"
|
|
335
|
+
const match2 = pattern.match(/\*\*\/([^/*]+)\/\*\*$/);
|
|
336
|
+
if (match2) {
|
|
337
|
+
excludeDirs.add(match2[1]);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
100
340
|
|
|
101
|
-
//
|
|
102
|
-
|
|
341
|
+
// Always exclude cache directory
|
|
342
|
+
excludeDirs.add(".smart-coding-cache");
|
|
343
|
+
|
|
344
|
+
if (this.config.verbose) {
|
|
345
|
+
console.error(`[Indexer] Using ${excludeDirs.size} exclude directories from config`);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const api = new fdir()
|
|
349
|
+
.withFullPaths()
|
|
350
|
+
.exclude((dirName) => excludeDirs.has(dirName))
|
|
351
|
+
.filter((filePath) => extensions.has(path.extname(filePath)))
|
|
352
|
+
.crawl(this.config.searchDirectory);
|
|
353
|
+
|
|
354
|
+
const files = await api.withPromise();
|
|
355
|
+
|
|
356
|
+
console.error(`[Indexer] File discovery: ${files.length} files in ${Date.now() - startTime}ms`);
|
|
357
|
+
return files;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Pre-filter files by hash (skip unchanged files before processing)
|
|
362
|
+
*/
|
|
363
|
+
async preFilterFiles(files) {
|
|
364
|
+
const startTime = Date.now();
|
|
365
|
+
const filesToProcess = [];
|
|
366
|
+
const skippedCount = { unchanged: 0, tooLarge: 0, error: 0 };
|
|
367
|
+
|
|
368
|
+
// Process in parallel batches for speed
|
|
369
|
+
const BATCH_SIZE = 500;
|
|
103
370
|
|
|
104
371
|
for (let i = 0; i < files.length; i += BATCH_SIZE) {
|
|
105
372
|
const batch = files.slice(i, i + BATCH_SIZE);
|
|
106
373
|
|
|
107
|
-
// Process batch in parallel
|
|
108
374
|
const results = await Promise.all(
|
|
109
|
-
batch.map(file =>
|
|
375
|
+
batch.map(async (file) => {
|
|
376
|
+
try {
|
|
377
|
+
const stats = await fs.stat(file);
|
|
378
|
+
|
|
379
|
+
if (stats.isDirectory()) {
|
|
380
|
+
return null;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
if (stats.size > this.config.maxFileSize) {
|
|
384
|
+
skippedCount.tooLarge++;
|
|
385
|
+
return null;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
const content = await fs.readFile(file, "utf-8");
|
|
389
|
+
const hash = hashContent(content);
|
|
390
|
+
|
|
391
|
+
if (this.cache.getFileHash(file) === hash) {
|
|
392
|
+
skippedCount.unchanged++;
|
|
393
|
+
return null;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return { file, content, hash };
|
|
397
|
+
} catch (error) {
|
|
398
|
+
skippedCount.error++;
|
|
399
|
+
return null;
|
|
400
|
+
}
|
|
401
|
+
})
|
|
110
402
|
);
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
403
|
+
|
|
404
|
+
for (const result of results) {
|
|
405
|
+
if (result) filesToProcess.push(result);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
console.error(`[Indexer] Pre-filter: ${filesToProcess.length} changed, ${skippedCount.unchanged} unchanged, ${skippedCount.tooLarge} too large, ${skippedCount.error} errors (${Date.now() - startTime}ms)`);
|
|
410
|
+
return filesToProcess;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
async indexAll(force = false) {
|
|
414
|
+
if (this.isIndexing) {
|
|
415
|
+
console.error("[Indexer] Indexing already in progress, skipping concurrent request");
|
|
416
|
+
return { skipped: true, reason: "Indexing already in progress" };
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
this.isIndexing = true;
|
|
420
|
+
|
|
421
|
+
try {
|
|
422
|
+
if (force) {
|
|
423
|
+
console.error("[Indexer] Force reindex requested: clearing cache");
|
|
424
|
+
this.cache.setVectorStore([]);
|
|
425
|
+
this.cache.fileHashes = new Map();
|
|
117
426
|
}
|
|
427
|
+
|
|
428
|
+
const totalStartTime = Date.now();
|
|
429
|
+
console.error(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
|
|
430
|
+
|
|
431
|
+
// Step 1: Fast file discovery with fdir
|
|
432
|
+
const files = await this.discoverFiles();
|
|
433
|
+
|
|
434
|
+
if (files.length === 0) {
|
|
435
|
+
console.error("[Indexer] No files found to index");
|
|
436
|
+
this.sendProgress(100, 100, "No files found to index");
|
|
437
|
+
return { skipped: false, filesProcessed: 0, chunksCreated: 0, message: "No files found to index" };
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Send progress: discovery complete
|
|
441
|
+
this.sendProgress(5, 100, `Discovered ${files.length} files`);
|
|
442
|
+
|
|
443
|
+
// Step 2: Pre-filter unchanged files (early hash check)
|
|
444
|
+
const filesToProcess = await this.preFilterFiles(files);
|
|
445
|
+
|
|
446
|
+
if (filesToProcess.length === 0) {
|
|
447
|
+
console.error("[Indexer] All files unchanged, nothing to index");
|
|
448
|
+
this.sendProgress(100, 100, "All files up to date");
|
|
449
|
+
await this.cache.save();
|
|
450
|
+
const vectorStore = this.cache.getVectorStore();
|
|
451
|
+
return {
|
|
452
|
+
skipped: false,
|
|
453
|
+
filesProcessed: 0,
|
|
454
|
+
chunksCreated: 0,
|
|
455
|
+
totalFiles: new Set(vectorStore.map(v => v.file)).size,
|
|
456
|
+
totalChunks: vectorStore.length,
|
|
457
|
+
message: "All files up to date"
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Send progress: filtering complete
|
|
462
|
+
this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
|
|
463
|
+
|
|
464
|
+
// Step 3: Determine batch size based on project size
|
|
465
|
+
const adaptiveBatchSize = files.length > 10000 ? 500 :
|
|
466
|
+
files.length > 1000 ? 200 :
|
|
467
|
+
this.config.batchSize || 100;
|
|
468
|
+
|
|
469
|
+
console.error(`[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`);
|
|
470
|
+
|
|
471
|
+
// Step 4: Initialize worker threads (always use when multi-core available)
|
|
472
|
+
const useWorkers = os.cpus().length > 1;
|
|
473
|
+
|
|
474
|
+
if (useWorkers) {
|
|
475
|
+
await this.initializeWorkers();
|
|
476
|
+
console.error(`[Indexer] Multi-threaded mode: ${this.workers.length} workers active`);
|
|
477
|
+
} else {
|
|
478
|
+
console.error(`[Indexer] Single-threaded mode (single-core system)`);
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
let totalChunks = 0;
|
|
482
|
+
let processedFiles = 0;
|
|
483
|
+
|
|
484
|
+
// Step 5: Process files in adaptive batches
|
|
485
|
+
for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
|
|
486
|
+
const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
|
|
487
|
+
|
|
488
|
+
// Generate all chunks for this batch
|
|
489
|
+
const allChunks = [];
|
|
118
490
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
491
|
+
for (const { file, content, hash } of batch) {
|
|
492
|
+
// Remove old chunks for this file
|
|
493
|
+
this.cache.removeFileFromStore(file);
|
|
494
|
+
|
|
495
|
+
const chunks = smartChunk(content, file, this.config);
|
|
496
|
+
|
|
497
|
+
for (const chunk of chunks) {
|
|
498
|
+
allChunks.push({
|
|
499
|
+
file,
|
|
500
|
+
text: chunk.text,
|
|
501
|
+
startLine: chunk.startLine,
|
|
502
|
+
endLine: chunk.endLine,
|
|
503
|
+
hash
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Process chunks (with workers if available, otherwise single-threaded)
|
|
509
|
+
let results;
|
|
510
|
+
if (useWorkers && this.workers.length > 0) {
|
|
511
|
+
results = await this.processChunksWithWorkers(allChunks);
|
|
512
|
+
} else {
|
|
513
|
+
results = await this.processChunksSingleThreaded(allChunks);
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Store successful results
|
|
517
|
+
const fileHashes = new Map();
|
|
518
|
+
for (const result of results) {
|
|
519
|
+
if (result.success) {
|
|
520
|
+
this.cache.addToStore({
|
|
521
|
+
file: result.file,
|
|
522
|
+
startLine: result.startLine,
|
|
523
|
+
endLine: result.endLine,
|
|
524
|
+
content: result.content,
|
|
525
|
+
vector: result.vector
|
|
526
|
+
});
|
|
527
|
+
totalChunks++;
|
|
528
|
+
}
|
|
529
|
+
// Track hash for each file
|
|
530
|
+
const chunkInfo = allChunks.find(c => c.file === result.file);
|
|
531
|
+
if (chunkInfo) {
|
|
532
|
+
fileHashes.set(result.file, chunkInfo.hash);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Update file hashes
|
|
537
|
+
for (const [file, hash] of fileHashes) {
|
|
538
|
+
this.cache.setFileHash(file, hash);
|
|
122
539
|
}
|
|
540
|
+
|
|
541
|
+
processedFiles += batch.length;
|
|
542
|
+
|
|
543
|
+
// Progress indicator every batch
|
|
544
|
+
if (processedFiles % (adaptiveBatchSize * 2) === 0 || processedFiles === filesToProcess.length) {
|
|
545
|
+
const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
|
|
546
|
+
const rate = (processedFiles / parseFloat(elapsed)).toFixed(0);
|
|
547
|
+
console.error(`[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec)`);
|
|
548
|
+
|
|
549
|
+
// Send MCP progress notification (10-95% range for batch processing)
|
|
550
|
+
const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
|
|
551
|
+
this.sendProgress(progressPercent, 100, `Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// Cleanup workers
|
|
556
|
+
if (useWorkers) {
|
|
557
|
+
this.terminateWorkers();
|
|
123
558
|
}
|
|
124
559
|
|
|
125
|
-
|
|
560
|
+
const totalTime = ((Date.now() - totalStartTime) / 1000).toFixed(1);
|
|
561
|
+
console.error(`[Indexer] Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
|
|
562
|
+
|
|
563
|
+
// Send completion progress
|
|
564
|
+
this.sendProgress(100, 100, `Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
|
|
565
|
+
|
|
126
566
|
await this.cache.save();
|
|
567
|
+
|
|
568
|
+
const vectorStore = this.cache.getVectorStore();
|
|
569
|
+
return {
|
|
570
|
+
skipped: false,
|
|
571
|
+
filesProcessed: filesToProcess.length,
|
|
572
|
+
chunksCreated: totalChunks,
|
|
573
|
+
totalFiles: new Set(vectorStore.map(v => v.file)).size,
|
|
574
|
+
totalChunks: vectorStore.length,
|
|
575
|
+
duration: totalTime,
|
|
576
|
+
message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`
|
|
577
|
+
};
|
|
578
|
+
} finally {
|
|
579
|
+
this.isIndexing = false;
|
|
580
|
+
}
|
|
127
581
|
}
|
|
128
582
|
|
|
129
583
|
setupFileWatcher() {
|
|
@@ -191,25 +645,41 @@ export function getToolDefinition() {
|
|
|
191
645
|
// Tool handler
|
|
192
646
|
export async function handleToolCall(request, indexer) {
|
|
193
647
|
const force = request.params.arguments?.force || false;
|
|
648
|
+
const result = await indexer.indexAll(force);
|
|
194
649
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
650
|
+
// Handle case when indexing was skipped due to concurrent request
|
|
651
|
+
if (result?.skipped) {
|
|
652
|
+
return {
|
|
653
|
+
content: [{
|
|
654
|
+
type: "text",
|
|
655
|
+
text: `Indexing skipped: ${result.reason}\n\nPlease wait for the current indexing operation to complete before requesting another reindex.`
|
|
656
|
+
}]
|
|
657
|
+
};
|
|
199
658
|
}
|
|
200
659
|
|
|
201
|
-
|
|
202
|
-
|
|
660
|
+
// Get current stats from cache
|
|
203
661
|
const vectorStore = indexer.cache.getVectorStore();
|
|
204
662
|
const stats = {
|
|
205
|
-
totalChunks: vectorStore.length,
|
|
206
|
-
totalFiles: new Set(vectorStore.map(v => v.file)).size
|
|
663
|
+
totalChunks: result?.totalChunks ?? vectorStore.length,
|
|
664
|
+
totalFiles: result?.totalFiles ?? new Set(vectorStore.map(v => v.file)).size,
|
|
665
|
+
filesProcessed: result?.filesProcessed ?? 0,
|
|
666
|
+
chunksCreated: result?.chunksCreated ?? 0
|
|
207
667
|
};
|
|
208
668
|
|
|
669
|
+
let message = result?.message
|
|
670
|
+
? `Codebase reindexed successfully.\n\n${result.message}`
|
|
671
|
+
: `Codebase reindexed successfully.`;
|
|
672
|
+
|
|
673
|
+
message += `\n\nStatistics:\n- Total files in index: ${stats.totalFiles}\n- Total code chunks: ${stats.totalChunks}`;
|
|
674
|
+
|
|
675
|
+
if (stats.filesProcessed > 0) {
|
|
676
|
+
message += `\n- Files processed this run: ${stats.filesProcessed}\n- Chunks created this run: ${stats.chunksCreated}`;
|
|
677
|
+
}
|
|
678
|
+
|
|
209
679
|
return {
|
|
210
680
|
content: [{
|
|
211
681
|
type: "text",
|
|
212
|
-
text:
|
|
682
|
+
text: message
|
|
213
683
|
}]
|
|
214
684
|
};
|
|
215
685
|
}
|
|
Binary file
|
package/index.js
CHANGED
|
@@ -95,9 +95,9 @@ async function initialize() {
|
|
|
95
95
|
await cache.load();
|
|
96
96
|
|
|
97
97
|
// Initialize features
|
|
98
|
-
indexer = new CodebaseIndexer(embedder, cache, config);
|
|
98
|
+
indexer = new CodebaseIndexer(embedder, cache, config, server);
|
|
99
99
|
hybridSearch = new HybridSearch(embedder, cache, config);
|
|
100
|
-
const cacheClearer = new ClearCacheFeature.CacheClearer(embedder, cache, config);
|
|
100
|
+
const cacheClearer = new ClearCacheFeature.CacheClearer(embedder, cache, config, indexer);
|
|
101
101
|
|
|
102
102
|
// Store feature instances (matches features array order)
|
|
103
103
|
features[0].instance = hybridSearch;
|
package/lib/cache.js
CHANGED
|
@@ -6,6 +6,7 @@ export class EmbeddingsCache {
|
|
|
6
6
|
this.config = config;
|
|
7
7
|
this.vectorStore = [];
|
|
8
8
|
this.fileHashes = new Map();
|
|
9
|
+
this.isSaving = false;
|
|
9
10
|
}
|
|
10
11
|
|
|
11
12
|
async load() {
|
|
@@ -55,6 +56,8 @@ export class EmbeddingsCache {
|
|
|
55
56
|
async save() {
|
|
56
57
|
if (!this.config.enableCache) return;
|
|
57
58
|
|
|
59
|
+
this.isSaving = true;
|
|
60
|
+
|
|
58
61
|
try {
|
|
59
62
|
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
60
63
|
const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
|
|
@@ -66,6 +69,8 @@ export class EmbeddingsCache {
|
|
|
66
69
|
]);
|
|
67
70
|
} catch (error) {
|
|
68
71
|
console.error("[Cache] Failed to save cache:", error.message);
|
|
72
|
+
} finally {
|
|
73
|
+
this.isSaving = false;
|
|
69
74
|
}
|
|
70
75
|
}
|
|
71
76
|
|