@softerist/heuristic-mcp 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,811 @@
1
+ import { fdir } from "fdir";
2
+ import fs from "fs/promises";
3
+ import chokidar from "chokidar";
4
+ import path from "path";
5
+ import os from "os";
6
+ import { Worker } from "worker_threads";
7
+ import { fileURLToPath } from "url";
8
+ import { smartChunk, hashContent } from "../lib/utils.js";
9
+
10
+ function escapeRegExp(value) {
11
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
12
+ }
13
+
14
+ function globToRegExp(pattern) {
15
+ let regex = "^";
16
+ for (let i = 0; i < pattern.length; ) {
17
+ const char = pattern[i];
18
+ if (char === "*") {
19
+ if (pattern[i + 1] === "*") {
20
+ if (pattern[i + 2] === "/") {
21
+ regex += "(?:.*/)?";
22
+ i += 3;
23
+ } else {
24
+ regex += ".*";
25
+ i += 2;
26
+ }
27
+ } else {
28
+ regex += "[^/]*";
29
+ i += 1;
30
+ }
31
+ continue;
32
+ }
33
+ if (char === "?") {
34
+ regex += "[^/]";
35
+ i += 1;
36
+ continue;
37
+ }
38
+ regex += escapeRegExp(char);
39
+ i += 1;
40
+ }
41
+ regex += "$";
42
+ return new RegExp(regex);
43
+ }
44
+
45
+ function normalizePath(filePath) {
46
+ return filePath.split(path.sep).join("/");
47
+ }
48
+
49
+ function buildExcludeMatchers(patterns) {
50
+ return [...new Set(patterns)]
51
+ .filter(Boolean)
52
+ .map(pattern => ({
53
+ matchBase: !pattern.includes("/"),
54
+ regex: globToRegExp(pattern)
55
+ }));
56
+ }
57
+
58
+ function matchesExcludePatterns(filePath, matchers) {
59
+ if (matchers.length === 0) return false;
60
+ const normalized = normalizePath(filePath);
61
+ const basename = path.posix.basename(normalized);
62
+
63
+ for (const matcher of matchers) {
64
+ const target = matcher.matchBase ? basename : normalized;
65
+ if (matcher.regex.test(target)) {
66
+ return true;
67
+ }
68
+ }
69
+ return false;
70
+ }
71
+
72
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
73
+
74
+ export class CodebaseIndexer {
75
+ constructor(embedder, cache, config, server = null) {
76
+ this.embedder = embedder;
77
+ this.cache = cache;
78
+ this.config = config;
79
+ this.server = server;
80
+ this.watcher = null;
81
+ this.workers = [];
82
+ this.workerReady = [];
83
+ this.isIndexing = false;
84
+ this.excludeMatchers = buildExcludeMatchers(this.config.excludePatterns || []);
85
+ }
86
+
87
+ /**
88
+ * Initialize worker thread pool for parallel embedding
89
+ */
90
+ async initializeWorkers() {
91
+ const numWorkers = this.config.workerThreads === "auto"
92
+ ? Math.min(4, Math.max(1, os.cpus().length - 1)) // Cap 'auto' at 4 workers
93
+ : (this.config.workerThreads || 1);
94
+
95
+ // Only use workers if we have more than 1 CPU
96
+ if (numWorkers <= 1) {
97
+ console.error("[Indexer] Single-threaded mode (1 CPU detected)");
98
+ return;
99
+ }
100
+
101
+ if (this.config.verbose) {
102
+ console.error(`[Indexer] Worker config: workerThreads=${this.config.workerThreads}, resolved to ${numWorkers}`);
103
+ }
104
+
105
+ console.error(`[Indexer] Initializing ${numWorkers} worker threads...`);
106
+
107
+ const workerPath = path.join(__dirname, "../lib/embedding-worker.js");
108
+
109
+ for (let i = 0; i < numWorkers; i++) {
110
+ try {
111
+ const worker = new Worker(workerPath, {
112
+ workerData: {
113
+ embeddingModel: this.config.embeddingModel,
114
+ verbose: this.config.verbose
115
+ }
116
+ });
117
+
118
+ const readyPromise = new Promise((resolve, reject) => {
119
+ const timeout = setTimeout(() => reject(new Error("Worker init timeout")), 120000);
120
+
121
+ worker.once("message", (msg) => {
122
+ clearTimeout(timeout);
123
+ if (msg.type === "ready") {
124
+ resolve(worker);
125
+ } else if (msg.type === "error") {
126
+ reject(new Error(msg.error));
127
+ }
128
+ });
129
+
130
+ worker.once("error", (err) => {
131
+ clearTimeout(timeout);
132
+ reject(err);
133
+ });
134
+ });
135
+
136
+ this.workers.push(worker);
137
+ this.workerReady.push(readyPromise);
138
+ } catch (err) {
139
+ console.error(`[Indexer] Failed to create worker ${i}: ${err.message}`);
140
+ }
141
+ }
142
+
143
+ // Wait for all workers to be ready
144
+ try {
145
+ await Promise.all(this.workerReady);
146
+ console.error(`[Indexer] ${this.workers.length} workers ready`);
147
+ if (this.config.verbose) {
148
+ console.error(`[Indexer] Each worker loaded model: ${this.config.embeddingModel}`);
149
+ }
150
+ } catch (err) {
151
+ console.error(`[Indexer] Worker initialization failed: ${err.message}, falling back to single-threaded`);
152
+ this.terminateWorkers();
153
+ }
154
+ }
155
+
156
+ /**
157
+ * Terminate all worker threads
158
+ */
159
+ terminateWorkers() {
160
+ for (const worker of this.workers) {
161
+ worker.postMessage({ type: "shutdown" });
162
+ }
163
+ this.workers = [];
164
+ this.workerReady = [];
165
+ }
166
+
167
+ isExcluded(filePath) {
168
+ return matchesExcludePatterns(filePath, this.excludeMatchers);
169
+ }
170
+
171
+ /**
172
+ * Send MCP progress notification to connected clients
173
+ */
174
+ sendProgress(progress, total, message) {
175
+ if (this.server) {
176
+ try {
177
+ this.server.sendNotification("notifications/progress", {
178
+ progressToken: "indexing",
179
+ progress,
180
+ total,
181
+ message
182
+ });
183
+ } catch (err) {
184
+ // Silently ignore if client doesn't support progress notifications
185
+ }
186
+ }
187
+ }
188
+
189
+ /**
190
+ * Process chunks using worker thread pool with timeout and error recovery
191
+ */
192
+ async processChunksWithWorkers(allChunks) {
193
+ if (this.workers.length === 0) {
194
+ // Fallback to single-threaded processing
195
+ return this.processChunksSingleThreaded(allChunks);
196
+ }
197
+
198
+ const results = [];
199
+ const chunkSize = Math.ceil(allChunks.length / this.workers.length);
200
+ const workerPromises = [];
201
+ const WORKER_TIMEOUT = 300000; // 5 minutes per batch
202
+
203
+ if (this.config.verbose) {
204
+ console.error(`[Indexer] Distributing ${allChunks.length} chunks across ${this.workers.length} workers (~${chunkSize} chunks each)`);
205
+ }
206
+
207
+ for (let i = 0; i < this.workers.length; i++) {
208
+ const workerChunks = allChunks.slice(i * chunkSize, (i + 1) * chunkSize);
209
+ if (workerChunks.length === 0) continue;
210
+
211
+ if (this.config.verbose) {
212
+ console.error(`[Indexer] Worker ${i}: processing ${workerChunks.length} chunks`);
213
+ }
214
+
215
+ const promise = new Promise((resolve, reject) => {
216
+ const worker = this.workers[i];
217
+ const batchId = `batch-${i}-${Date.now()}`;
218
+
219
+ // Timeout handler
220
+ const timeout = setTimeout(() => {
221
+ worker.off("message", handler);
222
+ console.error(`[Indexer] Worker ${i} timed out, falling back to single-threaded for this batch`);
223
+ // Return empty and let fallback handle it
224
+ resolve([]);
225
+ }, WORKER_TIMEOUT);
226
+
227
+ const handler = (msg) => {
228
+ if (msg.batchId === batchId) {
229
+ clearTimeout(timeout);
230
+ worker.off("message", handler);
231
+ if (msg.type === "results") {
232
+ resolve(msg.results);
233
+ } else if (msg.type === "error") {
234
+ console.error(`[Indexer] Worker ${i} error: ${msg.error}`);
235
+ resolve([]); // Return empty, don't reject - let fallback handle
236
+ }
237
+ }
238
+ };
239
+
240
+ // Handle worker crash
241
+ const errorHandler = (err) => {
242
+ clearTimeout(timeout);
243
+ worker.off("message", handler);
244
+ console.error(`[Indexer] Worker ${i} crashed: ${err.message}`);
245
+ resolve([]); // Return empty, don't reject
246
+ };
247
+ worker.once("error", errorHandler);
248
+
249
+ worker.on("message", handler);
250
+ worker.postMessage({ type: "process", chunks: workerChunks, batchId });
251
+ });
252
+
253
+ workerPromises.push({ promise, chunks: workerChunks });
254
+ }
255
+
256
+ // Wait for all workers with error recovery
257
+ const workerResults = await Promise.all(workerPromises.map(p => p.promise));
258
+
259
+ // Collect results and identify failed chunks that need retry
260
+ const failedChunks = [];
261
+ for (let i = 0; i < workerResults.length; i++) {
262
+ if (workerResults[i].length > 0) {
263
+ results.push(...workerResults[i]);
264
+ } else if (workerPromises[i].chunks.length > 0) {
265
+ // Worker failed or timed out, need to retry these chunks
266
+ failedChunks.push(...workerPromises[i].chunks);
267
+ }
268
+ }
269
+
270
+ // Retry failed chunks with single-threaded fallback
271
+ if (failedChunks.length > 0) {
272
+ console.error(`[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`);
273
+ const retryResults = await this.processChunksSingleThreaded(failedChunks);
274
+ results.push(...retryResults);
275
+ }
276
+
277
+ return results;
278
+ }
279
+
280
+ /**
281
+ * Single-threaded chunk processing (fallback)
282
+ */
283
+ async processChunksSingleThreaded(chunks) {
284
+ const results = [];
285
+
286
+ for (const chunk of chunks) {
287
+ try {
288
+ const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
289
+ results.push({
290
+ file: chunk.file,
291
+ startLine: chunk.startLine,
292
+ endLine: chunk.endLine,
293
+ content: chunk.text,
294
+ vector: Array.from(output.data),
295
+ success: true
296
+ });
297
+ } catch (error) {
298
+ results.push({
299
+ file: chunk.file,
300
+ startLine: chunk.startLine,
301
+ endLine: chunk.endLine,
302
+ error: error.message,
303
+ success: false
304
+ });
305
+ }
306
+ }
307
+
308
+ return results;
309
+ }
310
+
311
+ async indexFile(file) {
312
+ const fileName = path.basename(file);
313
+ if (this.isExcluded(file)) {
314
+ if (this.config.verbose) {
315
+ console.error(`[Indexer] Skipped ${fileName} (excluded by pattern)`);
316
+ }
317
+ return 0;
318
+ }
319
+ if (this.config.verbose) {
320
+ console.error(`[Indexer] Processing: ${fileName}...`);
321
+ }
322
+
323
+ try {
324
+ // Check file size first
325
+ const stats = await fs.stat(file);
326
+
327
+ // Skip directories
328
+ if (stats.isDirectory()) {
329
+ return 0;
330
+ }
331
+
332
+ if (stats.size > this.config.maxFileSize) {
333
+ if (this.config.verbose) {
334
+ console.error(`[Indexer] Skipped ${fileName} (too large: ${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
335
+ }
336
+ return 0;
337
+ }
338
+
339
+ const content = await fs.readFile(file, "utf-8");
340
+ const hash = hashContent(content);
341
+
342
+ // Skip if file hasn't changed
343
+ if (this.cache.getFileHash(file) === hash) {
344
+ if (this.config.verbose) {
345
+ console.error(`[Indexer] Skipped ${fileName} (unchanged)`);
346
+ }
347
+ return 0;
348
+ }
349
+
350
+ if (this.config.verbose) {
351
+ console.error(`[Indexer] Indexing ${fileName}...`);
352
+ }
353
+
354
+ // Remove old chunks for this file
355
+ this.cache.removeFileFromStore(file);
356
+
357
+ const chunks = smartChunk(content, file, this.config);
358
+ let addedChunks = 0;
359
+ let failedChunks = 0;
360
+
361
+ for (const chunk of chunks) {
362
+ try {
363
+ const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
364
+
365
+ this.cache.addToStore({
366
+ file,
367
+ startLine: chunk.startLine,
368
+ endLine: chunk.endLine,
369
+ content: chunk.text,
370
+ vector: Array.from(output.data)
371
+ });
372
+ addedChunks++;
373
+ } catch (embeddingError) {
374
+ failedChunks++;
375
+ console.error(`[Indexer] Failed to embed chunk in ${fileName}:`, embeddingError.message);
376
+ }
377
+ }
378
+
379
+ if (chunks.length === 0 || failedChunks === 0) {
380
+ this.cache.setFileHash(file, hash);
381
+ } else if (this.config.verbose) {
382
+ console.error(`[Indexer] Skipped hash update for ${fileName} (${addedChunks}/${chunks.length} chunks embedded)`);
383
+ }
384
+ if (this.config.verbose) {
385
+ console.error(`[Indexer] Completed ${fileName} (${addedChunks} chunks)`);
386
+ }
387
+ return addedChunks;
388
+ } catch (error) {
389
+ console.error(`[Indexer] Error indexing ${fileName}:`, error.message);
390
+ return 0;
391
+ }
392
+ }
393
+
394
+ /**
395
+ * Discover files using fdir (3-5x faster than glob)
396
+ * Uses config.excludePatterns which includes smart patterns from ignore-patterns.js
397
+ */
398
+ async discoverFiles() {
399
+ const startTime = Date.now();
400
+
401
+ // Build extension filter from config
402
+ const extensions = new Set(this.config.fileExtensions.map(ext => `.${ext}`));
403
+
404
+ // Extract directory names from glob patterns in config.excludePatterns
405
+ // Patterns like "**/node_modules/**" -> "node_modules"
406
+ const excludeDirs = new Set();
407
+ for (const pattern of this.config.excludePatterns) {
408
+ // Extract directory names from glob patterns
409
+ const match = pattern.match(/\*\*\/([^/*]+)\/?\*?\*?$/);
410
+ if (match) {
411
+ excludeDirs.add(match[1]);
412
+ }
413
+ // Also handle patterns like "**/dirname/**"
414
+ const match2 = pattern.match(/\*\*\/([^/*]+)\/\*\*$/);
415
+ if (match2) {
416
+ excludeDirs.add(match2[1]);
417
+ }
418
+ }
419
+
420
+ // Always exclude cache directory
421
+ excludeDirs.add(".smart-coding-cache");
422
+
423
+ if (this.config.verbose) {
424
+ console.error(`[Indexer] Using ${excludeDirs.size} exclude directories from config`);
425
+ }
426
+
427
+ const api = new fdir()
428
+ .withFullPaths()
429
+ .exclude((dirName) => excludeDirs.has(dirName))
430
+ .filter((filePath) => extensions.has(path.extname(filePath)) && !this.isExcluded(filePath))
431
+ .crawl(this.config.searchDirectory);
432
+
433
+ const files = await api.withPromise();
434
+
435
+ console.error(`[Indexer] File discovery: ${files.length} files in ${Date.now() - startTime}ms`);
436
+ return files;
437
+ }
438
+
439
+ /**
440
+ * Pre-filter files by hash (skip unchanged files before processing)
441
+ */
442
+ async preFilterFiles(files) {
443
+ const startTime = Date.now();
444
+ const filesToProcess = [];
445
+ const skippedCount = { unchanged: 0, tooLarge: 0, error: 0 };
446
+
447
+ // Process in parallel batches for speed
448
+ const BATCH_SIZE = 500;
449
+
450
+ for (let i = 0; i < files.length; i += BATCH_SIZE) {
451
+ const batch = files.slice(i, i + BATCH_SIZE);
452
+
453
+ const results = await Promise.all(
454
+ batch.map(async (file) => {
455
+ try {
456
+ const stats = await fs.stat(file);
457
+
458
+ if (stats.isDirectory()) {
459
+ return null;
460
+ }
461
+
462
+ if (stats.size > this.config.maxFileSize) {
463
+ skippedCount.tooLarge++;
464
+ return null;
465
+ }
466
+
467
+ const content = await fs.readFile(file, "utf-8");
468
+ const hash = hashContent(content);
469
+
470
+ if (this.cache.getFileHash(file) === hash) {
471
+ skippedCount.unchanged++;
472
+ return null;
473
+ }
474
+
475
+ return { file, content, hash };
476
+ } catch (error) {
477
+ skippedCount.error++;
478
+ return null;
479
+ }
480
+ })
481
+ );
482
+
483
+ for (const result of results) {
484
+ if (result) filesToProcess.push(result);
485
+ }
486
+ }
487
+
488
+ console.error(`[Indexer] Pre-filter: ${filesToProcess.length} changed, ${skippedCount.unchanged} unchanged, ${skippedCount.tooLarge} too large, ${skippedCount.error} errors (${Date.now() - startTime}ms)`);
489
+ return filesToProcess;
490
+ }
491
+
492
+ async indexAll(force = false) {
493
+ if (this.isIndexing) {
494
+ console.error("[Indexer] Indexing already in progress, skipping concurrent request");
495
+ return { skipped: true, reason: "Indexing already in progress" };
496
+ }
497
+
498
+ this.isIndexing = true;
499
+
500
+ try {
501
+ if (force) {
502
+ console.error("[Indexer] Force reindex requested: clearing cache");
503
+ this.cache.setVectorStore([]);
504
+ this.cache.fileHashes = new Map();
505
+ }
506
+
507
+ const totalStartTime = Date.now();
508
+ console.error(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
509
+
510
+ // Step 1: Fast file discovery with fdir
511
+ const files = await this.discoverFiles();
512
+
513
+ if (files.length === 0) {
514
+ console.error("[Indexer] No files found to index");
515
+ this.sendProgress(100, 100, "No files found to index");
516
+ return { skipped: false, filesProcessed: 0, chunksCreated: 0, message: "No files found to index" };
517
+ }
518
+
519
+ // Send progress: discovery complete
520
+ this.sendProgress(5, 100, `Discovered ${files.length} files`);
521
+
522
+ // Step 1.5: Prune deleted or excluded files from cache
523
+ if (!force) {
524
+ const currentFilesSet = new Set(files);
525
+ const cachedFiles = Array.from(this.cache.fileHashes.keys());
526
+ let prunedCount = 0;
527
+
528
+ for (const cachedFile of cachedFiles) {
529
+ if (!currentFilesSet.has(cachedFile)) {
530
+ this.cache.removeFileFromStore(cachedFile);
531
+ this.cache.deleteFileHash(cachedFile);
532
+ prunedCount++;
533
+ }
534
+ }
535
+
536
+ if (prunedCount > 0) {
537
+ if (this.config.verbose) {
538
+ console.error(`[Indexer] Pruned ${prunedCount} deleted/excluded files from index`);
539
+ }
540
+ // If we pruned files, we should save these changes even if no other files changed
541
+ }
542
+ }
543
+
544
+ // Step 2: Pre-filter unchanged files (early hash check)
545
+ const filesToProcess = await this.preFilterFiles(files);
546
+
547
+ if (filesToProcess.length === 0) {
548
+ console.error("[Indexer] All files unchanged, nothing to index");
549
+ this.sendProgress(100, 100, "All files up to date");
550
+ await this.cache.save();
551
+ const vectorStore = this.cache.getVectorStore();
552
+ return {
553
+ skipped: false,
554
+ filesProcessed: 0,
555
+ chunksCreated: 0,
556
+ totalFiles: new Set(vectorStore.map(v => v.file)).size,
557
+ totalChunks: vectorStore.length,
558
+ message: "All files up to date"
559
+ };
560
+ }
561
+
562
+ // Send progress: filtering complete
563
+ this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
564
+
565
+ // Step 3: Determine batch size based on project size
566
+ const adaptiveBatchSize = files.length > 10000 ? 500 :
567
+ files.length > 1000 ? 200 :
568
+ this.config.batchSize || 100;
569
+
570
+ console.error(`[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`);
571
+
572
+ // Step 4: Initialize worker threads (always use when multi-core available)
573
+ const useWorkers = os.cpus().length > 1;
574
+
575
+ if (useWorkers) {
576
+ await this.initializeWorkers();
577
+ console.error(`[Indexer] Multi-threaded mode: ${this.workers.length} workers active`);
578
+ } else {
579
+ console.error(`[Indexer] Single-threaded mode (single-core system)`);
580
+ }
581
+
582
+ let totalChunks = 0;
583
+ let processedFiles = 0;
584
+
585
+ // Step 5: Process files in adaptive batches
586
+ for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
587
+ const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
588
+
589
+ // Generate all chunks for this batch
590
+ const allChunks = [];
591
+ const fileStats = new Map();
592
+
593
+ for (const { file, content, hash } of batch) {
594
+ // Remove old chunks for this file
595
+ this.cache.removeFileFromStore(file);
596
+
597
+ const chunks = smartChunk(content, file, this.config);
598
+ fileStats.set(file, { hash, totalChunks: 0, successChunks: 0 });
599
+
600
+ for (const chunk of chunks) {
601
+ allChunks.push({
602
+ file,
603
+ text: chunk.text,
604
+ startLine: chunk.startLine,
605
+ endLine: chunk.endLine
606
+ });
607
+ const stats = fileStats.get(file);
608
+ if (stats) {
609
+ stats.totalChunks++;
610
+ }
611
+ }
612
+ }
613
+
614
+ // Process chunks (with workers if available, otherwise single-threaded)
615
+ let results;
616
+ if (useWorkers && this.workers.length > 0) {
617
+ results = await this.processChunksWithWorkers(allChunks);
618
+ } else {
619
+ results = await this.processChunksSingleThreaded(allChunks);
620
+ }
621
+
622
+ // Store successful results
623
+ for (const result of results) {
624
+ const stats = fileStats.get(result.file);
625
+ if (result.success) {
626
+ this.cache.addToStore({
627
+ file: result.file,
628
+ startLine: result.startLine,
629
+ endLine: result.endLine,
630
+ content: result.content,
631
+ vector: result.vector
632
+ });
633
+ totalChunks++;
634
+ if (stats) {
635
+ stats.successChunks++;
636
+ }
637
+ }
638
+ }
639
+
640
+ // Update file hashes
641
+ for (const [file, stats] of fileStats) {
642
+ if (stats.totalChunks === 0 || stats.successChunks === stats.totalChunks) {
643
+ this.cache.setFileHash(file, stats.hash);
644
+ } else if (this.config.verbose) {
645
+ console.error(`[Indexer] Skipped hash update for ${path.basename(file)} (${stats.successChunks}/${stats.totalChunks} chunks embedded)`);
646
+ }
647
+ }
648
+
649
+ processedFiles += batch.length;
650
+
651
+ // Progress indicator every batch
652
+ if (processedFiles % (adaptiveBatchSize * 2) === 0 || processedFiles === filesToProcess.length) {
653
+ const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
654
+ const rate = (processedFiles / parseFloat(elapsed)).toFixed(0);
655
+ console.error(`[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec)`);
656
+
657
+ // Send MCP progress notification (10-95% range for batch processing)
658
+ const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
659
+ this.sendProgress(progressPercent, 100, `Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`);
660
+ }
661
+ }
662
+
663
+ // Cleanup workers
664
+ if (useWorkers) {
665
+ this.terminateWorkers();
666
+ }
667
+
668
+ const totalTime = ((Date.now() - totalStartTime) / 1000).toFixed(1);
669
+ console.error(`[Indexer] Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
670
+
671
+ // Send completion progress
672
+ this.sendProgress(100, 100, `Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
673
+
674
+ await this.cache.save();
675
+
676
+ const vectorStore = this.cache.getVectorStore();
677
+ return {
678
+ skipped: false,
679
+ filesProcessed: filesToProcess.length,
680
+ chunksCreated: totalChunks,
681
+ totalFiles: new Set(vectorStore.map(v => v.file)).size,
682
+ totalChunks: vectorStore.length,
683
+ duration: totalTime,
684
+ message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`
685
+ };
686
+ } finally {
687
+ this.isIndexing = false;
688
+ }
689
+ }
690
+
691
+ setupFileWatcher() {
692
+ if (!this.config.watchFiles) return;
693
+
694
+ const pattern = this.config.fileExtensions.map(ext => `**/*.${ext}`);
695
+
696
+ this.watcher = chokidar.watch(pattern, {
697
+ cwd: this.config.searchDirectory,
698
+ ignored: this.config.excludePatterns,
699
+ persistent: true,
700
+ ignoreInitial: true
701
+ });
702
+
703
+ this.watcher
704
+ .on("add", async (filePath) => {
705
+ const fullPath = path.join(this.config.searchDirectory, filePath);
706
+ console.error(`[Indexer] New file detected: ${filePath}`);
707
+
708
+ // Invalidate recency cache
709
+ if (this.server && this.server.hybridSearch) {
710
+ this.server.hybridSearch.clearFileModTime(fullPath);
711
+ }
712
+
713
+ await this.indexFile(fullPath);
714
+ await this.cache.save();
715
+ })
716
+ .on("change", async (filePath) => {
717
+ const fullPath = path.join(this.config.searchDirectory, filePath);
718
+ console.error(`[Indexer] File changed: ${filePath}`);
719
+
720
+ // Invalidate recency cache
721
+ if (this.server && this.server.hybridSearch) {
722
+ this.server.hybridSearch.clearFileModTime(fullPath);
723
+ }
724
+
725
+ await this.indexFile(fullPath);
726
+ await this.cache.save();
727
+ })
728
+ .on("unlink", (filePath) => {
729
+ const fullPath = path.join(this.config.searchDirectory, filePath);
730
+ console.error(`[Indexer] File deleted: ${filePath}`);
731
+
732
+ // Invalidate recency cache
733
+ if (this.server && this.server.hybridSearch) {
734
+ this.server.hybridSearch.clearFileModTime(fullPath);
735
+ }
736
+
737
+ this.cache.removeFileFromStore(fullPath);
738
+ this.cache.deleteFileHash(fullPath);
739
+ this.cache.save();
740
+ });
741
+
742
+ console.error("[Indexer] File watcher enabled for incremental indexing");
743
+ }
744
+ }
745
+
746
+ // MCP Tool definition for this feature
747
+ export function getToolDefinition() {
748
+ return {
749
+ name: "b_index_codebase",
750
+ description: "Manually trigger a full reindex of the codebase. This will scan all files and update the embeddings cache. Useful after large code changes or if the index seems out of date.",
751
+ inputSchema: {
752
+ type: "object",
753
+ properties: {
754
+ force: {
755
+ type: "boolean",
756
+ description: "Force reindex even if files haven't changed",
757
+ default: false
758
+ }
759
+ }
760
+ },
761
+ annotations: {
762
+ title: "Reindex Codebase",
763
+ readOnlyHint: false,
764
+ destructiveHint: false,
765
+ idempotentHint: true,
766
+ openWorldHint: false
767
+ }
768
+ };
769
+ }
770
+
771
+ // Tool handler
772
+ export async function handleToolCall(request, indexer) {
773
+ const force = request.params.arguments?.force || false;
774
+ const result = await indexer.indexAll(force);
775
+
776
+ // Handle case when indexing was skipped due to concurrent request
777
+ if (result?.skipped) {
778
+ return {
779
+ content: [{
780
+ type: "text",
781
+ text: `Indexing skipped: ${result.reason}\n\nPlease wait for the current indexing operation to complete before requesting another reindex.`
782
+ }]
783
+ };
784
+ }
785
+
786
+ // Get current stats from cache
787
+ const vectorStore = indexer.cache.getVectorStore();
788
+ const stats = {
789
+ totalChunks: result?.totalChunks ?? vectorStore.length,
790
+ totalFiles: result?.totalFiles ?? new Set(vectorStore.map(v => v.file)).size,
791
+ filesProcessed: result?.filesProcessed ?? 0,
792
+ chunksCreated: result?.chunksCreated ?? 0
793
+ };
794
+
795
+ let message = result?.message
796
+ ? `Codebase reindexed successfully.\n\n${result.message}`
797
+ : `Codebase reindexed successfully.`;
798
+
799
+ message += `\n\nStatistics:\n- Total files in index: ${stats.totalFiles}\n- Total code chunks: ${stats.totalChunks}`;
800
+
801
+ if (stats.filesProcessed > 0) {
802
+ message += `\n- Files processed this run: ${stats.filesProcessed}\n- Chunks created this run: ${stats.chunksCreated}`;
803
+ }
804
+
805
+ return {
806
+ content: [{
807
+ type: "text",
808
+ text: message
809
+ }]
810
+ };
811
+ }