semantic-code-mcp 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +259 -0
- package/config.json +85 -0
- package/features/check-last-version.js +504 -0
- package/features/clear-cache.js +75 -0
- package/features/get-status.js +210 -0
- package/features/hybrid-search.js +189 -0
- package/features/index-codebase.js +999 -0
- package/features/set-workspace.js +183 -0
- package/index.js +297 -0
- package/lib/ast-chunker.js +273 -0
- package/lib/cache-factory.js +13 -0
- package/lib/cache.js +157 -0
- package/lib/config.js +1296 -0
- package/lib/embedding-worker.js +155 -0
- package/lib/gemini-embedder.js +351 -0
- package/lib/ignore-patterns.js +896 -0
- package/lib/milvus-cache.js +478 -0
- package/lib/mrl-embedder.js +235 -0
- package/lib/project-detector.js +75 -0
- package/lib/resource-throttle.js +85 -0
- package/lib/sqlite-cache.js +468 -0
- package/lib/tokenizer.js +149 -0
- package/lib/utils.js +214 -0
- package/package.json +70 -0
- package/reindex.js +109 -0
|
@@ -0,0 +1,999 @@
|
|
|
1
|
+
import { fdir } from "fdir";
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import chokidar from "chokidar";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import os from "os";
|
|
6
|
+
import { Worker } from "worker_threads";
|
|
7
|
+
import { fileURLToPath } from "url";
|
|
8
|
+
import { smartChunk, hashContent } from "../lib/utils.js";
|
|
9
|
+
import { ResourceThrottle } from "../lib/resource-throttle.js";
|
|
10
|
+
|
|
11
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
|
|
13
|
+
async function resolveCacheStats(cache) {
|
|
14
|
+
if (typeof cache?.getStats === "function") {
|
|
15
|
+
try {
|
|
16
|
+
const stats = await cache.getStats();
|
|
17
|
+
return {
|
|
18
|
+
totalChunks: Number(stats?.totalChunks || 0),
|
|
19
|
+
totalFiles: Number(stats?.totalFiles || 0)
|
|
20
|
+
};
|
|
21
|
+
} catch {
|
|
22
|
+
// Fall back to legacy vectorStore contract below.
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const vectorStore = cache?.getVectorStore?.() || [];
|
|
27
|
+
return {
|
|
28
|
+
totalChunks: vectorStore.length,
|
|
29
|
+
totalFiles: new Set(vectorStore.map((v) => v.file)).size
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class CodebaseIndexer {
|
|
34
|
+
constructor(embedder, cache, config, server = null) {
|
|
35
|
+
this.embedder = embedder;
|
|
36
|
+
this.cache = cache;
|
|
37
|
+
this.config = config;
|
|
38
|
+
this.server = server;
|
|
39
|
+
this.watcher = null;
|
|
40
|
+
this.workers = [];
|
|
41
|
+
this.workerReady = [];
|
|
42
|
+
this.isIndexing = false;
|
|
43
|
+
|
|
44
|
+
// Initialize resource throttling
|
|
45
|
+
this.throttle = new ResourceThrottle(config);
|
|
46
|
+
|
|
47
|
+
// Track indexing status for progressive search
|
|
48
|
+
this.indexingStatus = {
|
|
49
|
+
inProgress: false,
|
|
50
|
+
totalFiles: 0,
|
|
51
|
+
processedFiles: 0,
|
|
52
|
+
percentage: 0
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Initialize worker thread pool for parallel embedding
|
|
58
|
+
* Note: Workers are disabled for nomic models due to ONNX runtime thread-safety issues
|
|
59
|
+
*/
|
|
60
|
+
async initializeWorkers() {
|
|
61
|
+
// Workers don't work with nomic/transformers.js due to ONNX WASM thread-safety issues
|
|
62
|
+
const isNomicModel = this.config.embeddingModel?.includes('nomic');
|
|
63
|
+
if (isNomicModel) {
|
|
64
|
+
console.error("[Indexer] Single-threaded mode (nomic model - ONNX workers incompatible)");
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// API providers: allow workers for parallel embedding
|
|
69
|
+
// Each worker makes independent API calls concurrently
|
|
70
|
+
const provider = (this.config.embeddingProvider || 'local').toLowerCase();
|
|
71
|
+
if (['gemini', 'openai', 'openai-compatible', 'vertex'].includes(provider)) {
|
|
72
|
+
console.error("[Indexer] API provider detected - using parallel workers for faster embedding");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Check if workers are explicitly disabled
|
|
76
|
+
if (this.config.workerThreads === 0 || this.config.disableWorkers) {
|
|
77
|
+
console.error("[Indexer] Single-threaded mode (workers disabled by config)");
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const numWorkers = this.config.workerThreads === "auto"
|
|
82
|
+
? this.throttle.maxWorkers // Use throttled worker count
|
|
83
|
+
: this.throttle.getWorkerCount(this.config.workerThreads);
|
|
84
|
+
|
|
85
|
+
// Only use workers if we have more than 1 CPU
|
|
86
|
+
if (numWorkers <= 1) {
|
|
87
|
+
console.error("[Indexer] Single-threaded mode (1 CPU detected)");
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (this.config.verbose) {
|
|
92
|
+
console.error(`[Indexer] Worker config: workerThreads=${this.config.workerThreads}, resolved to ${numWorkers}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
console.error(`[Indexer] Initializing ${numWorkers} worker threads...`);
|
|
96
|
+
|
|
97
|
+
const workerPath = path.join(__dirname, "../lib/embedding-worker.js");
|
|
98
|
+
|
|
99
|
+
for (let i = 0; i < numWorkers; i++) {
|
|
100
|
+
try {
|
|
101
|
+
const worker = new Worker(workerPath, {
|
|
102
|
+
workerData: {
|
|
103
|
+
embeddingProvider: this.config.embeddingProvider,
|
|
104
|
+
embeddingModel: this.config.embeddingModel,
|
|
105
|
+
embeddingDimension: this.config.embeddingDimension,
|
|
106
|
+
geminiApiKey: this.config.geminiApiKey,
|
|
107
|
+
geminiModel: this.config.geminiModel,
|
|
108
|
+
geminiBaseURL: this.config.geminiBaseURL,
|
|
109
|
+
embeddingApiKey: this.config.embeddingApiKey,
|
|
110
|
+
embeddingBaseURL: this.config.embeddingBaseURL,
|
|
111
|
+
openaiApiKey: this.config.openaiApiKey || process.env.OPENAI_API_KEY,
|
|
112
|
+
vertexProject: this.config.vertexProject,
|
|
113
|
+
vertexLocation: this.config.vertexLocation,
|
|
114
|
+
googleApplicationCredentials: process.env.GOOGLE_APPLICATION_CREDENTIALS,
|
|
115
|
+
geminiDimensions: this.config.geminiDimensions,
|
|
116
|
+
geminiBatchSize: this.config.geminiBatchSize,
|
|
117
|
+
geminiBatchFlushMs: this.config.geminiBatchFlushMs,
|
|
118
|
+
geminiMaxRetries: this.config.geminiMaxRetries,
|
|
119
|
+
verbose: this.config.verbose
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
const readyPromise = new Promise((resolve, reject) => {
|
|
124
|
+
const timeout = setTimeout(() => reject(new Error("Worker init timeout")), 120000);
|
|
125
|
+
|
|
126
|
+
worker.once("message", (msg) => {
|
|
127
|
+
clearTimeout(timeout);
|
|
128
|
+
if (msg.type === "ready") {
|
|
129
|
+
resolve(worker);
|
|
130
|
+
} else if (msg.type === "error") {
|
|
131
|
+
reject(new Error(msg.error));
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
worker.once("error", (err) => {
|
|
136
|
+
clearTimeout(timeout);
|
|
137
|
+
reject(err);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
this.workers.push(worker);
|
|
142
|
+
this.workerReady.push(readyPromise);
|
|
143
|
+
} catch (err) {
|
|
144
|
+
console.error(`[Indexer] Failed to create worker ${i}: ${err.message}`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Wait for all workers to be ready
|
|
149
|
+
try {
|
|
150
|
+
await Promise.all(this.workerReady);
|
|
151
|
+
console.error(`[Indexer] ${this.workers.length} workers ready`);
|
|
152
|
+
if (this.config.verbose) {
|
|
153
|
+
console.error(`[Indexer] Each worker loaded model: ${this.config.embeddingModel}`);
|
|
154
|
+
}
|
|
155
|
+
} catch (err) {
|
|
156
|
+
console.error(`[Indexer] Worker initialization failed: ${err.message}, falling back to single-threaded`);
|
|
157
|
+
this.terminateWorkers();
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Terminate all worker threads
|
|
163
|
+
*/
|
|
164
|
+
terminateWorkers() {
|
|
165
|
+
for (const worker of this.workers) {
|
|
166
|
+
worker.postMessage({ type: "shutdown" });
|
|
167
|
+
}
|
|
168
|
+
this.workers = [];
|
|
169
|
+
this.workerReady = [];
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Send MCP progress notification to connected clients
|
|
174
|
+
*/
|
|
175
|
+
sendProgress(progress, total, message) {
|
|
176
|
+
if (this.server) {
|
|
177
|
+
try {
|
|
178
|
+
this.server.sendNotification("notifications/progress", {
|
|
179
|
+
progressToken: "indexing",
|
|
180
|
+
progress,
|
|
181
|
+
total,
|
|
182
|
+
message
|
|
183
|
+
});
|
|
184
|
+
} catch (err) {
|
|
185
|
+
// Silently ignore if client doesn't support progress notifications
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Process chunks using worker thread pool with timeout and error recovery
|
|
192
|
+
*/
|
|
193
|
+
async processChunksWithWorkers(allChunks) {
|
|
194
|
+
if (this.workers.length === 0) {
|
|
195
|
+
// Fallback to single-threaded processing
|
|
196
|
+
return this.processChunksSingleThreaded(allChunks);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const results = [];
|
|
200
|
+
const chunkSize = Math.ceil(allChunks.length / this.workers.length);
|
|
201
|
+
const workerPromises = [];
|
|
202
|
+
const WORKER_TIMEOUT = 300000; // 5 minutes per batch
|
|
203
|
+
|
|
204
|
+
if (this.config.verbose) {
|
|
205
|
+
console.error(`[Indexer] Distributing ${allChunks.length} chunks across ${this.workers.length} workers (~${chunkSize} chunks each)`);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
for (let i = 0; i < this.workers.length; i++) {
|
|
209
|
+
const workerChunks = allChunks.slice(i * chunkSize, (i + 1) * chunkSize);
|
|
210
|
+
if (workerChunks.length === 0) continue;
|
|
211
|
+
|
|
212
|
+
if (this.config.verbose) {
|
|
213
|
+
console.error(`[Indexer] Worker ${i}: processing ${workerChunks.length} chunks`);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const promise = new Promise((resolve, reject) => {
|
|
217
|
+
const worker = this.workers[i];
|
|
218
|
+
const batchId = `batch-${i}-${Date.now()}`;
|
|
219
|
+
|
|
220
|
+
// Timeout handler
|
|
221
|
+
const timeout = setTimeout(() => {
|
|
222
|
+
worker.off("message", handler);
|
|
223
|
+
console.error(`[Indexer] Worker ${i} timed out, falling back to single-threaded for this batch`);
|
|
224
|
+
// Return empty and let fallback handle it
|
|
225
|
+
resolve([]);
|
|
226
|
+
}, WORKER_TIMEOUT);
|
|
227
|
+
|
|
228
|
+
const handler = (msg) => {
|
|
229
|
+
if (msg.batchId === batchId) {
|
|
230
|
+
clearTimeout(timeout);
|
|
231
|
+
worker.off("message", handler);
|
|
232
|
+
if (msg.type === "results") {
|
|
233
|
+
resolve(msg.results);
|
|
234
|
+
} else if (msg.type === "error") {
|
|
235
|
+
console.error(`[Indexer] Worker ${i} error: ${msg.error}`);
|
|
236
|
+
resolve([]); // Return empty, don't reject - let fallback handle
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
// Handle worker crash
|
|
242
|
+
const errorHandler = (err) => {
|
|
243
|
+
clearTimeout(timeout);
|
|
244
|
+
worker.off("message", handler);
|
|
245
|
+
console.error(`[Indexer] Worker ${i} crashed: ${err.message}`);
|
|
246
|
+
resolve([]); // Return empty, don't reject
|
|
247
|
+
};
|
|
248
|
+
worker.once("error", errorHandler);
|
|
249
|
+
|
|
250
|
+
worker.on("message", handler);
|
|
251
|
+
worker.postMessage({ type: "process", chunks: workerChunks, batchId });
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
workerPromises.push({ promise, chunks: workerChunks });
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Wait for all workers with error recovery
|
|
258
|
+
const workerResults = await Promise.all(workerPromises.map(p => p.promise));
|
|
259
|
+
|
|
260
|
+
// Collect results and identify failed chunks that need retry
|
|
261
|
+
const failedChunks = [];
|
|
262
|
+
for (let i = 0; i < workerResults.length; i++) {
|
|
263
|
+
if (workerResults[i].length > 0) {
|
|
264
|
+
results.push(...workerResults[i]);
|
|
265
|
+
} else if (workerPromises[i].chunks.length > 0) {
|
|
266
|
+
// Worker failed or timed out, need to retry these chunks
|
|
267
|
+
failedChunks.push(...workerPromises[i].chunks);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Retry failed chunks with single-threaded fallback
|
|
272
|
+
if (failedChunks.length > 0) {
|
|
273
|
+
console.error(`[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`);
|
|
274
|
+
const retryResults = await this.processChunksSingleThreaded(failedChunks);
|
|
275
|
+
results.push(...retryResults);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return results;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Single-threaded chunk processing (fallback)
|
|
283
|
+
*/
|
|
284
|
+
async processChunksSingleThreaded(chunks) {
|
|
285
|
+
const results = [];
|
|
286
|
+
|
|
287
|
+
for (const chunk of chunks) {
|
|
288
|
+
try {
|
|
289
|
+
const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
|
|
290
|
+
results.push({
|
|
291
|
+
file: chunk.file,
|
|
292
|
+
startLine: chunk.startLine,
|
|
293
|
+
endLine: chunk.endLine,
|
|
294
|
+
content: chunk.text,
|
|
295
|
+
vector: Array.from(output.data),
|
|
296
|
+
success: true
|
|
297
|
+
});
|
|
298
|
+
} catch (error) {
|
|
299
|
+
results.push({
|
|
300
|
+
file: chunk.file,
|
|
301
|
+
startLine: chunk.startLine,
|
|
302
|
+
endLine: chunk.endLine,
|
|
303
|
+
error: error.message,
|
|
304
|
+
success: false
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return results;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
async indexFile(file) {
|
|
313
|
+
const fileName = path.basename(file);
|
|
314
|
+
if (this.config.verbose) {
|
|
315
|
+
console.error(`[Indexer] Processing: ${fileName}...`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
try {
|
|
319
|
+
// Check file size first
|
|
320
|
+
const stats = await fs.stat(file);
|
|
321
|
+
|
|
322
|
+
// Skip directories
|
|
323
|
+
if (stats.isDirectory()) {
|
|
324
|
+
return 0;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (stats.size > this.config.maxFileSize) {
|
|
328
|
+
if (this.config.verbose) {
|
|
329
|
+
console.error(`[Indexer] Skipped ${fileName} (too large: ${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
|
|
330
|
+
}
|
|
331
|
+
return 0;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// OPTIMIZATION: Check mtime first (fast) before reading file content
|
|
335
|
+
const currentMtime = stats.mtimeMs;
|
|
336
|
+
const cachedMtime = this.cache.getFileMtime(file);
|
|
337
|
+
|
|
338
|
+
// If mtime unchanged, file definitely unchanged - skip without reading
|
|
339
|
+
if (cachedMtime && currentMtime === cachedMtime) {
|
|
340
|
+
if (this.config.verbose) {
|
|
341
|
+
console.error(`[Indexer] Skipped ${fileName} (unchanged - mtime)`);
|
|
342
|
+
}
|
|
343
|
+
return 0;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const content = await fs.readFile(file, "utf-8");
|
|
347
|
+
const hash = hashContent(content);
|
|
348
|
+
|
|
349
|
+
// Skip if file hasn't changed (content check after mtime indicated change)
|
|
350
|
+
if (this.cache.getFileHash(file) === hash) {
|
|
351
|
+
// Content same but mtime different - update cached mtime
|
|
352
|
+
this.cache.setFileHash(file, hash, currentMtime);
|
|
353
|
+
if (this.config.verbose) {
|
|
354
|
+
console.error(`[Indexer] Skipped ${fileName} (unchanged - hash)`);
|
|
355
|
+
}
|
|
356
|
+
return 0;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
if (this.config.verbose) {
|
|
360
|
+
console.error(`[Indexer] Indexing ${fileName}...`);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Remove old chunks for this file
|
|
364
|
+
this.cache.removeFileFromStore(file);
|
|
365
|
+
|
|
366
|
+
const chunks = smartChunk(content, file, this.config);
|
|
367
|
+
let addedChunks = 0;
|
|
368
|
+
|
|
369
|
+
for (const chunk of chunks) {
|
|
370
|
+
try {
|
|
371
|
+
const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
|
|
372
|
+
|
|
373
|
+
this.cache.addToStore({
|
|
374
|
+
file,
|
|
375
|
+
startLine: chunk.startLine,
|
|
376
|
+
endLine: chunk.endLine,
|
|
377
|
+
content: chunk.text,
|
|
378
|
+
vector: Array.from(output.data)
|
|
379
|
+
});
|
|
380
|
+
addedChunks++;
|
|
381
|
+
} catch (embeddingError) {
|
|
382
|
+
console.error(`[Indexer] Failed to embed chunk in ${fileName}:`, embeddingError.message);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
this.cache.setFileHash(file, hash, currentMtime);
|
|
387
|
+
if (this.config.verbose) {
|
|
388
|
+
console.error(`[Indexer] Completed ${fileName} (${addedChunks} chunks)`);
|
|
389
|
+
}
|
|
390
|
+
return addedChunks;
|
|
391
|
+
} catch (error) {
|
|
392
|
+
console.error(`[Indexer] Error indexing ${fileName}:`, error.message);
|
|
393
|
+
return 0;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* Discover files using fdir (3-5x faster than glob)
|
|
399
|
+
* Uses config.excludePatterns which includes smart patterns from ignore-patterns.js
|
|
400
|
+
*/
|
|
401
|
+
async discoverFiles() {
|
|
402
|
+
const startTime = Date.now();
|
|
403
|
+
|
|
404
|
+
// Build extension filter from config
|
|
405
|
+
const extensions = new Set(this.config.fileExtensions.map(ext => `.${ext}`));
|
|
406
|
+
|
|
407
|
+
// Extract directory names from glob patterns in config.excludePatterns
|
|
408
|
+
// Patterns like "**/node_modules/**" -> "node_modules"
|
|
409
|
+
const excludeDirs = new Set();
|
|
410
|
+
const excludeFilePatterns = [];
|
|
411
|
+
for (const pattern of this.config.excludePatterns) {
|
|
412
|
+
// Extract directory names from glob patterns
|
|
413
|
+
const match = pattern.match(/\*\*\/([^/*]+)\/?\*?\*?$/);
|
|
414
|
+
if (match) {
|
|
415
|
+
excludeDirs.add(match[1]);
|
|
416
|
+
}
|
|
417
|
+
// Also handle patterns like "**/dirname/**"
|
|
418
|
+
const match2 = pattern.match(/\*\*\/([^/*]+)\/\*\*$/);
|
|
419
|
+
if (match2) {
|
|
420
|
+
excludeDirs.add(match2[1]);
|
|
421
|
+
}
|
|
422
|
+
// Extract file-level glob patterns like **/*.test.js, **/test_*.py
|
|
423
|
+
const fileMatch = pattern.match(/\*\*\/(\*[^/]+|[^/*]+\*[^/]*)$/);
|
|
424
|
+
if (fileMatch) {
|
|
425
|
+
const glob = fileMatch[1];
|
|
426
|
+
// Convert glob to regex: *.test.js -> /\.test\.js$/, test_*.py -> /^test_.*\.py$/
|
|
427
|
+
const escaped = glob
|
|
428
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
429
|
+
.replace(/\*/g, '.*');
|
|
430
|
+
try {
|
|
431
|
+
excludeFilePatterns.push(new RegExp(`^${escaped}$`));
|
|
432
|
+
} catch {
|
|
433
|
+
// skip invalid patterns
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Always exclude cache directory
|
|
439
|
+
excludeDirs.add(".smart-coding-cache");
|
|
440
|
+
|
|
441
|
+
const isExcludedDirectory = (dirName) => {
|
|
442
|
+
if (!dirName) {
|
|
443
|
+
return false;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
const normalized = dirName.replace(/[\\/]+$/g, "");
|
|
447
|
+
if (excludeDirs.has(normalized)) {
|
|
448
|
+
return true;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
const normalizedSegments = normalized.split(/[\\/]+/);
|
|
452
|
+
if (normalizedSegments.some((segment) => excludeDirs.has(segment))) {
|
|
453
|
+
return true;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
const basename = path.basename(normalized);
|
|
457
|
+
if (excludeDirs.has(basename)) {
|
|
458
|
+
return true;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return false;
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
if (this.config.verbose) {
|
|
465
|
+
console.error(`[Indexer] Using ${excludeDirs.size} exclude directories, ${excludeFilePatterns.length} file patterns from config`);
|
|
466
|
+
}
|
|
467
|
+
// Debug: always log for diagnosing test exclusion
|
|
468
|
+
console.error(`[Indexer] excludeDirs: ${[...excludeDirs].join(', ')}`);
|
|
469
|
+
console.error(`[Indexer] excludeFilePatterns: ${excludeFilePatterns.map(r => r.source).join(', ')}`);
|
|
470
|
+
|
|
471
|
+
const api = new fdir()
|
|
472
|
+
.withFullPaths()
|
|
473
|
+
.exclude(isExcludedDirectory)
|
|
474
|
+
.filter((filePath) => {
|
|
475
|
+
if (!extensions.has(path.extname(filePath))) return false;
|
|
476
|
+
// Apply file-level exclusion patterns
|
|
477
|
+
if (excludeFilePatterns.length > 0) {
|
|
478
|
+
const basename = path.basename(filePath);
|
|
479
|
+
for (const re of excludeFilePatterns) {
|
|
480
|
+
if (re.test(basename)) return false;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return true;
|
|
484
|
+
})
|
|
485
|
+
.crawl(this.config.searchDirectory);
|
|
486
|
+
|
|
487
|
+
const files = await api.withPromise();
|
|
488
|
+
|
|
489
|
+
console.error(`[Indexer] File discovery: ${files.length} files in ${Date.now() - startTime}ms`);
|
|
490
|
+
return files;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Sort files by priority for progressive indexing
|
|
495
|
+
* Priority: recently modified files first (users likely searching for recent work)
|
|
496
|
+
*/
|
|
497
|
+
async sortFilesByPriority(files) {
|
|
498
|
+
const startTime = Date.now();
|
|
499
|
+
|
|
500
|
+
// Get mtime for all files in parallel
|
|
501
|
+
const filesWithMtime = await Promise.all(
|
|
502
|
+
files.map(async (file) => {
|
|
503
|
+
try {
|
|
504
|
+
const stats = await fs.stat(file);
|
|
505
|
+
return { file, mtime: stats.mtimeMs };
|
|
506
|
+
} catch {
|
|
507
|
+
return { file, mtime: 0 };
|
|
508
|
+
}
|
|
509
|
+
})
|
|
510
|
+
);
|
|
511
|
+
|
|
512
|
+
// Sort by mtime descending (most recently modified first)
|
|
513
|
+
filesWithMtime.sort((a, b) => b.mtime - a.mtime);
|
|
514
|
+
|
|
515
|
+
if (this.config.verbose) {
|
|
516
|
+
console.error(`[Indexer] Priority sort: ${files.length} files in ${Date.now() - startTime}ms`);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
return filesWithMtime.map(f => f.file);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Start background indexing (non-blocking)
|
|
524
|
+
* Allows search to work immediately with partial results
|
|
525
|
+
*/
|
|
526
|
+
startBackgroundIndexing(force = false) {
|
|
527
|
+
if (this.isIndexing) {
|
|
528
|
+
console.error("[Indexer] Background indexing already in progress");
|
|
529
|
+
return;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
console.error("[Indexer] Starting background indexing...");
|
|
533
|
+
|
|
534
|
+
// Run indexAll in background (don't await)
|
|
535
|
+
this.indexAll(force).then(result => {
|
|
536
|
+
console.error(`[Indexer] Background indexing complete: ${result.message || 'done'}`);
|
|
537
|
+
}).catch(err => {
|
|
538
|
+
console.error(`[Indexer] Background indexing error: ${err.message}`);
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* Get current indexing status for progressive search
|
|
544
|
+
*/
|
|
545
|
+
getIndexingStatus() {
|
|
546
|
+
return {
|
|
547
|
+
...this.indexingStatus,
|
|
548
|
+
isReady: !this.indexingStatus.inProgress || this.indexingStatus.processedFiles > 0
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
/**
|
|
553
|
+
* Pre-filter files by hash (skip unchanged files before processing)
|
|
554
|
+
*/
|
|
555
|
+
async preFilterFiles(files) {
|
|
556
|
+
const startTime = Date.now();
|
|
557
|
+
const filesToProcess = [];
|
|
558
|
+
const skippedCount = { unchanged: 0, tooLarge: 0, error: 0 };
|
|
559
|
+
|
|
560
|
+
// Process in parallel batches for speed
|
|
561
|
+
const BATCH_SIZE = 500;
|
|
562
|
+
|
|
563
|
+
for (let i = 0; i < files.length; i += BATCH_SIZE) {
|
|
564
|
+
const batch = files.slice(i, i + BATCH_SIZE);
|
|
565
|
+
|
|
566
|
+
const results = await Promise.all(
|
|
567
|
+
batch.map(async (file) => {
|
|
568
|
+
try {
|
|
569
|
+
const stats = await fs.stat(file);
|
|
570
|
+
|
|
571
|
+
if (stats.isDirectory()) {
|
|
572
|
+
return null;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
if (stats.size > this.config.maxFileSize) {
|
|
576
|
+
skippedCount.tooLarge++;
|
|
577
|
+
return null;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const content = await fs.readFile(file, "utf-8");
|
|
581
|
+
const hash = hashContent(content);
|
|
582
|
+
|
|
583
|
+
if (this.cache.getFileHash(file) === hash) {
|
|
584
|
+
skippedCount.unchanged++;
|
|
585
|
+
return null;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
return { file, content, hash };
|
|
589
|
+
} catch (error) {
|
|
590
|
+
skippedCount.error++;
|
|
591
|
+
return null;
|
|
592
|
+
}
|
|
593
|
+
})
|
|
594
|
+
);
|
|
595
|
+
|
|
596
|
+
for (const result of results) {
|
|
597
|
+
if (result) filesToProcess.push(result);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
console.error(`[Indexer] Pre-filter: ${filesToProcess.length} changed, ${skippedCount.unchanged} unchanged, ${skippedCount.tooLarge} too large, ${skippedCount.error} errors (${Date.now() - startTime}ms)`);
|
|
602
|
+
return filesToProcess;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
async indexAll(force = false) {
|
|
606
|
+
if (this.isIndexing) {
|
|
607
|
+
console.error("[Indexer] Indexing already in progress, skipping concurrent request");
|
|
608
|
+
return { skipped: true, reason: "Indexing already in progress" };
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
this.isIndexing = true;
|
|
612
|
+
|
|
613
|
+
// Initialize indexing status for progressive search
|
|
614
|
+
this.indexingStatus = {
|
|
615
|
+
inProgress: true,
|
|
616
|
+
totalFiles: 0,
|
|
617
|
+
processedFiles: 0,
|
|
618
|
+
percentage: 0
|
|
619
|
+
};
|
|
620
|
+
|
|
621
|
+
// Declare counters outside try block so they're accessible in finally
|
|
622
|
+
let processedFiles = 0;
|
|
623
|
+
let skippedFiles = 0;
|
|
624
|
+
|
|
625
|
+
try {
|
|
626
|
+
if (force) {
|
|
627
|
+
console.error("[Indexer] Force reindex requested: clearing cache");
|
|
628
|
+
if (typeof this.cache.resetForFullReindex === "function") {
|
|
629
|
+
await this.cache.resetForFullReindex();
|
|
630
|
+
} else {
|
|
631
|
+
this.cache.setVectorStore([]);
|
|
632
|
+
this.cache.clearAllFileHashes();
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
const totalStartTime = Date.now();
|
|
637
|
+
console.error(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
|
|
638
|
+
|
|
639
|
+
// Step 1: Fast file discovery with fdir
|
|
640
|
+
let files = await this.discoverFiles();
|
|
641
|
+
|
|
642
|
+
if (files.length === 0) {
|
|
643
|
+
console.error("[Indexer] No files found to index");
|
|
644
|
+
this.sendProgress(100, 100, "No files found to index");
|
|
645
|
+
return { skipped: false, filesProcessed: 0, chunksCreated: 0, message: "No files found to index" };
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Step 1.1: Sort files by priority (recently modified first) for progressive indexing
|
|
649
|
+
// This ensures search results are useful even while indexing is in progress
|
|
650
|
+
files = await this.sortFilesByPriority(files);
|
|
651
|
+
console.error(`[Indexer] Progressive mode: recently modified files will be indexed first`);
|
|
652
|
+
|
|
653
|
+
// Send progress: discovery complete
|
|
654
|
+
this.sendProgress(5, 100, `Discovered ${files.length} files (sorted by priority)`);
|
|
655
|
+
|
|
656
|
+
// Step 1.5: Prune deleted or excluded files from cache
|
|
657
|
+
if (!force) {
|
|
658
|
+
const currentFilesSet = new Set(files);
|
|
659
|
+
const cachedFiles = Array.from(this.cache.getAllFileHashes().keys());
|
|
660
|
+
let prunedCount = 0;
|
|
661
|
+
|
|
662
|
+
for (const cachedFile of cachedFiles) {
|
|
663
|
+
if (!currentFilesSet.has(cachedFile)) {
|
|
664
|
+
this.cache.removeFileFromStore(cachedFile);
|
|
665
|
+
this.cache.deleteFileHash(cachedFile);
|
|
666
|
+
prunedCount++;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
if (prunedCount > 0) {
|
|
671
|
+
if (this.config.verbose) {
|
|
672
|
+
console.error(`[Indexer] Pruned ${prunedCount} deleted/excluded files from index`);
|
|
673
|
+
}
|
|
674
|
+
// If we pruned files, we should save these changes even if no other files changed
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// Step 2: Process files with progressive indexing
|
|
679
|
+
// Use batch size of 1 for immediate search availability (progressive indexing)
|
|
680
|
+
// Each file is processed, embedded, and saved immediately so search can find it
|
|
681
|
+
const adaptiveBatchSize = this.config.progressiveIndexing !== false ? 1 :
|
|
682
|
+
files.length > 10000 ? 500 :
|
|
683
|
+
files.length > 1000 ? 200 :
|
|
684
|
+
this.config.batchSize || 100;
|
|
685
|
+
|
|
686
|
+
console.error(`[Indexer] Processing ${files.length} files (progressive mode: batch size ${adaptiveBatchSize})`);
|
|
687
|
+
|
|
688
|
+
// Step 3: Initialize worker threads (always use when multi-core available)
|
|
689
|
+
const useWorkers = os.cpus().length > 1;
|
|
690
|
+
|
|
691
|
+
if (useWorkers) {
|
|
692
|
+
await this.initializeWorkers();
|
|
693
|
+
console.error(`[Indexer] Multi-threaded mode: ${this.workers.length} workers active`);
|
|
694
|
+
} else {
|
|
695
|
+
console.error(`[Indexer] Single-threaded mode (single-core system)`);
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
let totalChunks = 0;
|
|
699
|
+
let batchCounter = 0; // Track batches for incremental saves
|
|
700
|
+
|
|
701
|
+
// Update total file count for status tracking (estimated, will adjust as we filter)
|
|
702
|
+
this.indexingStatus.totalFiles = files.length;
|
|
703
|
+
|
|
704
|
+
// Step 4: Process files in adaptive batches with inline lazy filtering
|
|
705
|
+
for (let i = 0; i < files.length; i += adaptiveBatchSize) {
|
|
706
|
+
const batch = files.slice(i, i + adaptiveBatchSize);
|
|
707
|
+
|
|
708
|
+
// Lazy filter and generate chunks for this batch
|
|
709
|
+
const allChunks = [];
|
|
710
|
+
const fileHashes = new Map();
|
|
711
|
+
|
|
712
|
+
for (const file of batch) {
|
|
713
|
+
try {
|
|
714
|
+
const stats = await fs.stat(file);
|
|
715
|
+
|
|
716
|
+
// Skip directories and oversized files
|
|
717
|
+
if (stats.isDirectory()) continue;
|
|
718
|
+
if (stats.size > this.config.maxFileSize) {
|
|
719
|
+
skippedFiles++;
|
|
720
|
+
continue;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
// OPTIMIZATION: Check mtime first (fast) before reading file content
|
|
724
|
+
const currentMtime = stats.mtimeMs;
|
|
725
|
+
const cachedMtime = this.cache.getFileMtime(file);
|
|
726
|
+
|
|
727
|
+
// If mtime unchanged, file definitely unchanged - skip without reading
|
|
728
|
+
if (cachedMtime && currentMtime === cachedMtime) {
|
|
729
|
+
skippedFiles++;
|
|
730
|
+
continue;
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// mtime changed (or new file) - read content and verify with hash
|
|
734
|
+
const content = await fs.readFile(file, "utf-8");
|
|
735
|
+
const hash = hashContent(content);
|
|
736
|
+
|
|
737
|
+
// Check if content actually changed (mtime can change without content change)
|
|
738
|
+
if (this.cache.getFileHash(file) === hash) {
|
|
739
|
+
// Content same but mtime different - update cached mtime
|
|
740
|
+
this.cache.setFileHash(file, hash, currentMtime);
|
|
741
|
+
skippedFiles++;
|
|
742
|
+
continue;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
// File changed - remove old chunks and prepare new ones
|
|
746
|
+
this.cache.removeFileFromStore(file);
|
|
747
|
+
const chunks = smartChunk(content, file, this.config);
|
|
748
|
+
|
|
749
|
+
for (const chunk of chunks) {
|
|
750
|
+
allChunks.push({
|
|
751
|
+
file,
|
|
752
|
+
text: chunk.text,
|
|
753
|
+
startLine: chunk.startLine,
|
|
754
|
+
endLine: chunk.endLine,
|
|
755
|
+
hash,
|
|
756
|
+
mtime: currentMtime
|
|
757
|
+
});
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
fileHashes.set(file, { hash, mtime: currentMtime });
|
|
761
|
+
} catch (error) {
|
|
762
|
+
// Skip files with read errors
|
|
763
|
+
skippedFiles++;
|
|
764
|
+
if (this.config.verbose) {
|
|
765
|
+
console.error(`[Indexer] Error reading ${path.basename(file)}: ${error.message}`);
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
// Skip this batch if no chunks to process
|
|
771
|
+
if (allChunks.length === 0) {
|
|
772
|
+
continue;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
// Process chunks (with workers if available, otherwise single-threaded)
|
|
776
|
+
let results;
|
|
777
|
+
if (useWorkers && this.workers.length > 0) {
|
|
778
|
+
results = await this.processChunksWithWorkers(allChunks);
|
|
779
|
+
} else {
|
|
780
|
+
results = await this.processChunksSingleThreaded(allChunks);
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
// Collect successful results for batch insert
|
|
784
|
+
const chunksToInsert = [];
|
|
785
|
+
const filesProcessedInBatch = new Set();
|
|
786
|
+
|
|
787
|
+
for (const result of results) {
|
|
788
|
+
if (result.success) {
|
|
789
|
+
chunksToInsert.push({
|
|
790
|
+
file: result.file,
|
|
791
|
+
startLine: result.startLine,
|
|
792
|
+
endLine: result.endLine,
|
|
793
|
+
content: result.content,
|
|
794
|
+
vector: result.vector
|
|
795
|
+
});
|
|
796
|
+
totalChunks++;
|
|
797
|
+
filesProcessedInBatch.add(result.file);
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// Batch insert to SQLite (much faster than individual inserts)
|
|
802
|
+
if (chunksToInsert.length > 0 && typeof this.cache.addBatchToStore === 'function') {
|
|
803
|
+
this.cache.addBatchToStore(chunksToInsert);
|
|
804
|
+
} else {
|
|
805
|
+
// Fallback for old cache implementation
|
|
806
|
+
for (const chunk of chunksToInsert) {
|
|
807
|
+
this.cache.addToStore(chunk);
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// Update file hashes with mtime
|
|
812
|
+
for (const [file, { hash, mtime }] of fileHashes) {
|
|
813
|
+
this.cache.setFileHash(file, hash, mtime);
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
processedFiles += filesProcessedInBatch.size;
|
|
817
|
+
batchCounter++;
|
|
818
|
+
|
|
819
|
+
// Update indexing status for progressive search
|
|
820
|
+
const estimatedTotal = files.length - skippedFiles;
|
|
821
|
+
this.indexingStatus.processedFiles = processedFiles;
|
|
822
|
+
this.indexingStatus.totalFiles = Math.max(estimatedTotal, processedFiles);
|
|
823
|
+
this.indexingStatus.percentage = estimatedTotal > 0 ? Math.floor((processedFiles / estimatedTotal) * 100) : 100;
|
|
824
|
+
|
|
825
|
+
// Progressive indexing: save after EVERY batch so search can find new results immediately
|
|
826
|
+
// This is critical for background indexing - users can search while indexing continues
|
|
827
|
+
if (chunksToInsert.length > 0) {
|
|
828
|
+
if (typeof this.cache.saveIncremental === 'function') {
|
|
829
|
+
await this.cache.saveIncremental();
|
|
830
|
+
} else {
|
|
831
|
+
// Fallback: full save (slower but ensures data is persisted)
|
|
832
|
+
await this.cache.save();
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
// Apply CPU throttling (delay between batches)
|
|
837
|
+
await this.throttle.throttledBatch(null);
|
|
838
|
+
|
|
839
|
+
// Progress indicator - show progress after each file in progressive mode
|
|
840
|
+
const progressInterval = adaptiveBatchSize === 1 ? 1 : adaptiveBatchSize * 2;
|
|
841
|
+
if (processedFiles > 0 && ((processedFiles + skippedFiles) % progressInterval === 0 || i + adaptiveBatchSize >= files.length)) {
|
|
842
|
+
const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
|
|
843
|
+
const totalProcessed = processedFiles + skippedFiles;
|
|
844
|
+
const rate = totalProcessed > 0 ? (totalProcessed / parseFloat(elapsed)).toFixed(1) : '0';
|
|
845
|
+
console.error(`[Indexer] Progress: ${processedFiles} indexed, ${skippedFiles} skipped of ${files.length} (${rate} files/sec)`);
|
|
846
|
+
|
|
847
|
+
// Send MCP progress notification (10-95% range for batch processing)
|
|
848
|
+
const progressPercent = Math.min(95, Math.floor(10 + (totalProcessed / files.length) * 85));
|
|
849
|
+
this.sendProgress(progressPercent, 100, `Indexed ${processedFiles} files, ${skippedFiles} skipped (${rate}/sec)`);
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Cleanup workers
|
|
854
|
+
if (useWorkers) {
|
|
855
|
+
this.terminateWorkers();
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
const totalTime = ((Date.now() - totalStartTime) / 1000).toFixed(1);
|
|
859
|
+
const changedFiles = processedFiles;
|
|
860
|
+
console.error(`[Indexer] Complete: ${totalChunks} chunks from ${changedFiles} changed files (${skippedFiles} unchanged) in ${totalTime}s`);
|
|
861
|
+
|
|
862
|
+
// Mark indexing as complete
|
|
863
|
+
this.indexingStatus.inProgress = false;
|
|
864
|
+
this.indexingStatus.percentage = 100;
|
|
865
|
+
|
|
866
|
+
// Send completion progress
|
|
867
|
+
const summaryMsg = changedFiles > 0
|
|
868
|
+
? `Complete: ${totalChunks} chunks from ${changedFiles} changed files (${skippedFiles} unchanged) in ${totalTime}s`
|
|
869
|
+
: `Complete: No files changed (${skippedFiles} files up to date)`;
|
|
870
|
+
this.sendProgress(100, 100, summaryMsg);
|
|
871
|
+
|
|
872
|
+
await this.cache.save();
|
|
873
|
+
|
|
874
|
+
const stats = await resolveCacheStats(this.cache);
|
|
875
|
+
const resolvedTotalChunks =
|
|
876
|
+
stats.totalChunks === 0 && totalChunks > 0 ? totalChunks : stats.totalChunks;
|
|
877
|
+
const resolvedTotalFiles =
|
|
878
|
+
stats.totalFiles === 0 && changedFiles > 0 ? changedFiles : stats.totalFiles;
|
|
879
|
+
return {
|
|
880
|
+
skipped: false,
|
|
881
|
+
filesProcessed: changedFiles,
|
|
882
|
+
chunksCreated: totalChunks,
|
|
883
|
+
totalFiles: resolvedTotalFiles,
|
|
884
|
+
totalChunks: resolvedTotalChunks,
|
|
885
|
+
duration: totalTime,
|
|
886
|
+
message: changedFiles > 0
|
|
887
|
+
? `Indexed ${changedFiles} files (${totalChunks} chunks, ${skippedFiles} unchanged) in ${totalTime}s`
|
|
888
|
+
: `All ${skippedFiles} files up to date`
|
|
889
|
+
};
|
|
890
|
+
} finally {
|
|
891
|
+
this.isIndexing = false;
|
|
892
|
+
// Adjust estimated total after completion
|
|
893
|
+
this.indexingStatus.totalFiles = processedFiles + skippedFiles;
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
setupFileWatcher() {
|
|
898
|
+
if (!this.config.watchFiles) return;
|
|
899
|
+
|
|
900
|
+
const pattern = this.config.fileExtensions.map(ext => `**/*.${ext}`);
|
|
901
|
+
|
|
902
|
+
this.watcher = chokidar.watch(pattern, {
|
|
903
|
+
cwd: this.config.searchDirectory,
|
|
904
|
+
ignored: this.config.excludePatterns,
|
|
905
|
+
persistent: true,
|
|
906
|
+
ignoreInitial: true
|
|
907
|
+
});
|
|
908
|
+
|
|
909
|
+
this.watcher
|
|
910
|
+
.on("add", async (filePath) => {
|
|
911
|
+
const fullPath = path.join(this.config.searchDirectory, filePath);
|
|
912
|
+
console.error(`[Indexer] New file detected: ${filePath}`);
|
|
913
|
+
await this.indexFile(fullPath);
|
|
914
|
+
await this.cache.save();
|
|
915
|
+
})
|
|
916
|
+
.on("change", async (filePath) => {
|
|
917
|
+
const fullPath = path.join(this.config.searchDirectory, filePath);
|
|
918
|
+
console.error(`[Indexer] File changed: ${filePath}`);
|
|
919
|
+
await this.indexFile(fullPath);
|
|
920
|
+
await this.cache.save();
|
|
921
|
+
})
|
|
922
|
+
.on("unlink", (filePath) => {
|
|
923
|
+
const fullPath = path.join(this.config.searchDirectory, filePath);
|
|
924
|
+
console.error(`[Indexer] File deleted: ${filePath}`);
|
|
925
|
+
this.cache.removeFileFromStore(fullPath);
|
|
926
|
+
this.cache.deleteFileHash(fullPath);
|
|
927
|
+
this.cache.save();
|
|
928
|
+
});
|
|
929
|
+
|
|
930
|
+
console.error("[Indexer] File watcher enabled for incremental indexing");
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
// MCP Tool definition for this feature
|
|
935
|
+
export function getToolDefinition() {
|
|
936
|
+
return {
|
|
937
|
+
name: "b_index_codebase",
|
|
938
|
+
description: "Manually trigger a full reindex of the codebase. This will scan all files and update the embeddings cache. Useful after large code changes or if the index seems out of date.",
|
|
939
|
+
inputSchema: {
|
|
940
|
+
type: "object",
|
|
941
|
+
properties: {
|
|
942
|
+
force: {
|
|
943
|
+
type: "boolean",
|
|
944
|
+
description: "Force reindex even if files haven't changed",
|
|
945
|
+
default: false
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
},
|
|
949
|
+
annotations: {
|
|
950
|
+
title: "Reindex Codebase",
|
|
951
|
+
readOnlyHint: false,
|
|
952
|
+
destructiveHint: false,
|
|
953
|
+
idempotentHint: true,
|
|
954
|
+
openWorldHint: false
|
|
955
|
+
}
|
|
956
|
+
};
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
// Tool handler
|
|
960
|
+
export async function handleToolCall(request, indexer) {
|
|
961
|
+
const force = request.params.arguments?.force || false;
|
|
962
|
+
const result = await indexer.indexAll(force);
|
|
963
|
+
|
|
964
|
+
// Handle case when indexing was skipped due to concurrent request
|
|
965
|
+
if (result?.skipped) {
|
|
966
|
+
return {
|
|
967
|
+
content: [{
|
|
968
|
+
type: "text",
|
|
969
|
+
text: `Indexing skipped: ${result.reason}\n\nPlease wait for the current indexing operation to complete before requesting another reindex.`
|
|
970
|
+
}]
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
// Get current stats from cache
|
|
975
|
+
const cacheStats = await resolveCacheStats(indexer.cache);
|
|
976
|
+
const stats = {
|
|
977
|
+
totalChunks: result?.totalChunks ?? cacheStats.totalChunks,
|
|
978
|
+
totalFiles: result?.totalFiles ?? cacheStats.totalFiles,
|
|
979
|
+
filesProcessed: result?.filesProcessed ?? 0,
|
|
980
|
+
chunksCreated: result?.chunksCreated ?? 0
|
|
981
|
+
};
|
|
982
|
+
|
|
983
|
+
let message = result?.message
|
|
984
|
+
? `Codebase reindexed successfully.\n\n${result.message}`
|
|
985
|
+
: `Codebase reindexed successfully.`;
|
|
986
|
+
|
|
987
|
+
message += `\n\nStatistics:\n- Total files in index: ${stats.totalFiles}\n- Total code chunks: ${stats.totalChunks}`;
|
|
988
|
+
|
|
989
|
+
if (stats.filesProcessed > 0) {
|
|
990
|
+
message += `\n- Files processed this run: ${stats.filesProcessed}\n- Chunks created this run: ${stats.chunksCreated}`;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
return {
|
|
994
|
+
content: [{
|
|
995
|
+
type: "text",
|
|
996
|
+
text: message
|
|
997
|
+
}]
|
|
998
|
+
};
|
|
999
|
+
}
|