@softerist/heuristic-mcp 2.1.47 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/.agent/workflows/code-review.md +60 -0
  2. package/.prettierrc +7 -0
  3. package/ARCHITECTURE.md +105 -170
  4. package/CONTRIBUTING.md +32 -113
  5. package/GEMINI.md +73 -0
  6. package/LICENSE +21 -21
  7. package/README.md +161 -54
  8. package/config.json +876 -75
  9. package/debug-pids.js +27 -0
  10. package/eslint.config.js +36 -0
  11. package/features/ann-config.js +37 -26
  12. package/features/clear-cache.js +28 -19
  13. package/features/find-similar-code.js +142 -66
  14. package/features/hybrid-search.js +253 -93
  15. package/features/index-codebase.js +1455 -394
  16. package/features/lifecycle.js +813 -180
  17. package/features/register.js +58 -52
  18. package/index.js +450 -306
  19. package/lib/cache-ops.js +22 -0
  20. package/lib/cache-utils.js +68 -0
  21. package/lib/cache.js +1392 -587
  22. package/lib/call-graph.js +165 -50
  23. package/lib/cli.js +154 -0
  24. package/lib/config.js +462 -121
  25. package/lib/embedding-process.js +77 -0
  26. package/lib/embedding-worker.js +545 -30
  27. package/lib/ignore-patterns.js +61 -59
  28. package/lib/json-worker.js +14 -0
  29. package/lib/json-writer.js +344 -0
  30. package/lib/logging.js +88 -0
  31. package/lib/memory-logger.js +13 -0
  32. package/lib/project-detector.js +13 -17
  33. package/lib/server-lifecycle.js +38 -0
  34. package/lib/settings-editor.js +645 -0
  35. package/lib/tokenizer.js +207 -104
  36. package/lib/utils.js +273 -198
  37. package/lib/vector-store-binary.js +592 -0
  38. package/mcp_config.example.json +13 -0
  39. package/package.json +13 -2
  40. package/scripts/clear-cache.js +6 -17
  41. package/scripts/download-model.js +14 -9
  42. package/scripts/postinstall.js +5 -5
  43. package/search-configs.js +36 -0
  44. package/test/ann-config.test.js +179 -0
  45. package/test/ann-fallback.test.js +6 -6
  46. package/test/binary-store.test.js +69 -0
  47. package/test/cache-branches.test.js +120 -0
  48. package/test/cache-errors.test.js +264 -0
  49. package/test/cache-extra.test.js +300 -0
  50. package/test/cache-helpers.test.js +205 -0
  51. package/test/cache-hnsw-failure.test.js +40 -0
  52. package/test/cache-json-worker.test.js +190 -0
  53. package/test/cache-worker.test.js +102 -0
  54. package/test/cache.test.js +443 -0
  55. package/test/call-graph.test.js +103 -4
  56. package/test/clear-cache.test.js +69 -68
  57. package/test/code-review-workflow.test.js +50 -0
  58. package/test/config.test.js +418 -0
  59. package/test/coverage-gap.test.js +497 -0
  60. package/test/coverage-maximizer.test.js +236 -0
  61. package/test/debug-analysis.js +107 -0
  62. package/test/embedding-model.test.js +173 -103
  63. package/test/embedding-worker-extra.test.js +272 -0
  64. package/test/embedding-worker.test.js +158 -0
  65. package/test/features.test.js +139 -0
  66. package/test/final-boost.test.js +271 -0
  67. package/test/final-polish.test.js +183 -0
  68. package/test/final.test.js +95 -0
  69. package/test/find-similar-code.test.js +191 -0
  70. package/test/helpers.js +92 -11
  71. package/test/helpers.test.js +46 -0
  72. package/test/hybrid-search-basic.test.js +62 -0
  73. package/test/hybrid-search-branch.test.js +202 -0
  74. package/test/hybrid-search-callgraph.test.js +229 -0
  75. package/test/hybrid-search-extra.test.js +81 -0
  76. package/test/hybrid-search.test.js +484 -71
  77. package/test/index-cli.test.js +520 -0
  78. package/test/index-codebase-batch.test.js +119 -0
  79. package/test/index-codebase-branches.test.js +585 -0
  80. package/test/index-codebase-core.test.js +1032 -0
  81. package/test/index-codebase-edge-cases.test.js +254 -0
  82. package/test/index-codebase-errors.test.js +132 -0
  83. package/test/index-codebase-gap.test.js +239 -0
  84. package/test/index-codebase-lines.test.js +151 -0
  85. package/test/index-codebase-watcher.test.js +259 -0
  86. package/test/index-codebase-zone.test.js +259 -0
  87. package/test/index-codebase.test.js +371 -69
  88. package/test/index-memory.test.js +220 -0
  89. package/test/indexer-detailed.test.js +176 -0
  90. package/test/integration.test.js +148 -92
  91. package/test/json-worker.test.js +50 -0
  92. package/test/lifecycle.test.js +541 -0
  93. package/test/master.test.js +198 -0
  94. package/test/perfection.test.js +349 -0
  95. package/test/project-detector.test.js +65 -0
  96. package/test/register.test.js +262 -0
  97. package/test/tokenizer.test.js +55 -93
  98. package/test/ultra-maximizer.test.js +116 -0
  99. package/test/utils-branches.test.js +161 -0
  100. package/test/utils-extra.test.js +116 -0
  101. package/test/utils.test.js +131 -0
  102. package/test/verify_fixes.js +76 -0
  103. package/test/worker-errors.test.js +96 -0
  104. package/test/worker-init.test.js +102 -0
  105. package/test/worker_throttling.test.js +93 -0
  106. package/tools/scripts/benchmark-search.js +95 -0
  107. package/tools/scripts/cache-stats.js +71 -0
  108. package/tools/scripts/manual-search.js +34 -0
  109. package/vitest.config.js +19 -9
@@ -1,77 +1,86 @@
1
- import { fdir } from "fdir";
2
- import fs from "fs/promises";
3
- import chokidar from "chokidar";
4
- import path from "path";
5
- import os from "os";
6
- import { Worker } from "worker_threads";
7
- import { fileURLToPath } from "url";
8
- import { smartChunk, hashContent } from "../lib/utils.js";
9
- import { extractCallData } from "../lib/call-graph.js";
10
-
11
- function escapeRegExp(value) {
12
- return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1
+ import { fdir } from 'fdir';
2
+ import fs from 'fs/promises';
3
+ import chokidar from 'chokidar';
4
+ import path from 'path';
5
+ import os from 'os';
6
+ import { Worker } from 'worker_threads';
7
+ import { spawn } from 'child_process';
8
+ import { setTimeout as delay } from 'timers/promises';
9
+ import { fileURLToPath } from 'url';
10
+ import { smartChunk, hashContent } from '../lib/utils.js';
11
+ import { extractCallData } from '../lib/call-graph.js';
12
+
13
+ import ignore from 'ignore';
14
+
15
+ function toFloat32Array(vector) {
16
+ // Always create a copy to ensure we have a unique buffer
17
+ // and avoid issues with reusable WASM memory views
18
+ return new Float32Array(vector);
19
+ }
20
+
21
+ function isTestEnv() {
22
+ return process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
23
+ }
24
+
25
+ function normalizePath(value) {
26
+ if (typeof value !== 'string') return '';
27
+ return value.split(path.sep).join('/');
13
28
  }
14
29
 
15
30
  function globToRegExp(pattern) {
16
- let regex = "^";
17
- for (let i = 0; i < pattern.length; ) {
31
+ let regex = '^';
32
+ for (let i = 0; i < pattern.length; i += 1) {
18
33
  const char = pattern[i];
19
- if (char === "*") {
20
- if (pattern[i + 1] === "*") {
21
- if (pattern[i + 2] === "/") {
22
- regex += "(?:.*/)?";
23
- i += 3;
24
- } else {
25
- regex += ".*";
34
+ if (char === '*') {
35
+ if (pattern[i + 1] === '*') {
36
+ if (pattern[i + 2] === '/') {
37
+ regex += '(?:.*/)?';
26
38
  i += 2;
39
+ } else {
40
+ regex += '.*';
41
+ i += 1;
27
42
  }
28
43
  } else {
29
- regex += "[^/]*";
30
- i += 1;
44
+ regex += '[^/]*';
31
45
  }
32
- continue;
33
- }
34
- if (char === "?") {
35
- regex += "[^/]";
36
- i += 1;
37
- continue;
46
+ } else if (char === '?') {
47
+ regex += '[^/]';
48
+ } else if ('\\.[]{}()+-^$|'.includes(char)) {
49
+ regex += `\\${char}`;
50
+ } else {
51
+ regex += char;
38
52
  }
39
- regex += escapeRegExp(char);
40
- i += 1;
41
53
  }
42
- regex += "$";
54
+ regex += '$';
43
55
  return new RegExp(regex);
44
56
  }
45
57
 
46
- function normalizePath(filePath) {
47
- return filePath.split(path.sep).join("/");
48
- }
49
-
50
58
  function buildExcludeMatchers(patterns) {
51
- return [...new Set(patterns)]
52
- .filter(Boolean)
53
- .map(pattern => ({
54
- matchBase: !pattern.includes("/"),
55
- regex: globToRegExp(pattern)
56
- }));
59
+ if (!Array.isArray(patterns)) return [];
60
+ return patterns
61
+ .filter((pattern) => typeof pattern === 'string' && pattern.length > 0)
62
+ .map((pattern) => {
63
+ const normalized = pattern.replace(/\\/g, '/');
64
+ const matchBase = !normalized.includes('/');
65
+ return {
66
+ pattern: normalized,
67
+ matchBase,
68
+ regex: globToRegExp(normalized),
69
+ };
70
+ });
57
71
  }
58
72
 
59
73
  function matchesExcludePatterns(filePath, matchers) {
60
- if (matchers.length === 0) return false;
74
+ if (!filePath || matchers.length === 0) return false;
61
75
  const normalized = normalizePath(filePath);
62
- const basename = path.posix.basename(normalized);
63
-
76
+ const base = path.posix.basename(normalized);
64
77
  for (const matcher of matchers) {
65
- const target = matcher.matchBase ? basename : normalized;
66
- if (matcher.regex.test(target)) {
67
- return true;
68
- }
78
+ const target = matcher.matchBase ? base : normalized;
79
+ if (matcher.regex.test(target)) return true;
69
80
  }
70
81
  return false;
71
82
  }
72
83
 
73
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
74
-
75
84
  export class CodebaseIndexer {
76
85
  constructor(embedder, cache, config, server = null) {
77
86
  this.embedder = embedder;
@@ -82,54 +91,175 @@ export class CodebaseIndexer {
82
91
  this.workers = [];
83
92
  this.workerReady = [];
84
93
  this.isIndexing = false;
85
- this.excludeMatchers = buildExcludeMatchers(this.config.excludePatterns || []);
94
+ this.processingWatchEvents = false;
95
+ this.pendingWatchEvents = new Map();
96
+ const cacheRelative = this.getCacheRelativePath();
97
+ const autoExclude = ['.smart-coding-cache'];
98
+ if (cacheRelative) {
99
+ autoExclude.push(cacheRelative, `${cacheRelative}/**`);
100
+ }
101
+ this.excludeMatchers = buildExcludeMatchers([
102
+ ...autoExclude,
103
+ ...(this.config.excludePatterns || []),
104
+ ]);
105
+ this.gitignore = ignore();
106
+ this.workerFailureCount = 0;
107
+ this.workersDisabledUntil = 0;
108
+ this.workerCircuitOpen = false;
109
+ this._retryTimer = null;
110
+ this._lastProgress = null;
111
+ this.currentIndexMode = null;
112
+ }
113
+
114
+ maybeResetWorkerCircuit() {
115
+ if (
116
+ this.workerCircuitOpen &&
117
+ this.workersDisabledUntil &&
118
+ Date.now() >= this.workersDisabledUntil
119
+ ) {
120
+ this.workerCircuitOpen = false;
121
+ this.workersDisabledUntil = 0;
122
+ this.workerFailureCount = 0;
123
+ if (this.config.verbose) {
124
+ console.info('[Indexer] Worker circuit closed; resuming worker use');
125
+ }
126
+ }
127
+ }
128
+
129
+ shouldUseWorkers() {
130
+ this.maybeResetWorkerCircuit();
131
+ if (this.workersDisabledUntil && Date.now() < this.workersDisabledUntil) {
132
+ return false;
133
+ }
134
+ if (isTestEnv()) return false;
135
+ return os.cpus().length > 1 && this.config.workerThreads !== 0 && !this.config.embeddingProcessPerBatch;
136
+ }
137
+
138
+ scheduleRetry() {
139
+ if (this._retryTimer || isTestEnv()) return;
140
+ const delayMs = Math.max(1000, this.workersDisabledUntil - Date.now());
141
+ if (!Number.isFinite(delayMs) || delayMs <= 0) return;
142
+ this._retryTimer = setTimeout(() => {
143
+ this._retryTimer = null;
144
+ if (!this.isIndexing && !this.processingWatchEvents) {
145
+ this.indexAll().catch(() => null);
146
+ }
147
+ }, delayMs);
148
+ }
149
+
150
+ recordWorkerFailure(reason) {
151
+ const threshold = Number.isInteger(this.config.workerFailureThreshold)
152
+ ? this.config.workerFailureThreshold
153
+ : 1;
154
+ const cooldownMs = Number.isInteger(this.config.workerFailureCooldownMs)
155
+ ? this.config.workerFailureCooldownMs
156
+ : 10 * 60 * 1000;
157
+
158
+ this.workerFailureCount += 1;
159
+ console.warn(`[Indexer] Worker failure: ${reason} (${this.workerFailureCount}/${threshold})`);
160
+
161
+ if (this.workerFailureCount >= threshold) {
162
+ this.workersDisabledUntil = Date.now() + cooldownMs;
163
+ this.workerCircuitOpen = true;
164
+ console.warn(
165
+ `[Indexer] Worker circuit open; pausing worker use for ${Math.round(cooldownMs / 1000)}s`
166
+ );
167
+ this.scheduleRetry();
168
+ }
86
169
  }
87
170
 
88
171
  /**
89
172
  * Initialize worker thread pool for parallel embedding
90
173
  */
91
174
  async initializeWorkers() {
92
- const numWorkers = this.config.workerThreads === "auto"
93
- ? Math.min(4, Math.max(1, os.cpus().length - 1)) // Cap 'auto' at 4 workers
94
- : (this.config.workerThreads || 1);
175
+ // Check if we have any active workers
176
+ const activeWorkers = this.workers.filter(w => w !== null);
177
+ if (activeWorkers.length > 0) return;
178
+
179
+ // If we have workers array but they are all null, reset it
180
+ if (this.workers.length > 0) {
181
+ this.workers = [];
182
+ this.workerReady = [];
183
+ }
184
+
185
+ if (this.initWorkerPromise) return this.initWorkerPromise;
186
+
187
+ this.initWorkerPromise = (async () => {
188
+ try {
189
+ let numWorkers =
190
+ this.config.workerThreads === 'auto'
191
+ ? Math.min(2, Math.max(1, os.cpus().length - 1)) // Cap 'auto' at 2 workers
192
+ : typeof this.config.workerThreads === 'number'
193
+ ? this.config.workerThreads
194
+ : 1;
195
+
196
+ // Resource-aware scaling: check available RAM (skip in test env to avoid mocking issues)
197
+ // We apply this if we have > 1 worker, regardless of whether it was 'auto' or explicit
198
+ if (numWorkers > 1 && !isTestEnv() && typeof os.freemem === 'function') {
199
+ // Jina model typically requires ~1.5GB - 2GB per worker
200
+ const freeMemGb = os.freemem() / 1024 / 1024 / 1024;
201
+ const isHeavyModel = this.config.embeddingModel.includes('jina');
202
+ const memPerWorker = isHeavyModel ? 2.0 : 0.8;
203
+
204
+ const memCappedWorkers = Math.max(1, Math.floor(freeMemGb / memPerWorker));
205
+ if (memCappedWorkers < numWorkers) {
206
+ if (this.config.verbose) {
207
+ console.info(
208
+ `[Indexer] Throttling workers from ${numWorkers} to ${memCappedWorkers} due to available RAM (${freeMemGb.toFixed(1)}GB)`
209
+ );
210
+ }
211
+ numWorkers = memCappedWorkers;
212
+ }
213
+ }
95
214
 
96
- // Only use workers if we have more than 1 CPU
97
- if (numWorkers <= 1) {
98
- console.error("[Indexer] Single-threaded mode (1 CPU detected)");
215
+ // Use workers even for single worker to benefit from --expose-gc and separate heap
216
+ if (numWorkers < 1) {
217
+ console.info('[Indexer] No workers configured, using main thread (warning: higher RAM usage)');
99
218
  return;
100
219
  }
101
220
 
102
221
  if (this.config.verbose) {
103
- console.error(`[Indexer] Worker config: workerThreads=${this.config.workerThreads}, resolved to ${numWorkers}`);
222
+ console.info(
223
+ `[Indexer] Worker config: workerThreads=${this.config.workerThreads}, resolved to ${numWorkers}`
224
+ );
104
225
  }
105
226
 
106
- console.error(`[Indexer] Initializing ${numWorkers} worker threads...`);
227
+ // Force 1 thread per worker to prevent CPU saturation (ONNX is very aggressive)
228
+ const threadsPerWorker = 1;
107
229
 
108
- const workerPath = path.join(__dirname, "../lib/embedding-worker.js");
230
+ console.info(`[Indexer] Initializing ${numWorkers} worker threads (${threadsPerWorker} threads per worker)...`);
109
231
 
110
232
  for (let i = 0; i < numWorkers; i++) {
111
233
  try {
112
- const worker = new Worker(workerPath, {
234
+ const worker = new Worker(new URL('../lib/embedding-worker.js', import.meta.url), {
113
235
  workerData: {
236
+ workerId: i,
114
237
  embeddingModel: this.config.embeddingModel,
115
- verbose: this.config.verbose
116
- }
238
+ verbose: this.config.verbose,
239
+ numThreads: threadsPerWorker,
240
+ },
117
241
  });
118
242
 
119
243
  const readyPromise = new Promise((resolve, reject) => {
120
- const timeout = setTimeout(() => reject(new Error("Worker init timeout")), 120000);
244
+ const readyTimeoutMs = isTestEnv() ? 1000 : 120000;
245
+ const timeout = setTimeout(
246
+ () => reject(new Error('Worker init timeout')),
247
+ readyTimeoutMs
248
+ );
121
249
 
122
- worker.once("message", (msg) => {
250
+ worker.once('message', (msg) => {
123
251
  clearTimeout(timeout);
124
- if (msg.type === "ready") {
252
+ if (msg.type === 'ready') {
125
253
  resolve(worker);
126
- } else if (msg.type === "error") {
254
+ } else if (msg.type === 'error') {
255
+ console.warn(`[Indexer] Worker initialization failed: ${msg.error}`);
127
256
  reject(new Error(msg.error));
128
257
  }
129
258
  });
130
259
 
131
- worker.once("error", (err) => {
260
+ worker.once('error', (err) => {
132
261
  clearTimeout(timeout);
262
+ console.warn(`[Indexer] Worker initialization failed: ${err.message}`);
133
263
  reject(err);
134
264
  });
135
265
  });
@@ -137,40 +267,143 @@ export class CodebaseIndexer {
137
267
  this.workers.push(worker);
138
268
  this.workerReady.push(readyPromise);
139
269
  } catch (err) {
140
- console.error(`[Indexer] Failed to create worker ${i}: ${err.message}`);
270
+ console.warn(`[Indexer] Failed to create worker ${i}: ${err.message}`);
141
271
  }
142
272
  }
143
273
 
144
274
  // Wait for all workers to be ready
145
275
  try {
146
276
  await Promise.all(this.workerReady);
147
- console.error(`[Indexer] ${this.workers.length} workers ready`);
277
+ console.info(`[Indexer] ${this.workers.length} workers ready`);
148
278
  if (this.config.verbose) {
149
- console.error(`[Indexer] Each worker loaded model: ${this.config.embeddingModel}`);
279
+ console.info(`[Indexer] Each worker loaded model: ${this.config.embeddingModel}`);
150
280
  }
151
281
  } catch (err) {
152
- console.error(`[Indexer] Worker initialization failed: ${err.message}, falling back to single-threaded`);
282
+ console.warn(
283
+ `[Indexer] Worker initialization failed: ${err.message}, falling back to single-threaded`
284
+ );
153
285
  await this.terminateWorkers();
154
286
  }
287
+ } finally {
288
+ this.initWorkerPromise = null;
289
+ }
290
+ })();
291
+ return this.initWorkerPromise;
155
292
  }
156
293
 
157
294
  /**
158
295
  * Terminate all worker threads
159
296
  */
160
297
  async terminateWorkers() {
161
- const terminations = this.workers.map((worker) => {
298
+ const WORKER_SHUTDOWN_TIMEOUT = isTestEnv() ? 50 : 5000;
299
+ const terminations = this.workers
300
+ .filter(Boolean)
301
+ .map((worker) => {
162
302
  try {
163
- worker.postMessage({ type: "shutdown" });
164
- } catch {}
165
- return worker.terminate().catch(() => null);
166
- });
303
+ worker.postMessage({ type: 'shutdown' });
304
+ } catch { /* ignore */ }
305
+
306
+ let exited = false;
307
+ const exitPromise = new Promise((resolve) => {
308
+ worker.once('exit', () => {
309
+ exited = true;
310
+ resolve();
311
+ });
312
+ });
313
+ const timeoutPromise = delay(WORKER_SHUTDOWN_TIMEOUT);
314
+
315
+ return Promise.race([exitPromise, timeoutPromise]).then(() => {
316
+ if (!exited) {
317
+ const termination = worker.terminate?.();
318
+ return Promise.resolve(termination).catch(() => null);
319
+ }
320
+ return null;
321
+ });
322
+ });
167
323
  await Promise.all(terminations);
168
324
  this.workers = [];
169
325
  this.workerReady = [];
170
326
  }
171
327
 
328
+ async loadGitignore() {
329
+ if (!this.config.searchDirectory) {
330
+ this.gitignore = ignore();
331
+ return;
332
+ }
333
+ try {
334
+ const gitignorePath = path.join(this.config.searchDirectory, '.gitignore');
335
+ const content = await fs.readFile(gitignorePath, 'utf8');
336
+ this.gitignore = ignore().add(content);
337
+ if (this.config.verbose) console.info('[Indexer] Loaded .gitignore rules');
338
+ } catch (_e) {
339
+ // No .gitignore or error reading it
340
+ this.gitignore = ignore();
341
+ }
342
+ }
343
+
344
+ getCacheRelativePath() {
345
+ if (!this.config.cacheDirectory || !this.config.searchDirectory) return null;
346
+ const relative = path.relative(this.config.searchDirectory, this.config.cacheDirectory);
347
+ if (!relative || relative.startsWith('..') || path.isAbsolute(relative)) return null;
348
+ return normalizePath(relative);
349
+ }
350
+
172
351
  isExcluded(filePath) {
173
- return matchesExcludePatterns(filePath, this.excludeMatchers);
352
+ if (!filePath || typeof filePath !== 'string') {
353
+ return false;
354
+ }
355
+
356
+ let relative = filePath;
357
+ if (path.isAbsolute(filePath)) {
358
+ if (this.config.searchDirectory) {
359
+ relative = path.relative(this.config.searchDirectory, filePath);
360
+ if (!relative || relative.startsWith('..') || path.isAbsolute(relative)) {
361
+ return false;
362
+ }
363
+ } else {
364
+ const root = path.parse(filePath).root;
365
+ relative = filePath.slice(root.length);
366
+ }
367
+ }
368
+
369
+ relative = normalizePath(relative);
370
+
371
+ if (matchesExcludePatterns(relative, this.excludeMatchers)) return true;
372
+
373
+ if (this.gitignore.ignores(relative)) return true;
374
+
375
+ return false;
376
+ }
377
+
378
+ async replaceDeadWorker(index) {
379
+ if (this.config.verbose) console.info(`[Indexer] Replacing dead worker at index ${index}...`);
380
+
381
+ const newWorker = new Worker(new URL('../lib/embedding-worker.js', import.meta.url), {
382
+ workerData: {
383
+ workerId: index,
384
+ embeddingModel: this.config.embeddingModel,
385
+ verbose: this.config.verbose,
386
+ numThreads: 1,
387
+ },
388
+ });
389
+
390
+ // Wait for ready
391
+ await new Promise((resolve, reject) => {
392
+ const timeout = setTimeout(() => reject(new Error('Timeout')), 30000);
393
+ newWorker.once('message', (msg) => {
394
+ if (msg.type === 'ready') {
395
+ clearTimeout(timeout);
396
+ resolve();
397
+ }
398
+ });
399
+ newWorker.once('error', (err) => {
400
+ clearTimeout(timeout);
401
+ reject(err);
402
+ });
403
+ });
404
+
405
+ this.workers[index] = newWorker;
406
+ if (this.config.verbose) console.info(`[Indexer] Worker ${index} respawned successfully`);
174
407
  }
175
408
 
176
409
  /**
@@ -179,87 +412,319 @@ export class CodebaseIndexer {
179
412
  sendProgress(progress, total, message) {
180
413
  if (this.server) {
181
414
  try {
182
- this.server.sendNotification("notifications/progress", {
183
- progressToken: "indexing",
415
+ this.server.sendNotification('notifications/progress', {
416
+ progressToken: 'indexing',
184
417
  progress,
185
418
  total,
186
- message
419
+ message,
187
420
  });
188
- } catch (err) {
421
+ } catch (_err) {
189
422
  // Silently ignore if client doesn't support progress notifications
190
423
  }
191
424
  }
425
+ this.writeProgressFile(progress, total, message).catch(() => null);
426
+ }
427
+
428
+ async writeProgressFile(progress, total, message) {
429
+ if (!this.config.enableCache) return;
430
+
431
+ const payload = {
432
+ progress,
433
+ total,
434
+ message,
435
+ updatedAt: new Date().toISOString(),
436
+ indexMode: this.currentIndexMode || null,
437
+ workerCircuitOpen: !!this.workerCircuitOpen,
438
+ workersDisabledUntil: Number.isFinite(this.workersDisabledUntil)
439
+ ? this.workersDisabledUntil
440
+ : null,
441
+ };
442
+
443
+ const prev = this._lastProgress;
444
+ if (
445
+ prev &&
446
+ prev.progress === payload.progress &&
447
+ prev.total === payload.total &&
448
+ prev.message === payload.message
449
+ ) {
450
+ return;
451
+ }
452
+
453
+ this._lastProgress = payload;
454
+ try {
455
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
456
+ const progressPath = path.join(this.config.cacheDirectory, 'progress.json');
457
+ await fs.writeFile(progressPath, JSON.stringify(payload), 'utf-8');
458
+ } catch {
459
+ // ignore progress write errors
460
+ }
461
+ }
462
+
463
+ async processFilesWithWorkers(allFiles) {
464
+ const activeWorkers = this.workers
465
+ .map((worker, index) => ({ worker, index }))
466
+ .filter((entry) => entry.worker);
467
+
468
+ if (activeWorkers.length === 0) {
469
+ // Fallback: This method shouldn't be called if workers aren't available,
470
+ // but if it is, we return empty and let the caller handle legacy fallback.
471
+ return [];
472
+ }
473
+
474
+ const results = [];
475
+ const chunkSize = Math.ceil(allFiles.length / activeWorkers.length);
476
+ const workerPromises = [];
477
+ const configuredTimeout = Number.isInteger(this.config.workerBatchTimeoutMs)
478
+ ? this.config.workerBatchTimeoutMs
479
+ : 300000;
480
+ const WORKER_TIMEOUT = isTestEnv() ? 1000 : configuredTimeout;
481
+
482
+ for (let i = 0; i < activeWorkers.length; i++) {
483
+ const { worker, index: workerIndex } = activeWorkers[i];
484
+ const workerFiles = allFiles.slice(i * chunkSize, (i + 1) * chunkSize);
485
+ if (workerFiles.length === 0) continue;
486
+
487
+ if (this.config.verbose) {
488
+ console.info(`[Indexer] Worker ${workerIndex}: processing ${workerFiles.length} files`);
489
+ }
490
+
491
+ const promise = new Promise((resolve) => {
492
+ const batchId = `file-batch-${i}-${Date.now()}`;
493
+ const batchResults = [];
494
+
495
+ const killWorker = async () => {
496
+ try {
497
+ await worker.terminate?.();
498
+ } catch (_err) {
499
+ // ignore termination errors
500
+ }
501
+ this.workers[workerIndex] = null;
502
+ this.replaceDeadWorker(workerIndex).catch(() => {});
503
+ };
504
+
505
+ const handleTimeout = () => {
506
+ // Terminate first to ensure no more messages arrive
507
+ void killWorker();
508
+ worker.off('message', handler);
509
+ worker.off('error', errorHandler);
510
+ console.warn(`[Indexer] Worker ${workerIndex} timed out (files)`);
511
+ this.recordWorkerFailure(`timeout (batch ${batchId})`);
512
+ resolve([]);
513
+ };
514
+
515
+ let timeout = setTimeout(handleTimeout, WORKER_TIMEOUT);
516
+
517
+ const finalize = (results) => {
518
+ clearTimeout(timeout);
519
+ worker.off('message', handler);
520
+ worker.off('error', errorHandler);
521
+ resolve(results);
522
+ };
523
+
524
+ const handler = (msg) => {
525
+ if (msg.batchId === batchId) {
526
+ if (msg.type === 'results') {
527
+ if (Array.isArray(msg.results)) {
528
+ batchResults.push(...msg.results);
529
+ }
530
+ if (msg.done) {
531
+ finalize(batchResults);
532
+ }
533
+ } else if (msg.type === 'error') {
534
+ finalize([]);
535
+ }
536
+ }
537
+ };
538
+
539
+ const errorHandler = (err) => {
540
+ console.warn(`[Indexer] Worker ${workerIndex} crashed: ${err.message}`);
541
+ this.recordWorkerFailure(`crash (${err.message})`);
542
+ void killWorker();
543
+ finalize([]);
544
+ };
545
+
546
+ worker.once('error', errorHandler);
547
+ worker.on('message', handler);
548
+
549
+ try {
550
+ worker.postMessage({ type: 'processFiles', files: workerFiles, batchId, chunkConfig: this.config });
551
+ } catch (_error) {
552
+ finalize([]);
553
+ }
554
+ });
555
+
556
+ workerPromises.push({ promise, files: workerFiles });
557
+ }
558
+
559
+ const workerResults = await Promise.all(workerPromises.map((p) => p.promise));
560
+
561
+ // Identify failed files for retry
562
+ const failedFiles = [];
563
+ for (let i = 0; i < workerResults.length; i++) {
564
+ if (workerResults[i].length > 0) {
565
+ results.push(...workerResults[i]);
566
+ } else if (workerPromises[i].files.length > 0) {
567
+ failedFiles.push(...workerPromises[i].files);
568
+ }
569
+ }
570
+
571
+ // Pass failed files back to be handled by legacy path
572
+ if (failedFiles.length > 0) {
573
+ if (this.config.verbose) {
574
+ console.warn(`[Indexer] ${failedFiles.length} files failed in workers, falling back to main thread`);
575
+ }
576
+ // Mark these as failed in the results so the caller knows to process them manually
577
+ for (const f of failedFiles) {
578
+ results.push({ file: f.file, status: 'retry' });
579
+ }
580
+ }
581
+
582
+ return results;
192
583
  }
193
584
 
194
585
  /**
195
586
  * Process chunks using worker thread pool with timeout and error recovery
196
587
  */
197
588
  async processChunksWithWorkers(allChunks) {
198
- if (this.workers.length === 0) {
589
+ const activeWorkers = this.workers
590
+ .map((worker, index) => ({ worker, index }))
591
+ .filter((entry) => entry.worker);
592
+
593
+ if (activeWorkers.length === 0) {
199
594
  // Fallback to single-threaded processing
200
595
  return this.processChunksSingleThreaded(allChunks);
201
596
  }
202
597
 
203
598
  const results = [];
204
- const chunkSize = Math.ceil(allChunks.length / this.workers.length);
599
+ const allowSingleThreadFallback = this.config.allowSingleThreadFallback !== false;
600
+ const chunkSize = Math.ceil(allChunks.length / activeWorkers.length);
205
601
  const workerPromises = [];
206
- const WORKER_TIMEOUT = 300000; // 5 minutes per batch
602
+ const configuredTimeout = Number.isInteger(this.config.workerBatchTimeoutMs)
603
+ ? this.config.workerBatchTimeoutMs
604
+ : 300000;
605
+ const WORKER_TIMEOUT = isTestEnv() ? 1000 : configuredTimeout; // 1s in tests, configurable in prod
207
606
 
208
607
  if (this.config.verbose) {
209
- console.error(`[Indexer] Distributing ${allChunks.length} chunks across ${this.workers.length} workers (~${chunkSize} chunks each)`);
608
+ console.info(
609
+ `[Indexer] Distributing ${allChunks.length} chunks across ${activeWorkers.length} workers (~${chunkSize} chunks each)`
610
+ );
210
611
  }
211
612
 
212
- for (let i = 0; i < this.workers.length; i++) {
613
+ for (let i = 0; i < activeWorkers.length; i++) {
614
+ const { worker, index: workerIndex } = activeWorkers[i];
213
615
  const workerChunks = allChunks.slice(i * chunkSize, (i + 1) * chunkSize);
214
616
  if (workerChunks.length === 0) continue;
215
617
 
216
618
  if (this.config.verbose) {
217
- console.error(`[Indexer] Worker ${i}: processing ${workerChunks.length} chunks`);
619
+ console.info(`[Indexer] Worker ${workerIndex}: processing ${workerChunks.length} chunks`);
218
620
  }
219
621
 
220
- const promise = new Promise((resolve, reject) => {
221
- const worker = this.workers[i];
622
+ const promise = new Promise((resolve, _reject) => {
222
623
  const batchId = `batch-${i}-${Date.now()}`;
624
+ const batchResults = [];
223
625
 
224
626
  // Timeout handler
225
- const timeout = setTimeout(() => {
226
- worker.off("message", handler);
227
- console.error(`[Indexer] Worker ${i} timed out, falling back to single-threaded for this batch`);
627
+ const killWorker = async () => {
628
+ try {
629
+ await worker.terminate?.();
630
+ } catch {
631
+ // ignore terminate errors
632
+ }
633
+ this.workers[workerIndex] = null;
634
+
635
+ // Attempt to replace the dead worker asynchronously
636
+ this.replaceDeadWorker(workerIndex).catch(err => {
637
+ console.warn(`[Indexer] Failed to replace worker ${workerIndex}: ${err.message}`);
638
+ });
639
+ };
640
+
641
+ const handleTimeout = (label) => {
642
+ // Terminate first to ensure no more messages arrive
643
+ void killWorker();
644
+ worker.off('message', handler);
645
+ worker.off('error', errorHandler);
646
+ console.warn(
647
+ `[Indexer] Worker ${workerIndex} timed out, ${label}`
648
+ );
649
+ this.recordWorkerFailure(`timeout (batch ${batchId})`);
228
650
  // Return empty and let fallback handle it
229
651
  resolve([]);
230
- }, WORKER_TIMEOUT);
652
+ };
653
+
654
+ let timeout = setTimeout(
655
+ () => handleTimeout('killing worker and falling back to single-threaded for this batch'),
656
+ WORKER_TIMEOUT
657
+ );
658
+
659
+ const resetTimeout = () => {
660
+ clearTimeout(timeout);
661
+ timeout = setTimeout(
662
+ () => handleTimeout('killing worker and falling back to single-threaded for this batch'),
663
+ WORKER_TIMEOUT
664
+ );
665
+ };
666
+
667
+ let exitHandler;
668
+
669
+ const finalize = (results) => {
670
+ clearTimeout(timeout);
671
+ worker.off('message', handler);
672
+ worker.off('error', errorHandler);
673
+ if (exitHandler) worker.off('exit', exitHandler);
674
+ resolve(results);
675
+ };
231
676
 
232
677
  const handler = (msg) => {
233
678
  if (msg.batchId === batchId) {
234
- clearTimeout(timeout);
235
- worker.off("message", handler);
236
- if (msg.type === "results") {
237
- resolve(msg.results);
238
- } else if (msg.type === "error") {
239
- console.error(`[Indexer] Worker ${i} error: ${msg.error}`);
240
- resolve([]); // Return empty, don't reject - let fallback handle
679
+ resetTimeout();
680
+ if (msg.type === 'results') {
681
+ if (Array.isArray(msg.results) && msg.results.length > 0) {
682
+ batchResults.push(...msg.results);
683
+ }
684
+ if (msg.done === false) {
685
+ return;
686
+ }
687
+ finalize(batchResults);
688
+ } else if (msg.type === 'error') {
689
+ console.warn(`[Indexer] Worker ${workerIndex} error: ${msg.error}`);
690
+ finalize([]); // Return empty, don't reject - let fallback handle
241
691
  }
242
692
  }
243
693
  };
244
694
 
245
695
  // Handle worker crash
246
696
  const errorHandler = (err) => {
247
- clearTimeout(timeout);
248
- worker.off("message", handler);
249
- console.error(`[Indexer] Worker ${i} crashed: ${err.message}`);
250
- resolve([]); // Return empty, don't reject
697
+ console.warn(`[Indexer] Worker ${workerIndex} crashed: ${err.message}`);
698
+ this.recordWorkerFailure(`crash (${err.message})`);
699
+ void killWorker();
700
+ finalize([]); // Return empty, don't reject
251
701
  };
252
- worker.once("error", errorHandler);
702
+ worker.once('error', errorHandler);
703
+
704
+ exitHandler = (code) => {
705
+ if (code !== 0) {
706
+ console.warn(`[Indexer] Worker ${workerIndex} exited unexpectedly with code ${code}`);
707
+ this.recordWorkerFailure(`exit ${code}`);
708
+ void killWorker();
709
+ finalize([]);
710
+ }
711
+ };
712
+ worker.once('exit', exitHandler);
253
713
 
254
- worker.on("message", handler);
255
- worker.postMessage({ type: "process", chunks: workerChunks, batchId });
714
+ worker.on('message', handler);
715
+ try {
716
+ worker.postMessage({ type: 'process', chunks: workerChunks, batchId });
717
+ } catch (error) {
718
+ console.warn(`[Indexer] Worker ${i} postMessage failed: ${error.message}`);
719
+ finalize([]);
720
+ }
256
721
  });
257
722
 
258
723
  workerPromises.push({ promise, chunks: workerChunks });
259
724
  }
260
725
 
261
726
  // Wait for all workers with error recovery
262
- const workerResults = await Promise.all(workerPromises.map(p => p.promise));
727
+ const workerResults = await Promise.all(workerPromises.map((p) => p.promise));
263
728
 
264
729
  // Collect results and identify failed chunks that need retry
265
730
  const failedChunks = [];
@@ -273,39 +738,128 @@ export class CodebaseIndexer {
273
738
  }
274
739
 
275
740
  // Retry failed chunks with single-threaded fallback
276
- if (failedChunks.length > 0) {
277
- console.error(`[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`);
741
+ if (failedChunks.length > 0 && allowSingleThreadFallback) {
742
+ console.warn(
743
+ `[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`
744
+ );
278
745
  const retryResults = await this.processChunksSingleThreaded(failedChunks);
279
746
  results.push(...retryResults);
747
+ } else if (failedChunks.length > 0) {
748
+ console.warn(
749
+ `[Indexer] Skipping ${failedChunks.length} chunks (single-threaded fallback disabled)`
750
+ );
280
751
  }
281
752
 
282
753
  return results;
283
754
  }
284
755
 
756
+ async processChunksInChildProcess(chunks) {
757
+ const nodePath = process.execPath || 'node';
758
+ const scriptPath = fileURLToPath(new URL('../lib/embedding-process.js', import.meta.url));
759
+ const payload = {
760
+ embeddingModel: this.config.embeddingModel,
761
+ chunks,
762
+ numThreads: 1,
763
+ };
764
+
765
+ return new Promise((resolve) => {
766
+ const child = spawn(nodePath, [scriptPath], {
767
+ stdio: ['pipe', 'pipe', 'pipe'],
768
+ });
769
+
770
+ let stdout = '';
771
+ let stderr = '';
772
+ child.stdout.on('data', (chunk) => {
773
+ stdout += chunk.toString();
774
+ });
775
+ child.stderr.on('data', (chunk) => {
776
+ stderr += chunk.toString();
777
+ });
778
+
779
+ const timeoutMs = Number.isInteger(this.config.workerBatchTimeoutMs)
780
+ ? this.config.workerBatchTimeoutMs
781
+ : 120000;
782
+ const timeout = setTimeout(() => {
783
+ try {
784
+ child.kill('SIGKILL');
785
+ } catch {
786
+ // ignore
787
+ }
788
+ this.recordWorkerFailure('child process timeout');
789
+ resolve([]);
790
+ }, timeoutMs);
791
+
792
+ child.on('error', (err) => {
793
+ clearTimeout(timeout);
794
+ this.recordWorkerFailure(`child process error (${err.message})`);
795
+ resolve([]);
796
+ });
797
+
798
+ child.on('close', (code, signal) => {
799
+ clearTimeout(timeout);
800
+ if (code !== 0) {
801
+ this.recordWorkerFailure(
802
+ `child process exited (${code ?? 'null'}${signal ? `, signal=${signal}` : ''})`
803
+ );
804
+ if (stderr) {
805
+ console.warn(`[Indexer] Child process error: ${stderr.trim()}`);
806
+ }
807
+ return resolve([]);
808
+ }
809
+ try {
810
+ const parsed = JSON.parse(stdout);
811
+ resolve(parsed?.results || []);
812
+ } catch (err) {
813
+ this.recordWorkerFailure(`child process parse error (${err.message})`);
814
+ resolve([]);
815
+ }
816
+ });
817
+
818
+ child.stdin.end(JSON.stringify(payload));
819
+ });
820
+ }
821
+
285
822
  /**
286
823
  * Single-threaded chunk processing (fallback)
287
824
  */
288
825
  async processChunksSingleThreaded(chunks) {
289
826
  const results = [];
290
827
 
828
+ // Manual GC and yield loop to prevent CPU lockup
829
+ let processedSinceGc = 0;
830
+
291
831
  for (const chunk of chunks) {
832
+ // Throttle speed (balanced) - yield to event loop but don't wait unnecessarily
833
+ await delay(0);
834
+
292
835
  try {
293
- const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
294
- results.push({
295
- file: chunk.file,
296
- startLine: chunk.startLine,
297
- endLine: chunk.endLine,
298
- content: chunk.text,
299
- vector: Array.from(output.data),
300
- success: true
301
- });
836
+ const output = await this.embedder(chunk.text, {
837
+ pooling: 'mean',
838
+ normalize: true,
839
+ });
840
+ results.push({
841
+ file: chunk.file,
842
+ startLine: chunk.startLine,
843
+ endLine: chunk.endLine,
844
+ content: chunk.text,
845
+ vector: toFloat32Array(output.data),
846
+ success: true,
847
+ });
848
+
849
+ // Periodic GC to prevent memory creep (only if flag is present)
850
+ processedSinceGc++;
851
+ // Removed manual GC call to prevent performance degradation
852
+ if (processedSinceGc >= 50) {
853
+ processedSinceGc = 0;
854
+ }
855
+
302
856
  } catch (error) {
303
857
  results.push({
304
858
  file: chunk.file,
305
859
  startLine: chunk.startLine,
306
860
  endLine: chunk.endLine,
307
861
  error: error.message,
308
- success: false
862
+ success: false,
309
863
  });
310
864
  }
311
865
  }
@@ -317,12 +871,12 @@ export class CodebaseIndexer {
317
871
  const fileName = path.basename(file);
318
872
  if (this.isExcluded(file)) {
319
873
  if (this.config.verbose) {
320
- console.error(`[Indexer] Skipped ${fileName} (excluded by pattern)`);
874
+ console.info(`[Indexer] Skipped ${fileName} (excluded by pattern)`);
321
875
  }
322
876
  return 0;
323
877
  }
324
878
  if (this.config.verbose) {
325
- console.error(`[Indexer] Processing: ${fileName}...`);
879
+ console.info(`[Indexer] Processing: ${fileName}...`);
326
880
  }
327
881
 
328
882
  try {
@@ -336,62 +890,115 @@ export class CodebaseIndexer {
336
890
 
337
891
  if (stats.size > this.config.maxFileSize) {
338
892
  if (this.config.verbose) {
339
- console.error(`[Indexer] Skipped ${fileName} (too large: ${(stats.size / 1024 / 1024).toFixed(2)}MB)`);
893
+ console.warn(
894
+ `[Indexer] Skipped ${fileName} (too large: ${(stats.size / 1024 / 1024).toFixed(2)}MB)`
895
+ );
340
896
  }
341
897
  return 0;
342
898
  }
343
899
 
344
- const content = await fs.readFile(file, "utf-8");
900
+ const content = await fs.readFile(file, 'utf-8');
345
901
  const hash = hashContent(content);
346
902
 
347
903
  // Skip if file hasn't changed
348
- if (this.cache.getFileHash(file) === hash) {
904
+ const cachedHash = typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
905
+ if (cachedHash === hash) {
349
906
  if (this.config.verbose) {
350
- console.error(`[Indexer] Skipped ${fileName} (unchanged)`);
907
+ console.info(`[Indexer] Skipped ${fileName} (unchanged)`);
351
908
  }
909
+ // Still update metadata (size, mtime) even if hash is same
910
+ this.cache.setFileHash(file, hash, stats);
352
911
  return 0;
353
912
  }
354
913
 
355
914
  if (this.config.verbose) {
356
- console.error(`[Indexer] Indexing ${fileName}...`);
915
+ console.info(`[Indexer] Indexing ${fileName}...`);
357
916
  }
358
917
 
359
- // Remove old chunks for this file
360
- this.cache.removeFileFromStore(file);
918
+ // Extract call graph data if enabled
919
+ let callData = null;
920
+ if (this.config.callGraphEnabled) {
921
+ try {
922
+ callData = extractCallData(content, file);
923
+ } catch (err) {
924
+ if (this.config.verbose) {
925
+ console.warn(
926
+ `[Indexer] Call graph extraction failed for ${fileName}: ${err.message}`
927
+ );
928
+ }
929
+ }
930
+ }
361
931
 
362
- const chunks = smartChunk(content, file, this.config);
932
+ const rawChunks = smartChunk(content, file, this.config);
933
+ const chunks = Array.isArray(rawChunks) ? rawChunks : [];
363
934
  let addedChunks = 0;
935
+ let successChunks = 0;
364
936
  let failedChunks = 0;
937
+ const newChunks = [];
365
938
 
366
- for (const chunk of chunks) {
367
- try {
368
- const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
939
+ // Use workers for watcher-triggered embedding to keep main thread responsive
940
+ const useWorkers = this.shouldUseWorkers();
941
+ if (useWorkers && this.workers.length === 0) {
942
+ await this.initializeWorkers();
943
+ }
944
+
945
+ const chunksToProcess = chunks.map((c) => ({
946
+ file,
947
+ text: c.text,
948
+ startLine: c.startLine,
949
+ endLine: c.endLine
950
+ }));
369
951
 
370
- this.cache.addToStore({
952
+ let results = [];
953
+ if (useWorkers && this.workers.length > 0) {
954
+ results = await this.processChunksWithWorkers(chunksToProcess);
955
+ } else {
956
+ results = await this.processChunksSingleThreaded(chunksToProcess);
957
+ }
958
+
959
+ for (const result of results) {
960
+ if (result.success) {
961
+ newChunks.push({
371
962
  file,
372
- startLine: chunk.startLine,
373
- endLine: chunk.endLine,
374
- content: chunk.text,
375
- vector: Array.from(output.data)
963
+ startLine: result.startLine,
964
+ endLine: result.endLine,
965
+ content: result.content,
966
+ vector: toFloat32Array(result.vector),
376
967
  });
377
968
  addedChunks++;
378
- } catch (embeddingError) {
969
+ successChunks++;
970
+ } else {
971
+ console.warn(`[Indexer] Failed to embed chunk in ${fileName}:`, result.error);
379
972
  failedChunks++;
380
- console.error(`[Indexer] Failed to embed chunk in ${fileName}:`, embeddingError.message);
381
973
  }
382
974
  }
383
975
 
384
- if (chunks.length === 0 || failedChunks === 0) {
385
- this.cache.setFileHash(file, hash);
976
+ const totalChunks = chunks.length;
977
+ const allSucceeded = totalChunks === 0 || failedChunks === 0;
978
+
979
+ if (allSucceeded) {
980
+ this.cache.removeFileFromStore(file);
981
+ for (const chunk of newChunks) {
982
+ this.cache.addToStore(chunk);
983
+ }
984
+ this.cache.setFileHash(file, hash, stats);
985
+ if (this.config.callGraphEnabled && callData) {
986
+ this.cache.setFileCallData(file, callData);
987
+ }
386
988
  } else if (this.config.verbose) {
387
- console.error(`[Indexer] Skipped hash update for ${fileName} (${addedChunks}/${chunks.length} chunks embedded)`);
989
+ console.warn(
990
+ `[Indexer] Skipped hash update for ${fileName} (${successChunks}/${totalChunks} chunks embedded)`
991
+ );
388
992
  }
993
+
389
994
  if (this.config.verbose) {
390
- console.error(`[Indexer] Completed ${fileName} (${addedChunks} chunks)`);
995
+ console.info(`[Indexer] Completed ${fileName} (${addedChunks} chunks)`);
391
996
  }
392
997
  return addedChunks;
393
998
  } catch (error) {
394
- console.error(`[Indexer] Error indexing ${fileName}:`, error.message);
999
+ if (this.config.verbose) {
1000
+ console.warn(`[Indexer] Error indexing ${fileName}:`, error.message);
1001
+ }
395
1002
  return 0;
396
1003
  }
397
1004
  }
@@ -404,40 +1011,39 @@ export class CodebaseIndexer {
404
1011
  const startTime = Date.now();
405
1012
 
406
1013
  // Build extension filter from config
407
- const extensions = new Set(this.config.fileExtensions.map(ext => `.${ext}`));
1014
+ const extensions = new Set(this.config.fileExtensions.map((ext) => `.${ext}`));
1015
+ const allowedFileNames = new Set(this.config.fileNames || []);
408
1016
 
409
- // Extract directory names from glob patterns in config.excludePatterns
410
- // Patterns like "**/node_modules/**" -> "node_modules"
411
- const excludeDirs = new Set();
412
- for (const pattern of this.config.excludePatterns) {
413
- // Extract directory names from glob patterns
414
- const match = pattern.match(/\*\*\/([^/*]+)\/?\*?\*?$/);
415
- if (match) {
416
- excludeDirs.add(match[1]);
417
- }
418
- // Also handle patterns like "**/dirname/**"
419
- const match2 = pattern.match(/\*\*\/([^/*]+)\/\*\*$/);
420
- if (match2) {
421
- excludeDirs.add(match2[1]);
422
- }
423
- }
424
-
425
- // Always exclude cache directory
426
- excludeDirs.add(".smart-coding-cache");
1017
+ // Load .gitignore before discovery
1018
+ await this.loadGitignore();
427
1019
 
428
- if (this.config.verbose) {
429
- console.error(`[Indexer] Using ${excludeDirs.size} exclude directories from config`);
1020
+ if (!this.config.searchDirectory) {
1021
+ return [];
430
1022
  }
431
1023
 
432
1024
  const api = new fdir()
433
1025
  .withFullPaths()
434
- .exclude((dirName) => excludeDirs.has(dirName))
435
- .filter((filePath) => extensions.has(path.extname(filePath)) && !this.isExcluded(filePath))
1026
+ .exclude((dirName, dirPath) => {
1027
+ // Always exclude specific heavy folders immediately
1028
+ if (dirName === 'node_modules' || dirName === '.git' || dirName === '.smart-coding-cache') return true;
1029
+
1030
+ // Check exclusion rules for directories
1031
+ const fullPath = path.join(dirPath, dirName);
1032
+ return this.isExcluded(fullPath);
1033
+ })
1034
+ .filter((filePath) => {
1035
+ if (this.isExcluded(filePath)) return false;
1036
+
1037
+ // Check extensions/filenames
1038
+ const base = path.basename(filePath);
1039
+ const ext = path.extname(filePath);
1040
+ return (extensions.has(ext) || allowedFileNames.has(base));
1041
+ })
436
1042
  .crawl(this.config.searchDirectory);
437
1043
 
438
1044
  const files = await api.withPromise();
439
1045
 
440
- console.error(`[Indexer] File discovery: ${files.length} files in ${Date.now() - startTime}ms`);
1046
+ console.info(`[Indexer] File discovery: ${files.length} files in ${Date.now() - startTime}ms`);
441
1047
  return files;
442
1048
  }
443
1049
 
@@ -450,13 +1056,17 @@ export class CodebaseIndexer {
450
1056
  const skippedCount = { unchanged: 0, tooLarge: 0, error: 0 };
451
1057
 
452
1058
  // Process in parallel batches for speed
453
- const BATCH_SIZE = 500;
1059
+ // We fetch stats for 100 files at a time to keep IO efficient
1060
+ const STAT_BATCH_SIZE = Math.min(100, this.config.batchSize || 100);
1061
+ // Limit concurrent file reads to 50MB to prevent OOM
1062
+ const MAX_READ_BATCH_BYTES = 50 * 1024 * 1024;
454
1063
 
455
- for (let i = 0; i < files.length; i += BATCH_SIZE) {
456
- const batch = files.slice(i, i + BATCH_SIZE);
1064
+ for (let i = 0; i < files.length; i += STAT_BATCH_SIZE) {
1065
+ const batchFiles = files.slice(i, i + STAT_BATCH_SIZE);
457
1066
 
458
- const results = await Promise.all(
459
- batch.map(async (file) => {
1067
+ // 1. Get stats for all files in this batch parallel
1068
+ const fileStats = await Promise.all(
1069
+ batchFiles.map(async (file) => {
460
1070
  try {
461
1071
  const stats = await fs.stat(file);
462
1072
 
@@ -469,112 +1079,202 @@ export class CodebaseIndexer {
469
1079
  return null;
470
1080
  }
471
1081
 
472
- const content = await fs.readFile(file, "utf-8");
473
- const hash = hashContent(content);
474
-
475
- if (this.cache.getFileHash(file) === hash) {
476
- skippedCount.unchanged++;
477
- return null;
478
- }
479
-
480
- return { file, content, hash };
481
- } catch (error) {
1082
+ return { file, size: stats.size, mtimeMs: stats.mtimeMs };
1083
+ } catch (_err) {
482
1084
  skippedCount.error++;
483
1085
  return null;
484
1086
  }
485
1087
  })
486
1088
  );
487
1089
 
488
- for (const result of results) {
489
- if (result) filesToProcess.push(result);
1090
+ // 2. Process valid files in size-constrained sub-batches
1091
+ let currentReadBatch = [];
1092
+ let currentReadBytes = 0;
1093
+
1094
+ const processReadBatch = async (batch) => {
1095
+ const results = await Promise.all(
1096
+ batch.map(async ({ file, size, mtimeMs }) => {
1097
+ // Check if we have cached metadata for this file
1098
+ const cachedHash =
1099
+ typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
1100
+ const cachedMeta = this.cache.getFileMeta ? this.cache.getFileMeta(file) : null;
1101
+
1102
+ if (cachedHash && cachedMeta &&
1103
+ Number.isFinite(cachedMeta.mtimeMs) && cachedMeta.mtimeMs === mtimeMs &&
1104
+ Number.isFinite(cachedMeta.size) && cachedMeta.size === size) {
1105
+ // Metadata matches exactly, skip reading/hashing
1106
+ skippedCount.unchanged++;
1107
+ return null;
1108
+ }
1109
+
1110
+ // Suspect file: Either new, or metadata changed.
1111
+ // We pass it to indexAll with the cachedHash as 'expectedHash'
1112
+ // so workers can perform the actual hashing and unchanged check.
1113
+ return { file, hash: null, expectedHash: cachedHash, force: false, size, mtimeMs };
1114
+ })
1115
+ );
1116
+
1117
+ for (const result of results) {
1118
+ if (result) filesToProcess.push(result);
1119
+ }
1120
+ };
1121
+
1122
+ for (const item of fileStats) {
1123
+ if (!item) continue;
1124
+
1125
+ if (
1126
+ currentReadBytes + item.size > MAX_READ_BATCH_BYTES &&
1127
+ currentReadBatch.length > 0
1128
+ ) {
1129
+ await processReadBatch(currentReadBatch);
1130
+ currentReadBatch = [];
1131
+ currentReadBytes = 0;
1132
+ }
1133
+
1134
+ currentReadBatch.push(item);
1135
+ currentReadBytes += item.size;
1136
+ }
1137
+
1138
+ if (currentReadBatch.length > 0) {
1139
+ await processReadBatch(currentReadBatch);
1140
+ }
1141
+
1142
+ // Pre-warm HybridSearch cache if available
1143
+ if (this.server && this.server.hybridSearch && this.server.hybridSearch.fileModTimes) {
1144
+ for (const stat of fileStats) {
1145
+ if (stat && stat.file && typeof stat.mtimeMs === 'number') {
1146
+ this.server.hybridSearch.fileModTimes.set(stat.file, stat.mtimeMs);
1147
+ }
1148
+ }
490
1149
  }
491
1150
  }
492
1151
 
493
- console.error(`[Indexer] Pre-filter: ${filesToProcess.length} changed, ${skippedCount.unchanged} unchanged, ${skippedCount.tooLarge} too large, ${skippedCount.error} errors (${Date.now() - startTime}ms)`);
1152
+ if (this.config.verbose) {
1153
+ console.info(
1154
+ `[Indexer] Pre-filter: ${filesToProcess.length} changed, ${skippedCount.unchanged} unchanged, ${skippedCount.tooLarge} too large, ${skippedCount.error} errors (${Date.now() - startTime}ms)`
1155
+ );
1156
+ }
1157
+
1158
+
1159
+
494
1160
  return filesToProcess;
495
1161
  }
496
1162
 
497
1163
  async indexAll(force = false) {
498
- if (this.isIndexing) {
499
- console.error("[Indexer] Indexing already in progress, skipping concurrent request");
500
- return { skipped: true, reason: "Indexing already in progress" };
1164
+ if (this.isIndexing || this.processingWatchEvents) {
1165
+ console.warn('[Indexer] Indexing already in progress, skipping concurrent request');
1166
+ return {
1167
+ skipped: true,
1168
+ reason: 'Indexing already in progress or pending file updates are being applied',
1169
+ };
501
1170
  }
502
1171
 
503
1172
  this.isIndexing = true;
1173
+ let memoryTimer = null;
1174
+ const logMemory = (label) => {
1175
+ if (!this.config.verbose) return;
1176
+ const { rss, heapUsed, heapTotal } = process.memoryUsage();
1177
+ const toMb = (value) => `${(value / 1024 / 1024).toFixed(1)}MB`;
1178
+ console.info(
1179
+ `[Indexer] Memory ${label}: rss=${toMb(rss)} heap=${toMb(heapUsed)}/${toMb(heapTotal)}`,
1180
+ );
1181
+ };
504
1182
 
505
1183
  try {
1184
+ logMemory('start');
1185
+ if (this.config.verbose) {
1186
+ memoryTimer = setInterval(() => logMemory('periodic'), 15000);
1187
+ }
1188
+
506
1189
  if (force) {
507
- console.error("[Indexer] Force reindex requested: clearing cache");
508
- this.cache.setVectorStore([]);
509
- this.cache.fileHashes = new Map();
510
- await this.cache.clearCallGraphData({ removeFile: true });
1190
+ console.info('[Indexer] Force reindex requested: clearing cache');
1191
+ await this.cache.reset();
1192
+ } else {
1193
+ if (typeof this.cache.ensureLoaded === 'function') {
1194
+ await this.cache.ensureLoaded();
1195
+ }
511
1196
  }
512
1197
 
513
1198
  const totalStartTime = Date.now();
514
- console.error(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
515
-
516
- // Step 1: Fast file discovery with fdir
517
- const files = await this.discoverFiles();
518
-
519
- if (files.length === 0) {
520
- console.error("[Indexer] No files found to index");
521
- this.sendProgress(100, 100, "No files found to index");
522
- return { skipped: false, filesProcessed: 0, chunksCreated: 0, message: "No files found to index" };
523
- }
1199
+ const indexStartedAt = new Date(totalStartTime).toISOString();
1200
+ let indexMode = force
1201
+ ? 'full'
1202
+ : this.cache.getVectorStore().length === 0
1203
+ ? 'initial'
1204
+ : 'incremental';
1205
+ this.currentIndexMode = indexMode;
1206
+ this.sendProgress(0, 100, 'Indexing started');
1207
+ console.info(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
1208
+
1209
+ // Step 1: Fast file discovery with fdir
1210
+ const files = await this.discoverFiles();
1211
+
1212
+ if (files.length === 0) {
1213
+ console.info('[Indexer] No files found to index');
1214
+ this.sendProgress(100, 100, 'No files found to index');
1215
+ return {
1216
+ skipped: false,
1217
+ filesProcessed: 0,
1218
+ chunksCreated: 0,
1219
+ message: 'No files found to index',
1220
+ };
1221
+ }
524
1222
 
525
- // Send progress: discovery complete
526
- this.sendProgress(5, 100, `Discovered ${files.length} files`);
1223
+ // Send progress: discovery complete
1224
+ this.sendProgress(5, 100, `Discovered ${files.length} files`);
527
1225
 
528
- const currentFilesSet = new Set(files);
1226
+ const currentFilesSet = new Set(files);
529
1227
 
530
- // Step 1.5: Prune deleted or excluded files from cache
531
- if (!force) {
532
- const cachedFiles = Array.from(this.cache.fileHashes.keys());
533
- let prunedCount = 0;
1228
+ // Step 1.5: Prune deleted or excluded files from cache
1229
+ if (!force) {
1230
+ const cachedFiles =
1231
+ typeof this.cache.getFileHashKeys === 'function' ? this.cache.getFileHashKeys() : [];
1232
+ let prunedCount = 0;
534
1233
 
535
- for (const cachedFile of cachedFiles) {
536
- if (!currentFilesSet.has(cachedFile)) {
537
- this.cache.removeFileFromStore(cachedFile);
538
- this.cache.deleteFileHash(cachedFile);
539
- prunedCount++;
1234
+ for (const cachedFile of cachedFiles) {
1235
+ if (!currentFilesSet.has(cachedFile)) {
1236
+ this.cache.removeFileFromStore(cachedFile);
1237
+ this.cache.deleteFileHash(cachedFile);
1238
+ prunedCount++;
1239
+ }
540
1240
  }
541
- }
542
1241
 
543
- if (prunedCount > 0) {
544
- if (this.config.verbose) {
545
- console.error(`[Indexer] Pruned ${prunedCount} deleted/excluded files from index`);
1242
+ if (prunedCount > 0) {
1243
+ if (this.config.verbose) {
1244
+ console.info(`[Indexer] Pruned ${prunedCount} deleted/excluded files from index`);
1245
+ }
1246
+ // If we pruned files, we should save these changes even if no other files changed
546
1247
  }
547
- // If we pruned files, we should save these changes even if no other files changed
548
- }
549
1248
 
550
- const prunedCallGraph = this.cache.pruneCallGraphData(currentFilesSet);
551
- if (prunedCallGraph > 0 && this.config.verbose) {
552
- console.error(`[Indexer] Pruned ${prunedCallGraph} call-graph entries`);
1249
+ const prunedCallGraph = this.cache.pruneCallGraphData(currentFilesSet);
1250
+ if (prunedCallGraph > 0 && this.config.verbose) {
1251
+ console.info(`[Indexer] Pruned ${prunedCallGraph} call-graph entries`);
1252
+ }
553
1253
  }
554
- }
555
-
556
- // Step 2: Pre-filter unchanged files (early hash check)
557
- const filesToProcess = await this.preFilterFiles(files);
558
- const filesToProcessSet = new Set(filesToProcess.map(entry => entry.file));
559
1254
 
560
- if (filesToProcess.length === 0) {
561
- console.error("[Indexer] All files unchanged, nothing to index");
1255
+ // Step 2: Pre-filter unchanged files (early hash check)
1256
+ const filesToProcess = await this.preFilterFiles(files);
1257
+ const filesToProcessSet = new Set(filesToProcess.map((entry) => entry.file));
1258
+ const filesToProcessByFile = new Map(filesToProcess.map((entry) => [entry.file, entry]));
562
1259
 
563
- // If we have no call graph data but we have cached files, we should try to rebuild it
1260
+ // Re-index files missing call graph data (if enabled)
564
1261
  if (this.config.callGraphEnabled && this.cache.getVectorStore().length > 0) {
565
- // Check for files that are in cache but missing from call graph data
566
- const cachedFiles = new Set(this.cache.getVectorStore().map(c => c.file));
567
- const callDataFiles = new Set(this.cache.fileCallData.keys());
1262
+ const cachedFiles = new Set(this.cache.getVectorStore().map((c) => c.file));
1263
+ const callDataFiles = new Set(this.cache.getFileCallDataKeys());
568
1264
 
569
1265
  const missingCallData = [];
570
1266
  for (const file of cachedFiles) {
571
1267
  if (!callDataFiles.has(file) && currentFilesSet.has(file)) {
572
1268
  missingCallData.push(file);
1269
+ const existing = filesToProcessByFile.get(file);
1270
+ if (existing) existing.force = true;
573
1271
  }
574
1272
  }
575
1273
 
576
1274
  if (missingCallData.length > 0) {
577
- console.error(`[Indexer] Found ${missingCallData.length} files missing call graph data, re-indexing...`);
1275
+ console.info(
1276
+ `[Indexer] Found ${missingCallData.length} files missing call graph data, re-indexing...`
1277
+ );
578
1278
  const BATCH_SIZE = 100;
579
1279
  for (let i = 0; i < missingCallData.length; i += BATCH_SIZE) {
580
1280
  const batch = missingCallData.slice(i, i + BATCH_SIZE);
@@ -582,11 +1282,14 @@ export class CodebaseIndexer {
582
1282
  batch.map(async (file) => {
583
1283
  try {
584
1284
  const stats = await fs.stat(file);
1285
+ if (!stats || typeof stats.isDirectory !== 'function') {
1286
+ return null;
1287
+ }
585
1288
  if (stats.isDirectory()) return null;
586
1289
  if (stats.size > this.config.maxFileSize) return null;
587
- const content = await fs.readFile(file, "utf-8");
1290
+ const content = await fs.readFile(file, 'utf-8');
588
1291
  const hash = hashContent(content);
589
- return { file, content, hash };
1292
+ return { file, hash, force: true, size: stats.size, mtimeMs: stats.mtimeMs };
590
1293
  } catch {
591
1294
  return null;
592
1295
  }
@@ -595,198 +1298,535 @@ export class CodebaseIndexer {
595
1298
 
596
1299
  for (const result of results) {
597
1300
  if (!result) continue;
598
- if (filesToProcessSet.has(result.file)) continue;
599
- filesToProcess.push(result);
600
- filesToProcessSet.add(result.file);
1301
+ if (!filesToProcessSet.has(result.file)) {
1302
+ filesToProcess.push(result);
1303
+ filesToProcessSet.add(result.file);
1304
+ }
601
1305
  }
602
1306
  }
603
1307
  }
604
1308
  }
605
1309
 
606
- // If still empty after checking for missing call data, then we are truly done
1310
+ indexMode = force
1311
+ ? 'full'
1312
+ : this.cache.getVectorStore().length === 0
1313
+ ? 'initial'
1314
+ : filesToProcess.length === files.length
1315
+ ? 'full'
1316
+ : 'incremental';
1317
+ this.currentIndexMode = indexMode;
1318
+
607
1319
  if (filesToProcess.length === 0) {
608
- this.sendProgress(100, 100, "All files up to date");
1320
+ console.info('[Indexer] All files unchanged, nothing to index');
1321
+ this.sendProgress(100, 100, 'All files up to date');
609
1322
  await this.cache.save();
610
1323
  const vectorStore = this.cache.getVectorStore();
611
1324
  return {
612
1325
  skipped: false,
613
1326
  filesProcessed: 0,
614
1327
  chunksCreated: 0,
615
- totalFiles: new Set(vectorStore.map(v => v.file)).size,
1328
+ totalFiles: new Set(vectorStore.map((v) => v.file)).size,
616
1329
  totalChunks: vectorStore.length,
617
- message: "All files up to date"
1330
+ message: 'All files up to date',
618
1331
  };
619
1332
  }
620
- }
621
1333
 
622
- // Send progress: filtering complete
623
- this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
1334
+ // Send progress: filtering complete
1335
+ console.info(`[Indexer] Processing ${filesToProcess.length} changed files`);
1336
+ this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
1337
+
1338
+ // Step 3: Determine batch size based on project size
1339
+ // Adaptive batch size: use larger batches for larger projects to reduce overhead
1340
+ let adaptiveBatchSize = 10;
1341
+ if (files.length > 500) adaptiveBatchSize = 50;
1342
+ if (files.length > 1000) adaptiveBatchSize = 100;
1343
+ if (files.length > 5000) adaptiveBatchSize = 500;
1344
+
1345
+ if (this.config.verbose) {
1346
+ console.info(
1347
+ `[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`
1348
+ );
1349
+ }
624
1350
 
625
- // Step 3: Determine batch size based on project size
626
- const adaptiveBatchSize = files.length > 10000 ? 500 :
627
- files.length > 1000 ? 200 :
628
- this.config.batchSize || 100;
1351
+ // Step 4: Initialize worker threads (skip if explicitly disabled)
1352
+ const allowSingleThreadFallback =
1353
+ this.config.allowSingleThreadFallback !== false ||
1354
+ this.config.workerThreads === 0 ||
1355
+ isTestEnv();
1356
+ const useWorkers = this.shouldUseWorkers();
1357
+
1358
+ if (useWorkers) {
1359
+ await this.initializeWorkers();
1360
+ if (this.config.verbose && this.workers.length > 0) {
1361
+ console.info(`[Indexer] Multi-threaded mode: ${this.workers.length} workers active`);
1362
+ }
1363
+ } else if (this.config.verbose) {
1364
+ const until = this.workersDisabledUntil - Date.now();
1365
+ if (this.workersDisabledUntil && until > 0) {
1366
+ console.info(
1367
+ `[Indexer] Workers disabled for ${Math.round(until / 1000)}s; single-threaded fallback ${allowSingleThreadFallback ? 'enabled' : 'disabled'}`
1368
+ );
1369
+ } else {
1370
+ console.info(`[Indexer] Single-threaded mode (single-core system)`);
1371
+ }
1372
+ }
629
1373
 
630
- console.error(`[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`);
1374
+ const resolvedWorkerThreads = useWorkers ? this.workers.length : 0;
631
1375
 
632
- // Step 4: Initialize worker threads (always use when multi-core available)
633
- const useWorkers = os.cpus().length > 1;
1376
+ let totalChunks = 0;
1377
+ let processedFiles = 0;
634
1378
 
635
- if (useWorkers) {
636
- await this.initializeWorkers();
637
- console.error(`[Indexer] Multi-threaded mode: ${this.workers.length} workers active`);
638
- } else {
639
- console.error(`[Indexer] Single-threaded mode (single-core system)`);
640
- }
1379
+ console.info(
1380
+ `[Indexer] Embedding pass started: ${filesToProcess.length} files using ${this.config.embeddingModel}`
1381
+ );
641
1382
 
642
- let totalChunks = 0;
643
- let processedFiles = 0;
1383
+ // Step 5: Process files in adaptive batches
1384
+ for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
1385
+ const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
644
1386
 
645
- // Step 5: Process files in adaptive batches
646
- for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
647
- const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
1387
+ const allChunks = [];
1388
+ const fileStats = new Map();
1389
+ const newChunksByFile = new Map();
1390
+ const callDataByFile = new Map();
1391
+ const filesForWorkers = [];
648
1392
 
649
- // Generate all chunks for this batch
650
- const allChunks = [];
651
- const fileStats = new Map();
1393
+ // Memory safeguard
1394
+ const mem = process.memoryUsage();
1395
+ if (mem.rss > 2048 * 1024 * 1024) {
1396
+ if (global.gc) global.gc();
1397
+ }
652
1398
 
653
- for (const { file, content, hash } of batch) {
654
- // Remove old chunks for this file
655
- this.cache.removeFileFromStore(file);
1399
+ const useWorkersForBatch = useWorkers && this.workers.length > 0 && !this.config.embeddingProcessPerBatch;
1400
+
1401
+ for (const item of batch) {
1402
+ const { file, force, content: presetContent, hash: presetHash, expectedHash: presetExpectedHash, size: presetSize, mtimeMs: presetMtimeMs } = item;
1403
+ let content = presetContent;
1404
+ let liveHash = presetHash;
1405
+ let size = presetSize;
1406
+ let mtimeMs = presetMtimeMs;
1407
+ const expectedHash =
1408
+ presetExpectedHash ||
1409
+ (typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null);
1410
+
1411
+ if (useWorkersForBatch && (content === undefined || content === null)) {
1412
+ // Speed optimization: Offload reading and hashing to workers.
1413
+ // Main thread skips I/O entirely for this file.
1414
+ filesForWorkers.push({ file, content: null, force, expectedHash });
1415
+ // Initialize stats placeholder (will be updated with worker results)
1416
+ fileStats.set(file, { hash: null, totalChunks: 0, successChunks: 0, size, mtimeMs });
1417
+ continue;
1418
+ }
656
1419
 
657
- // Extract call graph data if enabled
658
- if (this.config.callGraphEnabled) {
659
- try {
660
- const callData = extractCallData(content, file);
661
- this.cache.setFileCallData(file, callData);
662
- } catch (err) {
663
- if (this.config.verbose) {
664
- console.error(`[Indexer] Call graph extraction failed for ${path.basename(file)}: ${err.message}`);
1420
+ // Read content if not provided (Legacy Path or workers disabled)
1421
+ if (content === undefined || content === null) {
1422
+ let stats = null;
1423
+ try {
1424
+ stats = await fs.stat(file);
1425
+ } catch (err) {
1426
+ if (this.config.verbose) {
1427
+ console.warn(`[Indexer] Failed to stat ${path.basename(file)}: ${err.message}`);
1428
+ }
1429
+ continue;
1430
+ }
1431
+ if (!stats || typeof stats.isDirectory !== 'function') {
1432
+ if (this.config.verbose) {
1433
+ console.warn(`[Indexer] Invalid stat result for ${path.basename(file)}`);
1434
+ }
1435
+ continue;
1436
+ }
1437
+ if (stats.isDirectory()) continue;
1438
+ if (stats.size > this.config.maxFileSize) {
1439
+ if (this.config.verbose) {
1440
+ console.warn(
1441
+ `[Indexer] Skipped ${path.basename(file)} (too large: ${(stats.size / 1024 / 1024).toFixed(2)}MB)`
1442
+ );
1443
+ }
1444
+ continue;
1445
+ }
1446
+ try {
1447
+ content = await fs.readFile(file, 'utf-8');
1448
+ } catch (err) {
1449
+ if (this.config.verbose) {
1450
+ console.warn(`[Indexer] Failed to read ${path.basename(file)}: ${err.message}`);
1451
+ }
1452
+ continue;
1453
+ }
1454
+ liveHash = hashContent(content);
1455
+ size = stats.size;
1456
+ mtimeMs = stats.mtimeMs;
1457
+ } else {
1458
+ if (typeof content !== 'string') content = String(content);
1459
+ if (!liveHash) liveHash = hashContent(content);
1460
+ if (!Number.isFinite(size)) {
1461
+ // Use character length as approximation to avoid blocking Buffer.byteLength on large strings
1462
+ size = content.length;
1463
+ }
1464
+ if (size > this.config.maxFileSize) {
1465
+ if (this.config.verbose) {
1466
+ console.warn(
1467
+ `[Indexer] Skipped ${path.basename(file)} (too large: ${(size / 1024 / 1024).toFixed(2)}MB)`
1468
+ );
1469
+ }
1470
+ continue;
665
1471
  }
666
1472
  }
667
- }
668
1473
 
669
- const chunks = smartChunk(content, file, this.config);
670
- fileStats.set(file, { hash, totalChunks: 0, successChunks: 0 });
1474
+ const cachedFileHash =
1475
+ typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
1476
+ if (!force && liveHash && cachedFileHash === liveHash) {
1477
+ if (this.config.verbose) console.info(`[Indexer] Skipped ${path.basename(file)} (unchanged)`);
1478
+ this.cache.setFileHash(file, liveHash, { size, mtimeMs });
1479
+ continue;
1480
+ }
671
1481
 
672
- for (const chunk of chunks) {
673
- allChunks.push({
674
- file,
675
- text: chunk.text,
676
- startLine: chunk.startLine,
677
- endLine: chunk.endLine
678
- });
679
- const stats = fileStats.get(file);
680
- if (stats) {
681
- stats.totalChunks++;
1482
+ if (useWorkersForBatch) {
1483
+ filesForWorkers.push({ file, content, force, expectedHash: liveHash });
1484
+ // Initialize stats placeholder (will be updated with worker results)
1485
+ fileStats.set(file, { hash: liveHash, totalChunks: 0, successChunks: 0, size, mtimeMs });
1486
+ continue;
1487
+ }
1488
+
1489
+ // Legacy / Fallback path: Chunk on main thread
1490
+ if (this.config.callGraphEnabled) {
1491
+ try {
1492
+ const callData = extractCallData(content, file);
1493
+ callDataByFile.set(file, callData);
1494
+ } catch (err) {
1495
+ if (this.config.verbose) {
1496
+ console.warn(
1497
+ `[Indexer] Call graph extraction failed for ${path.basename(file)}: ${err.message}`
1498
+ );
1499
+ }
1500
+ }
1501
+ }
1502
+
1503
+ const rawChunks = smartChunk(content, file, this.config);
1504
+ const chunks = Array.isArray(rawChunks) ? rawChunks : [];
1505
+ fileStats.set(file, { hash: liveHash, totalChunks: chunks.length, successChunks: 0, size, mtimeMs });
1506
+
1507
+ for (const chunk of chunks) {
1508
+ allChunks.push({
1509
+ file,
1510
+ text: chunk.text,
1511
+ startLine: chunk.startLine,
1512
+ endLine: chunk.endLine,
1513
+ });
682
1514
  }
683
1515
  }
684
- }
685
1516
 
686
- // Process chunks (with workers if available, otherwise single-threaded)
687
- let results;
688
- if (useWorkers && this.workers.length > 0) {
689
- results = await this.processChunksWithWorkers(allChunks);
690
- } else {
691
- results = await this.processChunksSingleThreaded(allChunks);
692
- }
1517
+ // Process files with workers (New Path)
1518
+ if (filesForWorkers.length > 0) {
1519
+ const results = await this.processFilesWithWorkers(filesForWorkers);
1520
+
1521
+ for (const res of results) {
1522
+ const stats = fileStats.get(res.file);
1523
+ if (res.status === 'indexed' && stats) {
1524
+ stats.totalChunks = res.results.length;
1525
+ stats.successChunks = res.results.length;
1526
+ if (res.hash) stats.hash = res.hash; // Update with new hash from worker
1527
+ if (res.callData) callDataByFile.set(res.file, res.callData);
1528
+
1529
+ const chunks = res.results.map(r => ({
1530
+ file: res.file,
1531
+ startLine: r.startLine,
1532
+ endLine: r.endLine,
1533
+ content: r.text,
1534
+ vector: toFloat32Array(r.vectorBuffer),
1535
+ }));
1536
+ newChunksByFile.set(res.file, chunks);
1537
+ } else if (res.status === 'unchanged' && stats) {
1538
+ // Worker found file hash matches old hash
1539
+ stats.totalChunks = 0; // Signal skip commit
1540
+ stats.successChunks = 0;
1541
+ stats.hash = res.hash;
1542
+ this.cache.setFileHash(res.file, res.hash, { size: res.size, mtimeMs: res.mtimeMs });
1543
+ if (res.callData && this.config.callGraphEnabled) {
1544
+ this.cache.setFileCallData(res.file, res.callData);
1545
+ }
1546
+ } else if ((res.status === 'retry' || res.status === 'error') && stats) {
1547
+ // Worker failed, fallback to local chunking + single threaded
1548
+ const original = filesForWorkers.find(f => f.file === res.file);
1549
+ if (original) {
1550
+ if (this.config.verbose) console.info(`[Indexer] Fallback for ${path.basename(res.file)}`);
1551
+
1552
+ if (this.config.callGraphEnabled) {
1553
+ try {
1554
+ callDataByFile.set(res.file, extractCallData(original.content, res.file));
1555
+ } catch (err) {
1556
+ if (this.config.verbose) {
1557
+ console.warn(
1558
+ `[Indexer] Call graph extraction failed for ${path.basename(res.file)}: ${err.message}`
1559
+ );
1560
+ }
1561
+ }
1562
+ }
1563
+ const fallbackChunks = smartChunk(original.content, res.file, this.config);
1564
+ const chunks = Array.isArray(fallbackChunks) ? fallbackChunks : [];
1565
+ stats.totalChunks = chunks.length;
1566
+ for (const chunk of chunks) {
1567
+ allChunks.push({
1568
+ file: res.file,
1569
+ text: chunk.text,
1570
+ startLine: chunk.startLine,
1571
+ endLine: chunk.endLine,
1572
+ });
1573
+ }
1574
+ }
1575
+ }
1576
+ }
1577
+ }
693
1578
 
694
- // Store successful results
695
- for (const result of results) {
696
- const stats = fileStats.get(result.file);
697
- if (result.success) {
698
- this.cache.addToStore({
699
- file: result.file,
700
- startLine: result.startLine,
701
- endLine: result.endLine,
702
- content: result.content,
703
- vector: result.vector
704
- });
705
- totalChunks++;
706
- if (stats) {
707
- stats.successChunks++;
1579
+ // Process chunks (Legacy Path & Fallbacks)
1580
+ if (allChunks.length > 0) {
1581
+ const chunksToProcess = allChunks.slice();
1582
+ let results = [];
1583
+ if (this.config.embeddingProcessPerBatch) {
1584
+ results = await this.processChunksInChildProcess(chunksToProcess);
1585
+ } else {
1586
+ // If we are here, either workers are disabled/full or these are retry chunks
1587
+ // Use single threaded fallback if not using child process
1588
+ results = await this.processChunksSingleThreaded(chunksToProcess);
1589
+ }
1590
+
1591
+ for (const result of results) {
1592
+ const stats = fileStats.get(result.file);
1593
+ if (result.success && stats) {
1594
+ const items = newChunksByFile.get(result.file) || [];
1595
+ items.push({
1596
+ file: result.file,
1597
+ startLine: result.startLine,
1598
+ endLine: result.endLine,
1599
+ content: result.content,
1600
+ vector: toFloat32Array(result.vector),
1601
+ });
1602
+ newChunksByFile.set(result.file, items);
1603
+ stats.successChunks++;
1604
+ }
1605
+ }
1606
+ }
1607
+
1608
+ // Commit changes to cache
1609
+ for (const [file, stats] of fileStats) {
1610
+ if (stats.totalChunks > 0 && stats.successChunks === stats.totalChunks) {
1611
+ this.cache.removeFileFromStore(file);
1612
+ const newChunks = newChunksByFile.get(file) || [];
1613
+ for (const chunk of newChunks) {
1614
+ this.cache.addToStore(chunk);
1615
+ totalChunks++;
1616
+ }
1617
+ this.cache.setFileHash(file, stats.hash, { size: stats.size, mtimeMs: stats.mtimeMs });
1618
+ const callData = callDataByFile.get(file);
1619
+ if (callData && this.config.callGraphEnabled) {
1620
+ this.cache.setFileCallData(file, callData);
1621
+ }
1622
+ } else if (stats.totalChunks === 0) {
1623
+ // File had no chunks (empty or comments only), just mark as indexed
1624
+ this.cache.setFileHash(file, stats.hash, { size: stats.size, mtimeMs: stats.mtimeMs });
1625
+ const callData = callDataByFile.get(file);
1626
+ if (callData && this.config.callGraphEnabled) {
1627
+ this.cache.setFileCallData(file, callData);
1628
+ }
1629
+ } else if (this.config.verbose) {
1630
+ console.warn(
1631
+ `[Indexer] Skipped hash update for ${path.basename(file)} (${stats.successChunks}/${stats.totalChunks} chunks embedded)`
1632
+ );
708
1633
  }
709
1634
  }
1635
+
1636
+ if (global.gc) global.gc();
1637
+
1638
+ processedFiles += batch.length;
1639
+
1640
+ // Progress indicator
1641
+ if (
1642
+ processedFiles % (adaptiveBatchSize * 2) === 0 ||
1643
+ processedFiles === filesToProcess.length
1644
+ ) {
1645
+ const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
1646
+ const rate = (processedFiles / parseFloat(elapsed)).toFixed(1);
1647
+ console.info(
1648
+ `[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec, ${elapsed}s elapsed)`
1649
+ );
1650
+ const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
1651
+ this.sendProgress(
1652
+ progressPercent,
1653
+ 100,
1654
+ `Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`
1655
+ );
1656
+ }
1657
+
1658
+ // Batch-level memory cleanup to reduce peak usage
1659
+ allChunks.length = 0;
1660
+ filesForWorkers.length = 0;
1661
+ fileStats.clear();
1662
+ newChunksByFile.clear();
1663
+ callDataByFile.clear();
1664
+ await delay(0);
1665
+ }
1666
+
1667
+ // Cleanup workers
1668
+ if (this.workers.length > 0) {
1669
+ await this.terminateWorkers();
710
1670
  }
1671
+ if (global.gc) global.gc();
1672
+
1673
+ const totalDurationMs = Date.now() - totalStartTime;
1674
+ const totalTime = (totalDurationMs / 1000).toFixed(1);
1675
+ console.info(
1676
+ `[Indexer] Embedding pass complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`
1677
+ );
1678
+
1679
+ // Send completion progress
1680
+ this.sendProgress(
1681
+ 100,
1682
+ 100,
1683
+ `Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`
1684
+ );
1685
+
1686
+ this.cache.setLastIndexDuration?.(totalDurationMs);
1687
+ this.cache.setLastIndexStats?.({
1688
+ lastIndexStartedAt: indexStartedAt,
1689
+ lastIndexEndedAt: new Date().toISOString(),
1690
+ lastDiscoveredFiles: files.length,
1691
+ lastFilesProcessed: filesToProcess.length,
1692
+ lastIndexMode: indexMode,
1693
+ lastBatchSize: adaptiveBatchSize,
1694
+ lastWorkerThreads: resolvedWorkerThreads,
1695
+ lastEmbeddingProcessPerBatch: this.config.embeddingProcessPerBatch,
1696
+ });
1697
+ await this.cache.save();
711
1698
 
712
- // Update file hashes
713
- for (const [file, stats] of fileStats) {
714
- if (stats.totalChunks === 0 || stats.successChunks === stats.totalChunks) {
715
- this.cache.setFileHash(file, stats.hash);
716
- } else if (this.config.verbose) {
717
- console.error(`[Indexer] Skipped hash update for ${path.basename(file)} (${stats.successChunks}/${stats.totalChunks} chunks embedded)`);
1699
+ if (this.config.clearCacheAfterIndex) {
1700
+ console.info(
1701
+ '[Indexer] clearCacheAfterIndex enabled; in-memory vectors will be reloaded on next query'
1702
+ );
1703
+ await this.cache.dropInMemoryVectors();
1704
+ if (this.config.verbose) {
1705
+ console.info('[Cache] Cleared in-memory vectors after indexing');
718
1706
  }
719
1707
  }
720
1708
 
721
- processedFiles += batch.length;
1709
+ // Rebuild call graph in background
1710
+ if (this.config.callGraphEnabled) {
1711
+ this.cache.rebuildCallGraph();
1712
+ }
722
1713
 
723
- // Progress indicator every batch
724
- if (processedFiles % (adaptiveBatchSize * 2) === 0 || processedFiles === filesToProcess.length) {
725
- const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
726
- const rate = (processedFiles / parseFloat(elapsed)).toFixed(0);
727
- console.error(`[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec)`);
1714
+ if (!this.config.clearCacheAfterIndex) {
1715
+ void this.cache.ensureAnnIndex().catch((error) => {
1716
+ if (this.config.verbose) {
1717
+ console.warn(`[ANN] Background ANN build failed: ${error.message}`);
1718
+ }
1719
+ });
1720
+ }
728
1721
 
729
- // Send MCP progress notification (10-95% range for batch processing)
730
- const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
731
- this.sendProgress(progressPercent, 100, `Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`);
1722
+ const vectorStore = this.cache.getVectorStore();
1723
+ return {
1724
+ skipped: false,
1725
+ filesProcessed: filesToProcess.length,
1726
+ chunksCreated: totalChunks,
1727
+ totalFiles: new Set(vectorStore.map((v) => v.file)).size,
1728
+ totalChunks: vectorStore.length,
1729
+ duration: totalTime,
1730
+ message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`,
1731
+ };
1732
+ } finally {
1733
+ if (memoryTimer) {
1734
+ clearInterval(memoryTimer);
732
1735
  }
1736
+ logMemory('end');
1737
+ this.isIndexing = false;
1738
+ try {
1739
+ await this.processPendingWatchEvents();
1740
+ } catch (error) {
1741
+ console.warn(`[Indexer] Failed to apply queued file updates: ${error.message}`);
733
1742
  }
1743
+ }
1744
+ }
734
1745
 
735
- // Cleanup workers
736
- if (useWorkers) {
737
- await this.terminateWorkers();
1746
+ enqueueWatchEvent(type, filePath) {
1747
+ // If it's a delete, it always wins
1748
+ if (type === 'unlink') {
1749
+ this.pendingWatchEvents.set(filePath, 'unlink');
1750
+ return;
738
1751
  }
739
1752
 
740
- const totalTime = ((Date.now() - totalStartTime) / 1000).toFixed(1);
741
- console.error(`[Indexer] Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
1753
+ // If we're adding/changing, it overwrites a potential unlink (file came back)
1754
+ this.pendingWatchEvents.set(filePath, type);
1755
+ }
742
1756
 
743
- // Send completion progress
744
- this.sendProgress(100, 100, `Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
1757
+ async processPendingWatchEvents() {
1758
+ if (this.processingWatchEvents || this.pendingWatchEvents.size === 0) {
1759
+ return;
1760
+ }
745
1761
 
746
- await this.cache.save();
1762
+ this.processingWatchEvents = true;
1763
+ try {
1764
+ while (this.pendingWatchEvents.size > 0) {
1765
+ const pending = Array.from(this.pendingWatchEvents.entries());
1766
+ this.pendingWatchEvents.clear();
747
1767
 
748
- // Rebuild call graph in background
749
- if (this.config.callGraphEnabled) {
750
- this.cache.rebuildCallGraph();
751
- }
1768
+ for (const [filePath, type] of pending) {
1769
+ if (this.server && this.server.hybridSearch) {
1770
+ this.server.hybridSearch.clearFileModTime(filePath);
1771
+ }
752
1772
 
753
- void this.cache.ensureAnnIndex().catch((error) => {
754
- if (this.config.verbose) {
755
- console.error(`[ANN] Background ANN build failed: ${error.message}`);
756
- }
757
- });
1773
+ if (type === 'unlink') {
1774
+ await this.cache.removeFileFromStore(filePath);
1775
+ this.cache.deleteFileHash(filePath);
1776
+ } else {
1777
+ await this.indexFile(filePath);
1778
+ }
1779
+ }
758
1780
 
759
- const vectorStore = this.cache.getVectorStore();
760
- return {
761
- skipped: false,
762
- filesProcessed: filesToProcess.length,
763
- chunksCreated: totalChunks,
764
- totalFiles: new Set(vectorStore.map(v => v.file)).size,
765
- totalChunks: vectorStore.length,
766
- duration: totalTime,
767
- message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`
768
- };
1781
+ await this.cache.save();
1782
+ }
769
1783
  } finally {
770
- this.isIndexing = false;
1784
+ this.processingWatchEvents = false;
771
1785
  }
772
1786
  }
773
1787
 
774
- setupFileWatcher() {
1788
+ async setupFileWatcher() {
775
1789
  if (!this.config.watchFiles) return;
776
1790
 
777
- const pattern = this.config.fileExtensions.map(ext => `**/*.${ext}`);
1791
+ // Close existing watcher if active to prevent leaks
1792
+ if (this.watcher) {
1793
+ await this.watcher.close();
1794
+ this.watcher = null;
1795
+ }
1796
+
1797
+ await this.loadGitignore();
1798
+
1799
+ const pattern = [
1800
+ ...this.config.fileExtensions.map((ext) => `**/*.${ext}`),
1801
+ ...(this.config.fileNames || []).map((name) => `**/${name}`),
1802
+ ];
1803
+
1804
+ const ignored = (filePath) => {
1805
+ const fullPath = path.isAbsolute(filePath)
1806
+ ? filePath
1807
+ : path.join(this.config.searchDirectory, filePath);
1808
+ return this.isExcluded(fullPath);
1809
+ };
778
1810
 
779
1811
  this.watcher = chokidar.watch(pattern, {
780
1812
  cwd: this.config.searchDirectory,
781
- ignored: this.config.excludePatterns,
1813
+ ignored,
782
1814
  persistent: true,
783
- ignoreInitial: true
1815
+ ignoreInitial: true,
784
1816
  });
785
1817
 
786
1818
  this.watcher
787
- .on("add", async (filePath) => {
1819
+ .on('add', async (filePath) => {
788
1820
  const fullPath = path.join(this.config.searchDirectory, filePath);
789
- console.error(`[Indexer] New file detected: ${filePath}`);
1821
+ console.info(`[Indexer] New file detected: ${filePath}`);
1822
+
1823
+ if (this.isIndexing || this.processingWatchEvents) {
1824
+ if (this.config.verbose) {
1825
+ console.info(`[Indexer] Queued add event during indexing: ${filePath}`);
1826
+ }
1827
+ this.enqueueWatchEvent('add', fullPath);
1828
+ return;
1829
+ }
790
1830
 
791
1831
  // Invalidate recency cache
792
1832
  if (this.server && this.server.hybridSearch) {
@@ -796,9 +1836,17 @@ export class CodebaseIndexer {
796
1836
  await this.indexFile(fullPath);
797
1837
  await this.cache.save();
798
1838
  })
799
- .on("change", async (filePath) => {
1839
+ .on('change', async (filePath) => {
800
1840
  const fullPath = path.join(this.config.searchDirectory, filePath);
801
- console.error(`[Indexer] File changed: ${filePath}`);
1841
+ console.info(`[Indexer] File changed: ${filePath}`);
1842
+
1843
+ if (this.isIndexing || this.processingWatchEvents) {
1844
+ if (this.config.verbose) {
1845
+ console.info(`[Indexer] Queued change event during indexing: ${filePath}`);
1846
+ }
1847
+ this.enqueueWatchEvent('change', fullPath);
1848
+ return;
1849
+ }
802
1850
 
803
1851
  // Invalidate recency cache
804
1852
  if (this.server && this.server.hybridSearch) {
@@ -808,46 +1856,55 @@ export class CodebaseIndexer {
808
1856
  await this.indexFile(fullPath);
809
1857
  await this.cache.save();
810
1858
  })
811
- .on("unlink", (filePath) => {
1859
+ .on('unlink', async (filePath) => {
812
1860
  const fullPath = path.join(this.config.searchDirectory, filePath);
813
- console.error(`[Indexer] File deleted: ${filePath}`);
1861
+ console.info(`[Indexer] File deleted: ${filePath}`);
1862
+
1863
+ if (this.isIndexing || this.processingWatchEvents) {
1864
+ if (this.config.verbose) {
1865
+ console.info(`[Indexer] Queued delete event during indexing: ${filePath}`);
1866
+ }
1867
+ this.enqueueWatchEvent('unlink', fullPath);
1868
+ return;
1869
+ }
814
1870
 
815
1871
  // Invalidate recency cache
816
1872
  if (this.server && this.server.hybridSearch) {
817
1873
  this.server.hybridSearch.clearFileModTime(fullPath);
818
1874
  }
819
1875
 
820
- this.cache.removeFileFromStore(fullPath);
1876
+ await this.cache.removeFileFromStore(fullPath);
821
1877
  this.cache.deleteFileHash(fullPath);
822
- this.cache.save();
1878
+ await this.cache.save();
823
1879
  });
824
1880
 
825
- console.error("[Indexer] File watcher enabled for incremental indexing");
1881
+ console.info('[Indexer] File watcher enabled for incremental indexing');
826
1882
  }
827
1883
  }
828
1884
 
829
1885
  // MCP Tool definition for this feature
830
1886
  export function getToolDefinition() {
831
1887
  return {
832
- name: "b_index_codebase",
833
- description: "Manually trigger a full reindex of the codebase. This will scan all files and update the embeddings cache. Useful after large code changes or if the index seems out of date.",
1888
+ name: 'b_index_codebase',
1889
+ description:
1890
+ 'Manually trigger a full reindex of the codebase. This will scan all files and update the embeddings cache. Useful after large code changes or if the index seems out of date.',
834
1891
  inputSchema: {
835
- type: "object",
1892
+ type: 'object',
836
1893
  properties: {
837
1894
  force: {
838
- type: "boolean",
1895
+ type: 'boolean',
839
1896
  description: "Force reindex even if files haven't changed",
840
- default: false
841
- }
842
- }
1897
+ default: false,
1898
+ },
1899
+ },
843
1900
  },
844
1901
  annotations: {
845
- title: "Reindex Codebase",
1902
+ title: 'Reindex Codebase',
846
1903
  readOnlyHint: false,
847
1904
  destructiveHint: false,
848
1905
  idempotentHint: true,
849
- openWorldHint: false
850
- }
1906
+ openWorldHint: false,
1907
+ },
851
1908
  };
852
1909
  }
853
1910
 
@@ -859,10 +1916,12 @@ export async function handleToolCall(request, indexer) {
859
1916
  // Handle case when indexing was skipped due to concurrent request
860
1917
  if (result?.skipped) {
861
1918
  return {
862
- content: [{
863
- type: "text",
864
- text: `Indexing skipped: ${result.reason}\n\nPlease wait for the current indexing operation to complete before requesting another reindex.`
865
- }]
1919
+ content: [
1920
+ {
1921
+ type: 'text',
1922
+ text: `Indexing skipped: ${result.reason}\n\nPlease wait for the current indexing operation to complete before requesting another reindex.`,
1923
+ },
1924
+ ],
866
1925
  };
867
1926
  }
868
1927
 
@@ -870,9 +1929,9 @@ export async function handleToolCall(request, indexer) {
870
1929
  const vectorStore = indexer.cache.getVectorStore();
871
1930
  const stats = {
872
1931
  totalChunks: result?.totalChunks ?? vectorStore.length,
873
- totalFiles: result?.totalFiles ?? new Set(vectorStore.map(v => v.file)).size,
1932
+ totalFiles: result?.totalFiles ?? new Set(vectorStore.map((v) => v.file)).size,
874
1933
  filesProcessed: result?.filesProcessed ?? 0,
875
- chunksCreated: result?.chunksCreated ?? 0
1934
+ chunksCreated: result?.chunksCreated ?? 0,
876
1935
  };
877
1936
 
878
1937
  let message = result?.message
@@ -886,9 +1945,11 @@ export async function handleToolCall(request, indexer) {
886
1945
  }
887
1946
 
888
1947
  return {
889
- content: [{
890
- type: "text",
891
- text: message
892
- }]
1948
+ content: [
1949
+ {
1950
+ type: 'text',
1951
+ text: message,
1952
+ },
1953
+ ],
893
1954
  };
894
1955
  }