@softerist/heuristic-mcp 3.0.17 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -112,21 +112,21 @@ export class CodebaseIndexer {
112
112
  this._lastIncrementalGcAt = 0;
113
113
  this._autoEmbeddingProcessLogged = false;
114
114
  this._heavyWorkerSafetyLogged = false;
115
- // Debounce timers for watcher events (path -> timeout ID)
115
+
116
116
  this._watcherDebounceTimers = new Map();
117
- // Files currently being indexed via watcher (path -> Promise)
117
+
118
118
  this._watcherInProgress = new Map();
119
- // Files that need a follow-up reindex after current watcher indexing finishes
119
+
120
120
  this._watcherPendingReindex = new Map();
121
- // Debounce delay in ms (consolidates rapid add/change events)
121
+
122
122
  this._watcherDebounceMs = Number.isInteger(this.config.watchDebounceMs)
123
123
  ? this.config.watchDebounceMs
124
124
  : 300;
125
- // Wait-for-stable writes (chokidar awaitWriteFinish) to reduce add+change churn
125
+
126
126
  this._watcherWriteStabilityMs = Number.isInteger(this.config.watchWriteStabilityMs)
127
127
  ? this.config.watchWriteStabilityMs
128
128
  : 1500;
129
- // Persistent embedding child process (used to avoid per-batch model reloads)
129
+
130
130
  this._embeddingProcessSessionActive = false;
131
131
  this._embeddingChild = null;
132
132
  this._embeddingChildBuffer = '';
@@ -202,6 +202,13 @@ export class CodebaseIndexer {
202
202
  return { threads, batchSize };
203
203
  }
204
204
 
205
+ getIndexCheckpointIntervalMs() {
206
+ const raw = Number(this.config.indexCheckpointIntervalMs);
207
+ if (!Number.isFinite(raw)) return 15000;
208
+ if (raw <= 0) return 0;
209
+ return Math.floor(raw);
210
+ }
211
+
205
212
  getEmbeddingProcessGcConfig() {
206
213
  const thresholdRaw = Number(this.config.embeddingProcessGcRssThresholdMb);
207
214
  const minIntervalRaw = Number(this.config.embeddingProcessGcMinIntervalMs);
@@ -311,7 +318,7 @@ export class CodebaseIndexer {
311
318
  const rel = path.relative(normalizedBase, normalizedTarget);
312
319
  return rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel));
313
320
  } catch {
314
- // Fall back to lexical check when realpath fails (e.g., deleted files).
321
+
315
322
  return this.isPathInsideWorkspace(filePath);
316
323
  }
317
324
  }
@@ -344,10 +351,16 @@ export class CodebaseIndexer {
344
351
  );
345
352
  }
346
353
 
347
- isHeavyEmbeddingModel() {
348
- const model = String(this.config.embeddingModel || '').toLowerCase();
349
- return model.includes('jina');
350
- }
354
+ isHeavyEmbeddingModel() {
355
+ const model = String(this.config.embeddingModel || '').toLowerCase();
356
+ return model.includes('jina');
357
+ }
358
+
359
+ shouldDisableHeavyModelWorkersOnWindows() {
360
+ if (process.platform !== 'win32') return false;
361
+ if (!this.isHeavyEmbeddingModel()) return false;
362
+ return this.config.workerDisableHeavyModelOnWindows !== false;
363
+ }
351
364
 
352
365
  getWorkerInferenceBatchSize({ numWorkers = null } = {}) {
353
366
  const configured =
@@ -355,7 +368,7 @@ export class CodebaseIndexer {
355
368
  ? this.config.embeddingBatchSize
356
369
  : null;
357
370
  if (configured) return Math.min(configured, 256);
358
- // Heavy models are more stable with batch=1 in multi-worker mode on some runtimes.
371
+
359
372
  if (this.isHeavyEmbeddingModel() && Number.isInteger(numWorkers) && numWorkers > 1) return 1;
360
373
  return null;
361
374
  }
@@ -478,7 +491,7 @@ export class CodebaseIndexer {
478
491
  if (this.config.verbose) {
479
492
  console.info(`[Cache] Cleared in-memory vectors after ${reason}`);
480
493
  }
481
- // Keep server RSS low after single-file updates where vector arrays can remain in old-gen.
494
+
482
495
  await this.traceIncrementalMemoryPhase(`incremental.explicitGc (${reason})`, async () => {
483
496
  this.runExplicitGc({ force: true });
484
497
  });
@@ -532,15 +545,13 @@ export class CodebaseIndexer {
532
545
  }
533
546
  }
534
547
 
535
- /**
536
- * Initialize worker thread pool for parallel embedding
537
- */
548
+
538
549
  async initializeWorkers() {
539
- // Check if we have any active workers
550
+
540
551
  const activeWorkers = this.workers.filter((w) => w !== null);
541
552
  if (activeWorkers.length > 0) return;
542
553
 
543
- // If we have workers array but they are all null, reset it
554
+
544
555
  if (this.workers.length > 0) {
545
556
  this.workers = [];
546
557
  this.workerReady = [];
@@ -552,25 +563,25 @@ export class CodebaseIndexer {
552
563
  try {
553
564
  let numWorkers =
554
565
  this.config.workerThreads === 'auto'
555
- ? Math.min(2, Math.max(1, os.cpus().length - 1)) // Cap 'auto' at 2 workers
566
+ ? Math.min(2, Math.max(1, os.cpus().length - 1))
556
567
  : typeof this.config.workerThreads === 'number'
557
568
  ? this.config.workerThreads
558
569
  : 1;
559
570
 
560
- // Heavy models can consume multiple GB per worker. Keep auto mode bounded by
561
- // existing memory guards below; do not hard-pin to 1 worker as it can hurt throughput.
562
- if (process.platform === 'win32' && this.isHeavyEmbeddingModel() && numWorkers > 1) {
563
- if (!this._heavyWorkerSafetyLogged) {
564
- console.warn(
565
- '[Indexer] Heavy model worker safety mode: forcing workers=1 on Windows to avoid native multi-worker crashes'
566
- );
567
- this._heavyWorkerSafetyLogged = true;
568
- }
569
- numWorkers = 1;
570
- }
571
-
572
- // Resource-aware scaling: check available RAM (skip in test env to avoid mocking issues)
573
- // We apply this if we have > 1 worker, regardless of whether it was 'auto' or explicit
571
+
572
+
573
+ if (this.shouldDisableHeavyModelWorkersOnWindows() && numWorkers > 0) {
574
+ if (!this._heavyWorkerSafetyLogged) {
575
+ console.warn(
576
+ '[Indexer] Heavy model worker safety mode: disabling workers on Windows to avoid native worker crashes/timeouts'
577
+ );
578
+ this._heavyWorkerSafetyLogged = true;
579
+ }
580
+ numWorkers = 0;
581
+ }
582
+
583
+
584
+
574
585
  if (numWorkers > 1 && !isTestEnv() && typeof os.freemem === 'function') {
575
586
  const freeMemGb = os.freemem() / 1024 / 1024 / 1024;
576
587
  const isHeavyModel = this.isHeavyEmbeddingModel();
@@ -587,13 +598,13 @@ export class CodebaseIndexer {
587
598
  }
588
599
  }
589
600
 
590
- // Hard memory ceiling: disable workers if projected RSS risks OOM
601
+
591
602
  if (!isTestEnv() && typeof os.totalmem === 'function') {
592
603
  const totalMemGb = os.totalmem() / 1024 / 1024 / 1024;
593
604
  const rssGb = process.memoryUsage().rss / 1024 / 1024 / 1024;
594
605
  const isHeavyModel = this.isHeavyEmbeddingModel();
595
606
  const memPerWorker = isHeavyModel ? 8.0 : 0.8;
596
- const projectedGb = rssGb + numWorkers * memPerWorker + 0.5; // 0.5GB headroom
607
+ const projectedGb = rssGb + numWorkers * memPerWorker + 0.5;
597
608
  const ceilingGb = totalMemGb * 0.85;
598
609
  if (numWorkers > 0 && projectedGb > ceilingGb) {
599
610
  if (this.config.verbose) {
@@ -605,7 +616,7 @@ export class CodebaseIndexer {
605
616
  }
606
617
  }
607
618
 
608
- // Use workers even for single worker to benefit from --expose-gc and separate heap
619
+
609
620
  if (numWorkers < 1) {
610
621
  console.info(
611
622
  '[Indexer] No workers configured, using main thread (warning: higher RAM usage)'
@@ -619,7 +630,7 @@ export class CodebaseIndexer {
619
630
  );
620
631
  }
621
632
 
622
- // Force 1 thread per worker to prevent CPU saturation (ONNX is very aggressive)
633
+
623
634
  const threadsPerWorker = 1;
624
635
 
625
636
  console.info(
@@ -682,7 +693,7 @@ export class CodebaseIndexer {
682
693
  }
683
694
  }
684
695
 
685
- // Wait for all workers to be ready
696
+
686
697
  try {
687
698
  await Promise.all(this.workerReady);
688
699
  console.info(`[Indexer] ${this.workers.length} workers ready`);
@@ -702,16 +713,14 @@ export class CodebaseIndexer {
702
713
  return this.initWorkerPromise;
703
714
  }
704
715
 
705
- /**
706
- * Terminate all worker threads
707
- */
716
+
708
717
  async terminateWorkers() {
709
718
  const WORKER_SHUTDOWN_TIMEOUT = isTestEnv() ? 50 : 5000;
710
719
  const terminations = this.workers.filter(Boolean).map((worker) => {
711
720
  try {
712
721
  worker.postMessage({ type: 'shutdown' });
713
722
  } catch {
714
- /* ignore */
723
+
715
724
  }
716
725
 
717
726
  let exited = false;
@@ -736,10 +745,7 @@ export class CodebaseIndexer {
736
745
  this.workerReady = [];
737
746
  }
738
747
 
739
- /**
740
- * Send unload message to all workers to free their model memory.
741
- * This keeps workers alive but releases the embedding model from RAM.
742
- */
748
+
743
749
  async unloadWorkersModels() {
744
750
  if (this.workers.length === 0) return { unloaded: 0 };
745
751
 
@@ -787,10 +793,7 @@ export class CodebaseIndexer {
787
793
  return { unloaded: unloadedCount };
788
794
  }
789
795
 
790
- /**
791
- * Send unload message to the embedding child process.
792
- * This frees the embedding model from RAM in the child process.
793
- */
796
+
794
797
  async unloadEmbeddingChildModel() {
795
798
  const child = this._embeddingChild;
796
799
  if (!child) return { success: true, wasLoaded: false };
@@ -816,7 +819,7 @@ export class CodebaseIndexer {
816
819
  }
817
820
  }
818
821
  } catch {
819
- // Not JSON or incomplete, keep waiting
822
+
820
823
  }
821
824
  };
822
825
 
@@ -835,17 +838,14 @@ export class CodebaseIndexer {
835
838
  });
836
839
  }
837
840
 
838
- /**
839
- * Unload embedding models from all sources (workers and child process) to free RAM.
840
- * This is called after indexing when unloadModelAfterIndex is enabled.
841
- */
841
+
842
842
  async unloadEmbeddingModels() {
843
843
  const results = { workers: 0, childUnloaded: false };
844
844
 
845
- // Unload from workers (or terminate them - termination also frees memory)
845
+
846
846
  if (this.workers.length > 0) {
847
- // Terminating workers is more reliable than unloading in-place
848
- // since it fully releases the ONNX runtime memory
847
+
848
+
849
849
  if (this.config.verbose) {
850
850
  console.info(`[Indexer] Terminating ${this.workers.length} workers to free model memory`);
851
851
  }
@@ -853,7 +853,7 @@ export class CodebaseIndexer {
853
853
  results.workers = this.workers.length;
854
854
  }
855
855
 
856
- // Unload from persistent embedding child process
856
+
857
857
  if (this._embeddingChild) {
858
858
  const childResult = await this.unloadEmbeddingChildModel();
859
859
  results.childUnloaded = childResult?.wasLoaded || false;
@@ -862,7 +862,7 @@ export class CodebaseIndexer {
862
862
  }
863
863
  }
864
864
 
865
- // Trigger GC in main process if configured
865
+
866
866
  if (this.isExplicitGcEnabled()) {
867
867
  const before = process.memoryUsage();
868
868
  this.runExplicitGc({ force: true });
@@ -889,7 +889,7 @@ export class CodebaseIndexer {
889
889
  this.gitignore = ignore().add(content);
890
890
  if (this.config.verbose) console.info('[Indexer] Loaded .gitignore rules');
891
891
  } catch (_e) {
892
- // No .gitignore or error reading it
892
+
893
893
  this.gitignore = ignore();
894
894
  }
895
895
  }
@@ -931,7 +931,7 @@ export class CodebaseIndexer {
931
931
  async replaceDeadWorker(index) {
932
932
  if (this.config.verbose) console.info(`[Indexer] Replacing dead worker at index ${index}...`);
933
933
 
934
- // Use 1 thread per worker to match initializeWorkers and prevent CPU saturation
934
+
935
935
  const threadsPerWorker = 1;
936
936
  const activeWorkerCount = this.workers.filter(Boolean).length || 1;
937
937
  const workerInferenceBatchSize = this.getWorkerInferenceBatchSize({
@@ -953,7 +953,7 @@ export class CodebaseIndexer {
953
953
  },
954
954
  });
955
955
 
956
- // Wait for ready
956
+
957
957
  await new Promise((resolve, reject) => {
958
958
  const timeout = setTimeout(() => reject(new Error('Timeout')), 30000);
959
959
  newWorker.once('message', (msg) => {
@@ -972,9 +972,7 @@ export class CodebaseIndexer {
972
972
  if (this.config.verbose) console.info(`[Indexer] Worker ${index} respawned successfully`);
973
973
  }
974
974
 
975
- /**
976
- * Send MCP progress notification to connected clients
977
- */
975
+
978
976
  sendProgress(progress, total, message) {
979
977
  if (this.server) {
980
978
  try {
@@ -985,7 +983,7 @@ export class CodebaseIndexer {
985
983
  message,
986
984
  });
987
985
  } catch (_err) {
988
- // Silently ignore if client doesn't support progress notifications
986
+
989
987
  }
990
988
  }
991
989
  this.writeProgressFile(progress, total, message).catch(() => null);
@@ -1022,15 +1020,15 @@ export class CodebaseIndexer {
1022
1020
  const progressPath = path.join(this.config.cacheDirectory, 'progress.json');
1023
1021
  await fs.writeFile(progressPath, JSON.stringify(payload), 'utf-8');
1024
1022
  } catch {
1025
- // ignore progress write errors
1023
+
1026
1024
  }
1027
1025
  }
1028
1026
 
1029
- async processFilesWithWorkers(allFiles) {
1030
- const allowedFiles = [];
1031
- for (const entry of allFiles) {
1032
- if (await this.isPathInsideWorkspaceReal(entry.file)) {
1033
- allowedFiles.push(entry);
1027
+ async processFilesWithWorkers(allFiles) {
1028
+ const allowedFiles = [];
1029
+ for (const entry of allFiles) {
1030
+ if (await this.isPathInsideWorkspaceReal(entry.file)) {
1031
+ allowedFiles.push(entry);
1034
1032
  }
1035
1033
  }
1036
1034
  if (allowedFiles.length !== allFiles.length) {
@@ -1038,30 +1036,45 @@ export class CodebaseIndexer {
1038
1036
  `[Indexer] Skipping ${allFiles.length - allowedFiles.length} file(s) outside workspace`
1039
1037
  );
1040
1038
  }
1041
- if (allowedFiles.length === 0) {
1042
- return [];
1043
- }
1044
-
1045
- // Wait for any pending worker replacements to complete before distributing work
1046
- if (this._workerReplacementPromises && this._workerReplacementPromises.size > 0) {
1047
- await Promise.all(this._workerReplacementPromises.values());
1048
- }
1049
-
1050
- const activeWorkers = this.workers
1051
- .map((worker, index) => ({ worker, index }))
1052
- .filter((entry) => entry.worker);
1053
-
1054
- if (activeWorkers.length === 0) {
1055
- // Fallback: This method shouldn't be called if workers aren't available,
1056
- // but if it is, we return empty and let the caller handle legacy fallback.
1057
- return [];
1058
- }
1059
-
1060
- const results = [];
1061
- const chunkSize = Math.ceil(allowedFiles.length / activeWorkers.length);
1062
- const workerPromises = [];
1063
- const configuredTimeout = Number.isInteger(this.config.workerBatchTimeoutMs)
1064
- ? this.config.workerBatchTimeoutMs
1039
+ if (allowedFiles.length === 0) {
1040
+ return [];
1041
+ }
1042
+
1043
+ const makeRetryResults = (files) =>
1044
+ files.map((fileEntry) => ({ file: fileEntry.file, status: 'retry' }));
1045
+
1046
+ if (this.workersDisabledUntil && Date.now() < this.workersDisabledUntil) {
1047
+ if (this.config.verbose) {
1048
+ console.warn(
1049
+ `[Indexer] Workers disabled by circuit breaker; routing ${allowedFiles.length} files to main-thread fallback`
1050
+ );
1051
+ }
1052
+ return makeRetryResults(allowedFiles);
1053
+ }
1054
+
1055
+
1056
+ if (this._workerReplacementPromises && this._workerReplacementPromises.size > 0) {
1057
+ await Promise.allSettled(this._workerReplacementPromises.values());
1058
+ }
1059
+
1060
+ const activeWorkers = this.workers
1061
+ .map((worker, index) => ({ worker, index }))
1062
+ .filter((entry) => entry.worker);
1063
+
1064
+ if (activeWorkers.length === 0) {
1065
+ if (this.config.verbose) {
1066
+ console.warn(
1067
+ `[Indexer] No active workers available; routing ${allowedFiles.length} files to main-thread fallback`
1068
+ );
1069
+ }
1070
+ return makeRetryResults(allowedFiles);
1071
+ }
1072
+
1073
+ const results = [];
1074
+ const chunkSize = Math.ceil(allowedFiles.length / activeWorkers.length);
1075
+ const workerPromises = [];
1076
+ const configuredTimeout = Number.isInteger(this.config.workerBatchTimeoutMs)
1077
+ ? this.config.workerBatchTimeoutMs
1065
1078
  : 300000;
1066
1079
  const WORKER_TIMEOUT = isTestEnv() ? 1000 : configuredTimeout;
1067
1080
 
@@ -1074,27 +1087,27 @@ export class CodebaseIndexer {
1074
1087
  console.info(`[Indexer] Worker ${workerIndex}: processing ${workerFiles.length} files`);
1075
1088
  }
1076
1089
 
1077
- const promise = new Promise((resolve) => {
1078
- const batchId = `file-batch-${i}-${Date.now()}`;
1079
- const batchResults = [];
1080
- let workerKilled = false; // Atomic guard against duplicate kills
1081
-
1090
+ const promise = new Promise((resolve) => {
1091
+ const batchId = `file-batch-${i}-${Date.now()}`;
1092
+ const batchResults = [];
1093
+ let workerKilled = false;
1094
+
1082
1095
  const killWorker = async () => {
1083
- // Atomic guard: prevent concurrent killWorker calls for same worker
1096
+
1084
1097
  if (workerKilled || this.workers[workerIndex] === null) return;
1085
1098
  workerKilled = true;
1086
- this.workers[workerIndex] = null; // Mark as dead immediately before async work
1099
+ this.workers[workerIndex] = null;
1087
1100
  try {
1088
1101
  await worker.terminate?.();
1089
1102
  } catch (_err) {
1090
- // ignore termination errors
1103
+
1091
1104
  }
1092
- // Track worker replacement to prevent concurrent replacements for the same slot
1105
+
1093
1106
  if (!this._workerReplacementPromises) {
1094
1107
  this._workerReplacementPromises = new Map();
1095
1108
  }
1096
1109
  if (!this._workerReplacementPromises.has(workerIndex)) {
1097
- // Use IIFE to ensure cleanup happens in finally block even on sync errors
1110
+
1098
1111
  const replacement = (async () => {
1099
1112
  try {
1100
1113
  await this.replaceDeadWorker(workerIndex);
@@ -1108,38 +1121,52 @@ export class CodebaseIndexer {
1108
1121
  }
1109
1122
  };
1110
1123
 
1111
- const handleTimeout = () => {
1112
- // Terminate first to ensure no more messages arrive
1113
- void killWorker();
1114
- worker.off('message', handler);
1115
- worker.off('error', errorHandler);
1116
- console.warn(`[Indexer] Worker ${workerIndex} timed out (files)`);
1117
- this.recordWorkerFailure(`timeout (batch ${batchId})`);
1118
- resolve([]);
1119
- };
1120
-
1121
- let timeout = setTimeout(handleTimeout, WORKER_TIMEOUT);
1122
-
1123
- const finalize = (results) => {
1124
- clearTimeout(timeout);
1125
- worker.off('message', handler);
1126
- worker.off('error', errorHandler);
1127
- resolve(results);
1128
- };
1129
-
1130
- const handler = (msg) => {
1131
- if (msg.batchId === batchId) {
1132
- if (msg.type === 'results') {
1133
- if (Array.isArray(msg.results)) {
1134
- batchResults.push(...msg.results);
1135
- }
1136
- if (msg.done) {
1137
- finalize(batchResults);
1138
- }
1139
- } else if (msg.type === 'error') {
1140
- finalize([]);
1141
- }
1142
- }
1124
+ let timeout = null;
1125
+ const resetTimeout = () => {
1126
+ if (timeout) clearTimeout(timeout);
1127
+ timeout = setTimeout(handleTimeout, WORKER_TIMEOUT);
1128
+ };
1129
+
1130
+ const handleTimeout = () => {
1131
+
1132
+ void killWorker();
1133
+ worker.off('message', handler);
1134
+ worker.off('error', errorHandler);
1135
+ console.warn(
1136
+ `[Indexer] Worker ${workerIndex} timed out (files, no heartbeat for ${Math.round(WORKER_TIMEOUT / 1000)}s)`
1137
+ );
1138
+ this.recordWorkerFailure(`timeout (batch ${batchId})`);
1139
+ resolve([]);
1140
+ };
1141
+
1142
+ resetTimeout();
1143
+
1144
+ const finalize = (results) => {
1145
+ if (timeout) clearTimeout(timeout);
1146
+ worker.off('message', handler);
1147
+ worker.off('error', errorHandler);
1148
+ resolve(results);
1149
+ };
1150
+
1151
+ const handler = (msg) => {
1152
+ if (msg.batchId === batchId) {
1153
+ if (msg.type === 'progress') {
1154
+ resetTimeout();
1155
+ return;
1156
+ }
1157
+ if (msg.type === 'results') {
1158
+ if (Array.isArray(msg.results)) {
1159
+ batchResults.push(...msg.results);
1160
+ }
1161
+ if (msg.done) {
1162
+ finalize(batchResults);
1163
+ } else {
1164
+ resetTimeout();
1165
+ }
1166
+ } else if (msg.type === 'error') {
1167
+ finalize([]);
1168
+ }
1169
+ }
1143
1170
  };
1144
1171
 
1145
1172
  const errorHandler = (err) => {
@@ -1169,7 +1196,7 @@ export class CodebaseIndexer {
1169
1196
 
1170
1197
  const workerResults = await Promise.all(workerPromises.map((p) => p.promise));
1171
1198
 
1172
- // Identify failed files for retry
1199
+
1173
1200
  const failedFiles = [];
1174
1201
  for (let i = 0; i < workerResults.length; i++) {
1175
1202
  if (workerResults[i].length > 0) {
@@ -1179,14 +1206,14 @@ export class CodebaseIndexer {
1179
1206
  }
1180
1207
  }
1181
1208
 
1182
- // Pass failed files back to be handled by legacy path
1209
+
1183
1210
  if (failedFiles.length > 0) {
1184
1211
  if (this.config.verbose) {
1185
1212
  console.warn(
1186
1213
  `[Indexer] ${failedFiles.length} files failed in workers, falling back to main thread`
1187
1214
  );
1188
1215
  }
1189
- // Mark these as failed in the results so the caller knows to process them manually
1216
+
1190
1217
  for (const f of failedFiles) {
1191
1218
  results.push({ file: f.file, status: 'retry' });
1192
1219
  }
@@ -1195,16 +1222,14 @@ export class CodebaseIndexer {
1195
1222
  return results;
1196
1223
  }
1197
1224
 
1198
- /**
1199
- * Process chunks using worker thread pool with timeout and error recovery
1200
- */
1225
+
1201
1226
  async processChunksWithWorkers(allChunks) {
1202
1227
  const activeWorkers = this.workers
1203
1228
  .map((worker, index) => ({ worker, index }))
1204
1229
  .filter((entry) => entry.worker);
1205
1230
 
1206
1231
  if (activeWorkers.length === 0) {
1207
- // Fallback to single-threaded processing
1232
+
1208
1233
  return this.processChunksSingleThreaded(allChunks);
1209
1234
  }
1210
1235
 
@@ -1215,7 +1240,7 @@ export class CodebaseIndexer {
1215
1240
  const configuredTimeout = Number.isInteger(this.config.workerBatchTimeoutMs)
1216
1241
  ? this.config.workerBatchTimeoutMs
1217
1242
  : 300000;
1218
- const WORKER_TIMEOUT = isTestEnv() ? 1000 : configuredTimeout; // 1s in tests, configurable in prod
1243
+ const WORKER_TIMEOUT = isTestEnv() ? 1000 : configuredTimeout;
1219
1244
 
1220
1245
  if (this.config.verbose) {
1221
1246
  console.info(
@@ -1235,21 +1260,21 @@ export class CodebaseIndexer {
1235
1260
  const promise = new Promise((resolve, _reject) => {
1236
1261
  const batchId = `batch-${i}-${Date.now()}`;
1237
1262
  const batchResults = [];
1238
- let workerKilled = false; // Atomic guard against duplicate kills
1263
+ let workerKilled = false;
1239
1264
 
1240
- // Timeout handler
1265
+
1241
1266
  const killWorker = async () => {
1242
- // Atomic guard: prevent concurrent killWorker calls for same worker
1267
+
1243
1268
  if (workerKilled || this.workers[workerIndex] === null) return;
1244
1269
  workerKilled = true;
1245
- this.workers[workerIndex] = null; // Mark as dead immediately before async work
1270
+ this.workers[workerIndex] = null;
1246
1271
  try {
1247
1272
  await worker.terminate?.();
1248
1273
  } catch {
1249
- // ignore terminate errors
1274
+
1250
1275
  }
1251
1276
 
1252
- // Track worker replacement to prevent concurrent replacements for the same slot
1277
+
1253
1278
  if (!this._workerReplacementPromises) {
1254
1279
  this._workerReplacementPromises = new Map();
1255
1280
  }
@@ -1266,14 +1291,14 @@ export class CodebaseIndexer {
1266
1291
  };
1267
1292
 
1268
1293
  const handleTimeout = (label) => {
1269
- // Terminate first to ensure no more messages arrive
1294
+
1270
1295
  void killWorker();
1271
1296
  worker.off('message', handler);
1272
1297
  worker.off('error', errorHandler);
1273
1298
  if (exitHandler) worker.off('exit', exitHandler);
1274
1299
  console.warn(`[Indexer] Worker ${workerIndex} timed out, ${label}`);
1275
1300
  this.recordWorkerFailure(`timeout (batch ${batchId})`);
1276
- // Return empty and let fallback handle it
1301
+
1277
1302
  resolve([]);
1278
1303
  };
1279
1304
 
@@ -1314,17 +1339,17 @@ export class CodebaseIndexer {
1314
1339
  finalize(batchResults);
1315
1340
  } else if (msg.type === 'error') {
1316
1341
  console.warn(`[Indexer] Worker ${workerIndex} error: ${msg.error}`);
1317
- finalize([]); // Return empty, don't reject - let fallback handle
1342
+ finalize([]);
1318
1343
  }
1319
1344
  }
1320
1345
  };
1321
1346
 
1322
- // Handle worker crash
1347
+
1323
1348
  const errorHandler = (err) => {
1324
1349
  console.warn(`[Indexer] Worker ${workerIndex} crashed: ${err.message}`);
1325
1350
  this.recordWorkerFailure(`crash (${err.message})`);
1326
1351
  void killWorker();
1327
- finalize([]); // Return empty, don't reject
1352
+ finalize([]);
1328
1353
  };
1329
1354
  worker.once('error', errorHandler);
1330
1355
 
@@ -1350,21 +1375,21 @@ export class CodebaseIndexer {
1350
1375
  workerPromises.push({ promise, chunks: workerChunks });
1351
1376
  }
1352
1377
 
1353
- // Wait for all workers with error recovery
1378
+
1354
1379
  const workerResults = await Promise.all(workerPromises.map((p) => p.promise));
1355
1380
 
1356
- // Collect results and identify failed chunks that need retry
1381
+
1357
1382
  const failedChunks = [];
1358
1383
  for (let i = 0; i < workerResults.length; i++) {
1359
1384
  if (workerResults[i].length > 0) {
1360
1385
  results.push(...workerResults[i]);
1361
1386
  } else if (workerPromises[i].chunks.length > 0) {
1362
- // Worker failed or timed out, need to retry these chunks
1387
+
1363
1388
  failedChunks.push(...workerPromises[i].chunks);
1364
1389
  }
1365
1390
  }
1366
1391
 
1367
- // Retry failed chunks with single-threaded fallback
1392
+
1368
1393
  if (failedChunks.length > 0 && allowSingleThreadFallback) {
1369
1394
  console.warn(
1370
1395
  `[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`
@@ -1534,14 +1559,14 @@ export class CodebaseIndexer {
1534
1559
  try {
1535
1560
  child.stdin.write(`${JSON.stringify({ type: 'shutdown' })}\n`);
1536
1561
  } catch {
1537
- // ignore
1562
+
1538
1563
  }
1539
1564
  await new Promise((resolve) => {
1540
1565
  const timeout = setTimeout(() => {
1541
1566
  try {
1542
1567
  child.kill('SIGKILL');
1543
1568
  } catch {
1544
- // ignore
1569
+
1545
1570
  }
1546
1571
  resolve();
1547
1572
  }, 5000);
@@ -1573,7 +1598,7 @@ export class CodebaseIndexer {
1573
1598
  this._embeddingChild = null;
1574
1599
  this._embeddingProcessSessionActive = false;
1575
1600
  this._embeddingChildStopping = false;
1576
- // Clear buffers to release memory
1601
+
1577
1602
  this._embeddingChildBuffer = '';
1578
1603
  this._embeddingChildQueue = [];
1579
1604
  if (!preserveStats) {
@@ -1644,7 +1669,7 @@ export class CodebaseIndexer {
1644
1669
  try {
1645
1670
  child.kill('SIGKILL');
1646
1671
  } catch {
1647
- // ignore
1672
+
1648
1673
  }
1649
1674
  resolve([]);
1650
1675
  }, timeoutMs);
@@ -1729,7 +1754,7 @@ export class CodebaseIndexer {
1729
1754
  try {
1730
1755
  child.kill('SIGKILL');
1731
1756
  } catch {
1732
- // ignore
1757
+
1733
1758
  }
1734
1759
  if (this.config.verbose && !closed) {
1735
1760
  const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1);
@@ -1775,12 +1800,12 @@ export class CodebaseIndexer {
1775
1800
  }
1776
1801
  try {
1777
1802
  const parsed = JSON.parse(stdout);
1778
- // Clear large JSON buffer immediately after parsing to release memory
1803
+
1779
1804
  stdout = '';
1780
1805
  stderr = '';
1781
1806
  resolve(this.applyEmbeddingDimensionToResults(parsed?.results || []));
1782
1807
  } catch (err) {
1783
- // Clear buffers on error too
1808
+
1784
1809
  stdout = '';
1785
1810
  stderr = '';
1786
1811
  this.recordWorkerFailure(`child process parse error (${err.message})`);
@@ -1792,17 +1817,15 @@ export class CodebaseIndexer {
1792
1817
  });
1793
1818
  }
1794
1819
 
1795
- /**
1796
- * Single-threaded chunk processing (fallback)
1797
- */
1820
+
1798
1821
  async processChunksSingleThreaded(chunks) {
1799
1822
  const results = [];
1800
1823
 
1801
- // Manual GC and yield loop to prevent CPU lockup
1824
+
1802
1825
  let processedSinceGc = 0;
1803
1826
 
1804
1827
  for (const chunk of chunks) {
1805
- // Throttle speed (balanced) - yield to event loop but don't wait unnecessarily
1828
+
1806
1829
  await delay(0);
1807
1830
 
1808
1831
  try {
@@ -1810,17 +1833,17 @@ export class CodebaseIndexer {
1810
1833
  pooling: 'mean',
1811
1834
  normalize: true,
1812
1835
  });
1813
- // CRITICAL: Deep copy to release ONNX tensor memory
1836
+
1814
1837
  let vector = toFloat32Array(output.data);
1815
1838
  if (this.config.embeddingDimension) {
1816
1839
  vector = sliceAndNormalize(vector, this.config.embeddingDimension);
1817
1840
  }
1818
- // Properly dispose tensor to release ONNX runtime memory
1841
+
1819
1842
  if (typeof output.dispose === 'function') {
1820
1843
  try {
1821
1844
  output.dispose();
1822
1845
  } catch {
1823
- /* frozen tensor */
1846
+
1824
1847
  }
1825
1848
  }
1826
1849
  results.push({
@@ -1832,7 +1855,7 @@ export class CodebaseIndexer {
1832
1855
  success: true,
1833
1856
  });
1834
1857
 
1835
- // Periodic GC to prevent memory creep
1858
+
1836
1859
  processedSinceGc++;
1837
1860
  if (processedSinceGc >= 100) {
1838
1861
  this.runExplicitGc({ minIntervalMs: 5000 });
@@ -1875,10 +1898,10 @@ export class CodebaseIndexer {
1875
1898
  }
1876
1899
 
1877
1900
  try {
1878
- // Check file size first
1901
+
1879
1902
  const stats = await fs.stat(file);
1880
1903
 
1881
- // Skip directories
1904
+
1882
1905
  if (stats.isDirectory()) {
1883
1906
  return 0;
1884
1907
  }
@@ -1895,14 +1918,14 @@ export class CodebaseIndexer {
1895
1918
  const content = await fs.readFile(file, 'utf-8');
1896
1919
  const hash = hashContent(content);
1897
1920
 
1898
- // Skip if file hasn't changed
1921
+
1899
1922
  const cachedHash =
1900
1923
  typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
1901
1924
  if (cachedHash === hash) {
1902
1925
  if (this.config.verbose) {
1903
1926
  console.info(`[Indexer] Skipped ${fileName} (unchanged)`);
1904
1927
  }
1905
- // Still update metadata (size, mtime) even if hash is same
1928
+
1906
1929
  this.cache.setFileHash(file, hash, stats);
1907
1930
  return 0;
1908
1931
  }
@@ -1911,7 +1934,7 @@ export class CodebaseIndexer {
1911
1934
  console.info(`[Indexer] Indexing ${fileName}...`);
1912
1935
  }
1913
1936
 
1914
- // Extract call graph data if enabled
1937
+
1915
1938
  let callData = null;
1916
1939
  if (this.config.callGraphEnabled) {
1917
1940
  try {
@@ -1930,7 +1953,7 @@ export class CodebaseIndexer {
1930
1953
  let failedChunks = 0;
1931
1954
  const newChunks = [];
1932
1955
 
1933
- // Use workers for watcher-triggered embedding to keep main thread responsive
1956
+
1934
1957
  let useWorkers = this.shouldUseWorkers();
1935
1958
  if (useWorkers && this.workers.length === 0) {
1936
1959
  await this.initializeWorkers();
@@ -2025,20 +2048,17 @@ export class CodebaseIndexer {
2025
2048
  }
2026
2049
  }
2027
2050
 
2028
- /**
2029
- * Discover files using fdir (3-5x faster than glob)
2030
- * Uses config.excludePatterns which includes smart patterns from ignore-patterns.js
2031
- */
2051
+
2032
2052
  async discoverFiles() {
2033
2053
  const startTime = Date.now();
2034
2054
 
2035
- // Build extension filter from config
2055
+
2036
2056
  const extensions = new Set(
2037
2057
  this.config.fileExtensions.map((ext) => `.${String(ext).toLowerCase()}`)
2038
2058
  );
2039
2059
  const allowedFileNames = new Set(this.config.fileNames || []);
2040
2060
 
2041
- // Load .gitignore before discovery
2061
+
2042
2062
  await this.loadGitignore();
2043
2063
 
2044
2064
  if (!this.config.searchDirectory) {
@@ -2048,18 +2068,18 @@ export class CodebaseIndexer {
2048
2068
  const api = new fdir()
2049
2069
  .withFullPaths()
2050
2070
  .exclude((dirName, dirPath) => {
2051
- // Always exclude specific heavy folders immediately
2071
+
2052
2072
  if (dirName === 'node_modules' || dirName === '.git' || dirName === '.smart-coding-cache')
2053
2073
  return true;
2054
2074
 
2055
- // Check exclusion rules for directories
2075
+
2056
2076
  const fullPath = path.join(dirPath, dirName);
2057
2077
  return this.isExcluded(fullPath);
2058
2078
  })
2059
2079
  .filter((filePath) => {
2060
2080
  if (this.isExcluded(filePath)) return false;
2061
2081
 
2062
- // Check extensions/filenames
2082
+
2063
2083
  const base = path.basename(filePath);
2064
2084
  const ext = path.extname(filePath).toLowerCase();
2065
2085
  return extensions.has(ext) || allowedFileNames.has(base);
@@ -2072,24 +2092,22 @@ export class CodebaseIndexer {
2072
2092
  return files;
2073
2093
  }
2074
2094
 
2075
- /**
2076
- * Pre-filter files by hash (skip unchanged files before processing)
2077
- */
2095
+
2078
2096
  async preFilterFiles(files) {
2079
2097
  const startTime = Date.now();
2080
2098
  const filesToProcess = [];
2081
2099
  const skippedCount = { unchanged: 0, tooLarge: 0, error: 0 };
2082
2100
 
2083
- // Process in parallel batches for speed
2084
- // We fetch stats for 100 files at a time to keep IO efficient
2101
+
2102
+
2085
2103
  const STAT_BATCH_SIZE = Math.min(100, this.config.batchSize || 100);
2086
- // Limit concurrent file reads to 50MB to prevent OOM
2104
+
2087
2105
  const MAX_READ_BATCH_BYTES = 50 * 1024 * 1024;
2088
2106
 
2089
2107
  for (let i = 0; i < files.length; i += STAT_BATCH_SIZE) {
2090
2108
  const batchFiles = files.slice(i, i + STAT_BATCH_SIZE);
2091
2109
 
2092
- // 1. Get stats for all files in this batch parallel
2110
+
2093
2111
  const fileStats = await Promise.all(
2094
2112
  batchFiles.map(async (file) => {
2095
2113
  try {
@@ -2112,7 +2130,7 @@ export class CodebaseIndexer {
2112
2130
  })
2113
2131
  );
2114
2132
 
2115
- // 2. Process valid files in size-constrained sub-batches
2133
+
2116
2134
  let currentReadBatch = [];
2117
2135
  let currentReadBytes = 0;
2118
2136
 
@@ -2124,7 +2142,7 @@ export class CodebaseIndexer {
2124
2142
  const processReadBatch = async (batch) => {
2125
2143
  const results = await Promise.all(
2126
2144
  batch.map(async ({ file, size, mtimeMs }) => {
2127
- // Check if we have cached metadata for this file
2145
+
2128
2146
  const cachedHash =
2129
2147
  typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
2130
2148
  const cachedMeta = this.cache.getFileMeta ? this.cache.getFileMeta(file) : null;
@@ -2137,19 +2155,19 @@ export class CodebaseIndexer {
2137
2155
  Number.isFinite(cachedMeta.size) &&
2138
2156
  cachedMeta.size === size;
2139
2157
  if (metaMatches) {
2140
- // Avoid missing rapid edits on coarse timestamp filesystems.
2158
+
2141
2159
  const now = Date.now();
2142
2160
  const isRecent = Math.abs(now - mtimeMs) <= mtimeSafeWindowMs;
2143
2161
  if (!isRecent) {
2144
- // Metadata matches exactly, skip reading/hashing
2162
+
2145
2163
  skippedCount.unchanged++;
2146
2164
  return null;
2147
2165
  }
2148
2166
  }
2149
2167
 
2150
- // Suspect file: Either new, or metadata changed.
2151
- // We pass it to indexAll with the cachedHash as 'expectedHash'
2152
- // so workers can perform the actual hashing and unchanged check.
2168
+
2169
+
2170
+
2153
2171
  return { file, hash: null, expectedHash: cachedHash, force: false, size, mtimeMs };
2154
2172
  })
2155
2173
  );
@@ -2176,7 +2194,7 @@ export class CodebaseIndexer {
2176
2194
  await processReadBatch(currentReadBatch);
2177
2195
  }
2178
2196
 
2179
- // Pre-warm HybridSearch cache if available
2197
+
2180
2198
  if (this.server && this.server.hybridSearch && this.server.hybridSearch.fileModTimes) {
2181
2199
  for (const stat of fileStats) {
2182
2200
  if (stat && stat.file && typeof stat.mtimeMs === 'number') {
@@ -2247,7 +2265,7 @@ export class CodebaseIndexer {
2247
2265
  this.sendProgress(0, 100, 'Indexing started');
2248
2266
  console.info(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
2249
2267
 
2250
- // Step 1: Fast file discovery with fdir
2268
+
2251
2269
  const files = await this.discoverFiles();
2252
2270
 
2253
2271
  if (files.length === 0) {
@@ -2261,12 +2279,12 @@ export class CodebaseIndexer {
2261
2279
  };
2262
2280
  }
2263
2281
 
2264
- // Send progress: discovery complete
2282
+
2265
2283
  this.sendProgress(5, 100, `Discovered ${files.length} files`);
2266
2284
 
2267
2285
  const currentFilesSet = new Set(files);
2268
2286
 
2269
- // Step 1.5: Prune deleted or excluded files from cache
2287
+
2270
2288
  if (!force) {
2271
2289
  const cachedFiles =
2272
2290
  typeof this.cache.getFileHashKeys === 'function' ? this.cache.getFileHashKeys() : [];
@@ -2284,7 +2302,7 @@ export class CodebaseIndexer {
2284
2302
  if (this.config.verbose) {
2285
2303
  console.info(`[Indexer] Pruned ${prunedCount} deleted/excluded files from index`);
2286
2304
  }
2287
- // If we pruned files, we should save these changes even if no other files changed
2305
+
2288
2306
  }
2289
2307
 
2290
2308
  const prunedCallGraph = this.cache.pruneCallGraphData(currentFilesSet);
@@ -2293,12 +2311,12 @@ export class CodebaseIndexer {
2293
2311
  }
2294
2312
  }
2295
2313
 
2296
- // Step 2: Pre-filter unchanged files (early hash check)
2314
+
2297
2315
  const filesToProcess = await this.preFilterFiles(files);
2298
2316
  const filesToProcessSet = new Set(filesToProcess.map((entry) => entry.file));
2299
2317
  const filesToProcessByFile = new Map(filesToProcess.map((entry) => [entry.file, entry]));
2300
2318
 
2301
- // Re-index files missing call graph data (if enabled)
2319
+
2302
2320
  if (this.config.callGraphEnabled && this.cache.getVectorStore().length > 0) {
2303
2321
  const cachedFiles = new Set(this.cache.getVectorStore().map((c) => c.file));
2304
2322
  const callDataFiles = new Set(this.cache.getFileCallDataKeys());
@@ -2372,12 +2390,12 @@ export class CodebaseIndexer {
2372
2390
  };
2373
2391
  }
2374
2392
 
2375
- // Send progress: filtering complete
2393
+
2376
2394
  console.info(`[Indexer] Processing ${filesToProcess.length} changed files`);
2377
2395
  this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
2378
2396
 
2379
- // Step 3: Determine batch size based on project size
2380
- // Adaptive batch size: use larger batches for larger projects to reduce overhead
2397
+
2398
+
2381
2399
  let adaptiveBatchSize = 10;
2382
2400
  if (files.length > 500) adaptiveBatchSize = 50;
2383
2401
  if (files.length > 1000) adaptiveBatchSize = 100;
@@ -2389,7 +2407,7 @@ export class CodebaseIndexer {
2389
2407
  );
2390
2408
  }
2391
2409
 
2392
- // Step 4: Initialize worker threads (skip if explicitly disabled)
2410
+
2393
2411
  const allowSingleThreadFallback =
2394
2412
  this.config.allowSingleThreadFallback !== false ||
2395
2413
  this.config.workerThreads === 0 ||
@@ -2408,7 +2426,7 @@ export class CodebaseIndexer {
2408
2426
  const useEmbeddingProcessPerBatch = this.shouldUseEmbeddingProcessPerBatch(useWorkers);
2409
2427
  let embeddingRuntimeSummary = '';
2410
2428
  if (useWorkers && this.workers.length > 0) {
2411
- // Worker pool is intentionally fixed to 1 ONNX thread per worker.
2429
+
2412
2430
  const workerInferenceBatchSize =
2413
2431
  this.getWorkerInferenceBatchSize({ numWorkers: this.workers.length }) ?? 'default';
2414
2432
  embeddingRuntimeSummary =
@@ -2452,12 +2470,15 @@ export class CodebaseIndexer {
2452
2470
 
2453
2471
  let totalChunks = 0;
2454
2472
  let processedFiles = 0;
2473
+ const checkpointIntervalMs = this.getIndexCheckpointIntervalMs();
2474
+ let lastCheckpointSaveAt = Date.now();
2475
+ let checkpointSaveCount = 0;
2455
2476
 
2456
2477
  console.info(
2457
2478
  `[Indexer] Embedding pass started: ${filesToProcess.length} files using ${this.config.embeddingModel}`
2458
2479
  );
2459
2480
 
2460
- // Step 5: Process files in adaptive batches
2481
+
2461
2482
  for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
2462
2483
  const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
2463
2484
 
@@ -2467,7 +2488,7 @@ export class CodebaseIndexer {
2467
2488
  const callDataByFile = new Map();
2468
2489
  const filesForWorkers = [];
2469
2490
 
2470
- // Memory safeguard
2491
+
2471
2492
  const mem = process.memoryUsage();
2472
2493
  if (mem.rss > 2048 * 1024 * 1024) {
2473
2494
  this.runExplicitGc({ minIntervalMs: 5000 });
@@ -2495,15 +2516,15 @@ export class CodebaseIndexer {
2495
2516
  (typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null);
2496
2517
 
2497
2518
  if (useWorkersForBatch && (content === undefined || content === null)) {
2498
- // Speed optimization: Offload reading and hashing to workers.
2499
- // Main thread skips I/O entirely for this file.
2519
+
2520
+
2500
2521
  filesForWorkers.push({ file, content: null, force, expectedHash });
2501
- // Initialize stats placeholder (will be updated with worker results)
2522
+
2502
2523
  fileStats.set(file, { hash: null, totalChunks: 0, successChunks: 0, size, mtimeMs });
2503
2524
  continue;
2504
2525
  }
2505
2526
 
2506
- // Read content if not provided (Legacy Path or workers disabled)
2527
+
2507
2528
  if (content === undefined || content === null) {
2508
2529
  let stats = null;
2509
2530
  try {
@@ -2544,7 +2565,7 @@ export class CodebaseIndexer {
2544
2565
  if (typeof content !== 'string') content = String(content);
2545
2566
  if (!liveHash) liveHash = hashContent(content);
2546
2567
  if (!Number.isFinite(size)) {
2547
- // Use character length as approximation to avoid blocking Buffer.byteLength on large strings
2568
+
2548
2569
  size = content.length;
2549
2570
  }
2550
2571
  if (size > this.config.maxFileSize) {
@@ -2568,7 +2589,7 @@ export class CodebaseIndexer {
2568
2589
 
2569
2590
  if (useWorkersForBatch) {
2570
2591
  filesForWorkers.push({ file, content, force, expectedHash });
2571
- // Initialize stats placeholder (will be updated with worker results)
2592
+
2572
2593
  fileStats.set(file, {
2573
2594
  hash: liveHash,
2574
2595
  totalChunks: 0,
@@ -2579,7 +2600,7 @@ export class CodebaseIndexer {
2579
2600
  continue;
2580
2601
  }
2581
2602
 
2582
- // Legacy / Fallback path: Chunk on main thread
2603
+
2583
2604
  if (this.config.callGraphEnabled) {
2584
2605
  try {
2585
2606
  const callData = extractCallData(content, file);
@@ -2613,7 +2634,7 @@ export class CodebaseIndexer {
2613
2634
  }
2614
2635
  }
2615
2636
 
2616
- // Process files with workers (New Path)
2637
+
2617
2638
  if (filesForWorkers.length > 0) {
2618
2639
  const results = await this.processFilesWithWorkers(filesForWorkers);
2619
2640
 
@@ -2622,7 +2643,7 @@ export class CodebaseIndexer {
2622
2643
  if (res.status === 'indexed' && stats) {
2623
2644
  stats.totalChunks = res.results.length;
2624
2645
  stats.successChunks = res.results.length;
2625
- if (res.hash) stats.hash = res.hash; // Update with new hash from worker
2646
+ if (res.hash) stats.hash = res.hash;
2626
2647
  if (res.callData) callDataByFile.set(res.file, res.callData);
2627
2648
 
2628
2649
  const chunks = res.results.map((r) => ({
@@ -2634,8 +2655,8 @@ export class CodebaseIndexer {
2634
2655
  }));
2635
2656
  newChunksByFile.set(res.file, chunks);
2636
2657
  } else if (res.status === 'unchanged' && stats) {
2637
- // Worker found file hash matches old hash
2638
- stats.totalChunks = 0; // Signal skip commit
2658
+
2659
+ stats.totalChunks = 0;
2639
2660
  stats.successChunks = 0;
2640
2661
  stats.hash = res.hash;
2641
2662
  this.cache.setFileHash(res.file, res.hash, { size: res.size, mtimeMs: res.mtimeMs });
@@ -2643,7 +2664,7 @@ export class CodebaseIndexer {
2643
2664
  this.cache.setFileCallData(res.file, res.callData);
2644
2665
  }
2645
2666
  } else if ((res.status === 'retry' || res.status === 'error') && stats) {
2646
- // Worker failed, fallback to local chunking + single threaded
2667
+
2647
2668
  const original = filesForWorkers.find((f) => f.file === res.file);
2648
2669
  if (original) {
2649
2670
  if (this.config.verbose)
@@ -2714,15 +2735,15 @@ export class CodebaseIndexer {
2714
2735
  }
2715
2736
  }
2716
2737
 
2717
- // Process chunks (Legacy Path & Fallbacks)
2738
+
2718
2739
  if (allChunks.length > 0) {
2719
2740
  const chunksToProcess = allChunks.slice();
2720
2741
  let results = [];
2721
2742
  if (useEmbeddingProcessPerBatch) {
2722
2743
  results = await this.processChunksInChildProcess(chunksToProcess);
2723
2744
  } else {
2724
- // If we are here, either workers are disabled/full or these are retry chunks
2725
- // Use single threaded fallback if not using child process
2745
+
2746
+
2726
2747
  results = await this.processChunksSingleThreaded(chunksToProcess);
2727
2748
  }
2728
2749
 
@@ -2743,7 +2764,7 @@ export class CodebaseIndexer {
2743
2764
  }
2744
2765
  }
2745
2766
 
2746
- // Commit changes to cache
2767
+
2747
2768
  for (const [file, stats] of fileStats) {
2748
2769
  if (stats.totalChunks > 0 && stats.successChunks === stats.totalChunks) {
2749
2770
  this.cache.removeFileFromStore(file);
@@ -2762,7 +2783,7 @@ export class CodebaseIndexer {
2762
2783
  this.cache.setFileCallData(file, callData);
2763
2784
  }
2764
2785
  } else if (stats.totalChunks === 0) {
2765
- // File had no chunks (empty or comments only), just mark as indexed
2786
+
2766
2787
  if (typeof stats.hash === 'string' && stats.hash.length > 0) {
2767
2788
  this.cache.setFileHash(file, stats.hash, { size: stats.size, mtimeMs: stats.mtimeMs });
2768
2789
  } else if (this.config.verbose) {
@@ -2783,7 +2804,24 @@ export class CodebaseIndexer {
2783
2804
 
2784
2805
  processedFiles += batch.length;
2785
2806
 
2786
- // Progress indicator
2807
+ const shouldCheckpointSave =
2808
+ checkpointIntervalMs > 0 &&
2809
+ processedFiles < filesToProcess.length &&
2810
+ Date.now() - lastCheckpointSaveAt >= checkpointIntervalMs;
2811
+ if (shouldCheckpointSave) {
2812
+ await this.traceIncrementalMemoryPhase('indexAll.checkpointSave', async () => {
2813
+ await this.cache.save();
2814
+ });
2815
+ checkpointSaveCount += 1;
2816
+ lastCheckpointSaveAt = Date.now();
2817
+ if (this.config.verbose) {
2818
+ console.info(
2819
+ `[Indexer] Checkpoint saved (${processedFiles}/${filesToProcess.length} files processed)`
2820
+ );
2821
+ }
2822
+ }
2823
+
2824
+
2787
2825
  if (
2788
2826
  processedFiles % (adaptiveBatchSize * 2) === 0 ||
2789
2827
  processedFiles === filesToProcess.length
@@ -2802,7 +2840,7 @@ export class CodebaseIndexer {
2802
2840
  );
2803
2841
  }
2804
2842
 
2805
- // Batch-level memory cleanup to reduce peak usage
2843
+
2806
2844
  allChunks.length = 0;
2807
2845
  filesForWorkers.length = 0;
2808
2846
  fileStats.clear();
@@ -2811,7 +2849,7 @@ export class CodebaseIndexer {
2811
2849
  await delay(0);
2812
2850
  }
2813
2851
 
2814
- // Cleanup workers
2852
+
2815
2853
  if (this.workers.length > 0) {
2816
2854
  await this.terminateWorkers();
2817
2855
  }
@@ -2823,7 +2861,7 @@ export class CodebaseIndexer {
2823
2861
  `[Indexer] Embedding pass complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`
2824
2862
  );
2825
2863
 
2826
- // Send completion progress
2864
+
2827
2865
  this.sendProgress(
2828
2866
  100,
2829
2867
  100,
@@ -2840,6 +2878,8 @@ export class CodebaseIndexer {
2840
2878
  lastBatchSize: adaptiveBatchSize,
2841
2879
  lastWorkerThreads: resolvedWorkerThreads,
2842
2880
  lastEmbeddingProcessPerBatch: useEmbeddingProcessPerBatch,
2881
+ lastCheckpointIntervalMs: checkpointIntervalMs,
2882
+ lastCheckpointSaves: checkpointSaveCount,
2843
2883
  });
2844
2884
  await this.cache.save();
2845
2885
 
@@ -2857,7 +2897,7 @@ export class CodebaseIndexer {
2857
2897
  }
2858
2898
  }
2859
2899
 
2860
- // Unload embedding models to free RAM
2900
+
2861
2901
  if (this.config.unloadModelAfterIndex) {
2862
2902
  console.info(
2863
2903
  '[Indexer] unloadModelAfterIndex enabled; embedding model will be reloaded on next query'
@@ -2866,7 +2906,7 @@ export class CodebaseIndexer {
2866
2906
  }
2867
2907
  this.maybeShutdownQueryEmbeddingPool('full index');
2868
2908
 
2869
- // Rebuild call graph in background
2909
+
2870
2910
  if (this.config.callGraphEnabled) {
2871
2911
  this.cache.rebuildCallGraph();
2872
2912
  }
@@ -2906,13 +2946,13 @@ export class CodebaseIndexer {
2906
2946
  }
2907
2947
 
2908
2948
  enqueueWatchEvent(type, filePath) {
2909
- // Prevent unbounded memory growth during rapid file churn (e.g., build processes)
2949
+
2910
2950
  if (this.pendingWatchEvents.size >= MAX_PENDING_WATCH_EVENTS) {
2911
2951
  console.warn(
2912
2952
  `[Indexer] pendingWatchEvents limit reached (${MAX_PENDING_WATCH_EVENTS}), ` +
2913
2953
  `trimming oldest ${this.pendingWatchEvents.size - PENDING_WATCH_EVENTS_TRIM_SIZE} events`
2914
2954
  );
2915
- // Drop oldest events (Map iterates in insertion order)
2955
+
2916
2956
  const toRemove = this.pendingWatchEvents.size - PENDING_WATCH_EVENTS_TRIM_SIZE;
2917
2957
  let count = 0;
2918
2958
  for (const key of this.pendingWatchEvents.keys()) {
@@ -2921,13 +2961,13 @@ export class CodebaseIndexer {
2921
2961
  }
2922
2962
  }
2923
2963
 
2924
- // If it's a delete, it always wins
2964
+
2925
2965
  if (type === 'unlink') {
2926
2966
  this.pendingWatchEvents.set(filePath, 'unlink');
2927
2967
  return;
2928
2968
  }
2929
2969
 
2930
- // If we're adding/changing, it overwrites a potential unlink (file came back)
2970
+
2931
2971
  this.pendingWatchEvents.set(filePath, type);
2932
2972
  }
2933
2973
 
@@ -2974,20 +3014,17 @@ export class CodebaseIndexer {
2974
3014
  }
2975
3015
  }
2976
3016
 
2977
- /**
2978
- * Debounced file indexing for watcher events.
2979
- * Consolidates rapid add/change events and prevents concurrent indexing of the same file.
2980
- */
3017
+
2981
3018
  debouncedWatchIndexFile(fullPath, eventType) {
2982
- // Cancel any pending debounce timer for this file
3019
+
2983
3020
  const existingTimer = this._watcherDebounceTimers.get(fullPath);
2984
3021
  if (existingTimer) {
2985
3022
  clearTimeout(existingTimer);
2986
3023
  }
2987
3024
 
2988
- // If file is currently being indexed, just schedule a re-index after it completes
3025
+
2989
3026
  if (this._watcherInProgress.has(fullPath)) {
2990
- // Schedule a follow-up reindex after current one completes
3027
+
2991
3028
  this._watcherPendingReindex.set(fullPath, eventType);
2992
3029
  if (this.config.verbose) {
2993
3030
  console.info(
@@ -2997,14 +3034,14 @@ export class CodebaseIndexer {
2997
3034
  return;
2998
3035
  }
2999
3036
 
3000
- // Set a debounce timer to consolidate rapid events
3037
+
3001
3038
  const timer = setTimeout(async () => {
3002
3039
  this._watcherDebounceTimers.delete(fullPath);
3003
3040
 
3004
- // Mark file as in-progress
3041
+
3005
3042
  const indexPromise = (async () => {
3006
3043
  try {
3007
- // Invalidate recency cache
3044
+
3008
3045
  if (this.server && this.server.hybridSearch) {
3009
3046
  this.server.hybridSearch.clearFileModTime(fullPath);
3010
3047
  }
@@ -3043,7 +3080,7 @@ export class CodebaseIndexer {
3043
3080
  async setupFileWatcher() {
3044
3081
  if (!this.config.watchFiles) return;
3045
3082
 
3046
- // Close existing watcher if active to prevent leaks
3083
+
3047
3084
  if (this.watcher) {
3048
3085
  await this.watcher.close();
3049
3086
  this.watcher = null;
@@ -3099,7 +3136,7 @@ export class CodebaseIndexer {
3099
3136
  const fullPath = path.join(this.config.searchDirectory, filePath);
3100
3137
  console.info(`[Indexer] New file detected: ${filePath}`);
3101
3138
 
3102
- // Invalidate recency cache for consistency
3139
+
3103
3140
  if (this.server && this.server.hybridSearch) {
3104
3141
  this.server.hybridSearch.clearFileModTime(fullPath);
3105
3142
  }
@@ -3112,14 +3149,14 @@ export class CodebaseIndexer {
3112
3149
  return;
3113
3150
  }
3114
3151
 
3115
- // Use debounced indexing to consolidate rapid add/change events
3152
+
3116
3153
  this.debouncedWatchIndexFile(fullPath, 'add');
3117
3154
  })
3118
3155
  .on('change', (filePath) => {
3119
3156
  const fullPath = path.join(this.config.searchDirectory, filePath);
3120
3157
  console.info(`[Indexer] File changed: ${filePath}`);
3121
3158
 
3122
- // Invalidate recency cache for consistency
3159
+
3123
3160
  if (this.server && this.server.hybridSearch) {
3124
3161
  this.server.hybridSearch.clearFileModTime(fullPath);
3125
3162
  }
@@ -3132,7 +3169,7 @@ export class CodebaseIndexer {
3132
3169
  return;
3133
3170
  }
3134
3171
 
3135
- // Use debounced indexing to consolidate rapid add/change events
3172
+
3136
3173
  this.debouncedWatchIndexFile(fullPath, 'change');
3137
3174
  })
3138
3175
  .on('unlink', async (filePath) => {
@@ -3147,7 +3184,7 @@ export class CodebaseIndexer {
3147
3184
  return;
3148
3185
  }
3149
3186
 
3150
- // Invalidate recency cache
3187
+
3151
3188
  if (this.server && this.server.hybridSearch) {
3152
3189
  this.server.hybridSearch.clearFileModTime(fullPath);
3153
3190
  }
@@ -3195,7 +3232,7 @@ export class CodebaseIndexer {
3195
3232
  }
3196
3233
  }
3197
3234
 
3198
- // MCP Tool definition for this feature
3235
+
3199
3236
  export function getToolDefinition() {
3200
3237
  return {
3201
3238
  name: 'b_index_codebase',
@@ -3221,12 +3258,12 @@ export function getToolDefinition() {
3221
3258
  };
3222
3259
  }
3223
3260
 
3224
- // Tool handler
3261
+
3225
3262
  export async function handleToolCall(request, indexer) {
3226
3263
  const force = request.params.arguments?.force || false;
3227
3264
  const result = await indexer.indexAll(force);
3228
3265
 
3229
- // Handle case when indexing was skipped due to concurrent request
3266
+
3230
3267
  if (result?.skipped) {
3231
3268
  return {
3232
3269
  content: [
@@ -3238,7 +3275,7 @@ export async function handleToolCall(request, indexer) {
3238
3275
  };
3239
3276
  }
3240
3277
 
3241
- // Get current stats from cache
3278
+
3242
3279
  const vectorStore = indexer.cache.getVectorStore();
3243
3280
  const stats = {
3244
3281
  totalChunks: result?.totalChunks ?? vectorStore.length,