@colbymchenry/codegraph 0.6.8 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/README.md +179 -476
  2. package/dist/bin/codegraph.d.ts +0 -5
  3. package/dist/bin/codegraph.d.ts.map +1 -1
  4. package/dist/bin/codegraph.js +217 -237
  5. package/dist/bin/codegraph.js.map +1 -1
  6. package/dist/bin/uninstall.d.ts +0 -1
  7. package/dist/bin/uninstall.d.ts.map +1 -1
  8. package/dist/bin/uninstall.js +3 -29
  9. package/dist/bin/uninstall.js.map +1 -1
  10. package/dist/context/index.d.ts +3 -5
  11. package/dist/context/index.d.ts.map +1 -1
  12. package/dist/context/index.js +531 -52
  13. package/dist/context/index.js.map +1 -1
  14. package/dist/db/migrations.d.ts +1 -1
  15. package/dist/db/migrations.d.ts.map +1 -1
  16. package/dist/db/migrations.js +10 -1
  17. package/dist/db/migrations.js.map +1 -1
  18. package/dist/db/queries.d.ts +53 -0
  19. package/dist/db/queries.d.ts.map +1 -1
  20. package/dist/db/queries.js +244 -14
  21. package/dist/db/queries.js.map +1 -1
  22. package/dist/db/schema.sql +1 -16
  23. package/dist/extraction/dfm-extractor.d.ts +31 -0
  24. package/dist/extraction/dfm-extractor.d.ts.map +1 -0
  25. package/dist/extraction/dfm-extractor.js +151 -0
  26. package/dist/extraction/dfm-extractor.js.map +1 -0
  27. package/dist/extraction/grammars.d.ts +9 -1
  28. package/dist/extraction/grammars.d.ts.map +1 -1
  29. package/dist/extraction/grammars.js +34 -2
  30. package/dist/extraction/grammars.js.map +1 -1
  31. package/dist/extraction/index.d.ts +7 -1
  32. package/dist/extraction/index.d.ts.map +1 -1
  33. package/dist/extraction/index.js +373 -22
  34. package/dist/extraction/index.js.map +1 -1
  35. package/dist/extraction/languages/c-cpp.d.ts +4 -0
  36. package/dist/extraction/languages/c-cpp.d.ts.map +1 -0
  37. package/dist/extraction/languages/c-cpp.js +126 -0
  38. package/dist/extraction/languages/c-cpp.js.map +1 -0
  39. package/dist/extraction/languages/csharp.d.ts +3 -0
  40. package/dist/extraction/languages/csharp.d.ts.map +1 -0
  41. package/dist/extraction/languages/csharp.js +72 -0
  42. package/dist/extraction/languages/csharp.js.map +1 -0
  43. package/dist/extraction/languages/dart.d.ts +3 -0
  44. package/dist/extraction/languages/dart.d.ts.map +1 -0
  45. package/dist/extraction/languages/dart.js +192 -0
  46. package/dist/extraction/languages/dart.js.map +1 -0
  47. package/dist/extraction/languages/go.d.ts +3 -0
  48. package/dist/extraction/languages/go.d.ts.map +1 -0
  49. package/dist/extraction/languages/go.js +58 -0
  50. package/dist/extraction/languages/go.js.map +1 -0
  51. package/dist/extraction/languages/index.d.ts +10 -0
  52. package/dist/extraction/languages/index.d.ts.map +1 -0
  53. package/dist/extraction/languages/index.js +43 -0
  54. package/dist/extraction/languages/index.js.map +1 -0
  55. package/dist/extraction/languages/java.d.ts +3 -0
  56. package/dist/extraction/languages/java.d.ts.map +1 -0
  57. package/dist/extraction/languages/java.js +64 -0
  58. package/dist/extraction/languages/java.js.map +1 -0
  59. package/dist/extraction/languages/javascript.d.ts +3 -0
  60. package/dist/extraction/languages/javascript.d.ts.map +1 -0
  61. package/dist/extraction/languages/javascript.js +90 -0
  62. package/dist/extraction/languages/javascript.js.map +1 -0
  63. package/dist/extraction/languages/kotlin.d.ts +3 -0
  64. package/dist/extraction/languages/kotlin.d.ts.map +1 -0
  65. package/dist/extraction/languages/kotlin.js +253 -0
  66. package/dist/extraction/languages/kotlin.js.map +1 -0
  67. package/dist/extraction/languages/pascal.d.ts +3 -0
  68. package/dist/extraction/languages/pascal.d.ts.map +1 -0
  69. package/dist/extraction/languages/pascal.js +66 -0
  70. package/dist/extraction/languages/pascal.js.map +1 -0
  71. package/dist/extraction/languages/php.d.ts +3 -0
  72. package/dist/extraction/languages/php.d.ts.map +1 -0
  73. package/dist/extraction/languages/php.js +107 -0
  74. package/dist/extraction/languages/php.js.map +1 -0
  75. package/dist/extraction/languages/python.d.ts +3 -0
  76. package/dist/extraction/languages/python.d.ts.map +1 -0
  77. package/dist/extraction/languages/python.js +56 -0
  78. package/dist/extraction/languages/python.js.map +1 -0
  79. package/dist/extraction/languages/ruby.d.ts +3 -0
  80. package/dist/extraction/languages/ruby.d.ts.map +1 -0
  81. package/dist/extraction/languages/ruby.js +114 -0
  82. package/dist/extraction/languages/ruby.js.map +1 -0
  83. package/dist/extraction/languages/rust.d.ts +3 -0
  84. package/dist/extraction/languages/rust.d.ts.map +1 -0
  85. package/dist/extraction/languages/rust.js +109 -0
  86. package/dist/extraction/languages/rust.js.map +1 -0
  87. package/dist/extraction/languages/swift.d.ts +3 -0
  88. package/dist/extraction/languages/swift.d.ts.map +1 -0
  89. package/dist/extraction/languages/swift.js +91 -0
  90. package/dist/extraction/languages/swift.js.map +1 -0
  91. package/dist/extraction/languages/typescript.d.ts +3 -0
  92. package/dist/extraction/languages/typescript.d.ts.map +1 -0
  93. package/dist/extraction/languages/typescript.js +129 -0
  94. package/dist/extraction/languages/typescript.js.map +1 -0
  95. package/dist/extraction/liquid-extractor.d.ts +52 -0
  96. package/dist/extraction/liquid-extractor.d.ts.map +1 -0
  97. package/dist/extraction/liquid-extractor.js +313 -0
  98. package/dist/extraction/liquid-extractor.js.map +1 -0
  99. package/dist/extraction/parse-worker.d.ts +8 -0
  100. package/dist/extraction/parse-worker.d.ts.map +1 -0
  101. package/dist/extraction/parse-worker.js +57 -0
  102. package/dist/extraction/parse-worker.js.map +1 -0
  103. package/dist/extraction/svelte-extractor.d.ts +56 -0
  104. package/dist/extraction/svelte-extractor.d.ts.map +1 -0
  105. package/dist/extraction/svelte-extractor.js +272 -0
  106. package/dist/extraction/svelte-extractor.js.map +1 -0
  107. package/dist/extraction/tree-sitter-helpers.d.ts +28 -0
  108. package/dist/extraction/tree-sitter-helpers.d.ts.map +1 -0
  109. package/dist/extraction/tree-sitter-helpers.js +103 -0
  110. package/dist/extraction/tree-sitter-helpers.js.map +1 -0
  111. package/dist/extraction/tree-sitter-types.d.ts +179 -0
  112. package/dist/extraction/tree-sitter-types.d.ts.map +1 -0
  113. package/dist/extraction/tree-sitter-types.js +10 -0
  114. package/dist/extraction/tree-sitter-types.js.map +1 -0
  115. package/dist/extraction/tree-sitter.d.ts +67 -125
  116. package/dist/extraction/tree-sitter.d.ts.map +1 -1
  117. package/dist/extraction/tree-sitter.js +1052 -1855
  118. package/dist/extraction/tree-sitter.js.map +1 -1
  119. package/dist/graph/traversal.d.ts.map +1 -1
  120. package/dist/graph/traversal.js +27 -3
  121. package/dist/graph/traversal.js.map +1 -1
  122. package/dist/index.d.ts +29 -53
  123. package/dist/index.d.ts.map +1 -1
  124. package/dist/index.js +88 -114
  125. package/dist/index.js.map +1 -1
  126. package/dist/installer/claude-md-template.d.ts +1 -1
  127. package/dist/installer/claude-md-template.d.ts.map +1 -1
  128. package/dist/installer/claude-md-template.js +15 -15
  129. package/dist/installer/config-writer.d.ts +1 -10
  130. package/dist/installer/config-writer.d.ts.map +1 -1
  131. package/dist/installer/config-writer.js +0 -79
  132. package/dist/installer/config-writer.js.map +1 -1
  133. package/dist/installer/index.d.ts +3 -4
  134. package/dist/installer/index.d.ts.map +1 -1
  135. package/dist/installer/index.js +118 -116
  136. package/dist/installer/index.js.map +1 -1
  137. package/dist/mcp/index.d.ts +5 -0
  138. package/dist/mcp/index.d.ts.map +1 -1
  139. package/dist/mcp/index.js +25 -1
  140. package/dist/mcp/index.js.map +1 -1
  141. package/dist/mcp/tools.d.ts +33 -0
  142. package/dist/mcp/tools.d.ts.map +1 -1
  143. package/dist/mcp/tools.js +432 -21
  144. package/dist/mcp/tools.js.map +1 -1
  145. package/dist/resolution/frameworks/csharp.js +29 -84
  146. package/dist/resolution/frameworks/csharp.js.map +1 -1
  147. package/dist/resolution/frameworks/express.js +44 -48
  148. package/dist/resolution/frameworks/express.js.map +1 -1
  149. package/dist/resolution/frameworks/go.js +34 -70
  150. package/dist/resolution/frameworks/go.js.map +1 -1
  151. package/dist/resolution/frameworks/java.js +29 -87
  152. package/dist/resolution/frameworks/java.js.map +1 -1
  153. package/dist/resolution/frameworks/laravel.js +6 -6
  154. package/dist/resolution/frameworks/laravel.js.map +1 -1
  155. package/dist/resolution/frameworks/python.js +33 -98
  156. package/dist/resolution/frameworks/python.js.map +1 -1
  157. package/dist/resolution/frameworks/react.js +53 -76
  158. package/dist/resolution/frameworks/react.js.map +1 -1
  159. package/dist/resolution/frameworks/ruby.js +12 -24
  160. package/dist/resolution/frameworks/ruby.js.map +1 -1
  161. package/dist/resolution/frameworks/rust.js +26 -66
  162. package/dist/resolution/frameworks/rust.js.map +1 -1
  163. package/dist/resolution/frameworks/svelte.js +11 -31
  164. package/dist/resolution/frameworks/svelte.js.map +1 -1
  165. package/dist/resolution/frameworks/swift.js +42 -160
  166. package/dist/resolution/frameworks/swift.js.map +1 -1
  167. package/dist/resolution/index.d.ts +19 -6
  168. package/dist/resolution/index.d.ts.map +1 -1
  169. package/dist/resolution/index.js +300 -141
  170. package/dist/resolution/index.js.map +1 -1
  171. package/dist/resolution/name-matcher.d.ts +5 -0
  172. package/dist/resolution/name-matcher.d.ts.map +1 -1
  173. package/dist/resolution/name-matcher.js +148 -8
  174. package/dist/resolution/name-matcher.js.map +1 -1
  175. package/dist/resolution/types.d.ts +1 -1
  176. package/dist/resolution/types.d.ts.map +1 -1
  177. package/dist/search/query-utils.d.ts +26 -1
  178. package/dist/search/query-utils.d.ts.map +1 -1
  179. package/dist/search/query-utils.js +209 -9
  180. package/dist/search/query-utils.js.map +1 -1
  181. package/dist/sync/index.d.ts +2 -4
  182. package/dist/sync/index.d.ts.map +1 -1
  183. package/dist/sync/index.js +4 -3
  184. package/dist/sync/index.js.map +1 -1
  185. package/dist/sync/watcher.d.ts +81 -0
  186. package/dist/sync/watcher.d.ts.map +1 -0
  187. package/dist/sync/watcher.js +184 -0
  188. package/dist/sync/watcher.js.map +1 -0
  189. package/dist/types.d.ts +2 -0
  190. package/dist/types.d.ts.map +1 -1
  191. package/dist/types.js.map +1 -1
  192. package/dist/ui/shimmer-progress.d.ts +11 -0
  193. package/dist/ui/shimmer-progress.d.ts.map +1 -0
  194. package/dist/ui/shimmer-progress.js +90 -0
  195. package/dist/ui/shimmer-progress.js.map +1 -0
  196. package/dist/ui/shimmer-worker.d.ts +2 -0
  197. package/dist/ui/shimmer-worker.d.ts.map +1 -0
  198. package/dist/ui/shimmer-worker.js +112 -0
  199. package/dist/ui/shimmer-worker.js.map +1 -0
  200. package/dist/ui/types.d.ts +17 -0
  201. package/dist/ui/types.d.ts.map +1 -0
  202. package/dist/ui/types.js +3 -0
  203. package/dist/ui/types.js.map +1 -0
  204. package/dist/vectors/embedder.js +1 -1
  205. package/dist/vectors/embedder.js.map +1 -1
  206. package/package.json +7 -12
  207. package/scripts/postinstall.js +0 -68
@@ -45,6 +45,7 @@ exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammar
45
45
  exports.hashContent = hashContent;
46
46
  exports.shouldIncludeFile = shouldIncludeFile;
47
47
  exports.scanDirectory = scanDirectory;
48
+ exports.scanDirectoryAsync = scanDirectoryAsync;
48
49
  const fs = __importStar(require("fs"));
49
50
  const fsp = __importStar(require("fs/promises"));
50
51
  const path = __importStar(require("path"));
@@ -60,6 +61,21 @@ const picomatch_1 = __importDefault(require("picomatch"));
60
61
  * File reads are I/O-bound; batching overlaps I/O wait with CPU parse work.
61
62
  */
62
63
  const FILE_IO_BATCH_SIZE = 10;
64
+ // PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
65
+ /**
66
+ * Maximum time (ms) to wait for a single file to parse in the worker thread.
67
+ * If tree-sitter hangs or WASM runs out of memory, this prevents the entire
68
+ * indexing run from freezing. The worker is restarted after a timeout.
69
+ */
70
+ const PARSE_TIMEOUT_MS = 10_000;
71
+ /**
72
+ * Number of files to parse before recycling the worker thread.
73
+ * WASM linear memory can grow but NEVER shrink (WebAssembly spec limitation).
74
+ * The only way to reclaim tree-sitter's WASM heap is to destroy the entire
75
+ * V8 isolate by terminating the worker thread and spawning a fresh one.
76
+ * This interval balances memory usage against the cost of reloading grammars.
77
+ */
78
+ const WORKER_RECYCLE_INTERVAL = 250;
63
79
  /**
64
80
  * Calculate SHA256 hash of file contents
65
81
  */
@@ -98,6 +114,21 @@ function shouldIncludeFile(filePath, config) {
98
114
  */
99
115
  function getGitVisibleFiles(rootDir) {
100
116
  try {
117
+ // Check if the project directory is gitignored by a parent repo.
118
+ // When rootDir lives inside a parent git repo that ignores it,
119
+ // `git ls-files` returns nothing — fall back to filesystem walk.
120
+ const gitRoot = (0, child_process_1.execFileSync)('git', ['rev-parse', '--show-toplevel'], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] }).trim();
121
+ if (path.resolve(gitRoot) !== path.resolve(rootDir)) {
122
+ try {
123
+ // git check-ignore exits 0 if the path IS ignored, 1 if not
124
+ (0, child_process_1.execFileSync)('git', ['check-ignore', '-q', path.resolve(rootDir)], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] });
125
+ // Directory is gitignored by parent repo — fall back to filesystem walk
126
+ return null;
127
+ }
128
+ catch {
129
+ // Not ignored — safe to use git ls-files
130
+ }
131
+ }
101
132
  // -c = cached (tracked), -o = others (untracked), --exclude-standard = respect .gitignore
102
133
  const output = (0, child_process_1.execFileSync)('git', ['ls-files', '-co', '--exclude-standard'], { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] });
103
134
  const files = new Set();
@@ -177,6 +208,30 @@ function scanDirectory(rootDir, config, onProgress) {
177
208
  // Fallback: walk filesystem for non-git projects
178
209
  return scanDirectoryWalk(rootDir, config, onProgress);
179
210
  }
211
+ /**
212
+ * Async variant of scanDirectory that yields to the event loop periodically,
213
+ * allowing worker threads to receive and render progress messages.
214
+ */
215
+ async function scanDirectoryAsync(rootDir, config, onProgress) {
216
+ const gitFiles = getGitVisibleFiles(rootDir);
217
+ if (gitFiles) {
218
+ const files = [];
219
+ let count = 0;
220
+ for (const filePath of gitFiles) {
221
+ if (shouldIncludeFile(filePath, config)) {
222
+ files.push(filePath);
223
+ count++;
224
+ onProgress?.(count, filePath);
225
+ // Yield every 100 files so worker threads can render progress
226
+ if (count % 100 === 0) {
227
+ await new Promise(r => setImmediate(r));
228
+ }
229
+ }
230
+ }
231
+ return files;
232
+ }
233
+ return scanDirectoryWalk(rootDir, config, onProgress);
234
+ }
180
235
  /**
181
236
  * Filesystem walk fallback for non-git projects.
182
237
  */
@@ -285,21 +340,25 @@ class ExtractionOrchestrator {
285
340
  /**
286
341
  * Index all files in the project
287
342
  */
288
- async indexAll(onProgress, signal) {
343
+ async indexAll(onProgress, signal, verbose) {
289
344
  await (0, grammars_1.initGrammars)();
290
345
  const startTime = Date.now();
291
346
  const errors = [];
292
347
  let filesIndexed = 0;
293
348
  let filesSkipped = 0;
349
+ let filesErrored = 0;
294
350
  let totalNodes = 0;
295
351
  let totalEdges = 0;
352
+ const log = verbose
353
+ ? (msg) => { console.log(`[worker] ${msg}`); }
354
+ : (_msg) => { };
296
355
  // Phase 1: Scan for files
297
356
  onProgress?.({
298
357
  phase: 'scanning',
299
358
  current: 0,
300
359
  total: 0,
301
360
  });
302
- const files = scanDirectory(this.rootDir, this.config, (current, file) => {
361
+ const files = await scanDirectoryAsync(this.rootDir, this.config, (current, file) => {
303
362
  onProgress?.({
304
363
  phase: 'scanning',
305
364
  current,
@@ -312,26 +371,164 @@ class ExtractionOrchestrator {
312
371
  success: false,
313
372
  filesIndexed: 0,
314
373
  filesSkipped: 0,
374
+ filesErrored: 0,
315
375
  nodesCreated: 0,
316
376
  edgesCreated: 0,
317
377
  errors: [{ message: 'Aborted', severity: 'error' }],
318
378
  durationMs: Date.now() - startTime,
319
379
  };
320
380
  }
321
- // Load only the grammars needed for languages actually present in the project.
322
- // This avoids compiling all 16+ WASM grammar modules upfront, which can cause
323
- // V8 WASM Zone OOM on large codebases (see issue #54).
324
- const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
325
- await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
326
- // Phase 2: Parse files (read in parallel batches, parse/store sequentially)
381
+ // Phase 2: Parse files in a worker thread (keeps main thread unblocked for UI)
327
382
  const total = files.length;
328
383
  let processed = 0;
384
+ // Emit parsing phase immediately so the progress bar appears during worker setup.
385
+ // The yield lets the shimmer worker flush the phase transition to stdout before
386
+ // the main thread starts synchronous grammar detection work.
387
+ onProgress?.({
388
+ phase: 'parsing',
389
+ current: 0,
390
+ total,
391
+ });
392
+ await new Promise(resolve => setImmediate(resolve));
393
+ // Detect needed languages and load grammars in the parse worker
394
+ const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
395
+ // .h files default to 'c' but may be C++ — ensure cpp grammar is loaded when c is needed
396
+ if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
397
+ neededLanguages.push('cpp');
398
+ }
399
+ // Try to use a worker thread for parsing (keeps main thread unblocked for UI).
400
+ // Falls back to in-process parsing if the compiled worker is unavailable (e.g. tests).
401
+ const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
402
+ const useWorker = fs.existsSync(parseWorkerPath);
403
+ let WorkerClass = null;
404
+ if (useWorker) {
405
+ const { Worker } = await Promise.resolve().then(() => __importStar(require('worker_threads')));
406
+ WorkerClass = Worker;
407
+ }
408
+ else {
409
+ // In-process fallback: load grammars locally
410
+ await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
411
+ }
412
+ // --- Worker lifecycle management ---
413
+ // The worker can crash (OOM in WASM) or hang on pathological files.
414
+ // We track pending parse promises and handle both cases:
415
+ // - Timeout: terminate + restart the worker, reject the timed-out request
416
+ // - Crash: reject all pending promises, restart for remaining files
417
+ let parseWorker = null;
418
+ let nextId = 0;
419
+ let workerParseCount = 0;
420
+ const pendingParses = new Map();
421
+ function rejectAllPending(reason) {
422
+ for (const [id, pending] of pendingParses) {
423
+ clearTimeout(pending.timer);
424
+ pendingParses.delete(id);
425
+ pending.reject(new Error(reason));
426
+ }
427
+ }
428
+ function attachWorkerHandlers(w) {
429
+ w.on('message', (msg) => {
430
+ if (msg.type === 'parse-result' && msg.id !== undefined) {
431
+ const pending = pendingParses.get(msg.id);
432
+ if (pending) {
433
+ clearTimeout(pending.timer);
434
+ pendingParses.delete(msg.id);
435
+ pending.resolve(msg.result);
436
+ }
437
+ }
438
+ });
439
+ w.on('error', (err) => {
440
+ (0, errors_1.logWarn)('Parse worker error', { error: err.message });
441
+ rejectAllPending(`Worker error: ${err.message}`);
442
+ });
443
+ w.on('exit', (code) => {
444
+ if (code !== 0 && pendingParses.size > 0) {
445
+ (0, errors_1.logWarn)('Parse worker exited unexpectedly', { code });
446
+ rejectAllPending(`Worker exited with code ${code}`);
447
+ }
448
+ // Clear reference so we know to respawn, reset count so
449
+ // the fresh worker gets a full cycle before recycling.
450
+ if (parseWorker === w) {
451
+ parseWorker = null;
452
+ workerParseCount = 0;
453
+ }
454
+ });
455
+ }
456
+ async function ensureWorker() {
457
+ if (parseWorker)
458
+ return parseWorker;
459
+ log('Spawning new parse worker...');
460
+ parseWorker = new WorkerClass(parseWorkerPath);
461
+ attachWorkerHandlers(parseWorker);
462
+ // Load grammars in the new worker
463
+ await new Promise((resolve, reject) => {
464
+ parseWorker.once('message', (msg) => {
465
+ if (msg.type === 'grammars-loaded')
466
+ resolve();
467
+ else
468
+ reject(new Error(`Unexpected message: ${msg.type}`));
469
+ });
470
+ parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
471
+ });
472
+ return parseWorker;
473
+ }
474
+ if (WorkerClass) {
475
+ await ensureWorker();
476
+ }
477
+ /**
478
+ * Recycle the worker thread to reclaim WASM memory.
479
+ * Terminates the current worker and clears the reference so
480
+ * ensureWorker() will spawn a fresh one on the next call.
481
+ */
482
+ function recycleWorker() {
483
+ if (!parseWorker)
484
+ return;
485
+ log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
486
+ const w = parseWorker;
487
+ parseWorker = null;
488
+ workerParseCount = 0;
489
+ // Fire-and-forget: worker.terminate() can hang if WASM is stuck
490
+ w.terminate().catch(() => { });
491
+ }
492
+ async function requestParse(filePath, content) {
493
+ if (!WorkerClass) {
494
+ // In-process fallback
495
+ return (0, tree_sitter_1.extractFromSource)(filePath, content, (0, grammars_1.detectLanguage)(filePath, content));
496
+ }
497
+ // Recycle the worker before the next parse if we've hit the threshold.
498
+ // This destroys the WASM linear memory (which can grow but never shrink)
499
+ // and starts a fresh worker with a clean heap.
500
+ if (workerParseCount >= WORKER_RECYCLE_INTERVAL) {
501
+ await recycleWorker();
502
+ }
503
+ const worker = await ensureWorker();
504
+ const id = nextId++;
505
+ workerParseCount++;
506
+ // Scale timeout for large files: base 10s + 10s per 100KB
507
+ const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
508
+ return new Promise((resolve, reject) => {
509
+ const timer = setTimeout(() => {
510
+ pendingParses.delete(id);
511
+ log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
512
+ // Reject FIRST — worker.terminate() can hang if WASM is stuck
513
+ parseWorker = null;
514
+ workerParseCount = 0;
515
+ reject(new Error(`Parse timed out after ${timeoutMs}ms`));
516
+ // Fire-and-forget: kill the stuck worker in the background
517
+ worker.terminate().catch(() => { });
518
+ }, timeoutMs);
519
+ pendingParses.set(id, { resolve, reject, timer });
520
+ worker.postMessage({ type: 'parse', id, filePath, content });
521
+ });
522
+ }
329
523
  for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
330
524
  if (signal?.aborted) {
525
+ if (parseWorker)
526
+ parseWorker.terminate().catch(() => { });
331
527
  return {
332
528
  success: false,
333
529
  filesIndexed,
334
530
  filesSkipped,
531
+ filesErrored,
335
532
  nodesCreated: totalNodes,
336
533
  edgesCreated: totalEdges,
337
534
  errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
@@ -355,20 +552,23 @@ class ExtractionOrchestrator {
355
552
  return { filePath: fp, content: null, stats: null, error: err };
356
553
  }
357
554
  }));
358
- // Parse and store sequentially
555
+ // Send to worker for parsing, store results on main thread
359
556
  for (const { filePath, content, stats, error } of fileContents) {
360
557
  if (signal?.aborted) {
558
+ if (parseWorker)
559
+ parseWorker.terminate().catch(() => { });
361
560
  return {
362
561
  success: false,
363
562
  filesIndexed,
364
563
  filesSkipped,
564
+ filesErrored,
365
565
  nodesCreated: totalNodes,
366
566
  edgesCreated: totalEdges,
367
567
  errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
368
568
  durationMs: Date.now() - startTime,
369
569
  };
370
570
  }
371
- processed++;
571
+ // Report progress before parsing (show current file being worked on)
372
572
  onProgress?.({
373
573
  phase: 'parsing',
374
574
  current: processed,
@@ -376,14 +576,44 @@ class ExtractionOrchestrator {
376
576
  currentFile: filePath,
377
577
  });
378
578
  if (error || content === null || stats === null) {
579
+ processed++;
580
+ filesErrored++;
379
581
  errors.push({
380
582
  message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
583
+ filePath,
584
+ severity: 'error',
585
+ code: 'read_error',
586
+ });
587
+ continue;
588
+ }
589
+ // Parse in worker thread (main thread stays unblocked).
590
+ // Wrapped in try/catch to handle worker timeouts and crashes gracefully.
591
+ let result;
592
+ try {
593
+ result = await requestParse(filePath, content);
594
+ }
595
+ catch (parseErr) {
596
+ processed++;
597
+ filesErrored++;
598
+ errors.push({
599
+ message: parseErr instanceof Error ? parseErr.message : String(parseErr),
600
+ filePath,
381
601
  severity: 'error',
602
+ code: 'parse_error',
382
603
  });
383
604
  continue;
384
605
  }
385
- const result = await this.indexFileWithContent(filePath, content, stats);
606
+ processed++;
607
+ // Store in database on main thread (SQLite is not thread-safe)
608
+ if (result.nodes.length > 0 || result.errors.length === 0) {
609
+ const language = (0, grammars_1.detectLanguage)(filePath, content);
610
+ this.storeExtractionResult(filePath, content, language, stats, result);
611
+ }
386
612
  if (result.errors.length > 0) {
613
+ for (const err of result.errors) {
614
+ if (!err.filePath)
615
+ err.filePath = filePath;
616
+ }
387
617
  errors.push(...result.errors);
388
618
  }
389
619
  if (result.nodes.length > 0) {
@@ -391,22 +621,130 @@ class ExtractionOrchestrator {
391
621
  totalNodes += result.nodes.length;
392
622
  totalEdges += result.edges.length;
393
623
  }
394
- else if (result.errors.length === 0) {
624
+ else if (result.errors.some((e) => e.severity === 'error')) {
625
+ filesErrored++;
626
+ }
627
+ else {
395
628
  filesSkipped++;
396
629
  }
397
630
  }
398
631
  }
399
- // Phase 3: Resolve references
632
+ // Report 100% so the progress bar doesn't hang at 99%
400
633
  onProgress?.({
401
- phase: 'resolving',
402
- current: 0,
403
- total: 1,
634
+ phase: 'parsing',
635
+ current: total,
636
+ total,
404
637
  });
405
- // TODO: Implement reference resolution in Phase 3
638
+ // Yield so the shimmer worker's buffered stdout writes can flush.
639
+ // Worker thread stdout is proxied through the main thread's event loop,
640
+ // so synchronous work here blocks the animation from rendering.
641
+ await new Promise(resolve => setImmediate(resolve));
642
+ // Retry pass: files that failed due to WASM memory corruption may succeed
643
+ // on a fresh worker with a clean heap. Recycle before each attempt so
644
+ // every file gets the absolute cleanest WASM state possible.
645
+ const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
646
+ (e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
647
+ if (retryableErrors.length > 0 && WorkerClass) {
648
+ log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
649
+ const stillFailing = [];
650
+ for (const errEntry of retryableErrors) {
651
+ const filePath = errEntry.filePath;
652
+ if (signal?.aborted)
653
+ break;
654
+ // Fresh worker for every retry — maximum WASM headroom
655
+ recycleWorker();
656
+ let content;
657
+ try {
658
+ const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
659
+ if (!fullPath)
660
+ continue;
661
+ content = await fsp.readFile(fullPath, 'utf-8');
662
+ }
663
+ catch {
664
+ continue;
665
+ }
666
+ let result;
667
+ try {
668
+ result = await requestParse(filePath, content);
669
+ }
670
+ catch {
671
+ stillFailing.push(errEntry);
672
+ continue;
673
+ }
674
+ if (result.nodes.length > 0 || result.errors.length === 0) {
675
+ const language = (0, grammars_1.detectLanguage)(filePath, content);
676
+ const stats = await fsp.stat(path.join(this.rootDir, filePath));
677
+ this.storeExtractionResult(filePath, content, language, stats, result);
678
+ const idx = errors.indexOf(errEntry);
679
+ if (idx >= 0)
680
+ errors.splice(idx, 1);
681
+ filesErrored--;
682
+ filesIndexed++;
683
+ totalNodes += result.nodes.length;
684
+ totalEdges += result.edges.length;
685
+ log(`Retry OK: ${filePath} (${result.nodes.length} nodes)`);
686
+ }
687
+ }
688
+ // Last resort: for files that still crash on a clean worker, strip
689
+ // comment-only lines to reduce WASM memory pressure. Many compiler
690
+ // test files are 90%+ comments (CHECK directives) that don't contribute
691
+ // code nodes but consume parser memory.
692
+ if (stillFailing.length > 0) {
693
+ log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
694
+ for (const errEntry of stillFailing) {
695
+ const filePath = errEntry.filePath;
696
+ if (signal?.aborted)
697
+ break;
698
+ recycleWorker();
699
+ let fullContent;
700
+ try {
701
+ const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
702
+ if (!fullPath)
703
+ continue;
704
+ fullContent = await fsp.readFile(fullPath, 'utf-8');
705
+ }
706
+ catch {
707
+ continue;
708
+ }
709
+ // Strip lines that are entirely comments (preserving line numbers
710
+ // by replacing with empty lines so node positions stay correct)
711
+ const stripped = fullContent
712
+ .split('\n')
713
+ .map(line => /^\s*\/\//.test(line) ? '' : line)
714
+ .join('\n');
715
+ let result;
716
+ try {
717
+ result = await requestParse(filePath, stripped);
718
+ }
719
+ catch {
720
+ continue;
721
+ }
722
+ if (result.nodes.length > 0 || result.errors.length === 0) {
723
+ const language = (0, grammars_1.detectLanguage)(filePath, fullContent);
724
+ const stats = await fsp.stat(path.join(this.rootDir, filePath));
725
+ this.storeExtractionResult(filePath, fullContent, language, stats, result);
726
+ const idx = errors.indexOf(errEntry);
727
+ if (idx >= 0)
728
+ errors.splice(idx, 1);
729
+ filesErrored--;
730
+ filesIndexed++;
731
+ totalNodes += result.nodes.length;
732
+ totalEdges += result.edges.length;
733
+ log(`Retry (stripped) OK: ${filePath} (${result.nodes.length} nodes)`);
734
+ }
735
+ }
736
+ }
737
+ }
738
+ // Shut down parse worker and clear any pending timers
739
+ rejectAllPending('Indexing complete');
740
+ if (parseWorker) {
741
+ parseWorker.terminate().catch(() => { });
742
+ }
406
743
  return {
407
- success: errors.filter((e) => e.severity === 'error').length === 0,
744
+ success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
408
745
  filesIndexed,
409
746
  filesSkipped,
747
+ filesErrored,
410
748
  nodesCreated: totalNodes,
411
749
  edgesCreated: totalEdges,
412
750
  errors,
@@ -421,6 +759,7 @@ class ExtractionOrchestrator {
421
759
  const errors = [];
422
760
  let filesIndexed = 0;
423
761
  let filesSkipped = 0;
762
+ let filesErrored = 0;
424
763
  let totalNodes = 0;
425
764
  let totalEdges = 0;
426
765
  for (const filePath of filePaths) {
@@ -433,14 +772,18 @@ class ExtractionOrchestrator {
433
772
  totalNodes += result.nodes.length;
434
773
  totalEdges += result.edges.length;
435
774
  }
775
+ else if (result.errors.some((e) => e.severity === 'error')) {
776
+ filesErrored++;
777
+ }
436
778
  else {
437
779
  filesSkipped++;
438
780
  }
439
781
  }
440
782
  return {
441
- success: errors.filter((e) => e.severity === 'error').length === 0,
783
+ success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
442
784
  filesIndexed,
443
785
  filesSkipped,
786
+ filesErrored,
444
787
  nodesCreated: totalNodes,
445
788
  edgesCreated: totalEdges,
446
789
  errors,
@@ -457,7 +800,7 @@ class ExtractionOrchestrator {
457
800
  nodes: [],
458
801
  edges: [],
459
802
  unresolvedReferences: [],
460
- errors: [{ message: `Path traversal blocked: ${relativePath}`, severity: 'error' }],
803
+ errors: [{ message: `Path traversal blocked: ${relativePath}`, filePath: relativePath, severity: 'error', code: 'path_traversal' }],
461
804
  durationMs: 0,
462
805
  };
463
806
  }
@@ -476,7 +819,9 @@ class ExtractionOrchestrator {
476
819
  errors: [
477
820
  {
478
821
  message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
822
+ filePath: relativePath,
479
823
  severity: 'error',
824
+ code: 'read_error',
480
825
  },
481
826
  ],
482
827
  durationMs: 0,
@@ -497,7 +842,7 @@ class ExtractionOrchestrator {
497
842
  nodes: [],
498
843
  edges: [],
499
844
  unresolvedReferences: [],
500
- errors: [{ message: 'Path traversal blocked', severity: 'error' }],
845
+ errors: [{ message: 'Path traversal blocked', filePath: relativePath, severity: 'error', code: 'path_traversal' }],
501
846
  durationMs: 0,
502
847
  };
503
848
  }
@@ -510,14 +855,16 @@ class ExtractionOrchestrator {
510
855
  errors: [
511
856
  {
512
857
  message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
858
+ filePath: relativePath,
513
859
  severity: 'warning',
860
+ code: 'size_exceeded',
514
861
  },
515
862
  ],
516
863
  durationMs: 0,
517
864
  };
518
865
  }
519
866
  // Detect language
520
- const language = (0, grammars_1.detectLanguage)(relativePath);
867
+ const language = (0, grammars_1.detectLanguage)(relativePath, content);
521
868
  if (!(0, grammars_1.isLanguageSupported)(language)) {
522
869
  return {
523
870
  nodes: [],
@@ -700,6 +1047,10 @@ class ExtractionOrchestrator {
700
1047
  // Load only grammars needed for changed files
701
1048
  if (filesToIndex.length > 0) {
702
1049
  const neededLanguages = [...new Set(filesToIndex.map((f) => (0, grammars_1.detectLanguage)(f)))];
1050
+ // .h files default to 'c' but may be C++ — ensure cpp grammar is loaded
1051
+ if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
1052
+ neededLanguages.push('cpp');
1053
+ }
703
1054
  await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
704
1055
  }
705
1056
  // Index changed files