@colbymchenry/codegraph 0.6.6 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. package/README.md +180 -502
  2. package/dist/bin/codegraph.d.ts +0 -5
  3. package/dist/bin/codegraph.d.ts.map +1 -1
  4. package/dist/bin/codegraph.js +217 -263
  5. package/dist/bin/codegraph.js.map +1 -1
  6. package/dist/bin/uninstall.d.ts +0 -1
  7. package/dist/bin/uninstall.d.ts.map +1 -1
  8. package/dist/bin/uninstall.js +3 -29
  9. package/dist/bin/uninstall.js.map +1 -1
  10. package/dist/config.d.ts.map +1 -1
  11. package/dist/config.js +0 -3
  12. package/dist/config.js.map +1 -1
  13. package/dist/context/index.d.ts +3 -5
  14. package/dist/context/index.d.ts.map +1 -1
  15. package/dist/context/index.js +497 -46
  16. package/dist/context/index.js.map +1 -1
  17. package/dist/db/migrations.d.ts +1 -1
  18. package/dist/db/migrations.d.ts.map +1 -1
  19. package/dist/db/migrations.js +10 -1
  20. package/dist/db/migrations.js.map +1 -1
  21. package/dist/db/queries.d.ts +53 -0
  22. package/dist/db/queries.d.ts.map +1 -1
  23. package/dist/db/queries.js +244 -24
  24. package/dist/db/queries.js.map +1 -1
  25. package/dist/db/schema.sql +1 -16
  26. package/dist/errors.d.ts +1 -1
  27. package/dist/errors.d.ts.map +1 -1
  28. package/dist/errors.js +1 -7
  29. package/dist/errors.js.map +1 -1
  30. package/dist/extraction/dfm-extractor.d.ts +31 -0
  31. package/dist/extraction/dfm-extractor.d.ts.map +1 -0
  32. package/dist/extraction/dfm-extractor.js +151 -0
  33. package/dist/extraction/dfm-extractor.js.map +1 -0
  34. package/dist/extraction/grammars.d.ts +9 -1
  35. package/dist/extraction/grammars.d.ts.map +1 -1
  36. package/dist/extraction/grammars.js +34 -2
  37. package/dist/extraction/grammars.js.map +1 -1
  38. package/dist/extraction/index.d.ts +7 -1
  39. package/dist/extraction/index.d.ts.map +1 -1
  40. package/dist/extraction/index.js +373 -29
  41. package/dist/extraction/index.js.map +1 -1
  42. package/dist/extraction/languages/c-cpp.d.ts +4 -0
  43. package/dist/extraction/languages/c-cpp.d.ts.map +1 -0
  44. package/dist/extraction/languages/c-cpp.js +126 -0
  45. package/dist/extraction/languages/c-cpp.js.map +1 -0
  46. package/dist/extraction/languages/csharp.d.ts +3 -0
  47. package/dist/extraction/languages/csharp.d.ts.map +1 -0
  48. package/dist/extraction/languages/csharp.js +72 -0
  49. package/dist/extraction/languages/csharp.js.map +1 -0
  50. package/dist/extraction/languages/dart.d.ts +3 -0
  51. package/dist/extraction/languages/dart.d.ts.map +1 -0
  52. package/dist/extraction/languages/dart.js +192 -0
  53. package/dist/extraction/languages/dart.js.map +1 -0
  54. package/dist/extraction/languages/go.d.ts +3 -0
  55. package/dist/extraction/languages/go.d.ts.map +1 -0
  56. package/dist/extraction/languages/go.js +58 -0
  57. package/dist/extraction/languages/go.js.map +1 -0
  58. package/dist/extraction/languages/index.d.ts +10 -0
  59. package/dist/extraction/languages/index.d.ts.map +1 -0
  60. package/dist/extraction/languages/index.js +43 -0
  61. package/dist/extraction/languages/index.js.map +1 -0
  62. package/dist/extraction/languages/java.d.ts +3 -0
  63. package/dist/extraction/languages/java.d.ts.map +1 -0
  64. package/dist/extraction/languages/java.js +64 -0
  65. package/dist/extraction/languages/java.js.map +1 -0
  66. package/dist/extraction/languages/javascript.d.ts +3 -0
  67. package/dist/extraction/languages/javascript.d.ts.map +1 -0
  68. package/dist/extraction/languages/javascript.js +90 -0
  69. package/dist/extraction/languages/javascript.js.map +1 -0
  70. package/dist/extraction/languages/kotlin.d.ts +3 -0
  71. package/dist/extraction/languages/kotlin.d.ts.map +1 -0
  72. package/dist/extraction/languages/kotlin.js +253 -0
  73. package/dist/extraction/languages/kotlin.js.map +1 -0
  74. package/dist/extraction/languages/pascal.d.ts +3 -0
  75. package/dist/extraction/languages/pascal.d.ts.map +1 -0
  76. package/dist/extraction/languages/pascal.js +66 -0
  77. package/dist/extraction/languages/pascal.js.map +1 -0
  78. package/dist/extraction/languages/php.d.ts +3 -0
  79. package/dist/extraction/languages/php.d.ts.map +1 -0
  80. package/dist/extraction/languages/php.js +107 -0
  81. package/dist/extraction/languages/php.js.map +1 -0
  82. package/dist/extraction/languages/python.d.ts +3 -0
  83. package/dist/extraction/languages/python.d.ts.map +1 -0
  84. package/dist/extraction/languages/python.js +56 -0
  85. package/dist/extraction/languages/python.js.map +1 -0
  86. package/dist/extraction/languages/ruby.d.ts +3 -0
  87. package/dist/extraction/languages/ruby.d.ts.map +1 -0
  88. package/dist/extraction/languages/ruby.js +114 -0
  89. package/dist/extraction/languages/ruby.js.map +1 -0
  90. package/dist/extraction/languages/rust.d.ts +3 -0
  91. package/dist/extraction/languages/rust.d.ts.map +1 -0
  92. package/dist/extraction/languages/rust.js +109 -0
  93. package/dist/extraction/languages/rust.js.map +1 -0
  94. package/dist/extraction/languages/swift.d.ts +3 -0
  95. package/dist/extraction/languages/swift.d.ts.map +1 -0
  96. package/dist/extraction/languages/swift.js +91 -0
  97. package/dist/extraction/languages/swift.js.map +1 -0
  98. package/dist/extraction/languages/typescript.d.ts +3 -0
  99. package/dist/extraction/languages/typescript.d.ts.map +1 -0
  100. package/dist/extraction/languages/typescript.js +129 -0
  101. package/dist/extraction/languages/typescript.js.map +1 -0
  102. package/dist/extraction/liquid-extractor.d.ts +52 -0
  103. package/dist/extraction/liquid-extractor.d.ts.map +1 -0
  104. package/dist/extraction/liquid-extractor.js +313 -0
  105. package/dist/extraction/liquid-extractor.js.map +1 -0
  106. package/dist/extraction/parse-worker.d.ts +8 -0
  107. package/dist/extraction/parse-worker.d.ts.map +1 -0
  108. package/dist/extraction/parse-worker.js +57 -0
  109. package/dist/extraction/parse-worker.js.map +1 -0
  110. package/dist/extraction/svelte-extractor.d.ts +47 -0
  111. package/dist/extraction/svelte-extractor.d.ts.map +1 -0
  112. package/dist/extraction/svelte-extractor.js +230 -0
  113. package/dist/extraction/svelte-extractor.js.map +1 -0
  114. package/dist/extraction/tree-sitter-helpers.d.ts +28 -0
  115. package/dist/extraction/tree-sitter-helpers.d.ts.map +1 -0
  116. package/dist/extraction/tree-sitter-helpers.js +103 -0
  117. package/dist/extraction/tree-sitter-helpers.js.map +1 -0
  118. package/dist/extraction/tree-sitter-types.d.ts +179 -0
  119. package/dist/extraction/tree-sitter-types.d.ts.map +1 -0
  120. package/dist/extraction/tree-sitter-types.js +10 -0
  121. package/dist/extraction/tree-sitter-types.js.map +1 -0
  122. package/dist/extraction/tree-sitter.d.ts +67 -125
  123. package/dist/extraction/tree-sitter.d.ts.map +1 -1
  124. package/dist/extraction/tree-sitter.js +1052 -1860
  125. package/dist/extraction/tree-sitter.js.map +1 -1
  126. package/dist/graph/traversal.d.ts.map +1 -1
  127. package/dist/graph/traversal.js +20 -2
  128. package/dist/graph/traversal.js.map +1 -1
  129. package/dist/index.d.ts +29 -53
  130. package/dist/index.d.ts.map +1 -1
  131. package/dist/index.js +88 -117
  132. package/dist/index.js.map +1 -1
  133. package/dist/installer/claude-md-template.d.ts +1 -1
  134. package/dist/installer/claude-md-template.d.ts.map +1 -1
  135. package/dist/installer/claude-md-template.js +15 -15
  136. package/dist/installer/config-writer.d.ts +2 -13
  137. package/dist/installer/config-writer.d.ts.map +1 -1
  138. package/dist/installer/config-writer.js +4 -87
  139. package/dist/installer/config-writer.js.map +1 -1
  140. package/dist/installer/index.d.ts +3 -4
  141. package/dist/installer/index.d.ts.map +1 -1
  142. package/dist/installer/index.js +118 -127
  143. package/dist/installer/index.js.map +1 -1
  144. package/dist/mcp/index.d.ts +5 -0
  145. package/dist/mcp/index.d.ts.map +1 -1
  146. package/dist/mcp/index.js +25 -4
  147. package/dist/mcp/index.js.map +1 -1
  148. package/dist/mcp/tools.d.ts +33 -0
  149. package/dist/mcp/tools.d.ts.map +1 -1
  150. package/dist/mcp/tools.js +405 -26
  151. package/dist/mcp/tools.js.map +1 -1
  152. package/dist/mcp/transport.d.ts.map +1 -1
  153. package/dist/mcp/transport.js +0 -2
  154. package/dist/mcp/transport.js.map +1 -1
  155. package/dist/resolution/frameworks/csharp.js +29 -84
  156. package/dist/resolution/frameworks/csharp.js.map +1 -1
  157. package/dist/resolution/frameworks/express.js +44 -48
  158. package/dist/resolution/frameworks/express.js.map +1 -1
  159. package/dist/resolution/frameworks/go.js +34 -70
  160. package/dist/resolution/frameworks/go.js.map +1 -1
  161. package/dist/resolution/frameworks/java.js +29 -87
  162. package/dist/resolution/frameworks/java.js.map +1 -1
  163. package/dist/resolution/frameworks/laravel.js +6 -6
  164. package/dist/resolution/frameworks/laravel.js.map +1 -1
  165. package/dist/resolution/frameworks/python.js +33 -98
  166. package/dist/resolution/frameworks/python.js.map +1 -1
  167. package/dist/resolution/frameworks/react.js +53 -76
  168. package/dist/resolution/frameworks/react.js.map +1 -1
  169. package/dist/resolution/frameworks/ruby.js +12 -24
  170. package/dist/resolution/frameworks/ruby.js.map +1 -1
  171. package/dist/resolution/frameworks/rust.js +26 -66
  172. package/dist/resolution/frameworks/rust.js.map +1 -1
  173. package/dist/resolution/frameworks/svelte.js +11 -31
  174. package/dist/resolution/frameworks/svelte.js.map +1 -1
  175. package/dist/resolution/frameworks/swift.js +42 -160
  176. package/dist/resolution/frameworks/swift.js.map +1 -1
  177. package/dist/resolution/index.d.ts +19 -6
  178. package/dist/resolution/index.d.ts.map +1 -1
  179. package/dist/resolution/index.js +300 -144
  180. package/dist/resolution/index.js.map +1 -1
  181. package/dist/resolution/name-matcher.d.ts +5 -0
  182. package/dist/resolution/name-matcher.d.ts.map +1 -1
  183. package/dist/resolution/name-matcher.js +148 -8
  184. package/dist/resolution/name-matcher.js.map +1 -1
  185. package/dist/resolution/types.d.ts +1 -1
  186. package/dist/resolution/types.d.ts.map +1 -1
  187. package/dist/search/query-utils.d.ts +26 -1
  188. package/dist/search/query-utils.d.ts.map +1 -1
  189. package/dist/search/query-utils.js +209 -9
  190. package/dist/search/query-utils.js.map +1 -1
  191. package/dist/sync/index.d.ts +2 -4
  192. package/dist/sync/index.d.ts.map +1 -1
  193. package/dist/sync/index.js +4 -3
  194. package/dist/sync/index.js.map +1 -1
  195. package/dist/sync/watcher.d.ts +81 -0
  196. package/dist/sync/watcher.d.ts.map +1 -0
  197. package/dist/sync/watcher.js +184 -0
  198. package/dist/sync/watcher.js.map +1 -0
  199. package/dist/types.d.ts +2 -2
  200. package/dist/types.d.ts.map +1 -1
  201. package/dist/types.js +0 -1
  202. package/dist/types.js.map +1 -1
  203. package/dist/ui/shimmer-progress.d.ts +11 -0
  204. package/dist/ui/shimmer-progress.d.ts.map +1 -0
  205. package/dist/ui/shimmer-progress.js +90 -0
  206. package/dist/ui/shimmer-progress.js.map +1 -0
  207. package/dist/ui/shimmer-worker.d.ts +2 -0
  208. package/dist/ui/shimmer-worker.d.ts.map +1 -0
  209. package/dist/ui/shimmer-worker.js +112 -0
  210. package/dist/ui/shimmer-worker.js.map +1 -0
  211. package/dist/ui/types.d.ts +17 -0
  212. package/dist/ui/types.d.ts.map +1 -0
  213. package/dist/ui/types.js +3 -0
  214. package/dist/ui/types.js.map +1 -0
  215. package/dist/vectors/embedder.js +1 -1
  216. package/dist/vectors/embedder.js.map +1 -1
  217. package/dist/visualizer/server.d.ts.map +1 -1
  218. package/dist/visualizer/server.js +3 -11
  219. package/dist/visualizer/server.js.map +1 -1
  220. package/package.json +7 -12
  221. package/scripts/postinstall.js +0 -68
@@ -45,6 +45,7 @@ exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammar
45
45
  exports.hashContent = hashContent;
46
46
  exports.shouldIncludeFile = shouldIncludeFile;
47
47
  exports.scanDirectory = scanDirectory;
48
+ exports.scanDirectoryAsync = scanDirectoryAsync;
48
49
  const fs = __importStar(require("fs"));
49
50
  const fsp = __importStar(require("fs/promises"));
50
51
  const path = __importStar(require("path"));
@@ -53,7 +54,6 @@ const child_process_1 = require("child_process");
53
54
  const tree_sitter_1 = require("./tree-sitter");
54
55
  const grammars_1 = require("./grammars");
55
56
  const errors_1 = require("../errors");
56
- const sentry_1 = require("../sentry");
57
57
  const utils_1 = require("../utils");
58
58
  const picomatch_1 = __importDefault(require("picomatch"));
59
59
  /**
@@ -61,6 +61,21 @@ const picomatch_1 = __importDefault(require("picomatch"));
61
61
  * File reads are I/O-bound; batching overlaps I/O wait with CPU parse work.
62
62
  */
63
63
  const FILE_IO_BATCH_SIZE = 10;
64
+ // PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
65
+ /**
66
+ * Maximum time (ms) to wait for a single file to parse in the worker thread.
67
+ * If tree-sitter hangs or WASM runs out of memory, this prevents the entire
68
+ * indexing run from freezing. The worker is restarted after a timeout.
69
+ */
70
+ const PARSE_TIMEOUT_MS = 10_000;
71
+ /**
72
+ * Number of files to parse before recycling the worker thread.
73
+ * WASM linear memory can grow but NEVER shrink (WebAssembly spec limitation).
74
+ * The only way to reclaim tree-sitter's WASM heap is to destroy the entire
75
+ * V8 isolate by terminating the worker thread and spawning a fresh one.
76
+ * This interval balances memory usage against the cost of reloading grammars.
77
+ */
78
+ const WORKER_RECYCLE_INTERVAL = 250;
64
79
  /**
65
80
  * Calculate SHA256 hash of file contents
66
81
  */
@@ -99,6 +114,21 @@ function shouldIncludeFile(filePath, config) {
99
114
  */
100
115
  function getGitVisibleFiles(rootDir) {
101
116
  try {
117
+ // Check if the project directory is gitignored by a parent repo.
118
+ // When rootDir lives inside a parent git repo that ignores it,
119
+ // `git ls-files` returns nothing — fall back to filesystem walk.
120
+ const gitRoot = (0, child_process_1.execFileSync)('git', ['rev-parse', '--show-toplevel'], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] }).trim();
121
+ if (path.resolve(gitRoot) !== path.resolve(rootDir)) {
122
+ try {
123
+ // git check-ignore exits 0 if the path IS ignored, 1 if not
124
+ (0, child_process_1.execFileSync)('git', ['check-ignore', '-q', path.resolve(rootDir)], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] });
125
+ // Directory is gitignored by parent repo — fall back to filesystem walk
126
+ return null;
127
+ }
128
+ catch {
129
+ // Not ignored — safe to use git ls-files
130
+ }
131
+ }
102
132
  // -c = cached (tracked), -o = others (untracked), --exclude-standard = respect .gitignore
103
133
  const output = (0, child_process_1.execFileSync)('git', ['ls-files', '-co', '--exclude-standard'], { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] });
104
134
  const files = new Set();
@@ -178,6 +208,30 @@ function scanDirectory(rootDir, config, onProgress) {
178
208
  // Fallback: walk filesystem for non-git projects
179
209
  return scanDirectoryWalk(rootDir, config, onProgress);
180
210
  }
211
+ /**
212
+ * Async variant of scanDirectory that yields to the event loop periodically,
213
+ * allowing worker threads to receive and render progress messages.
214
+ */
215
+ async function scanDirectoryAsync(rootDir, config, onProgress) {
216
+ const gitFiles = getGitVisibleFiles(rootDir);
217
+ if (gitFiles) {
218
+ const files = [];
219
+ let count = 0;
220
+ for (const filePath of gitFiles) {
221
+ if (shouldIncludeFile(filePath, config)) {
222
+ files.push(filePath);
223
+ count++;
224
+ onProgress?.(count, filePath);
225
+ // Yield every 100 files so worker threads can render progress
226
+ if (count % 100 === 0) {
227
+ await new Promise(r => setImmediate(r));
228
+ }
229
+ }
230
+ }
231
+ return files;
232
+ }
233
+ return scanDirectoryWalk(rootDir, config, onProgress);
234
+ }
181
235
  /**
182
236
  * Filesystem walk fallback for non-git projects.
183
237
  */
@@ -210,7 +264,6 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
210
264
  entries = fs.readdirSync(dir, { withFileTypes: true });
211
265
  }
212
266
  catch (error) {
213
- (0, sentry_1.captureException)(error, { operation: 'walk-directory', dir });
214
267
  (0, errors_1.logDebug)('Skipping unreadable directory', { dir, error: String(error) });
215
268
  return;
216
269
  }
@@ -287,21 +340,25 @@ class ExtractionOrchestrator {
287
340
  /**
288
341
  * Index all files in the project
289
342
  */
290
- async indexAll(onProgress, signal) {
343
+ async indexAll(onProgress, signal, verbose) {
291
344
  await (0, grammars_1.initGrammars)();
292
345
  const startTime = Date.now();
293
346
  const errors = [];
294
347
  let filesIndexed = 0;
295
348
  let filesSkipped = 0;
349
+ let filesErrored = 0;
296
350
  let totalNodes = 0;
297
351
  let totalEdges = 0;
352
+ const log = verbose
353
+ ? (msg) => { console.log(`[worker] ${msg}`); }
354
+ : (_msg) => { };
298
355
  // Phase 1: Scan for files
299
356
  onProgress?.({
300
357
  phase: 'scanning',
301
358
  current: 0,
302
359
  total: 0,
303
360
  });
304
- const files = scanDirectory(this.rootDir, this.config, (current, file) => {
361
+ const files = await scanDirectoryAsync(this.rootDir, this.config, (current, file) => {
305
362
  onProgress?.({
306
363
  phase: 'scanning',
307
364
  current,
@@ -314,26 +371,164 @@ class ExtractionOrchestrator {
314
371
  success: false,
315
372
  filesIndexed: 0,
316
373
  filesSkipped: 0,
374
+ filesErrored: 0,
317
375
  nodesCreated: 0,
318
376
  edgesCreated: 0,
319
377
  errors: [{ message: 'Aborted', severity: 'error' }],
320
378
  durationMs: Date.now() - startTime,
321
379
  };
322
380
  }
323
- // Load only the grammars needed for languages actually present in the project.
324
- // This avoids compiling all 16+ WASM grammar modules upfront, which can cause
325
- // V8 WASM Zone OOM on large codebases (see issue #54).
326
- const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
327
- await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
328
- // Phase 2: Parse files (read in parallel batches, parse/store sequentially)
381
+ // Phase 2: Parse files in a worker thread (keeps main thread unblocked for UI)
329
382
  const total = files.length;
330
383
  let processed = 0;
384
+ // Emit parsing phase immediately so the progress bar appears during worker setup.
385
+ // The yield lets the shimmer worker flush the phase transition to stdout before
386
+ // the main thread starts synchronous grammar detection work.
387
+ onProgress?.({
388
+ phase: 'parsing',
389
+ current: 0,
390
+ total,
391
+ });
392
+ await new Promise(resolve => setImmediate(resolve));
393
+ // Detect needed languages and load grammars in the parse worker
394
+ const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
395
+ // .h files default to 'c' but may be C++ — ensure cpp grammar is loaded when c is needed
396
+ if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
397
+ neededLanguages.push('cpp');
398
+ }
399
+ // Try to use a worker thread for parsing (keeps main thread unblocked for UI).
400
+ // Falls back to in-process parsing if the compiled worker is unavailable (e.g. tests).
401
+ const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
402
+ const useWorker = fs.existsSync(parseWorkerPath);
403
+ let WorkerClass = null;
404
+ if (useWorker) {
405
+ const { Worker } = await Promise.resolve().then(() => __importStar(require('worker_threads')));
406
+ WorkerClass = Worker;
407
+ }
408
+ else {
409
+ // In-process fallback: load grammars locally
410
+ await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
411
+ }
412
+ // --- Worker lifecycle management ---
413
+ // The worker can crash (OOM in WASM) or hang on pathological files.
414
+ // We track pending parse promises and handle both cases:
415
+ // - Timeout: terminate + restart the worker, reject the timed-out request
416
+ // - Crash: reject all pending promises, restart for remaining files
417
+ let parseWorker = null;
418
+ let nextId = 0;
419
+ let workerParseCount = 0;
420
+ const pendingParses = new Map();
421
+ function rejectAllPending(reason) {
422
+ for (const [id, pending] of pendingParses) {
423
+ clearTimeout(pending.timer);
424
+ pendingParses.delete(id);
425
+ pending.reject(new Error(reason));
426
+ }
427
+ }
428
+ function attachWorkerHandlers(w) {
429
+ w.on('message', (msg) => {
430
+ if (msg.type === 'parse-result' && msg.id !== undefined) {
431
+ const pending = pendingParses.get(msg.id);
432
+ if (pending) {
433
+ clearTimeout(pending.timer);
434
+ pendingParses.delete(msg.id);
435
+ pending.resolve(msg.result);
436
+ }
437
+ }
438
+ });
439
+ w.on('error', (err) => {
440
+ (0, errors_1.logWarn)('Parse worker error', { error: err.message });
441
+ rejectAllPending(`Worker error: ${err.message}`);
442
+ });
443
+ w.on('exit', (code) => {
444
+ if (code !== 0 && pendingParses.size > 0) {
445
+ (0, errors_1.logWarn)('Parse worker exited unexpectedly', { code });
446
+ rejectAllPending(`Worker exited with code ${code}`);
447
+ }
448
+ // Clear reference so we know to respawn, reset count so
449
+ // the fresh worker gets a full cycle before recycling.
450
+ if (parseWorker === w) {
451
+ parseWorker = null;
452
+ workerParseCount = 0;
453
+ }
454
+ });
455
+ }
456
+ async function ensureWorker() {
457
+ if (parseWorker)
458
+ return parseWorker;
459
+ log('Spawning new parse worker...');
460
+ parseWorker = new WorkerClass(parseWorkerPath);
461
+ attachWorkerHandlers(parseWorker);
462
+ // Load grammars in the new worker
463
+ await new Promise((resolve, reject) => {
464
+ parseWorker.once('message', (msg) => {
465
+ if (msg.type === 'grammars-loaded')
466
+ resolve();
467
+ else
468
+ reject(new Error(`Unexpected message: ${msg.type}`));
469
+ });
470
+ parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
471
+ });
472
+ return parseWorker;
473
+ }
474
+ if (WorkerClass) {
475
+ await ensureWorker();
476
+ }
477
+ /**
478
+ * Recycle the worker thread to reclaim WASM memory.
479
+ * Terminates the current worker and clears the reference so
480
+ * ensureWorker() will spawn a fresh one on the next call.
481
+ */
482
+ function recycleWorker() {
483
+ if (!parseWorker)
484
+ return;
485
+ log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
486
+ const w = parseWorker;
487
+ parseWorker = null;
488
+ workerParseCount = 0;
489
+ // Fire-and-forget: worker.terminate() can hang if WASM is stuck
490
+ w.terminate().catch(() => { });
491
+ }
492
+ async function requestParse(filePath, content) {
493
+ if (!WorkerClass) {
494
+ // In-process fallback
495
+ return (0, tree_sitter_1.extractFromSource)(filePath, content, (0, grammars_1.detectLanguage)(filePath, content));
496
+ }
497
+ // Recycle the worker before the next parse if we've hit the threshold.
498
+ // This destroys the WASM linear memory (which can grow but never shrink)
499
+ // and starts a fresh worker with a clean heap.
500
+ if (workerParseCount >= WORKER_RECYCLE_INTERVAL) {
501
+ await recycleWorker();
502
+ }
503
+ const worker = await ensureWorker();
504
+ const id = nextId++;
505
+ workerParseCount++;
506
+ // Scale timeout for large files: base 10s + 10s per 100KB
507
+ const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
508
+ return new Promise((resolve, reject) => {
509
+ const timer = setTimeout(() => {
510
+ pendingParses.delete(id);
511
+ log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
512
+ // Reject FIRST — worker.terminate() can hang if WASM is stuck
513
+ parseWorker = null;
514
+ workerParseCount = 0;
515
+ reject(new Error(`Parse timed out after ${timeoutMs}ms`));
516
+ // Fire-and-forget: kill the stuck worker in the background
517
+ worker.terminate().catch(() => { });
518
+ }, timeoutMs);
519
+ pendingParses.set(id, { resolve, reject, timer });
520
+ worker.postMessage({ type: 'parse', id, filePath, content });
521
+ });
522
+ }
331
523
  for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
332
524
  if (signal?.aborted) {
525
+ if (parseWorker)
526
+ parseWorker.terminate().catch(() => { });
333
527
  return {
334
528
  success: false,
335
529
  filesIndexed,
336
530
  filesSkipped,
531
+ filesErrored,
337
532
  nodesCreated: totalNodes,
338
533
  edgesCreated: totalEdges,
339
534
  errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
@@ -357,20 +552,23 @@ class ExtractionOrchestrator {
357
552
  return { filePath: fp, content: null, stats: null, error: err };
358
553
  }
359
554
  }));
360
- // Parse and store sequentially
555
+ // Send to worker for parsing, store results on main thread
361
556
  for (const { filePath, content, stats, error } of fileContents) {
362
557
  if (signal?.aborted) {
558
+ if (parseWorker)
559
+ parseWorker.terminate().catch(() => { });
363
560
  return {
364
561
  success: false,
365
562
  filesIndexed,
366
563
  filesSkipped,
564
+ filesErrored,
367
565
  nodesCreated: totalNodes,
368
566
  edgesCreated: totalEdges,
369
567
  errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
370
568
  durationMs: Date.now() - startTime,
371
569
  };
372
570
  }
373
- processed++;
571
+ // Report progress before parsing (show current file being worked on)
374
572
  onProgress?.({
375
573
  phase: 'parsing',
376
574
  current: processed,
@@ -378,14 +576,44 @@ class ExtractionOrchestrator {
378
576
  currentFile: filePath,
379
577
  });
380
578
  if (error || content === null || stats === null) {
579
+ processed++;
580
+ filesErrored++;
381
581
  errors.push({
382
582
  message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
583
+ filePath,
584
+ severity: 'error',
585
+ code: 'read_error',
586
+ });
587
+ continue;
588
+ }
589
+ // Parse in worker thread (main thread stays unblocked).
590
+ // Wrapped in try/catch to handle worker timeouts and crashes gracefully.
591
+ let result;
592
+ try {
593
+ result = await requestParse(filePath, content);
594
+ }
595
+ catch (parseErr) {
596
+ processed++;
597
+ filesErrored++;
598
+ errors.push({
599
+ message: parseErr instanceof Error ? parseErr.message : String(parseErr),
600
+ filePath,
383
601
  severity: 'error',
602
+ code: 'parse_error',
384
603
  });
385
604
  continue;
386
605
  }
387
- const result = await this.indexFileWithContent(filePath, content, stats);
606
+ processed++;
607
+ // Store in database on main thread (SQLite is not thread-safe)
608
+ if (result.nodes.length > 0 || result.errors.length === 0) {
609
+ const language = (0, grammars_1.detectLanguage)(filePath, content);
610
+ this.storeExtractionResult(filePath, content, language, stats, result);
611
+ }
388
612
  if (result.errors.length > 0) {
613
+ for (const err of result.errors) {
614
+ if (!err.filePath)
615
+ err.filePath = filePath;
616
+ }
389
617
  errors.push(...result.errors);
390
618
  }
391
619
  if (result.nodes.length > 0) {
@@ -393,22 +621,130 @@ class ExtractionOrchestrator {
393
621
  totalNodes += result.nodes.length;
394
622
  totalEdges += result.edges.length;
395
623
  }
396
- else if (result.errors.length === 0) {
624
+ else if (result.errors.some((e) => e.severity === 'error')) {
625
+ filesErrored++;
626
+ }
627
+ else {
397
628
  filesSkipped++;
398
629
  }
399
630
  }
400
631
  }
401
- // Phase 3: Resolve references
632
+ // Report 100% so the progress bar doesn't hang at 99%
402
633
  onProgress?.({
403
- phase: 'resolving',
404
- current: 0,
405
- total: 1,
634
+ phase: 'parsing',
635
+ current: total,
636
+ total,
406
637
  });
407
- // TODO: Implement reference resolution in Phase 3
638
+ // Yield so the shimmer worker's buffered stdout writes can flush.
639
+ // Worker thread stdout is proxied through the main thread's event loop,
640
+ // so synchronous work here blocks the animation from rendering.
641
+ await new Promise(resolve => setImmediate(resolve));
642
+ // Retry pass: files that failed due to WASM memory corruption may succeed
643
+ // on a fresh worker with a clean heap. Recycle before each attempt so
644
+ // every file gets the absolute cleanest WASM state possible.
645
+ const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
646
+ (e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
647
+ if (retryableErrors.length > 0 && WorkerClass) {
648
+ log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
649
+ const stillFailing = [];
650
+ for (const errEntry of retryableErrors) {
651
+ const filePath = errEntry.filePath;
652
+ if (signal?.aborted)
653
+ break;
654
+ // Fresh worker for every retry — maximum WASM headroom
655
+ recycleWorker();
656
+ let content;
657
+ try {
658
+ const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
659
+ if (!fullPath)
660
+ continue;
661
+ content = await fsp.readFile(fullPath, 'utf-8');
662
+ }
663
+ catch {
664
+ continue;
665
+ }
666
+ let result;
667
+ try {
668
+ result = await requestParse(filePath, content);
669
+ }
670
+ catch {
671
+ stillFailing.push(errEntry);
672
+ continue;
673
+ }
674
+ if (result.nodes.length > 0 || result.errors.length === 0) {
675
+ const language = (0, grammars_1.detectLanguage)(filePath, content);
676
+ const stats = await fsp.stat(path.join(this.rootDir, filePath));
677
+ this.storeExtractionResult(filePath, content, language, stats, result);
678
+ const idx = errors.indexOf(errEntry);
679
+ if (idx >= 0)
680
+ errors.splice(idx, 1);
681
+ filesErrored--;
682
+ filesIndexed++;
683
+ totalNodes += result.nodes.length;
684
+ totalEdges += result.edges.length;
685
+ log(`Retry OK: ${filePath} (${result.nodes.length} nodes)`);
686
+ }
687
+ }
688
+ // Last resort: for files that still crash on a clean worker, strip
689
+ // comment-only lines to reduce WASM memory pressure. Many compiler
690
+ // test files are 90%+ comments (CHECK directives) that don't contribute
691
+ // code nodes but consume parser memory.
692
+ if (stillFailing.length > 0) {
693
+ log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
694
+ for (const errEntry of stillFailing) {
695
+ const filePath = errEntry.filePath;
696
+ if (signal?.aborted)
697
+ break;
698
+ recycleWorker();
699
+ let fullContent;
700
+ try {
701
+ const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
702
+ if (!fullPath)
703
+ continue;
704
+ fullContent = await fsp.readFile(fullPath, 'utf-8');
705
+ }
706
+ catch {
707
+ continue;
708
+ }
709
+ // Strip lines that are entirely comments (preserving line numbers
710
+ // by replacing with empty lines so node positions stay correct)
711
+ const stripped = fullContent
712
+ .split('\n')
713
+ .map(line => /^\s*\/\//.test(line) ? '' : line)
714
+ .join('\n');
715
+ let result;
716
+ try {
717
+ result = await requestParse(filePath, stripped);
718
+ }
719
+ catch {
720
+ continue;
721
+ }
722
+ if (result.nodes.length > 0 || result.errors.length === 0) {
723
+ const language = (0, grammars_1.detectLanguage)(filePath, fullContent);
724
+ const stats = await fsp.stat(path.join(this.rootDir, filePath));
725
+ this.storeExtractionResult(filePath, fullContent, language, stats, result);
726
+ const idx = errors.indexOf(errEntry);
727
+ if (idx >= 0)
728
+ errors.splice(idx, 1);
729
+ filesErrored--;
730
+ filesIndexed++;
731
+ totalNodes += result.nodes.length;
732
+ totalEdges += result.edges.length;
733
+ log(`Retry (stripped) OK: ${filePath} (${result.nodes.length} nodes)`);
734
+ }
735
+ }
736
+ }
737
+ }
738
+ // Shut down parse worker and clear any pending timers
739
+ rejectAllPending('Indexing complete');
740
+ if (parseWorker) {
741
+ parseWorker.terminate().catch(() => { });
742
+ }
408
743
  return {
409
- success: errors.filter((e) => e.severity === 'error').length === 0,
744
+ success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
410
745
  filesIndexed,
411
746
  filesSkipped,
747
+ filesErrored,
412
748
  nodesCreated: totalNodes,
413
749
  edgesCreated: totalEdges,
414
750
  errors,
@@ -423,6 +759,7 @@ class ExtractionOrchestrator {
423
759
  const errors = [];
424
760
  let filesIndexed = 0;
425
761
  let filesSkipped = 0;
762
+ let filesErrored = 0;
426
763
  let totalNodes = 0;
427
764
  let totalEdges = 0;
428
765
  for (const filePath of filePaths) {
@@ -435,14 +772,18 @@ class ExtractionOrchestrator {
435
772
  totalNodes += result.nodes.length;
436
773
  totalEdges += result.edges.length;
437
774
  }
775
+ else if (result.errors.some((e) => e.severity === 'error')) {
776
+ filesErrored++;
777
+ }
438
778
  else {
439
779
  filesSkipped++;
440
780
  }
441
781
  }
442
782
  return {
443
- success: errors.filter((e) => e.severity === 'error').length === 0,
783
+ success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
444
784
  filesIndexed,
445
785
  filesSkipped,
786
+ filesErrored,
446
787
  nodesCreated: totalNodes,
447
788
  edgesCreated: totalEdges,
448
789
  errors,
@@ -459,7 +800,7 @@ class ExtractionOrchestrator {
459
800
  nodes: [],
460
801
  edges: [],
461
802
  unresolvedReferences: [],
462
- errors: [{ message: `Path traversal blocked: ${relativePath}`, severity: 'error' }],
803
+ errors: [{ message: `Path traversal blocked: ${relativePath}`, filePath: relativePath, severity: 'error', code: 'path_traversal' }],
463
804
  durationMs: 0,
464
805
  };
465
806
  }
@@ -471,7 +812,6 @@ class ExtractionOrchestrator {
471
812
  content = await fsp.readFile(fullPath, 'utf-8');
472
813
  }
473
814
  catch (error) {
474
- (0, sentry_1.captureException)(error, { operation: 'extract-file', filePath: fullPath });
475
815
  return {
476
816
  nodes: [],
477
817
  edges: [],
@@ -479,7 +819,9 @@ class ExtractionOrchestrator {
479
819
  errors: [
480
820
  {
481
821
  message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
822
+ filePath: relativePath,
482
823
  severity: 'error',
824
+ code: 'read_error',
483
825
  },
484
826
  ],
485
827
  durationMs: 0,
@@ -500,7 +842,7 @@ class ExtractionOrchestrator {
500
842
  nodes: [],
501
843
  edges: [],
502
844
  unresolvedReferences: [],
503
- errors: [{ message: 'Path traversal blocked', severity: 'error' }],
845
+ errors: [{ message: 'Path traversal blocked', filePath: relativePath, severity: 'error', code: 'path_traversal' }],
504
846
  durationMs: 0,
505
847
  };
506
848
  }
@@ -513,14 +855,16 @@ class ExtractionOrchestrator {
513
855
  errors: [
514
856
  {
515
857
  message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
858
+ filePath: relativePath,
516
859
  severity: 'warning',
860
+ code: 'size_exceeded',
517
861
  },
518
862
  ],
519
863
  durationMs: 0,
520
864
  };
521
865
  }
522
866
  // Detect language
523
- const language = (0, grammars_1.detectLanguage)(relativePath);
867
+ const language = (0, grammars_1.detectLanguage)(relativePath, content);
524
868
  if (!(0, grammars_1.isLanguageSupported)(language)) {
525
869
  return {
526
870
  nodes: [],
@@ -635,7 +979,6 @@ class ExtractionOrchestrator {
635
979
  content = fs.readFileSync(fullPath, 'utf-8');
636
980
  }
637
981
  catch (error) {
638
- (0, sentry_1.captureException)(error, { operation: 'sync-read-file', filePath });
639
982
  (0, errors_1.logDebug)('Skipping unreadable file during sync', { filePath, error: String(error) });
640
983
  continue;
641
984
  }
@@ -684,7 +1027,6 @@ class ExtractionOrchestrator {
684
1027
  content = fs.readFileSync(fullPath, 'utf-8');
685
1028
  }
686
1029
  catch (error) {
687
- (0, sentry_1.captureException)(error, { operation: 'sync-read-file', filePath });
688
1030
  (0, errors_1.logDebug)('Skipping unreadable file during sync', { filePath, error: String(error) });
689
1031
  continue;
690
1032
  }
@@ -705,6 +1047,10 @@ class ExtractionOrchestrator {
705
1047
  // Load only grammars needed for changed files
706
1048
  if (filesToIndex.length > 0) {
707
1049
  const neededLanguages = [...new Set(filesToIndex.map((f) => (0, grammars_1.detectLanguage)(f)))];
1050
+ // .h files default to 'c' but may be C++ — ensure cpp grammar is loaded
1051
+ if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
1052
+ neededLanguages.push('cpp');
1053
+ }
708
1054
  await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
709
1055
  }
710
1056
  // Index changed files
@@ -756,7 +1102,6 @@ class ExtractionOrchestrator {
756
1102
  content = fs.readFileSync(fullPath, 'utf-8');
757
1103
  }
758
1104
  catch (error) {
759
- (0, sentry_1.captureException)(error, { operation: 'detect-changes-read-file', filePath });
760
1105
  (0, errors_1.logDebug)('Skipping unreadable file while detecting changes', { filePath, error: String(error) });
761
1106
  continue;
762
1107
  }
@@ -800,7 +1145,6 @@ class ExtractionOrchestrator {
800
1145
  content = fs.readFileSync(fullPath, 'utf-8');
801
1146
  }
802
1147
  catch (error) {
803
- (0, sentry_1.captureException)(error, { operation: 'detect-changes-read-file', filePath });
804
1148
  (0, errors_1.logDebug)('Skipping unreadable file while detecting changes', { filePath, error: String(error) });
805
1149
  continue;
806
1150
  }