@colbymchenry/codegraph-darwin-x64 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/lib/dist/bin/codegraph.js +79 -52
  2. package/lib/dist/bin/codegraph.js.map +1 -1
  3. package/lib/dist/bin/command-supervision.d.ts +12 -0
  4. package/lib/dist/bin/command-supervision.d.ts.map +1 -0
  5. package/lib/dist/bin/command-supervision.js +76 -0
  6. package/lib/dist/bin/command-supervision.js.map +1 -0
  7. package/lib/dist/db/queries.d.ts.map +1 -1
  8. package/lib/dist/db/queries.js +10 -2
  9. package/lib/dist/db/queries.js.map +1 -1
  10. package/lib/dist/directory.d.ts +32 -0
  11. package/lib/dist/directory.d.ts.map +1 -1
  12. package/lib/dist/directory.js +83 -0
  13. package/lib/dist/directory.js.map +1 -1
  14. package/lib/dist/extraction/index.d.ts +13 -1
  15. package/lib/dist/extraction/index.d.ts.map +1 -1
  16. package/lib/dist/extraction/index.js +219 -213
  17. package/lib/dist/extraction/index.js.map +1 -1
  18. package/lib/dist/extraction/parse-pool.d.ts +126 -0
  19. package/lib/dist/extraction/parse-pool.d.ts.map +1 -0
  20. package/lib/dist/extraction/parse-pool.js +319 -0
  21. package/lib/dist/extraction/parse-pool.js.map +1 -0
  22. package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
  23. package/lib/dist/extraction/tree-sitter.js +48 -19
  24. package/lib/dist/extraction/tree-sitter.js.map +1 -1
  25. package/lib/dist/mcp/daemon-paths.d.ts +30 -3
  26. package/lib/dist/mcp/daemon-paths.d.ts.map +1 -1
  27. package/lib/dist/mcp/daemon-paths.js +50 -10
  28. package/lib/dist/mcp/daemon-paths.js.map +1 -1
  29. package/lib/dist/mcp/daemon-registry.d.ts.map +1 -1
  30. package/lib/dist/mcp/daemon-registry.js +7 -3
  31. package/lib/dist/mcp/daemon-registry.js.map +1 -1
  32. package/lib/dist/mcp/daemon.d.ts +38 -0
  33. package/lib/dist/mcp/daemon.d.ts.map +1 -1
  34. package/lib/dist/mcp/daemon.js +164 -31
  35. package/lib/dist/mcp/daemon.js.map +1 -1
  36. package/lib/dist/mcp/engine.d.ts +17 -0
  37. package/lib/dist/mcp/engine.d.ts.map +1 -1
  38. package/lib/dist/mcp/engine.js +73 -1
  39. package/lib/dist/mcp/engine.js.map +1 -1
  40. package/lib/dist/mcp/index.d.ts.map +1 -1
  41. package/lib/dist/mcp/index.js +25 -43
  42. package/lib/dist/mcp/index.js.map +1 -1
  43. package/lib/dist/mcp/ppid-watchdog.d.ts +18 -0
  44. package/lib/dist/mcp/ppid-watchdog.d.ts.map +1 -1
  45. package/lib/dist/mcp/ppid-watchdog.js +37 -0
  46. package/lib/dist/mcp/ppid-watchdog.js.map +1 -1
  47. package/lib/dist/mcp/query-pool.d.ts +94 -0
  48. package/lib/dist/mcp/query-pool.d.ts.map +1 -0
  49. package/lib/dist/mcp/query-pool.js +297 -0
  50. package/lib/dist/mcp/query-pool.js.map +1 -0
  51. package/lib/dist/mcp/query-worker.d.ts +24 -0
  52. package/lib/dist/mcp/query-worker.d.ts.map +1 -0
  53. package/lib/dist/mcp/query-worker.js +87 -0
  54. package/lib/dist/mcp/query-worker.js.map +1 -0
  55. package/lib/dist/mcp/tools.d.ts +57 -0
  56. package/lib/dist/mcp/tools.d.ts.map +1 -1
  57. package/lib/dist/mcp/tools.js +147 -37
  58. package/lib/dist/mcp/tools.js.map +1 -1
  59. package/lib/dist/project-config.d.ts +20 -0
  60. package/lib/dist/project-config.d.ts.map +1 -1
  61. package/lib/dist/project-config.js +42 -2
  62. package/lib/dist/project-config.js.map +1 -1
  63. package/lib/dist/resolution/c-fnptr-synthesizer.d.ts +0 -28
  64. package/lib/dist/resolution/c-fnptr-synthesizer.d.ts.map +1 -1
  65. package/lib/dist/resolution/c-fnptr-synthesizer.js +765 -79
  66. package/lib/dist/resolution/c-fnptr-synthesizer.js.map +1 -1
  67. package/lib/dist/resolution/name-matcher.d.ts.map +1 -1
  68. package/lib/dist/resolution/name-matcher.js +44 -0
  69. package/lib/dist/resolution/name-matcher.js.map +1 -1
  70. package/lib/node_modules/.package-lock.json +1 -1
  71. package/lib/package.json +1 -1
  72. package/package.json +1 -1
@@ -51,9 +51,11 @@ exports.scanDirectoryAsync = scanDirectoryAsync;
51
51
  const fs = __importStar(require("fs"));
52
52
  const fsp = __importStar(require("fs/promises"));
53
53
  const path = __importStar(require("path"));
54
+ const os = __importStar(require("os"));
54
55
  const crypto = __importStar(require("crypto"));
55
56
  const child_process_1 = require("child_process");
56
57
  const tree_sitter_1 = require("./tree-sitter");
58
+ const parse_pool_1 = require("./parse-pool");
57
59
  const grammars_1 = require("./grammars");
58
60
  const project_config_1 = require("../project-config");
59
61
  const directory_1 = require("../directory");
@@ -265,6 +267,19 @@ function loadIncludeIgnoredMatcher(rootDir) {
265
267
  const patterns = (0, project_config_1.loadIncludeIgnoredPatterns)(rootDir);
266
268
  return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
267
269
  }
270
+ /**
271
+ * Matcher for the project's `codegraph.json` `exclude` patterns — paths to keep
272
+ * OUT of the index even when git-tracked, which `.gitignore` cannot do (#999).
273
+ * The escape hatch for a committed vendor/theme/SDK directory. Returns `null`
274
+ * when nothing is excluded (the zero-config default → no overhead). Matched
275
+ * against project-root-relative paths, so it applies uniformly across the whole
276
+ * workspace, including inside embedded repos (excluding `static/` means gone
277
+ * everywhere). Built once per scan/sync/scope operation from the scan root.
278
+ */
279
+ function loadExcludeMatcher(rootDir) {
280
+ const patterns = (0, project_config_1.loadExcludePatterns)(rootDir);
281
+ return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
282
+ }
268
283
  /**
269
284
  * `git ls-files --directory` collapses a wholly-untracked/ignored directory into
270
285
  * one entry — and when the command's own cwd is such a directory (the indexed
@@ -404,14 +419,27 @@ function findNestedGitRepos(absDir, relPrefix) {
404
419
  */
405
420
  class ScopeIgnore {
406
421
  rootMatcher;
422
+ exclude;
407
423
  embedded;
408
424
  defaults = defaultsOnlyIgnore();
409
- constructor(rootMatcher, embedded) {
425
+ constructor(rootMatcher, embedded,
426
+ /**
427
+ * Project `codegraph.json` `exclude` patterns (#999), matched against the
428
+ * full root-relative path. Wins over everything else — an explicit user
429
+ * exclude applies even to tracked files and even inside embedded repos.
430
+ */
431
+ exclude = null) {
410
432
  this.rootMatcher = rootMatcher;
433
+ this.exclude = exclude;
411
434
  // Longest root first so paths in nested embedded repos hit the innermost matcher.
412
435
  this.embedded = [...embedded].sort((a, b) => b.root.length - a.root.length);
413
436
  }
414
437
  ignores(rel) {
438
+ // User `exclude` (#999) is checked first and against the full root-relative
439
+ // path: it must drop git-TRACKED paths (which `.gitignore` can't) and apply
440
+ // everywhere, including ancestors of embedded repos.
441
+ if (this.exclude && this.exclude.ignores(rel))
442
+ return true;
415
443
  for (const { root, matcher } of this.embedded) {
416
444
  if (rel.startsWith(root)) {
417
445
  const inner = rel.slice(root.length);
@@ -438,7 +466,7 @@ exports.ScopeIgnore = ScopeIgnore;
438
466
  */
439
467
  function buildScopeIgnore(rootDir, embeddedRoots) {
440
468
  const roots = embeddedRoots ? [...embeddedRoots] : discoverEmbeddedRepoRoots(rootDir);
441
- return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })));
469
+ return new ScopeIgnore(buildDefaultIgnore(rootDir), roots.map((root) => ({ root, matcher: buildDefaultIgnore(path.join(rootDir, root)) })), loadExcludeMatcher(rootDir));
442
470
  }
443
471
  /**
444
472
  * Standalone discovery of every embedded repo root under `rootDir` (relative,
@@ -637,14 +665,14 @@ function getGitChangedFiles(rootDir) {
637
665
  // Custom extension → language overrides from the project's codegraph.json,
638
666
  // so change detection sees the same custom-extension files the full index does.
639
667
  const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
640
- collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir));
668
+ collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir), loadExcludeMatcher(rootDir));
641
669
  return changes;
642
670
  }
643
671
  catch {
644
672
  return null;
645
673
  }
646
674
  }
647
- function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null) {
675
+ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null, exclude = null) {
648
676
  const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
649
677
  // This repo's own ignore rules — built-in defaults (#407) plus its .gitignore.
650
678
  // Change detection must exclude the SAME files the full index does, but git
@@ -683,6 +711,12 @@ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null
683
711
  // index — match against the repo-relative path, same as the full scan. (#766)
684
712
  if (ig.ignores(rel))
685
713
  continue;
714
+ // User `codegraph.json` `exclude` (#999) is project-root-relative, so it's
715
+ // matched against the full path — sync must not re-add a tracked file the
716
+ // full index now keeps out. Deletions above stay unfiltered so a file that
717
+ // WAS indexed before an exclude was added still cleans itself out.
718
+ if (exclude && exclude.ignores(filePath))
719
+ continue;
686
720
  if (statusCode === '??') {
687
721
  out.added.push(filePath);
688
722
  }
@@ -697,11 +731,11 @@ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null
697
731
  // and they are left alone (#970, #976), mirroring the full-index scan.
698
732
  for (const rel of untrackedDirs) {
699
733
  for (const repoRel of findNestedGitRepos(path.join(repoDir, rel), rel)) {
700
- collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored);
734
+ collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored, exclude);
701
735
  }
702
736
  }
703
737
  for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
704
- collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored);
738
+ collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored, exclude);
705
739
  }
706
740
  }
707
741
  /**
@@ -860,7 +894,14 @@ function scanDirectoryWalk(rootDir, onProgress) {
860
894
  }
861
895
  // Seed a base matcher with the built-in default ignores (merged with the root
862
896
  // .gitignore so a negation can override). Nested .gitignores still layer per-dir.
863
- walk(rootDir, [{ dir: rootDir, ig: buildDefaultIgnore(rootDir) }]);
897
+ const baseMatchers = [{ dir: rootDir, ig: buildDefaultIgnore(rootDir) }];
898
+ // Project `codegraph.json` `exclude` patterns (#999), rooted at the project so
899
+ // `isIgnored` matches them against root-relative paths — same coverage the
900
+ // git path gets via ScopeIgnore, for non-git projects.
901
+ const exclude = loadExcludeMatcher(rootDir);
902
+ if (exclude)
903
+ baseMatchers.push({ dir: rootDir, ig: exclude });
904
+ walk(rootDir, baseMatchers);
864
905
  return files;
865
906
  }
866
907
  /**
@@ -1022,148 +1063,153 @@ class ExtractionOrchestrator {
1022
1063
  if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
1023
1064
  neededLanguages.push('cpp');
1024
1065
  }
1025
- // Try to use a worker thread for parsing (keeps main thread unblocked for UI).
1026
- // Falls back to in-process parsing if the compiled worker is unavailable (e.g. tests).
1066
+ // Parse files on a pool of worker threads (keeps the main thread free for UI
1067
+ // and uses every core). Falls back to in-process parsing when the compiled
1068
+ // worker is unavailable (e.g. running from source in tests).
1027
1069
  const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
1028
1070
  const useWorker = fs.existsSync(parseWorkerPath);
1029
- let WorkerClass = null;
1071
+ let pool = null;
1030
1072
  if (useWorker) {
1031
- const { Worker } = await Promise.resolve().then(() => __importStar(require('worker_threads')));
1032
- WorkerClass = Worker;
1073
+ // CODEGRAPH_PARSE_WORKERS: explicit worker count; 1 = the old single-worker
1074
+ // behaviour (the conservative rollback). Unset → clamp(cores-1, 1, 8).
1075
+ const poolSize = (0, parse_pool_1.resolveParsePoolSize)(process.env.CODEGRAPH_PARSE_WORKERS, os.cpus().length);
1076
+ pool = new parse_pool_1.ParseWorkerPool({
1077
+ languages: neededLanguages,
1078
+ size: poolSize,
1079
+ workerScriptPath: parseWorkerPath,
1080
+ recycleInterval: WORKER_RECYCLE_INTERVAL,
1081
+ parseTimeoutMs: PARSE_TIMEOUT_MS,
1082
+ log,
1083
+ });
1084
+ log(`Parse worker pool: ${poolSize} worker(s)`);
1033
1085
  }
1034
1086
  else {
1035
- // In-process fallback: load grammars locally
1087
+ // In-process fallback: load grammars locally and parse on the main thread.
1036
1088
  await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
1037
1089
  }
1038
- // --- Worker lifecycle management ---
1039
- // The worker can crash (OOM in WASM) or hang on pathological files.
1040
- // We track pending parse promises and handle both cases:
1041
- // - Timeout: terminate + restart the worker, reject the timed-out request
1042
- // - Crash: reject all pending promises, restart for remaining files
1043
- let parseWorker = null;
1044
- let nextId = 0;
1045
- let workerParseCount = 0;
1046
- const pendingParses = new Map();
1047
- function rejectAllPending(reason) {
1048
- for (const [id, pending] of pendingParses) {
1049
- clearTimeout(pending.timer);
1050
- pendingParses.delete(id);
1051
- pending.reject(new Error(reason));
1090
+ /**
1091
+ * Parse one file: on the pool when available (the promise REJECTS on a worker
1092
+ * crash/timeout the caller records it and the retry pass re-attempts), or
1093
+ * in-process synchronously as the no-worker fallback. The language is resolved
1094
+ * here on the main thread, where the codegraph.json overrides are loaded.
1095
+ */
1096
+ const parseFile = (filePath, content) => {
1097
+ const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1098
+ if (!pool)
1099
+ return Promise.resolve((0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames));
1100
+ return pool.requestParse({ filePath, content, language, frameworkNames });
1101
+ };
1102
+ // --- Bounded rolling-window dispatch, ordered commit ---
1103
+ // Reads stay batched/parallel; parses run concurrently across the pool; the
1104
+ // SQLite store stays on the main thread (it isn't thread-safe). Crucially we
1105
+ // COMMIT results in original file order, not parse-completion order: the
1106
+ // resolution phase (run after indexing) resolves an ambiguous reference to one
1107
+ // of several same-named candidates by the nodes' DB insertion order, so a
1108
+ // stable commit order keeps the resulting graph deterministic — byte-identical
1109
+ // to the single-worker path — instead of drifting with parse timing. The
1110
+ // `completed` buffer holds at most ~windowSize out-of-order results, so memory
1111
+ // stays bounded.
1112
+ const windowSize = pool ? Math.max(4, pool.size * 2) : 1;
1113
+ const inFlight = new Set();
1114
+ const completed = new Map();
1115
+ let nextSeq = 0; // file-order sequence assigned at dispatch
1116
+ let nextToStore = 0; // cursor: next sequence to commit
1117
+ let aborted = false;
1118
+ const storeResult = (filePath, content, stats, result) => {
1119
+ processed++;
1120
+ // Store in database on main thread (SQLite is not thread-safe)
1121
+ if (result.nodes.length > 0 || result.errors.length === 0) {
1122
+ const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1123
+ this.storeExtractionResult(filePath, content, language, stats, result);
1052
1124
  }
1053
- }
1054
- function attachWorkerHandlers(w) {
1055
- w.on('message', (msg) => {
1056
- if (msg.type === 'parse-result' && msg.id !== undefined) {
1057
- const pending = pendingParses.get(msg.id);
1058
- if (pending) {
1059
- clearTimeout(pending.timer);
1060
- pendingParses.delete(msg.id);
1061
- pending.resolve(msg.result);
1062
- }
1125
+ if (result.errors.length > 0) {
1126
+ for (const err of result.errors) {
1127
+ if (!err.filePath)
1128
+ err.filePath = filePath;
1063
1129
  }
1064
- });
1065
- w.on('error', (err) => {
1066
- (0, errors_1.logWarn)('Parse worker error', { error: err.message });
1067
- rejectAllPending(`Worker error: ${err.message}`);
1068
- });
1069
- w.on('exit', (code) => {
1070
- if (code !== 0 && pendingParses.size > 0) {
1071
- (0, errors_1.logWarn)('Parse worker exited unexpectedly', { code });
1072
- rejectAllPending(`Worker exited with code ${code}`);
1130
+ errors.push(...result.errors);
1131
+ }
1132
+ if (result.nodes.length > 0) {
1133
+ filesIndexed++;
1134
+ totalNodes += result.nodes.length;
1135
+ totalEdges += result.edges.length;
1136
+ }
1137
+ else if (result.errors.some((e) => e.severity === 'error')) {
1138
+ filesErrored++;
1139
+ }
1140
+ else {
1141
+ // Files with no symbols but no errors (yaml, twig, properties) are
1142
+ // tracked at the file level — count them as indexed so the CLI doesn't
1143
+ // misleadingly report "No files found to index".
1144
+ const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1145
+ if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
1146
+ filesIndexed++;
1073
1147
  }
1074
- // Clear reference so we know to respawn, reset count so
1075
- // the fresh worker gets a full cycle before recycling.
1076
- if (parseWorker === w) {
1077
- parseWorker = null;
1078
- workerParseCount = 0;
1148
+ else {
1149
+ filesSkipped++;
1079
1150
  }
1151
+ }
1152
+ onProgress?.({ phase: 'parsing', current: processed, total, currentFile: filePath });
1153
+ };
1154
+ const recordParseFailure = (filePath, err) => {
1155
+ processed++;
1156
+ filesErrored++;
1157
+ errors.push({
1158
+ message: err instanceof Error ? err.message : String(err),
1159
+ filePath,
1160
+ severity: 'error',
1161
+ code: 'parse_error',
1080
1162
  });
1081
- }
1082
- async function ensureWorker() {
1083
- if (parseWorker)
1084
- return parseWorker;
1085
- log('Spawning new parse worker...');
1086
- parseWorker = new WorkerClass(parseWorkerPath);
1087
- attachWorkerHandlers(parseWorker);
1088
- // Load grammars in the new worker
1089
- await new Promise((resolve, reject) => {
1090
- parseWorker.once('message', (msg) => {
1091
- if (msg.type === 'grammars-loaded')
1092
- resolve();
1093
- else
1094
- reject(new Error(`Unexpected message: ${msg.type}`));
1095
- });
1096
- parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
1097
- });
1098
- return parseWorker;
1099
- }
1100
- if (WorkerClass) {
1101
- await ensureWorker();
1102
- }
1103
- /**
1104
- * Recycle the worker thread to reclaim WASM memory.
1105
- * Terminates the current worker and clears the reference so
1106
- * ensureWorker() will spawn a fresh one on the next call.
1107
- */
1108
- function recycleWorker() {
1109
- if (!parseWorker)
1163
+ onProgress?.({ phase: 'parsing', current: processed, total });
1164
+ };
1165
+ // Commit buffered parses to the DB in file order, advancing the cursor over
1166
+ // contiguous completed results. Runs after each parse settles (and once more
1167
+ // after the drain). storeResult / recordParseFailure run here single-threaded,
1168
+ // so shared counters and SQLite writes never race despite parallel parsing.
1169
+ const flushOrdered = () => {
1170
+ if (aborted)
1110
1171
  return;
1111
- log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
1112
- const w = parseWorker;
1113
- parseWorker = null;
1114
- workerParseCount = 0;
1115
- // Fire-and-forget: worker.terminate() can hang if WASM is stuck
1116
- w.terminate().catch(() => { });
1117
- }
1118
- async function requestParse(filePath, content) {
1119
- // Resolve the language on the main thread (where the project's
1120
- // codegraph.json overrides are loaded) and hand it to the worker, so the
1121
- // worker never needs the override map itself.
1122
- const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1123
- if (!WorkerClass) {
1124
- // In-process fallback
1125
- return (0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames);
1172
+ while (completed.has(nextToStore)) {
1173
+ const item = completed.get(nextToStore);
1174
+ completed.delete(nextToStore);
1175
+ nextToStore++;
1176
+ if (item.ok)
1177
+ storeResult(item.filePath, item.content, item.stats, item.result);
1178
+ else
1179
+ recordParseFailure(item.filePath, item.err);
1126
1180
  }
1127
- // Recycle the worker before the next parse if we've hit the threshold.
1128
- // This destroys the WASM linear memory (which can grow but never shrink)
1129
- // and starts a fresh worker with a clean heap.
1130
- if (workerParseCount >= WORKER_RECYCLE_INTERVAL) {
1131
- await recycleWorker();
1181
+ };
1182
+ // Dispatch one file's parse (parses run concurrently across the pool), tagged
1183
+ // with its file-order sequence so flushOrdered commits results in order. The
1184
+ // backpressure below bounds how far parsing runs ahead of the in-order commit.
1185
+ const feed = async (filePath, content, stats) => {
1186
+ const seq = nextSeq++;
1187
+ const p = (async () => {
1188
+ try {
1189
+ const result = await parseFile(filePath, content);
1190
+ completed.set(seq, { ok: true, filePath, content, stats, result });
1191
+ }
1192
+ catch (parseErr) {
1193
+ completed.set(seq, { ok: false, filePath, err: parseErr });
1194
+ }
1195
+ flushOrdered();
1196
+ })();
1197
+ const tracked = p.finally(() => { inFlight.delete(tracked); });
1198
+ inFlight.add(tracked);
1199
+ // Backpressure on the dispatched-but-not-yet-committed count (in-flight +
1200
+ // buffered), not just in-flight: a slow file sitting at the commit cursor
1201
+ // lets later parses finish and buffer, which would otherwise grow without
1202
+ // bound. Wait for parses to settle (each may advance the cursor) until the
1203
+ // window has room. `inFlight.size > 0` guards against an empty race — the
1204
+ // cursor file is always still in flight when the window is full.
1205
+ while (nextSeq - nextToStore >= windowSize && inFlight.size > 0) {
1206
+ await Promise.race(inFlight);
1132
1207
  }
1133
- const worker = await ensureWorker();
1134
- const id = nextId++;
1135
- workerParseCount++;
1136
- // Scale timeout for large files: base 10s + 10s per 100KB
1137
- const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
1138
- return new Promise((resolve, reject) => {
1139
- const timer = setTimeout(() => {
1140
- pendingParses.delete(id);
1141
- log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
1142
- // Reject FIRST — worker.terminate() can hang if WASM is stuck
1143
- parseWorker = null;
1144
- workerParseCount = 0;
1145
- reject(new Error(`Parse timed out after ${timeoutMs}ms`));
1146
- // Fire-and-forget: kill the stuck worker in the background
1147
- worker.terminate().catch(() => { });
1148
- }, timeoutMs);
1149
- pendingParses.set(id, { resolve, reject, timer });
1150
- worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames, language });
1151
- });
1152
- }
1208
+ };
1153
1209
  for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
1154
1210
  if (signal?.aborted) {
1155
- if (parseWorker)
1156
- parseWorker.terminate().catch(() => { });
1157
- return {
1158
- success: false,
1159
- filesIndexed,
1160
- filesSkipped,
1161
- filesErrored,
1162
- nodesCreated: totalNodes,
1163
- edgesCreated: totalEdges,
1164
- errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
1165
- durationMs: Date.now() - startTime,
1166
- };
1211
+ aborted = true;
1212
+ break;
1167
1213
  }
1168
1214
  const batch = files.slice(i, i + FILE_IO_BATCH_SIZE);
1169
1215
  // Read files in parallel (with path validation before any I/O)
@@ -1185,29 +1231,13 @@ class ExtractionOrchestrator {
1185
1231
  return { filePath: fp, content: null, stats: null, error: err };
1186
1232
  }
1187
1233
  }));
1188
- // Send to worker for parsing, store results on main thread
1234
+ // Dispatch each readable file into the bounded parse window; the window
1235
+ // stores results on the main thread as they arrive.
1189
1236
  for (const { filePath, content, stats, error } of fileContents) {
1190
1237
  if (signal?.aborted) {
1191
- if (parseWorker)
1192
- parseWorker.terminate().catch(() => { });
1193
- return {
1194
- success: false,
1195
- filesIndexed,
1196
- filesSkipped,
1197
- filesErrored,
1198
- nodesCreated: totalNodes,
1199
- edgesCreated: totalEdges,
1200
- errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
1201
- durationMs: Date.now() - startTime,
1202
- };
1238
+ aborted = true;
1239
+ break;
1203
1240
  }
1204
- // Report progress before parsing (show current file being worked on)
1205
- onProgress?.({
1206
- phase: 'parsing',
1207
- current: processed,
1208
- total,
1209
- currentFile: filePath,
1210
- });
1211
1241
  if (error || content === null || stats === null) {
1212
1242
  processed++;
1213
1243
  filesErrored++;
@@ -1217,6 +1247,7 @@ class ExtractionOrchestrator {
1217
1247
  severity: 'error',
1218
1248
  code: 'read_error',
1219
1249
  });
1250
+ onProgress?.({ phase: 'parsing', current: processed, total });
1220
1251
  continue;
1221
1252
  }
1222
1253
  // Honour MAX_FILE_SIZE. Without this check, vendored generated
@@ -1236,57 +1267,32 @@ class ExtractionOrchestrator {
1236
1267
  onProgress?.({ phase: 'parsing', current: processed, total });
1237
1268
  continue;
1238
1269
  }
1239
- // Parse in worker thread (main thread stays unblocked).
1240
- // Wrapped in try/catch to handle worker timeouts and crashes gracefully.
1241
- let result;
1242
- try {
1243
- result = await requestParse(filePath, content);
1244
- }
1245
- catch (parseErr) {
1246
- processed++;
1247
- filesErrored++;
1248
- errors.push({
1249
- message: parseErr instanceof Error ? parseErr.message : String(parseErr),
1250
- filePath,
1251
- severity: 'error',
1252
- code: 'parse_error',
1253
- });
1254
- continue;
1255
- }
1256
- processed++;
1257
- // Store in database on main thread (SQLite is not thread-safe)
1258
- if (result.nodes.length > 0 || result.errors.length === 0) {
1259
- const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1260
- this.storeExtractionResult(filePath, content, language, stats, result);
1261
- }
1262
- if (result.errors.length > 0) {
1263
- for (const err of result.errors) {
1264
- if (!err.filePath)
1265
- err.filePath = filePath;
1266
- }
1267
- errors.push(...result.errors);
1268
- }
1269
- if (result.nodes.length > 0) {
1270
- filesIndexed++;
1271
- totalNodes += result.nodes.length;
1272
- totalEdges += result.edges.length;
1273
- }
1274
- else if (result.errors.some((e) => e.severity === 'error')) {
1275
- filesErrored++;
1276
- }
1277
- else {
1278
- // Files with no symbols but no errors (yaml, twig, properties) are
1279
- // tracked at the file level — count them as indexed so the CLI
1280
- // doesn't misleadingly report "No files found to index".
1281
- const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1282
- if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
1283
- filesIndexed++;
1284
- }
1285
- else {
1286
- filesSkipped++;
1287
- }
1288
- }
1270
+ // Parse on the pool (main thread stays unblocked). Errors/timeouts are
1271
+ // handled inside feed() recordParseFailure, feeding the retry pass.
1272
+ await feed(filePath, content, stats);
1289
1273
  }
1274
+ if (aborted)
1275
+ break;
1276
+ }
1277
+ // Drain parses still in flight (skip on abort — we tear down below instead),
1278
+ // then commit any results the cursor hasn't reached yet.
1279
+ if (!aborted) {
1280
+ await Promise.all(inFlight);
1281
+ flushOrdered();
1282
+ }
1283
+ if (signal?.aborted || aborted) {
1284
+ if (pool)
1285
+ await pool.destroy();
1286
+ return {
1287
+ success: false,
1288
+ filesIndexed,
1289
+ filesSkipped,
1290
+ filesErrored,
1291
+ nodesCreated: totalNodes,
1292
+ edgesCreated: totalEdges,
1293
+ errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
1294
+ durationMs: Date.now() - startTime,
1295
+ };
1290
1296
  }
1291
1297
  // Report 100% so the progress bar doesn't hang at 99%
1292
1298
  onProgress?.({
@@ -1303,15 +1309,17 @@ class ExtractionOrchestrator {
1303
1309
  // every file gets the absolute cleanest WASM state possible.
1304
1310
  const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
1305
1311
  (e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
1306
- if (retryableErrors.length > 0 && WorkerClass) {
1312
+ if (retryableErrors.length > 0 && pool) {
1307
1313
  log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
1314
+ // Fresh WASM heaps for the retry phase. A retry that still crashes its
1315
+ // worker makes the pool respawn it, so later retries keep landing on clean
1316
+ // workers too.
1317
+ pool.recycleAll();
1308
1318
  const stillFailing = [];
1309
1319
  for (const errEntry of retryableErrors) {
1310
1320
  const filePath = errEntry.filePath;
1311
1321
  if (signal?.aborted)
1312
1322
  break;
1313
- // Fresh worker for every retry — maximum WASM headroom
1314
- recycleWorker();
1315
1323
  let content;
1316
1324
  try {
1317
1325
  const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
@@ -1324,7 +1332,7 @@ class ExtractionOrchestrator {
1324
1332
  }
1325
1333
  let result;
1326
1334
  try {
1327
- result = await requestParse(filePath, content);
1335
+ result = await parseFile(filePath, content);
1328
1336
  }
1329
1337
  catch {
1330
1338
  stillFailing.push(errEntry);
@@ -1350,11 +1358,11 @@ class ExtractionOrchestrator {
1350
1358
  // code nodes but consume parser memory.
1351
1359
  if (stillFailing.length > 0) {
1352
1360
  log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
1361
+ pool.recycleAll();
1353
1362
  for (const errEntry of stillFailing) {
1354
1363
  const filePath = errEntry.filePath;
1355
1364
  if (signal?.aborted)
1356
1365
  break;
1357
- recycleWorker();
1358
1366
  let fullContent;
1359
1367
  try {
1360
1368
  const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
@@ -1373,7 +1381,7 @@ class ExtractionOrchestrator {
1373
1381
  .join('\n');
1374
1382
  let result;
1375
1383
  try {
1376
- result = await requestParse(filePath, stripped);
1384
+ result = await parseFile(filePath, stripped);
1377
1385
  }
1378
1386
  catch {
1379
1387
  continue;
@@ -1394,11 +1402,9 @@ class ExtractionOrchestrator {
1394
1402
  }
1395
1403
  }
1396
1404
  }
1397
- // Shut down parse worker and clear any pending timers
1398
- rejectAllPending('Indexing complete');
1399
- if (parseWorker) {
1400
- parseWorker.terminate().catch(() => { });
1401
- }
1405
+ // Shut down the parse worker pool.
1406
+ if (pool)
1407
+ await pool.destroy();
1402
1408
  return {
1403
1409
  success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
1404
1410
  filesIndexed,