@optave/codegraph 3.9.4 → 3.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/README.md +10 -10
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +3 -2
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/csharp.js +8 -1
  7. package/dist/ast-analysis/rules/csharp.js.map +1 -1
  8. package/dist/ast-analysis/rules/go.d.ts.map +1 -1
  9. package/dist/ast-analysis/rules/go.js +4 -1
  10. package/dist/ast-analysis/rules/go.js.map +1 -1
  11. package/dist/ast-analysis/rules/index.d.ts +6 -0
  12. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  13. package/dist/ast-analysis/rules/index.js +151 -4
  14. package/dist/ast-analysis/rules/index.js.map +1 -1
  15. package/dist/ast-analysis/rules/java.d.ts.map +1 -1
  16. package/dist/ast-analysis/rules/java.js +5 -1
  17. package/dist/ast-analysis/rules/java.js.map +1 -1
  18. package/dist/ast-analysis/rules/php.d.ts.map +1 -1
  19. package/dist/ast-analysis/rules/php.js +6 -1
  20. package/dist/ast-analysis/rules/php.js.map +1 -1
  21. package/dist/ast-analysis/rules/python.d.ts.map +1 -1
  22. package/dist/ast-analysis/rules/python.js +5 -1
  23. package/dist/ast-analysis/rules/python.js.map +1 -1
  24. package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
  25. package/dist/ast-analysis/rules/ruby.js +4 -1
  26. package/dist/ast-analysis/rules/ruby.js.map +1 -1
  27. package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
  28. package/dist/ast-analysis/rules/rust.js +5 -1
  29. package/dist/ast-analysis/rules/rust.js.map +1 -1
  30. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
  31. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  32. package/dist/ast-analysis/visitors/ast-store-visitor.js +129 -37
  33. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  34. package/dist/cli/commands/watch.d.ts.map +1 -1
  35. package/dist/cli/commands/watch.js +2 -0
  36. package/dist/cli/commands/watch.js.map +1 -1
  37. package/dist/cli.js +24 -1
  38. package/dist/cli.js.map +1 -1
  39. package/dist/domain/graph/builder/context.d.ts +2 -0
  40. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  41. package/dist/domain/graph/builder/context.js.map +1 -1
  42. package/dist/domain/graph/builder/helpers.d.ts +13 -2
  43. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  44. package/dist/domain/graph/builder/helpers.js +30 -4
  45. package/dist/domain/graph/builder/helpers.js.map +1 -1
  46. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  47. package/dist/domain/graph/builder/pipeline.js +141 -3
  48. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  49. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  50. package/dist/domain/graph/builder/stages/collect-files.js +58 -26
  51. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  52. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/detect-changes.js +54 -45
  54. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  56. package/dist/domain/graph/builder/stages/finalize.js +17 -0
  57. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  58. package/dist/domain/graph/journal.d.ts +15 -0
  59. package/dist/domain/graph/journal.d.ts.map +1 -1
  60. package/dist/domain/graph/journal.js +283 -28
  61. package/dist/domain/graph/journal.js.map +1 -1
  62. package/dist/domain/graph/watcher.d.ts +17 -0
  63. package/dist/domain/graph/watcher.d.ts.map +1 -1
  64. package/dist/domain/graph/watcher.js +23 -7
  65. package/dist/domain/graph/watcher.js.map +1 -1
  66. package/dist/domain/parser.d.ts +53 -4
  67. package/dist/domain/parser.d.ts.map +1 -1
  68. package/dist/domain/parser.js +278 -80
  69. package/dist/domain/parser.js.map +1 -1
  70. package/dist/domain/search/generator.d.ts.map +1 -1
  71. package/dist/domain/search/generator.js +28 -2
  72. package/dist/domain/search/generator.js.map +1 -1
  73. package/dist/domain/search/models.js +1 -1
  74. package/dist/domain/wasm-worker-entry.d.ts +24 -0
  75. package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
  76. package/dist/domain/wasm-worker-entry.js +644 -0
  77. package/dist/domain/wasm-worker-entry.js.map +1 -0
  78. package/dist/domain/wasm-worker-pool.d.ts +59 -0
  79. package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
  80. package/dist/domain/wasm-worker-pool.js +312 -0
  81. package/dist/domain/wasm-worker-pool.js.map +1 -0
  82. package/dist/domain/wasm-worker-protocol.d.ts +65 -0
  83. package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
  84. package/dist/domain/wasm-worker-protocol.js +13 -0
  85. package/dist/domain/wasm-worker-protocol.js.map +1 -0
  86. package/dist/extractors/javascript.js +146 -2
  87. package/dist/extractors/javascript.js.map +1 -1
  88. package/dist/features/ast.d.ts.map +1 -1
  89. package/dist/features/ast.js +11 -9
  90. package/dist/features/ast.js.map +1 -1
  91. package/dist/features/boundaries.d.ts +2 -2
  92. package/dist/features/boundaries.d.ts.map +1 -1
  93. package/dist/features/boundaries.js +2 -31
  94. package/dist/features/boundaries.js.map +1 -1
  95. package/dist/features/snapshot.d.ts.map +1 -1
  96. package/dist/features/snapshot.js +99 -13
  97. package/dist/features/snapshot.js.map +1 -1
  98. package/dist/graph/algorithms/louvain.d.ts.map +1 -1
  99. package/dist/graph/algorithms/louvain.js +2 -4
  100. package/dist/graph/algorithms/louvain.js.map +1 -1
  101. package/dist/infrastructure/config.d.ts.map +1 -1
  102. package/dist/infrastructure/config.js +12 -2
  103. package/dist/infrastructure/config.js.map +1 -1
  104. package/dist/shared/globs.d.ts +40 -0
  105. package/dist/shared/globs.d.ts.map +1 -0
  106. package/dist/shared/globs.js +126 -0
  107. package/dist/shared/globs.js.map +1 -0
  108. package/dist/types.d.ts +26 -1
  109. package/dist/types.d.ts.map +1 -1
  110. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  111. package/grammars/tree-sitter-erlang.wasm +0 -0
  112. package/package.json +7 -7
  113. package/src/ast-analysis/engine.ts +11 -1
  114. package/src/ast-analysis/rules/csharp.ts +8 -1
  115. package/src/ast-analysis/rules/go.ts +4 -1
  116. package/src/ast-analysis/rules/index.ts +181 -4
  117. package/src/ast-analysis/rules/java.ts +5 -1
  118. package/src/ast-analysis/rules/php.ts +6 -1
  119. package/src/ast-analysis/rules/python.ts +5 -1
  120. package/src/ast-analysis/rules/ruby.ts +4 -1
  121. package/src/ast-analysis/rules/rust.ts +5 -1
  122. package/src/ast-analysis/visitors/ast-store-visitor.ts +129 -34
  123. package/src/cli/commands/watch.ts +2 -0
  124. package/src/cli.ts +31 -8
  125. package/src/domain/graph/builder/context.ts +2 -0
  126. package/src/domain/graph/builder/helpers.ts +53 -3
  127. package/src/domain/graph/builder/pipeline.ts +162 -3
  128. package/src/domain/graph/builder/stages/collect-files.ts +56 -26
  129. package/src/domain/graph/builder/stages/detect-changes.ts +57 -49
  130. package/src/domain/graph/builder/stages/finalize.ts +16 -0
  131. package/src/domain/graph/journal.ts +284 -27
  132. package/src/domain/graph/watcher.ts +29 -9
  133. package/src/domain/parser.ts +288 -73
  134. package/src/domain/search/generator.ts +34 -2
  135. package/src/domain/search/models.ts +1 -1
  136. package/src/domain/wasm-worker-entry.ts +798 -0
  137. package/src/domain/wasm-worker-pool.ts +330 -0
  138. package/src/domain/wasm-worker-protocol.ts +81 -0
  139. package/src/extractors/javascript.ts +149 -2
  140. package/src/features/ast.ts +22 -9
  141. package/src/features/boundaries.ts +2 -27
  142. package/src/features/snapshot.ts +93 -14
  143. package/src/graph/algorithms/louvain.ts +2 -4
  144. package/src/infrastructure/config.ts +12 -2
  145. package/src/shared/globs.ts +121 -0
  146. package/src/types.ts +26 -1
@@ -0,0 +1,330 @@
1
+ /**
2
+ * WASM parse worker pool with crash recovery.
3
+ *
4
+ * The WASM grammar can trigger uncatchable V8 fatal errors (#965) that kill
5
+ * whichever thread is running it. Running parses in a worker_thread means the
6
+ * crash kills only the worker — the pool detects the exit, marks the in-flight
7
+ * file as skipped, respawns the worker, and continues with the rest.
8
+ *
9
+ * This is a single-worker pool; dispatch is sequential. Multi-worker parallelism
10
+ * is a future optimization — correctness of crash isolation does not depend on
11
+ * it. Sequential dispatch also simplifies attribution of a crash to a single
12
+ * "in-flight" file.
13
+ */
14
+
15
+ import fs from 'node:fs';
16
+ import path from 'node:path';
17
+ import { fileURLToPath, pathToFileURL } from 'node:url';
18
+ import { Worker } from 'node:worker_threads';
19
+ import { debug, warn } from '../infrastructure/logger.js';
20
+ import type { ASTNodeRow, ExtractorOutput, TypeMapEntry } from '../types.js';
21
+ import type {
22
+ SerializedExtractorOutput,
23
+ WorkerAnalysisOpts,
24
+ WorkerRequest,
25
+ WorkerResponse,
26
+ } from './wasm-worker-protocol.js';
27
+
28
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
29
+
30
+ /**
31
+ * Resolve the path to the compiled worker entry script.
32
+ *
33
+ * The worker is always loaded from compiled `.js` — Node's worker_threads
34
+ * loader does not apply vitest/ts-node transforms or rewrite `.js` specifiers
35
+ * to `.ts`, so even under `--experimental-strip-types` the worker's
36
+ * relative `.js` imports (e.g. `../ast-analysis/metrics.js`) would fail to
37
+ * resolve inside the src/ tree.
38
+ *
39
+ * Resolution order:
40
+ * 1. Sibling `.js` (dist build — `dist/domain/wasm-worker-entry.js`).
41
+ * 2. Corresponding `dist/` file when running from `src/` (tests/dev).
42
+ * If neither exists, surface a clear error instead of silently exiting the
43
+ * worker with "module not found".
44
+ */
45
+ function resolveWorkerEntry(): URL {
46
+ const selfUrl = import.meta.url;
47
+ const selfPath = fileURLToPath(selfUrl);
48
+
49
+ // Prefer the sibling .js first (dist build — fast path).
50
+ const siblingJs = path.join(path.dirname(selfPath), 'wasm-worker-entry.js');
51
+ if (fs.existsSync(siblingJs)) return pathToFileURL(siblingJs);
52
+
53
+ // Running from src/ — fall back to the compiled dist/ copy. Walk up to the
54
+ // package root (parent of `src/`) and look for `dist/domain/wasm-worker-entry.js`.
55
+ // This lets vitest import parser.ts while the worker still runs real .js.
56
+ const srcIdx = selfPath.lastIndexOf(`${path.sep}src${path.sep}`);
57
+ if (srcIdx !== -1) {
58
+ const repoRoot = selfPath.slice(0, srcIdx);
59
+ const distJs = path.join(repoRoot, 'dist', 'domain', 'wasm-worker-entry.js');
60
+ if (fs.existsSync(distJs)) return pathToFileURL(distJs);
61
+ }
62
+
63
+ throw new Error(
64
+ `wasm-worker-entry.js not found — run \`npm run build\` to generate dist/. Searched: ${siblingJs}`,
65
+ );
66
+ }
67
+
68
+ interface PendingJob {
69
+ id: number;
70
+ filePath: string;
71
+ code: string;
72
+ opts: WorkerAnalysisOpts;
73
+ resolve: (out: ExtractorOutput | null) => void;
74
+ /** setTimeout handle — fires if the worker hangs in a non-crashing loop. */
75
+ timeoutHandle: NodeJS.Timeout | null;
76
+ }
77
+
78
+ /**
79
+ * Per-file watchdog deadline. A parse that takes longer than this is assumed
80
+ * to be hung (e.g. WASM grammar stuck in an infinite loop rather than
81
+ * crashing). We terminate the worker, skip the file, and continue.
82
+ *
83
+ * 60s is comfortably above worst-case real parses seen in CI (~12s for the
84
+ * slowest fixture) while still giving the build a definite upper bound
85
+ * instead of stalling forever.
86
+ */
87
+ const WORKER_PARSE_TIMEOUT_MS = 60_000;
88
+
89
+ function deserializeResult(ser: SerializedExtractorOutput | null): ExtractorOutput | null {
90
+ if (!ser) return null;
91
+ const typeMap = new Map<string, TypeMapEntry>();
92
+ for (const [k, v] of ser.typeMap) typeMap.set(k, v);
93
+ const out: ExtractorOutput = {
94
+ definitions: ser.definitions,
95
+ calls: ser.calls,
96
+ imports: ser.imports,
97
+ classes: ser.classes,
98
+ exports: ser.exports,
99
+ typeMap,
100
+ };
101
+ if (ser._langId !== undefined) out._langId = ser._langId;
102
+ if (ser._lineCount !== undefined) out._lineCount = ser._lineCount;
103
+ if (ser.dataflow !== undefined) out.dataflow = ser.dataflow;
104
+ // Pre-existing type mismatch: ExtractorOutput.astNodes is typed ASTNodeRow[]
105
+ // (DB-row shape with node_id), but all producers/consumers use the simpler
106
+ // {line, kind, name, text?, receiver?} shape — see engine.ts:822 where the
107
+ // visitor output is cast the same way.
108
+ if (ser.astNodes !== undefined) out.astNodes = ser.astNodes as unknown as ASTNodeRow[];
109
+ return out;
110
+ }
111
+
112
+ export class WasmWorkerPool {
113
+ private worker: Worker | null = null;
114
+ private nextId = 1;
115
+ private queue: PendingJob[] = [];
116
+ private inFlight: PendingJob | null = null;
117
+ private disposed = false;
118
+ /** filePaths that already caused one worker crash — skipped rather than retried. */
119
+ private crashedFiles = new Set<string>();
120
+ /**
121
+ * Tracks the id of the job whose timeout fired and triggered `terminate()`.
122
+ * Node timers are delivered before poll-phase I/O, so `onTimeout` can fire in
123
+ * the same loop iteration that already has the worker's response queued. In
124
+ * that race, `onMessage` resolves the timed-out job and starts the next one
125
+ * BEFORE `onExit` arrives for the earlier `terminate()` — so the `inFlight`
126
+ * job `onExit` sees is the innocent next job, not the one that actually hung.
127
+ * `onExit` uses this field to detect the mismatch and re-queue the new job
128
+ * instead of silently discarding it.
129
+ */
130
+ private timedOutJobId: number | null = null;
131
+
132
+ /**
133
+ * Parse a single file via the worker. Returns the fully pre-computed
134
+ * ExtractorOutput, or `null` if the worker crashed on this file or
135
+ * reported a soft error.
136
+ */
137
+ parse(filePath: string, code: string, opts: WorkerAnalysisOpts): Promise<ExtractorOutput | null> {
138
+ if (this.disposed) return Promise.resolve(null);
139
+ if (this.crashedFiles.has(filePath)) return Promise.resolve(null);
140
+ return new Promise((resolve) => {
141
+ const job: PendingJob = {
142
+ id: this.nextId++,
143
+ filePath,
144
+ code,
145
+ opts,
146
+ resolve,
147
+ timeoutHandle: null,
148
+ };
149
+ this.queue.push(job);
150
+ this.pump();
151
+ });
152
+ }
153
+
154
+ /** Terminate the worker and drain pending jobs with null results. */
155
+ async dispose(): Promise<void> {
156
+ this.disposed = true;
157
+ const pending = this.queue.splice(0);
158
+ const inFlight = this.inFlight;
159
+ this.inFlight = null;
160
+ this.timedOutJobId = null;
161
+ for (const j of pending) j.resolve(null);
162
+ if (inFlight) {
163
+ if (inFlight.timeoutHandle) clearTimeout(inFlight.timeoutHandle);
164
+ inFlight.resolve(null);
165
+ }
166
+ if (this.worker) {
167
+ try {
168
+ await this.worker.terminate();
169
+ } catch (e: unknown) {
170
+ debug(`WasmWorkerPool dispose: terminate failed: ${(e as Error).message}`);
171
+ }
172
+ this.worker = null;
173
+ }
174
+ }
175
+
176
+ private pump(): void {
177
+ if (this.disposed) return;
178
+ if (this.inFlight) return;
179
+ const next = this.queue.shift();
180
+ if (!next) return;
181
+ this.inFlight = next;
182
+ const worker = this.ensureWorker();
183
+ const req: WorkerRequest = {
184
+ type: 'parse',
185
+ id: next.id,
186
+ filePath: next.filePath,
187
+ code: next.code,
188
+ opts: next.opts,
189
+ };
190
+ // Arm the hang watchdog BEFORE posting so we can't race a fast reply.
191
+ next.timeoutHandle = setTimeout(() => this.onTimeout(next.id), WORKER_PARSE_TIMEOUT_MS);
192
+ worker.postMessage(req);
193
+ }
194
+
195
+ /**
196
+ * Called when the per-job watchdog fires. Terminate the worker so the
197
+ * hang stops consuming CPU; `onExit` will then resolve the in-flight job
198
+ * with `null` and blacklist the file via `crashedFiles`.
199
+ */
200
+ private onTimeout(jobId: number): void {
201
+ const job = this.inFlight;
202
+ if (!job || job.id !== jobId) return; // already resolved
203
+ warn(
204
+ `WASM worker parse timed out after ${WORKER_PARSE_TIMEOUT_MS}ms on ${job.filePath} — terminating worker and skipping file`,
205
+ );
206
+ this.crashedFiles.add(job.filePath);
207
+ // Record which job we're terminating so onExit can distinguish this
208
+ // terminate-induced exit from a crash on a different (innocent) job that
209
+ // got pumped in between — see `timedOutJobId` field comment.
210
+ this.timedOutJobId = jobId;
211
+ const w = this.worker;
212
+ if (w) {
213
+ w.terminate().catch((e: unknown) => {
214
+ debug(`WasmWorkerPool onTimeout: terminate failed: ${(e as Error).message}`);
215
+ });
216
+ // onExit will fire and clean up `inFlight` + resolve the job.
217
+ }
218
+ }
219
+
220
+ private ensureWorker(): Worker {
221
+ if (this.worker) return this.worker;
222
+ const w = new Worker(resolveWorkerEntry());
223
+ this.worker = w;
224
+ w.on('message', (msg: WorkerResponse) => this.onMessage(msg));
225
+ w.on('error', (err: unknown) => this.onError(err));
226
+ w.on('exit', (code) => this.onExit(code));
227
+ return w;
228
+ }
229
+
230
+ private onMessage(msg: WorkerResponse): void {
231
+ const job = this.inFlight;
232
+ if (!job || job.id !== msg.id) {
233
+ debug(`WasmWorkerPool: stale or unmatched response id=${msg.id}`);
234
+ return;
235
+ }
236
+ if (job.timeoutHandle) {
237
+ clearTimeout(job.timeoutHandle);
238
+ job.timeoutHandle = null;
239
+ }
240
+ // If a terminate() is pending for this same job (response + timeout raced
241
+ // in the same loop tick — timers fire before poll-phase I/O), delay
242
+ // pumping the next job until `onExit` runs. Otherwise the upcoming exit
243
+ // would land on an innocent follow-up job. `onExit` clears
244
+ // `timedOutJobId` and calls pump() itself once the worker is fully gone.
245
+ const terminatePending = this.timedOutJobId === job.id;
246
+ this.inFlight = null;
247
+ if (msg.ok) {
248
+ job.resolve(deserializeResult(msg.result));
249
+ } else {
250
+ warn(`WASM worker soft error on ${job.filePath}: ${msg.error}`);
251
+ job.resolve(null);
252
+ }
253
+ if (!terminatePending) this.pump();
254
+ }
255
+
256
+ private onError(err: unknown): void {
257
+ // 'error' fires for uncaught exceptions inside the worker — not always fatal
258
+ // (Node may still follow with 'exit'). Log and let 'exit' handle cleanup.
259
+ const msg = err instanceof Error ? err.message : String(err);
260
+ debug(`WASM worker 'error' event: ${msg}`);
261
+ }
262
+
263
+ private onExit(code: number): void {
264
+ const crashed = this.inFlight;
265
+ this.worker = null;
266
+ const timedOutJobId = this.timedOutJobId;
267
+ this.timedOutJobId = null;
268
+ if (!crashed) {
269
+ // Clean exit with no in-flight job — e.g. shutdown, or the race where
270
+ // `onMessage` already resolved the timed-out job (and deferred pump()
271
+ // because a terminate was in flight). Nothing to crash; just pump the
272
+ // queue so any waiting jobs get dispatched on a fresh worker.
273
+ if (code !== 0) {
274
+ debug(`WASM worker exited with code ${code}, no job in flight`);
275
+ }
276
+ if (timedOutJobId !== null) this.pump();
277
+ return;
278
+ }
279
+ if (timedOutJobId !== null && crashed.id !== timedOutJobId) {
280
+ // Defensive: a terminate() we issued for a different (earlier) job is
281
+ // what triggered this exit, but somehow an innocent follow-up job ended
282
+ // up in-flight. `onMessage` normally defers pumping when a terminate is
283
+ // pending, so this path should not trigger — but if it does, re-queue
284
+ // the follow-up rather than silently discarding a valid parse.
285
+ if (crashed.timeoutHandle) {
286
+ clearTimeout(crashed.timeoutHandle);
287
+ crashed.timeoutHandle = null;
288
+ }
289
+ this.inFlight = null;
290
+ this.queue.unshift(crashed);
291
+ this.pump();
292
+ return;
293
+ }
294
+ if (crashed.timeoutHandle) {
295
+ clearTimeout(crashed.timeoutHandle);
296
+ crashed.timeoutHandle = null;
297
+ }
298
+ this.inFlight = null;
299
+ if (code === 0) {
300
+ // Clean exit mid-job — could be our own terminate() from onTimeout,
301
+ // or an unexpected worker shutdown. In either case the file is
302
+ // skipped (crashedFiles was already set in onTimeout if that was the cause).
303
+ warn(`WASM worker exited cleanly mid-job on ${crashed.filePath} — skipping`);
304
+ } else {
305
+ warn(
306
+ `WASM worker crashed (exit ${code}) parsing ${crashed.filePath} — skipping file and restarting worker`,
307
+ );
308
+ }
309
+ this.crashedFiles.add(crashed.filePath);
310
+ crashed.resolve(null);
311
+ // Respawn lazily on the next pump()
312
+ this.pump();
313
+ }
314
+ }
315
+
316
+ let _sharedPool: WasmWorkerPool | null = null;
317
+
318
+ /** Shared pool instance for the process. Callers share the worker across builds. */
319
+ export function getWasmWorkerPool(): WasmWorkerPool {
320
+ if (!_sharedPool) _sharedPool = new WasmWorkerPool();
321
+ return _sharedPool;
322
+ }
323
+
324
+ /** Dispose the shared pool (used by tests + `disposeParsers`). */
325
+ export async function disposeWasmWorkerPool(): Promise<void> {
326
+ if (!_sharedPool) return;
327
+ const p = _sharedPool;
328
+ _sharedPool = null;
329
+ await p.dispose();
330
+ }
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Message protocol between the main thread and the WASM parse worker.
3
+ *
4
+ * The worker owns every tree-sitter WASM call. Fatal V8 aborts from the
5
+ * grammar (#965) kill only the worker — the main thread respawns it and
6
+ * skips the file that crashed.
7
+ *
8
+ * The worker returns fully pre-computed ExtractorOutput — matching what the
9
+ * native engine's parseFilesFull emits — so the main thread never holds a
10
+ * live Tree. The `_tree` field is never populated by this pipeline.
11
+ */
12
+
13
+ import type {
14
+ Call,
15
+ ClassRelation,
16
+ DataflowResult,
17
+ Definition,
18
+ Export,
19
+ Import,
20
+ LanguageId,
21
+ TypeMapEntry,
22
+ } from '../types.js';
23
+
24
+ export interface WorkerAnalysisOpts {
25
+ ast: boolean;
26
+ complexity: boolean;
27
+ cfg: boolean;
28
+ dataflow: boolean;
29
+ }
30
+
31
+ export interface WorkerParseRequest {
32
+ type: 'parse';
33
+ id: number;
34
+ filePath: string;
35
+ code: string;
36
+ opts: WorkerAnalysisOpts;
37
+ }
38
+
39
+ export type WorkerRequest = WorkerParseRequest;
40
+
41
+ /**
42
+ * Serialized ExtractorOutput shape. Identical to ExtractorOutput except:
43
+ * - `_tree` is never set (cannot cross worker boundary).
44
+ * - `typeMap` is encoded as an array of [key, value] tuples. Structured
45
+ * clone supports Map natively in Node 22, but the tuple form keeps the
46
+ * wire format language-agnostic and matches the native engine's form.
47
+ */
48
+ export interface SerializedExtractorOutput {
49
+ definitions: Definition[];
50
+ calls: Call[];
51
+ imports: Import[];
52
+ classes: ClassRelation[];
53
+ exports: Export[];
54
+ typeMap: Array<[string, TypeMapEntry]>;
55
+ _langId?: LanguageId;
56
+ _lineCount?: number;
57
+ dataflow?: DataflowResult;
58
+ astNodes?: Array<{
59
+ line: number;
60
+ kind: string;
61
+ name: string;
62
+ text?: string;
63
+ receiver?: string;
64
+ }>;
65
+ }
66
+
67
+ export interface WorkerParseResponseOk {
68
+ type: 'result';
69
+ id: number;
70
+ ok: true;
71
+ result: SerializedExtractorOutput | null;
72
+ }
73
+
74
+ export interface WorkerParseResponseErr {
75
+ type: 'result';
76
+ id: number;
77
+ ok: false;
78
+ error: string;
79
+ }
80
+
81
+ export type WorkerResponse = WorkerParseResponseOk | WorkerParseResponseErr;
@@ -1182,7 +1182,11 @@ function handleVarDeclaratorTypeMap(
1182
1182
  const obj = fn.childForFieldName('object');
1183
1183
  if (obj && obj.type === 'identifier') {
1184
1184
  const objName = obj.text;
1185
- if (objName[0]! !== objName[0]!.toLowerCase() && !BUILTIN_GLOBALS.has(objName)) {
1185
+ if (
1186
+ objName[0] &&
1187
+ objName[0] !== objName[0].toLowerCase() &&
1188
+ !BUILTIN_GLOBALS.has(objName)
1189
+ ) {
1186
1190
  setTypeMapEntry(typeMap, nameN.text, objName, 0.7);
1187
1191
  }
1188
1192
  }
@@ -1277,16 +1281,159 @@ function extractSubscriptCallInfo(fn: TreeSitterNode, callNode: TreeSitterNode):
1277
1281
  return null;
1278
1282
  }
1279
1283
 
1284
+ /**
1285
+ * Callee names that idiomatically accept callback references. Used to gate
1286
+ * member_expression args in {@link extractCallbackReferenceCalls}: arguments
1287
+ * like `user.id` are only emitted as dynamic callback calls when the callee
1288
+ * is a known callback-accepting API (router/middleware, promises, array
1289
+ * methods, event emitters, scheduling APIs). This avoids false positives
1290
+ * from plain property reads passed as data, e.g. `store.set(user.id, user)`.
1291
+ *
1292
+ * Identifier args (e.g. `router.use(handleToken)`) are always emitted — the
1293
+ * collateral damage of dropping them is larger than the FP risk, since plain
1294
+ * identifier data args rarely collide with real function names.
1295
+ */
1296
+ const CALLBACK_ACCEPTING_CALLEES: ReadonlySet<string> = new Set([
1297
+ // Express / router / middleware
1298
+ 'use',
1299
+ 'get',
1300
+ 'post',
1301
+ 'put',
1302
+ 'delete',
1303
+ 'patch',
1304
+ 'options',
1305
+ 'head',
1306
+ 'all',
1307
+ // Promises
1308
+ 'then',
1309
+ 'catch',
1310
+ 'finally',
1311
+ // Array iteration / reduction
1312
+ 'map',
1313
+ 'filter',
1314
+ 'forEach',
1315
+ 'find',
1316
+ 'findIndex',
1317
+ 'findLast',
1318
+ 'findLastIndex',
1319
+ 'some',
1320
+ 'every',
1321
+ 'reduce',
1322
+ 'reduceRight',
1323
+ 'flatMap',
1324
+ 'sort',
1325
+ // Event emitters / DOM
1326
+ 'on',
1327
+ 'once',
1328
+ 'off',
1329
+ 'addListener',
1330
+ 'removeListener',
1331
+ 'addEventListener',
1332
+ 'removeEventListener',
1333
+ 'subscribe',
1334
+ 'unsubscribe',
1335
+ // Scheduling / plain function callbacks
1336
+ 'setTimeout',
1337
+ 'setInterval',
1338
+ 'setImmediate',
1339
+ 'queueMicrotask',
1340
+ 'requestAnimationFrame',
1341
+ 'requestIdleCallback',
1342
+ 'nextTick',
1343
+ // Commander / yargs / hooks
1344
+ 'action',
1345
+ 'command',
1346
+ ]);
1347
+
1348
+ /**
1349
+ * HTTP-verb callees that double as Map/cache/repository method names (`get`,
1350
+ * `post`, `put`, `delete`, `patch`, `options`, `head`, `all`). Express/router
1351
+ * invocations always take a string-literal route path as the first argument
1352
+ * (`app.get('/path', handler)`), whereas Map-like APIs pass values/keys
1353
+ * (`cache.get(user.id)`). Requiring a string-literal first arg keeps real
1354
+ * route handlers covered while dropping the Map/cache false-positive surface.
1355
+ *
1356
+ * `use` and `all` without a path are legitimate middleware registrations, so
1357
+ * `use` is intentionally excluded here — it stays in the general allowlist.
1358
+ */
1359
+ const HTTP_VERB_CALLEES: ReadonlySet<string> = new Set([
1360
+ 'get',
1361
+ 'post',
1362
+ 'put',
1363
+ 'delete',
1364
+ 'patch',
1365
+ 'options',
1366
+ 'head',
1367
+ 'all',
1368
+ ]);
1369
+
1370
+ /**
1371
+ * Extract the callee's final name (function identifier or member expression
1372
+ * property) for callback-eligibility filtering. Returns null if the callee
1373
+ * shape is not analyzable (e.g. computed subscripts, IIFEs).
1374
+ *
1375
+ * Optional-chaining (`obj?.method(...)`) is handled transparently: in both
1376
+ * tree-sitter-javascript and tree-sitter-typescript grammars `obj?.method` is
1377
+ * still a `member_expression` (the `?.` appears as an `optional_chain` child),
1378
+ * so the property extraction below returns `method` as expected.
1379
+ */
1380
+ function extractCalleeName(callNode: TreeSitterNode): string | null {
1381
+ const fn = callNode.childForFieldName('function');
1382
+ if (!fn) return null;
1383
+ if (fn.type === 'identifier') return fn.text;
1384
+ if (fn.type === 'member_expression') {
1385
+ const prop = fn.childForFieldName('property');
1386
+ return prop ? prop.text : null;
1387
+ }
1388
+ return null;
1389
+ }
1390
+
1391
+ /**
1392
+ * True iff the first argument of an arguments node is a string literal.
1393
+ * Used to distinguish Express/router route handlers (`app.get('/path', h)`)
1394
+ * from Map/cache APIs that reuse the same verb names (`cache.get(user.id)`).
1395
+ */
1396
+ function firstArgIsStringLiteral(argsNode: TreeSitterNode): boolean {
1397
+ for (let i = 0; i < argsNode.childCount; i++) {
1398
+ const child = argsNode.child(i);
1399
+ if (!child) continue;
1400
+ // Skip parens and commas; the first non-punctuation child is the first arg.
1401
+ if (child.type === '(' || child.type === ',' || child.type === ')') continue;
1402
+ return child.type === 'string' || child.type === 'template_string';
1403
+ }
1404
+ return false;
1405
+ }
1406
+
1280
1407
  /**
1281
1408
  * Extract Call entries for named function references passed as arguments.
1282
1409
  * e.g. `router.use(handleToken, checkAuth)` yields calls to handleToken and checkAuth.
1283
1410
  * `app.use(auth.validate)` yields a call to validate with receiver auth.
1284
1411
  * Skips literals, objects, arrays, anonymous functions, and call expressions (already handled).
1412
+ *
1413
+ * To avoid false positives where plain property reads are passed as data
1414
+ * (e.g. `store.set(user.id, user)` — `user.id` is a value, not a callback),
1415
+ * member_expression args are only emitted when the callee is in
1416
+ * {@link CALLBACK_ACCEPTING_CALLEES}. Identifier args are always emitted.
1417
+ *
1418
+ * HTTP-verb callees (`get`, `post`, `put`, `delete`, `patch`, `options`,
1419
+ * `head`, `all`) double as Map/cache/repository method names, so their
1420
+ * member-expr args are only emitted when the first argument is a string
1421
+ * literal route path — matching Express/router shape and skipping
1422
+ * `cache.get(user.id)`-style calls.
1285
1423
  */
1286
1424
  function extractCallbackReferenceCalls(callNode: TreeSitterNode): Call[] {
1287
1425
  const args = callNode.childForFieldName('arguments') || findChild(callNode, 'arguments');
1288
1426
  if (!args) return [];
1289
1427
 
1428
+ const calleeName = extractCalleeName(callNode);
1429
+ let memberExprArgsAllowed = calleeName !== null && CALLBACK_ACCEPTING_CALLEES.has(calleeName);
1430
+ if (memberExprArgsAllowed && calleeName !== null && HTTP_VERB_CALLEES.has(calleeName)) {
1431
+ // HTTP verbs require a string-literal route path to be treated as a
1432
+ // callback-accepting API; otherwise `cache.get(user.id)` etc. would
1433
+ // still emit `id` as a dynamic call.
1434
+ memberExprArgsAllowed = firstArgIsStringLiteral(args);
1435
+ }
1436
+
1290
1437
  const result: Call[] = [];
1291
1438
  const callLine = callNode.startPosition.row + 1;
1292
1439
 
@@ -1296,7 +1443,7 @@ function extractCallbackReferenceCalls(callNode: TreeSitterNode): Call[] {
1296
1443
 
1297
1444
  if (child.type === 'identifier') {
1298
1445
  result.push({ name: child.text, line: callLine, dynamic: true });
1299
- } else if (child.type === 'member_expression') {
1446
+ } else if (child.type === 'member_expression' && memberExprArgsAllowed) {
1300
1447
  const prop = child.childForFieldName('property');
1301
1448
  const obj = child.childForFieldName('object');
1302
1449
  if (prop) {
@@ -1,5 +1,9 @@
1
1
  import path from 'node:path';
2
- import { AST_TYPE_MAPS } from '../ast-analysis/rules/index.js';
2
+ import {
3
+ AST_STRING_CONFIGS,
4
+ AST_TYPE_MAPS,
5
+ astStopRecurseKinds,
6
+ } from '../ast-analysis/rules/index.js';
3
7
  import { buildExtensionSet } from '../ast-analysis/shared.js';
4
8
  import { walkWithVisitors } from '../ast-analysis/visitor.js';
5
9
  import { createAstStoreVisitor } from '../ast-analysis/visitors/ast-store-visitor.js';
@@ -22,8 +26,6 @@ const KIND_ICONS: Record<string, string> = {
22
26
  await: '\u22B3', // ⊳
23
27
  };
24
28
 
25
- const JS_TS_AST_TYPES = AST_TYPE_MAPS.get('javascript');
26
-
27
29
  const WALK_EXTENSIONS = buildExtensionSet(AST_TYPE_MAPS);
28
30
 
29
31
  // ─── Helpers ──────────────────────────────────────────────────────────
@@ -171,9 +173,10 @@ function collectFileAstRows(
171
173
 
172
174
  // WASM fallback — walk tree if available
173
175
  const ext = path.extname(relPath).toLowerCase();
174
- if (WALK_EXTENSIONS.has(ext) && symbols._tree) {
176
+ const langId = symbols._langId || '';
177
+ if ((WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(langId)) && symbols._tree) {
175
178
  const rows: AstRow[] = [];
176
- walkAst(symbols._tree.rootNode, defs, relPath, rows, nodeIdMap);
179
+ walkAst(symbols._tree.rootNode, defs, relPath, rows, nodeIdMap, langId);
177
180
  return rows;
178
181
  }
179
182
 
@@ -226,13 +229,23 @@ function walkAst(
226
229
  relPath: string,
227
230
  rows: AstRow[],
228
231
  nodeIdMap: Map<string, number>,
232
+ langId: string,
229
233
  ): void {
230
- if (!JS_TS_AST_TYPES) {
231
- debug('ast-store: JS_TS_AST_TYPES not available — skipping walk');
234
+ const astTypeMap = AST_TYPE_MAPS.get(langId);
235
+ if (!astTypeMap) {
236
+ debug(`ast-store: no astTypes for langId=${langId} — skipping walk`);
232
237
  return;
233
238
  }
234
- const visitor = createAstStoreVisitor(JS_TS_AST_TYPES, defs, relPath, nodeIdMap);
235
- const results = walkWithVisitors(rootNode, [visitor], 'javascript');
239
+ const stringConfig = AST_STRING_CONFIGS.get(langId);
240
+ const visitor = createAstStoreVisitor(
241
+ astTypeMap,
242
+ defs,
243
+ relPath,
244
+ nodeIdMap,
245
+ stringConfig,
246
+ astStopRecurseKinds(langId),
247
+ );
248
+ const results = walkWithVisitors(rootNode, [visitor], langId);
236
249
  const collected = (results['ast-store'] || []) as AstRow[];
237
250
  rows.push(...collected);
238
251
  }
@@ -1,34 +1,9 @@
1
1
  import { isTestFile } from '../infrastructure/test-filter.js';
2
2
  import { BoundaryError } from '../shared/errors.js';
3
+ import { globToRegex } from '../shared/globs.js';
3
4
  import type { BetterSqlite3Database } from '../types.js';
4
5
 
5
- // ─── Glob-to-Regex ───────────────────────────────────────────────────
6
-
7
- export function globToRegex(pattern: string): RegExp {
8
- let re = '';
9
- let i = 0;
10
- while (i < pattern.length) {
11
- const ch = pattern[i] as string;
12
- if (ch === '*' && pattern[i + 1] === '*') {
13
- re += '.*';
14
- i += 2;
15
- if (pattern[i] === '/') i++;
16
- } else if (ch === '*') {
17
- re += '[^/]*';
18
- i++;
19
- } else if (ch === '?') {
20
- re += '[^/]';
21
- i++;
22
- } else if (/[.+^${}()|[\]\\]/.test(ch)) {
23
- re += `\\${ch}`;
24
- i++;
25
- } else {
26
- re += ch;
27
- i++;
28
- }
29
- }
30
- return new RegExp(`^${re}$`);
31
- }
6
+ export { globToRegex };
32
7
 
33
8
  // ─── Presets ─────────────────────────────────────────────────────────
34
9