@optave/codegraph 3.9.4 → 3.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/README.md +10 -10
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +3 -2
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/csharp.js +8 -1
  7. package/dist/ast-analysis/rules/csharp.js.map +1 -1
  8. package/dist/ast-analysis/rules/go.d.ts.map +1 -1
  9. package/dist/ast-analysis/rules/go.js +4 -1
  10. package/dist/ast-analysis/rules/go.js.map +1 -1
  11. package/dist/ast-analysis/rules/index.d.ts +6 -0
  12. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  13. package/dist/ast-analysis/rules/index.js +151 -4
  14. package/dist/ast-analysis/rules/index.js.map +1 -1
  15. package/dist/ast-analysis/rules/java.d.ts.map +1 -1
  16. package/dist/ast-analysis/rules/java.js +5 -1
  17. package/dist/ast-analysis/rules/java.js.map +1 -1
  18. package/dist/ast-analysis/rules/php.d.ts.map +1 -1
  19. package/dist/ast-analysis/rules/php.js +6 -1
  20. package/dist/ast-analysis/rules/php.js.map +1 -1
  21. package/dist/ast-analysis/rules/python.d.ts.map +1 -1
  22. package/dist/ast-analysis/rules/python.js +5 -1
  23. package/dist/ast-analysis/rules/python.js.map +1 -1
  24. package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
  25. package/dist/ast-analysis/rules/ruby.js +4 -1
  26. package/dist/ast-analysis/rules/ruby.js.map +1 -1
  27. package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
  28. package/dist/ast-analysis/rules/rust.js +5 -1
  29. package/dist/ast-analysis/rules/rust.js.map +1 -1
  30. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
  31. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  32. package/dist/ast-analysis/visitors/ast-store-visitor.js +129 -37
  33. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  34. package/dist/cli/commands/watch.d.ts.map +1 -1
  35. package/dist/cli/commands/watch.js +2 -0
  36. package/dist/cli/commands/watch.js.map +1 -1
  37. package/dist/cli.js +24 -1
  38. package/dist/cli.js.map +1 -1
  39. package/dist/domain/graph/builder/context.d.ts +2 -0
  40. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  41. package/dist/domain/graph/builder/context.js.map +1 -1
  42. package/dist/domain/graph/builder/helpers.d.ts +13 -2
  43. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  44. package/dist/domain/graph/builder/helpers.js +30 -4
  45. package/dist/domain/graph/builder/helpers.js.map +1 -1
  46. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  47. package/dist/domain/graph/builder/pipeline.js +141 -3
  48. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  49. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  50. package/dist/domain/graph/builder/stages/collect-files.js +58 -26
  51. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  52. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/detect-changes.js +54 -45
  54. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  56. package/dist/domain/graph/builder/stages/finalize.js +17 -0
  57. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  58. package/dist/domain/graph/journal.d.ts +15 -0
  59. package/dist/domain/graph/journal.d.ts.map +1 -1
  60. package/dist/domain/graph/journal.js +283 -28
  61. package/dist/domain/graph/journal.js.map +1 -1
  62. package/dist/domain/graph/watcher.d.ts +17 -0
  63. package/dist/domain/graph/watcher.d.ts.map +1 -1
  64. package/dist/domain/graph/watcher.js +23 -7
  65. package/dist/domain/graph/watcher.js.map +1 -1
  66. package/dist/domain/parser.d.ts +53 -4
  67. package/dist/domain/parser.d.ts.map +1 -1
  68. package/dist/domain/parser.js +278 -80
  69. package/dist/domain/parser.js.map +1 -1
  70. package/dist/domain/search/generator.d.ts.map +1 -1
  71. package/dist/domain/search/generator.js +28 -2
  72. package/dist/domain/search/generator.js.map +1 -1
  73. package/dist/domain/search/models.js +1 -1
  74. package/dist/domain/wasm-worker-entry.d.ts +24 -0
  75. package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
  76. package/dist/domain/wasm-worker-entry.js +644 -0
  77. package/dist/domain/wasm-worker-entry.js.map +1 -0
  78. package/dist/domain/wasm-worker-pool.d.ts +59 -0
  79. package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
  80. package/dist/domain/wasm-worker-pool.js +312 -0
  81. package/dist/domain/wasm-worker-pool.js.map +1 -0
  82. package/dist/domain/wasm-worker-protocol.d.ts +65 -0
  83. package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
  84. package/dist/domain/wasm-worker-protocol.js +13 -0
  85. package/dist/domain/wasm-worker-protocol.js.map +1 -0
  86. package/dist/extractors/javascript.js +146 -2
  87. package/dist/extractors/javascript.js.map +1 -1
  88. package/dist/features/ast.d.ts.map +1 -1
  89. package/dist/features/ast.js +11 -9
  90. package/dist/features/ast.js.map +1 -1
  91. package/dist/features/boundaries.d.ts +2 -2
  92. package/dist/features/boundaries.d.ts.map +1 -1
  93. package/dist/features/boundaries.js +2 -31
  94. package/dist/features/boundaries.js.map +1 -1
  95. package/dist/features/snapshot.d.ts.map +1 -1
  96. package/dist/features/snapshot.js +99 -13
  97. package/dist/features/snapshot.js.map +1 -1
  98. package/dist/graph/algorithms/louvain.d.ts.map +1 -1
  99. package/dist/graph/algorithms/louvain.js +2 -4
  100. package/dist/graph/algorithms/louvain.js.map +1 -1
  101. package/dist/infrastructure/config.d.ts.map +1 -1
  102. package/dist/infrastructure/config.js +12 -2
  103. package/dist/infrastructure/config.js.map +1 -1
  104. package/dist/shared/globs.d.ts +40 -0
  105. package/dist/shared/globs.d.ts.map +1 -0
  106. package/dist/shared/globs.js +126 -0
  107. package/dist/shared/globs.js.map +1 -0
  108. package/dist/types.d.ts +26 -1
  109. package/dist/types.d.ts.map +1 -1
  110. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  111. package/grammars/tree-sitter-erlang.wasm +0 -0
  112. package/package.json +7 -7
  113. package/src/ast-analysis/engine.ts +11 -1
  114. package/src/ast-analysis/rules/csharp.ts +8 -1
  115. package/src/ast-analysis/rules/go.ts +4 -1
  116. package/src/ast-analysis/rules/index.ts +181 -4
  117. package/src/ast-analysis/rules/java.ts +5 -1
  118. package/src/ast-analysis/rules/php.ts +6 -1
  119. package/src/ast-analysis/rules/python.ts +5 -1
  120. package/src/ast-analysis/rules/ruby.ts +4 -1
  121. package/src/ast-analysis/rules/rust.ts +5 -1
  122. package/src/ast-analysis/visitors/ast-store-visitor.ts +129 -34
  123. package/src/cli/commands/watch.ts +2 -0
  124. package/src/cli.ts +31 -8
  125. package/src/domain/graph/builder/context.ts +2 -0
  126. package/src/domain/graph/builder/helpers.ts +53 -3
  127. package/src/domain/graph/builder/pipeline.ts +162 -3
  128. package/src/domain/graph/builder/stages/collect-files.ts +56 -26
  129. package/src/domain/graph/builder/stages/detect-changes.ts +57 -49
  130. package/src/domain/graph/builder/stages/finalize.ts +16 -0
  131. package/src/domain/graph/journal.ts +284 -27
  132. package/src/domain/graph/watcher.ts +29 -9
  133. package/src/domain/parser.ts +288 -73
  134. package/src/domain/search/generator.ts +34 -2
  135. package/src/domain/search/models.ts +1 -1
  136. package/src/domain/wasm-worker-entry.ts +798 -0
  137. package/src/domain/wasm-worker-pool.ts +330 -0
  138. package/src/domain/wasm-worker-protocol.ts +81 -0
  139. package/src/extractors/javascript.ts +149 -2
  140. package/src/features/ast.ts +22 -9
  141. package/src/features/boundaries.ts +2 -27
  142. package/src/features/snapshot.ts +93 -14
  143. package/src/graph/algorithms/louvain.ts +2 -4
  144. package/src/infrastructure/config.ts +12 -2
  145. package/src/shared/globs.ts +121 -0
  146. package/src/types.ts +26 -1
@@ -7,7 +7,7 @@ import { DbError } from '../../shared/errors.js';
7
7
  import { createParseTreeCache, getActiveEngine } from '../parser.js';
8
8
  import { type IncrementalStmts, rebuildFile } from './builder/incremental.js';
9
9
  import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js';
10
- import { appendJournalEntries } from './journal.js';
10
+ import { appendJournalEntriesAndStampHeader } from './journal.js';
11
11
 
12
12
  function shouldIgnorePath(filePath: string): boolean {
13
13
  const parts = filePath.split(path.sep);
@@ -100,7 +100,7 @@ function writeJournalAndChangeEvents(rootDir: string, updates: RebuildResult[]):
100
100
  deleted: r.deleted || false,
101
101
  }));
102
102
  try {
103
- appendJournalEntries(rootDir, entries);
103
+ appendJournalEntriesAndStampHeader(rootDir, entries, Date.now());
104
104
  } catch (e: unknown) {
105
105
  debug(`Journal write failed (non-fatal): ${(e as Error).message}`);
106
106
  }
@@ -165,8 +165,8 @@ interface WatcherContext {
165
165
  }
166
166
 
167
167
  /** Initialize DB, engine, cache, and statements for watch mode. */
168
- function setupWatcher(rootDir: string, opts: { engine?: string }): WatcherContext {
169
- const dbPath = path.join(rootDir, '.codegraph', 'graph.db');
168
+ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string }): WatcherContext {
169
+ const dbPath = opts.dbPath ?? path.join(rootDir, '.codegraph', 'graph.db');
170
170
  if (!fs.existsSync(dbPath)) {
171
171
  throw new DbError('No graph.db found. Run `codegraph build` first.', { file: dbPath });
172
172
  }
@@ -274,17 +274,37 @@ function startNativeWatcher(ctx: WatcherContext): () => void {
274
274
  return () => watcher.close();
275
275
  }
276
276
 
277
+ /**
278
+ * Build journal entries for a pending-path set, detecting deletions by
279
+ * existence check.
280
+ *
281
+ * `ctx.pending` is an untyped `Set<string>` — it carries no event-type
282
+ * metadata. Without this check, a file deleted during the watch session
283
+ * would be journaled as "changed", causing the next incremental build to
284
+ * try to re-parse a non-existent file instead of removing it from the graph.
285
+ * Mirrors the deletion detection in `rebuildFile` (see builder/incremental.ts).
286
+ *
287
+ * Exported for unit-testing; prefer `setupShutdownHandler` in production paths.
288
+ */
289
+ export function buildFlushEntriesFromPending(
290
+ rootDir: string,
291
+ pending: Iterable<string>,
292
+ ): Array<{ file: string; deleted: boolean }> {
293
+ return [...pending].map((filePath) => ({
294
+ file: normalizePath(path.relative(rootDir, filePath)),
295
+ deleted: !fs.existsSync(filePath),
296
+ }));
297
+ }
298
+
277
299
  /** Register SIGINT handler to flush journal and clean up. */
278
300
  function setupShutdownHandler(ctx: WatcherContext, cleanup: () => void): void {
279
301
  process.once('SIGINT', () => {
280
302
  info('Stopping watcher...');
281
303
  cleanup();
282
304
  if (ctx.pending.size > 0) {
283
- const entries = [...ctx.pending].map((filePath) => ({
284
- file: normalizePath(path.relative(ctx.rootDir, filePath)),
285
- }));
305
+ const entries = buildFlushEntriesFromPending(ctx.rootDir, ctx.pending);
286
306
  try {
287
- appendJournalEntries(ctx.rootDir, entries);
307
+ appendJournalEntriesAndStampHeader(ctx.rootDir, entries, Date.now());
288
308
  } catch (e: unknown) {
289
309
  debug(`Journal flush on exit failed (non-fatal): ${(e as Error).message}`);
290
310
  }
@@ -297,7 +317,7 @@ function setupShutdownHandler(ctx: WatcherContext, cleanup: () => void): void {
297
317
 
298
318
  export async function watchProject(
299
319
  rootDir: string,
300
- opts: { engine?: string; poll?: boolean; pollInterval?: number } = {},
320
+ opts: { engine?: string; poll?: boolean; pollInterval?: number; dbPath?: string } = {},
301
321
  ): Promise<void> {
302
322
  const ctx = setupWatcher(rootDir, opts);
303
323
 
@@ -13,6 +13,24 @@ import type {
13
13
  LanguageRegistryEntry,
14
14
  TypeMapEntry,
15
15
  } from '../types.js';
16
+ import { disposeWasmWorkerPool, getWasmWorkerPool } from './wasm-worker-pool.js';
17
+ import type { WorkerAnalysisOpts } from './wasm-worker-protocol.js';
18
+
19
+ /** Default worker opts: run all analyses so output matches parseFilesFull. */
20
+ const FULL_ANALYSIS: WorkerAnalysisOpts = {
21
+ ast: true,
22
+ complexity: true,
23
+ cfg: true,
24
+ dataflow: true,
25
+ };
26
+
27
+ /** Extract-only opts: skip visitor walk for typeMap backfill / similar fast paths. */
28
+ const EXTRACT_ONLY: WorkerAnalysisOpts = {
29
+ ast: false,
30
+ complexity: false,
31
+ cfg: false,
32
+ dataflow: false,
33
+ };
16
34
 
17
35
  // Re-export all extractors for backward compatibility
18
36
  export {
@@ -262,7 +280,7 @@ function disposeMapEntries(entries: Iterable<[string, any]>, label: string): voi
262
280
  }
263
281
  }
264
282
 
265
- export function disposeParsers(): void {
283
+ export async function disposeParsers(): Promise<void> {
266
284
  if (_cachedParsers) {
267
285
  disposeMapEntries(_cachedParsers, 'parser');
268
286
  _cachedParsers = null;
@@ -276,6 +294,7 @@ export function disposeParsers(): void {
276
294
  _initialized = false;
277
295
  _allParsersLoaded = false;
278
296
  _loadingPromises.clear();
297
+ await disposeWasmWorkerPool();
279
298
  }
280
299
 
281
300
  export function getParser(parsers: Map<string, Parser | null>, filePath: string): Parser | null {
@@ -286,33 +305,33 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
286
305
  }
287
306
 
288
307
  /**
289
- * Pre-parse files missing `_tree` via WASM so downstream phases (CFG, dataflow)
290
- * don't each need to create parsers and re-parse independently.
291
- * Only parses files whose extension is in SUPPORTED_EXTENSIONS.
308
+ * Backfill missing AST-analysis data (astNodes, dataflow, def.complexity,
309
+ * def.cfg) via the WASM worker pool for files that were parsed by the native
310
+ * engine but are missing one or more analyses.
311
+ *
312
+ * Historically this function populated `symbols._tree` so the main-thread
313
+ * visitor walk in `ast-analysis/engine.ts` could run. After the worker-isolation
314
+ * refactor (#965), the worker runs every visitor itself and returns pre-computed
315
+ * analysis data — `_tree` is never set on the main thread.
316
+ *
317
+ * Name is preserved for caller compatibility; the function now ensures
318
+ * *analysis data* rather than *trees*.
292
319
  */
293
320
  export async function ensureWasmTrees(
294
321
  fileSymbols: Map<string, any>,
295
322
  rootDir: string,
296
323
  ): Promise<void> {
297
- // Single pass: collect absolute paths for files that need parsing
298
- const filePaths: string[] = [];
324
+ // Collect files that still need analysis data and are parseable by WASM.
325
+ const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
299
326
  for (const [relPath, symbols] of fileSymbols) {
300
- if (!symbols._tree && _extToLang.has(path.extname(relPath).toLowerCase())) {
301
- filePaths.push(path.join(rootDir, relPath));
302
- }
327
+ if (symbols._tree) continue; // legacy path — leave existing trees alone
328
+ if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
329
+ pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
303
330
  }
304
- if (filePaths.length === 0) return;
305
- const parsers = await ensureParsersForFiles(filePaths);
331
+ if (pending.length === 0) return;
306
332
 
307
- for (const [relPath, symbols] of fileSymbols) {
308
- if (symbols._tree) continue;
309
- const ext = path.extname(relPath).toLowerCase();
310
- const entry = _extToLang.get(ext);
311
- if (!entry) continue;
312
- const parser = parsers.get(entry.id);
313
- if (!parser) continue;
314
-
315
- const absPath = path.join(rootDir, relPath);
333
+ const pool = getWasmWorkerPool();
334
+ for (const { relPath, absPath, symbols } of pending) {
316
335
  let code: string;
317
336
  try {
318
337
  code = fs.readFileSync(absPath, 'utf-8');
@@ -320,11 +339,45 @@ export async function ensureWasmTrees(
320
339
  debug(`ensureWasmTrees: cannot read ${relPath}: ${(e as Error).message}`);
321
340
  continue;
322
341
  }
323
- try {
324
- symbols._tree = parser.parse(code);
325
- symbols._langId = entry.id;
326
- } catch (e: unknown) {
327
- debug(`ensureWasmTrees: parse failed for ${relPath}: ${(e as Error).message}`);
342
+ const output = await pool.parse(absPath, code, FULL_ANALYSIS);
343
+ if (!output) continue; // worker crashed or returned null — skip silently
344
+ mergeAnalysisData(symbols, output);
345
+ }
346
+ }
347
+
348
+ /**
349
+ * Merge pre-computed analysis data from a worker result onto existing symbols.
350
+ * Only fills gaps — never overwrites fields the caller already populated.
351
+ * Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
352
+ * per-definition complexity and cfg.
353
+ */
354
+ function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
355
+ if (!symbols._langId && worker._langId) symbols._langId = worker._langId;
356
+ if (!symbols._lineCount && worker._lineCount) symbols._lineCount = worker._lineCount;
357
+ if (!Array.isArray(symbols.astNodes) && Array.isArray(worker.astNodes)) {
358
+ symbols.astNodes = worker.astNodes;
359
+ }
360
+ if (!symbols.dataflow && worker.dataflow) symbols.dataflow = worker.dataflow;
361
+ if (worker.typeMap && worker.typeMap.size > 0) {
362
+ if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
363
+ symbols.typeMap = new Map(worker.typeMap);
364
+ } else {
365
+ for (const [k, v] of worker.typeMap) {
366
+ if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
367
+ }
368
+ }
369
+ }
370
+ const existingDefs: any[] = Array.isArray(symbols.definitions) ? symbols.definitions : [];
371
+ const workerDefs: any[] = Array.isArray(worker.definitions) ? worker.definitions : [];
372
+ // Index existing defs by (kind, name, line) — mirrors engine.ts matching key.
373
+ const byKey = new Map<string, any>();
374
+ for (const d of existingDefs) byKey.set(`${d.kind}|${d.name}|${d.line}`, d);
375
+ for (const wd of workerDefs) {
376
+ const existing = byKey.get(`${wd.kind}|${wd.name}|${wd.line}`);
377
+ if (!existing) continue;
378
+ if (!existing.complexity && wd.complexity) existing.complexity = wd.complexity;
379
+ if ((!existing.cfg || !Array.isArray(existing.cfg.blocks)) && wd.cfg?.blocks) {
380
+ existing.cfg = wd.cfg;
328
381
  }
329
382
  }
330
383
  }
@@ -338,6 +391,149 @@ export function isWasmAvailable(): boolean {
338
391
  );
339
392
  }
340
393
 
394
+ /**
395
+ * Return the set of lowercase file extensions whose WASM grammar is actually
396
+ * installed on disk. Used to scope engine-parity backfill to files that WASM
397
+ * can recover — languages without an installed grammar are skipped by both
398
+ * engines, so they don't represent a native-engine drop.
399
+ *
400
+ * Cached on first call; the grammars directory is shipped immutable.
401
+ */
402
+ let _installedWasmExts: Set<string> | null = null;
403
+ export function getInstalledWasmExtensions(): Set<string> {
404
+ if (_installedWasmExts) return _installedWasmExts;
405
+ const exts = new Set<string>();
406
+ for (const entry of LANGUAGE_REGISTRY) {
407
+ if (fs.existsSync(grammarPath(entry.grammarFile))) {
408
+ for (const ext of entry.extensions) exts.add(ext.toLowerCase());
409
+ }
410
+ }
411
+ _installedWasmExts = exts;
412
+ return exts;
413
+ }
414
+
415
+ /**
416
+ * Lowercase file extensions covered by the native Rust addon.
417
+ *
418
+ * Mirrors `LanguageKind::from_extension` in
419
+ * `crates/codegraph-core/src/parser_registry.rs`. Used to classify why the
420
+ * native orchestrator dropped a file: extensions outside this set are a
421
+ * legitimate parser limit (no Rust extractor exists), while extensions inside
422
+ * it indicate a real native bug (parse/read/extract failure).
423
+ *
424
+ * Keep this list in sync with the Rust enum — the native addon is a separate
425
+ * npm package, so JS has no runtime way to discover its language coverage.
426
+ */
427
+ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
428
+ '.js',
429
+ '.jsx',
430
+ '.mjs',
431
+ '.cjs',
432
+ '.ts',
433
+ '.tsx',
434
+ '.py',
435
+ '.pyi',
436
+ '.tf',
437
+ '.hcl',
438
+ '.go',
439
+ '.rs',
440
+ '.java',
441
+ '.cs',
442
+ '.rb',
443
+ '.rake',
444
+ '.gemspec',
445
+ '.php',
446
+ '.phtml',
447
+ '.c',
448
+ '.h',
449
+ '.cpp',
450
+ '.cc',
451
+ '.cxx',
452
+ '.hpp',
453
+ '.kt',
454
+ '.kts',
455
+ '.swift',
456
+ '.scala',
457
+ '.sh',
458
+ '.bash',
459
+ '.ex',
460
+ '.exs',
461
+ '.lua',
462
+ '.dart',
463
+ '.zig',
464
+ '.hs',
465
+ '.ml',
466
+ '.mli',
467
+ ]);
468
+
469
+ /**
470
+ * Classification for a file the native orchestrator dropped.
471
+ * - `unsupported-by-native`: extension has no Rust extractor (legitimate parser limit).
472
+ * - `native-extractor-failure`: extension is supported by native but the file was
473
+ * still dropped — points at a real bug (read error, parse failure, extractor crash).
474
+ */
475
+ export type NativeDropReason = 'unsupported-by-native' | 'native-extractor-failure';
476
+
477
+ export interface NativeDropClassification {
478
+ /** Per-reason → per-extension → list of relative paths that hit that bucket. */
479
+ byReason: Record<NativeDropReason, Map<string, string[]>>;
480
+ /** Total file count per reason. */
481
+ totals: Record<NativeDropReason, number>;
482
+ }
483
+
484
+ /**
485
+ * Group the missing files (relative paths) by drop reason and extension so the
486
+ * caller can log per-extension counts and a sample path. Pure function — no
487
+ * I/O, safe to unit-test independently of the build pipeline.
488
+ */
489
+ export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClassification {
490
+ const byReason: Record<NativeDropReason, Map<string, string[]>> = {
491
+ 'unsupported-by-native': new Map(),
492
+ 'native-extractor-failure': new Map(),
493
+ };
494
+ const totals: Record<NativeDropReason, number> = {
495
+ 'unsupported-by-native': 0,
496
+ 'native-extractor-failure': 0,
497
+ };
498
+ for (const rel of relPaths) {
499
+ const ext = path.extname(rel).toLowerCase();
500
+ const reason: NativeDropReason = NATIVE_SUPPORTED_EXTENSIONS.has(ext)
501
+ ? 'native-extractor-failure'
502
+ : 'unsupported-by-native';
503
+ const bucket = byReason[reason];
504
+ let list = bucket.get(ext);
505
+ if (!list) {
506
+ list = [];
507
+ bucket.set(ext, list);
508
+ }
509
+ list.push(rel);
510
+ totals[reason]++;
511
+ }
512
+ return { byReason, totals };
513
+ }
514
+
515
+ /**
516
+ * Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
517
+ * Caps at 3 sample paths per extension and 6 extensions total to keep warnings
518
+ * readable when many languages are dropped at once. Extensions are sorted by
519
+ * descending file count so the loudest offender shows up first; ties keep
520
+ * insertion order. Pure function — safe to unit-test independently.
521
+ */
522
+ export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
523
+ const MAX_EXTS = 6;
524
+ const MAX_SAMPLES = 3;
525
+ const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
526
+ const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
527
+ const sample = paths.slice(0, MAX_SAMPLES).join(', ');
528
+ const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
529
+ return `${ext} (${paths.length}: ${sample}${more})`;
530
+ });
531
+ if (entries.length > MAX_EXTS) {
532
+ shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
533
+ }
534
+ return shown.join('; ');
535
+ }
536
+
341
537
  // ── Unified API ──────────────────────────────────────────────────────────────
342
538
 
343
539
  function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine {
@@ -721,23 +917,13 @@ async function backfillTypeMap(
721
917
  return { typeMap: new Map(), backfilled: false };
722
918
  }
723
919
  }
724
- const parsers = await ensureParsersForFiles([filePath]);
725
- const extracted = wasmExtractSymbols(parsers, filePath, code);
726
- try {
727
- if (!extracted || extracted.symbols.typeMap.size === 0) {
728
- return { typeMap: new Map(), backfilled: false };
729
- }
730
- return { typeMap: extracted.symbols.typeMap, backfilled: true };
731
- } finally {
732
- // Free the WASM tree to prevent memory accumulation across repeated builds
733
- if (extracted?.tree && typeof extracted.tree.delete === 'function') {
734
- try {
735
- extracted.tree.delete();
736
- } catch (e) {
737
- debug(`backfillTypeMap: WASM tree cleanup failed: ${toErrorMessage(e)}`);
738
- }
739
- }
920
+ const pool = getWasmWorkerPool();
921
+ // Extract-only no visitor walk, we only need the typeMap from this pass.
922
+ const output = await pool.parse(filePath, code, EXTRACT_ONLY);
923
+ if (!output || output.typeMap.size === 0) {
924
+ return { typeMap: new Map(), backfilled: false };
740
925
  }
926
+ return { typeMap: output.typeMap, backfilled: true };
741
927
  }
742
928
 
743
929
  /**
@@ -765,7 +951,16 @@ function wasmExtractSymbols(
765
951
  if (!entry) return null;
766
952
  const query = _queryCache.get(entry.id) ?? undefined;
767
953
  // Query (web-tree-sitter) is structurally compatible with TreeSitterQuery at runtime
768
- const symbols = entry.extractor(tree as any, filePath, query as any);
954
+ let symbols: ExtractorOutput | null;
955
+ try {
956
+ symbols = entry.extractor(tree as any, filePath, query as any);
957
+ } catch (e: unknown) {
958
+ warn(`Extractor error in ${filePath}: ${(e as Error).message}`);
959
+ // Free WASM tree to prevent memory leak — web-tree-sitter trees are backed
960
+ // by WASM linear memory and are not garbage-collected automatically.
961
+ if (typeof (tree as any).delete === 'function') (tree as any).delete();
962
+ return null;
963
+ }
769
964
  return symbols ? { symbols, tree, langId: entry.id } : null;
770
965
  }
771
966
 
@@ -796,10 +991,9 @@ export async function parseFileAuto(
796
991
  return patched;
797
992
  }
798
993
 
799
- // WASM path
800
- const parsers = await ensureParsersForFiles([filePath]);
801
- const extracted = wasmExtractSymbols(parsers, filePath, source);
802
- return extracted ? extracted.symbols : null;
994
+ // WASM path — dispatch to isolated worker
995
+ const pool = getWasmWorkerPool();
996
+ return pool.parse(filePath, source, FULL_ANALYSIS);
803
997
  }
804
998
 
805
999
  /** Backfill typeMap via WASM for TS/TSX files parsed by the native engine. */
@@ -812,40 +1006,44 @@ async function backfillTypeMapBatch(
812
1006
  );
813
1007
  if (tsFiles.length === 0) return;
814
1008
 
815
- const parsers = await ensureParsersForFiles(tsFiles.map((f) => f.filePath));
1009
+ const pool = getWasmWorkerPool();
816
1010
  for (const { filePath, relPath } of tsFiles) {
817
- let extracted: WasmExtractResult | null | undefined;
1011
+ let code: string;
818
1012
  try {
819
- const code = fs.readFileSync(filePath, 'utf-8');
820
- extracted = wasmExtractSymbols(parsers, filePath, code);
821
- if (extracted?.symbols && extracted.symbols.typeMap.size > 0) {
822
- const symbols = result.get(relPath);
823
- if (!symbols) continue;
824
- symbols.typeMap = extracted.symbols.typeMap;
825
- symbols._typeMapBackfilled = true;
826
- }
1013
+ code = fs.readFileSync(filePath, 'utf-8');
827
1014
  } catch (e) {
828
- debug(`batchExtract: typeMap backfill failed: ${toErrorMessage(e)}`);
829
- } finally {
830
- if (extracted?.tree && typeof extracted.tree.delete === 'function') {
831
- try {
832
- extracted.tree.delete();
833
- } catch (e) {
834
- debug(`batchExtract: WASM tree cleanup failed: ${toErrorMessage(e)}`);
835
- }
836
- }
1015
+ debug(`batchExtract: cannot read ${filePath}: ${toErrorMessage(e)}`);
1016
+ continue;
837
1017
  }
1018
+ const output = await pool.parse(filePath, code, EXTRACT_ONLY);
1019
+ if (!output || output.typeMap.size === 0) continue;
1020
+ const symbols = result.get(relPath);
1021
+ if (!symbols) continue;
1022
+ symbols.typeMap = output.typeMap;
1023
+ symbols._typeMapBackfilled = true;
838
1024
  }
839
1025
  }
840
1026
 
841
- /** Parse files via WASM engine, returning a Map<relPath, symbols>. */
1027
+ /**
1028
+ * Parse files via WASM engine, returning a Map<relPath, symbols>.
1029
+ *
1030
+ * Each file is dispatched to the WASM worker pool. The worker parses, extracts,
1031
+ * and runs all AST analyses (complexity, CFG, dataflow, ast-store) in its own
1032
+ * thread, returning fully pre-computed ExtractorOutput. V8 fatal errors from
1033
+ * tree-sitter WASM (#965) kill only the worker — the pool skips the file and
1034
+ * restarts the worker for the next one.
1035
+ *
1036
+ * `_tree` is NEVER set by this path. All downstream analyses operate on the
1037
+ * pre-computed `astNodes` / `dataflow` / `def.complexity` / `def.cfg` fields.
1038
+ */
842
1039
  async function parseFilesWasm(
843
1040
  filePaths: string[],
844
1041
  rootDir: string,
845
1042
  ): Promise<Map<string, ExtractorOutput>> {
846
1043
  const result = new Map<string, ExtractorOutput>();
847
- const parsers = await ensureParsersForFiles(filePaths);
1044
+ const pool = getWasmWorkerPool();
848
1045
  for (const filePath of filePaths) {
1046
+ if (!_extToLang.has(path.extname(filePath).toLowerCase())) continue;
849
1047
  let code: string;
850
1048
  try {
851
1049
  code = fs.readFileSync(filePath, 'utf-8');
@@ -853,13 +1051,10 @@ async function parseFilesWasm(
853
1051
  warn(`Skipping ${path.relative(rootDir, filePath)}: ${(err as Error).message}`);
854
1052
  continue;
855
1053
  }
856
- const extracted = wasmExtractSymbols(parsers, filePath, code);
857
- if (extracted) {
1054
+ const output = await pool.parse(filePath, code, FULL_ANALYSIS);
1055
+ if (output) {
858
1056
  const relPath = path.relative(rootDir, filePath).split(path.sep).join('/');
859
- extracted.symbols._tree = extracted.tree;
860
- extracted.symbols._langId = extracted.langId;
861
- extracted.symbols._lineCount = code.split('\n').length;
862
- result.set(relPath, extracted.symbols);
1057
+ result.set(relPath, output);
863
1058
  }
864
1059
  }
865
1060
  return result;
@@ -884,8 +1079,10 @@ export async function parseFilesAuto(
884
1079
  ? native.parseFilesFull(filePaths, rootDir)
885
1080
  : native.parseFiles(filePaths, rootDir, true, true);
886
1081
  const needsTypeMap: { filePath: string; relPath: string }[] = [];
1082
+ const nativeParsed = new Set<string>();
887
1083
  for (const r of nativeResults) {
888
1084
  if (!r) continue;
1085
+ nativeParsed.add(r.file);
889
1086
  const patched = patchNativeResult(r);
890
1087
  const relPath = path.relative(rootDir, r.file).split(path.sep).join('/');
891
1088
  result.set(relPath, patched);
@@ -901,6 +1098,24 @@ export async function parseFilesAuto(
901
1098
  if (needsTypeMap.length > 0) {
902
1099
  await backfillTypeMapBatch(needsTypeMap, result);
903
1100
  }
1101
+
1102
+ // Engine parity: native may silently drop files whose extensions are in
1103
+ // SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
1104
+ // extractor/grammar is missing or fails. WASM handles these — fall back so
1105
+ // both engines process the same file set (#967). Restrict to installed WASM
1106
+ // grammars so we don't warn about files that neither engine can parse.
1107
+ const installedExts = getInstalledWasmExtensions();
1108
+ const dropped = filePaths.filter(
1109
+ (f) => !nativeParsed.has(f) && installedExts.has(path.extname(f).toLowerCase()),
1110
+ );
1111
+ if (dropped.length > 0) {
1112
+ warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
1113
+ const wasmResults = await parseFilesWasm(dropped, rootDir);
1114
+ for (const [relPath, symbols] of wasmResults) {
1115
+ result.set(relPath, symbols);
1116
+ }
1117
+ }
1118
+
904
1119
  return result;
905
1120
  }
906
1121
 
@@ -1,6 +1,6 @@
1
1
  import fs from 'node:fs';
2
2
  import path from 'node:path';
3
- import { closeDb, findDbPath, openDb } from '../../db/index.js';
3
+ import { closeDb, findDbPath, getBuildMeta, openDb } from '../../db/index.js';
4
4
  import { warn } from '../../infrastructure/logger.js';
5
5
  import { DbError } from '../../shared/errors.js';
6
6
  import type { BetterSqlite3Database, NodeRow } from '../../types.js';
@@ -73,6 +73,21 @@ export async function buildEmbeddings(
73
73
  const db = openDb(dbPath) as BetterSqlite3Database;
74
74
  initEmbeddingsSchema(db);
75
75
 
76
+ // Prefer the repo root recorded at build time — embed may be invoked from a
77
+ // different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
78
+ // positional rootDir will be wrong in that case. For legacy DBs without
79
+ // root_dir metadata, fall back to `<dbParent>` only when the DB lives at
80
+ // the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
81
+ // the caller-provided rootDir (which may be an explicit positional arg).
82
+ // `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
83
+ // conventional-layout check is required to keep the rootDir path reachable.
84
+ const metaRoot = getBuildMeta(db, 'root_dir');
85
+ const resolvedDbPath = path.resolve(dbPath);
86
+ const dbDirName = path.basename(path.dirname(resolvedDbPath));
87
+ const dbParent =
88
+ dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
89
+ const resolvedRoot = metaRoot || dbParent || rootDir;
90
+
76
91
  db.exec('DELETE FROM embeddings');
77
92
  db.exec('DELETE FROM embedding_meta');
78
93
  db.exec('DELETE FROM fts_index');
@@ -98,13 +113,17 @@ export async function buildEmbeddings(
98
113
  const config = getModelConfig(modelKey);
99
114
  const contextWindow = config.contextWindow;
100
115
  let overflowCount = 0;
116
+ let filesRead = 0;
117
+ let filesSkipped = 0;
101
118
 
102
119
  for (const [file, fileNodes] of byFile) {
103
- const fullPath = path.isAbsolute(file) ? file : path.join(rootDir, file);
120
+ const fullPath = path.isAbsolute(file) ? file : path.join(resolvedRoot, file);
104
121
  let lines: string[];
105
122
  try {
106
123
  lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
124
+ filesRead++;
107
125
  } catch (err: unknown) {
126
+ filesSkipped++;
108
127
  warn(`Cannot read ${file} for embeddings: ${(err as Error).message}`);
109
128
  continue;
110
129
  }
@@ -136,6 +155,19 @@ export async function buildEmbeddings(
136
155
  );
137
156
  }
138
157
 
158
+ // If there were symbols to embed but every file failed to read, the DB was
159
+ // almost certainly built from a different location than the current cwd.
160
+ // Surface this clearly instead of emitting a silent "Stored 0 embeddings".
161
+ if (byFile.size > 0 && filesRead === 0) {
162
+ closeDb(db);
163
+ throw new DbError(
164
+ `embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
165
+ `Tried resolving against: ${resolvedRoot}\n` +
166
+ 'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
167
+ { file: dbPath },
168
+ );
169
+ }
170
+
139
171
  console.log(`Embedding ${texts.length} symbols...`);
140
172
  const { vectors, dim } = await embed(texts, modelKey);
141
173
 
@@ -253,7 +253,7 @@ export async function embed(
253
253
  }
254
254
 
255
255
  if (texts.length > batchSize) {
256
- process.stdout.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
256
+ process.stderr.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
257
257
  }
258
258
  }
259
259