@optave/codegraph 3.9.3 → 3.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +10 -10
  2. package/dist/ast-analysis/visitor.d.ts.map +1 -1
  3. package/dist/ast-analysis/visitor.js +14 -0
  4. package/dist/ast-analysis/visitor.js.map +1 -1
  5. package/dist/cli/commands/watch.d.ts.map +1 -1
  6. package/dist/cli/commands/watch.js +2 -0
  7. package/dist/cli/commands/watch.js.map +1 -1
  8. package/dist/cli.js +24 -1
  9. package/dist/cli.js.map +1 -1
  10. package/dist/domain/graph/builder/context.d.ts +17 -0
  11. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  12. package/dist/domain/graph/builder/context.js +7 -0
  13. package/dist/domain/graph/builder/context.js.map +1 -1
  14. package/dist/domain/graph/builder/helpers.d.ts +13 -2
  15. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  16. package/dist/domain/graph/builder/helpers.js +30 -4
  17. package/dist/domain/graph/builder/helpers.js.map +1 -1
  18. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  19. package/dist/domain/graph/builder/pipeline.js +221 -51
  20. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  21. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  22. package/dist/domain/graph/builder/stages/build-edges.js +67 -6
  23. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  24. package/dist/domain/graph/builder/stages/build-structure.js +2 -2
  25. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  26. package/dist/domain/graph/builder/stages/collect-files.js +58 -26
  27. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  28. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  29. package/dist/domain/graph/builder/stages/detect-changes.js +105 -55
  30. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  31. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  32. package/dist/domain/graph/builder/stages/finalize.js +27 -4
  33. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  34. package/dist/domain/graph/builder/stages/run-analyses.d.ts.map +1 -1
  35. package/dist/domain/graph/builder/stages/run-analyses.js +5 -20
  36. package/dist/domain/graph/builder/stages/run-analyses.js.map +1 -1
  37. package/dist/domain/graph/journal.d.ts +15 -0
  38. package/dist/domain/graph/journal.d.ts.map +1 -1
  39. package/dist/domain/graph/journal.js +283 -28
  40. package/dist/domain/graph/journal.js.map +1 -1
  41. package/dist/domain/graph/watcher.d.ts +17 -0
  42. package/dist/domain/graph/watcher.d.ts.map +1 -1
  43. package/dist/domain/graph/watcher.js +23 -7
  44. package/dist/domain/graph/watcher.js.map +1 -1
  45. package/dist/domain/parser.d.ts +13 -4
  46. package/dist/domain/parser.d.ts.map +1 -1
  47. package/dist/domain/parser.js +174 -80
  48. package/dist/domain/parser.js.map +1 -1
  49. package/dist/domain/search/generator.d.ts.map +1 -1
  50. package/dist/domain/search/generator.js +28 -2
  51. package/dist/domain/search/generator.js.map +1 -1
  52. package/dist/domain/wasm-worker-entry.d.ts +24 -0
  53. package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
  54. package/dist/domain/wasm-worker-entry.js +643 -0
  55. package/dist/domain/wasm-worker-entry.js.map +1 -0
  56. package/dist/domain/wasm-worker-pool.d.ts +59 -0
  57. package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
  58. package/dist/domain/wasm-worker-pool.js +312 -0
  59. package/dist/domain/wasm-worker-pool.js.map +1 -0
  60. package/dist/domain/wasm-worker-protocol.d.ts +65 -0
  61. package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
  62. package/dist/domain/wasm-worker-protocol.js +13 -0
  63. package/dist/domain/wasm-worker-protocol.js.map +1 -0
  64. package/dist/extractors/javascript.js +265 -1
  65. package/dist/extractors/javascript.js.map +1 -1
  66. package/dist/features/boundaries.d.ts +2 -2
  67. package/dist/features/boundaries.d.ts.map +1 -1
  68. package/dist/features/boundaries.js +2 -31
  69. package/dist/features/boundaries.js.map +1 -1
  70. package/dist/features/snapshot.d.ts.map +1 -1
  71. package/dist/features/snapshot.js +99 -13
  72. package/dist/features/snapshot.js.map +1 -1
  73. package/dist/features/structure.d.ts.map +1 -1
  74. package/dist/features/structure.js +14 -1
  75. package/dist/features/structure.js.map +1 -1
  76. package/dist/graph/algorithms/louvain.d.ts.map +1 -1
  77. package/dist/graph/algorithms/louvain.js +2 -4
  78. package/dist/graph/algorithms/louvain.js.map +1 -1
  79. package/dist/infrastructure/config.d.ts.map +1 -1
  80. package/dist/infrastructure/config.js +12 -2
  81. package/dist/infrastructure/config.js.map +1 -1
  82. package/dist/shared/globs.d.ts +40 -0
  83. package/dist/shared/globs.d.ts.map +1 -0
  84. package/dist/shared/globs.js +126 -0
  85. package/dist/shared/globs.js.map +1 -0
  86. package/dist/types.d.ts +26 -1
  87. package/dist/types.d.ts.map +1 -1
  88. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  89. package/package.json +7 -7
  90. package/src/ast-analysis/visitor.ts +15 -0
  91. package/src/cli/commands/watch.ts +2 -0
  92. package/src/cli.ts +31 -8
  93. package/src/domain/graph/builder/context.ts +19 -0
  94. package/src/domain/graph/builder/helpers.ts +53 -3
  95. package/src/domain/graph/builder/pipeline.ts +235 -49
  96. package/src/domain/graph/builder/stages/build-edges.ts +80 -6
  97. package/src/domain/graph/builder/stages/build-structure.ts +2 -2
  98. package/src/domain/graph/builder/stages/collect-files.ts +56 -26
  99. package/src/domain/graph/builder/stages/detect-changes.ts +118 -61
  100. package/src/domain/graph/builder/stages/finalize.ts +27 -4
  101. package/src/domain/graph/builder/stages/run-analyses.ts +5 -26
  102. package/src/domain/graph/journal.ts +284 -27
  103. package/src/domain/graph/watcher.ts +29 -9
  104. package/src/domain/parser.ts +166 -73
  105. package/src/domain/search/generator.ts +34 -2
  106. package/src/domain/wasm-worker-entry.ts +788 -0
  107. package/src/domain/wasm-worker-pool.ts +330 -0
  108. package/src/domain/wasm-worker-protocol.ts +81 -0
  109. package/src/extractors/javascript.ts +290 -1
  110. package/src/features/boundaries.ts +2 -27
  111. package/src/features/snapshot.ts +93 -14
  112. package/src/features/structure.ts +17 -1
  113. package/src/graph/algorithms/louvain.ts +2 -4
  114. package/src/infrastructure/config.ts +12 -2
  115. package/src/shared/globs.ts +121 -0
  116. package/src/types.ts +26 -1
@@ -13,6 +13,24 @@ import type {
13
13
  LanguageRegistryEntry,
14
14
  TypeMapEntry,
15
15
  } from '../types.js';
16
+ import { disposeWasmWorkerPool, getWasmWorkerPool } from './wasm-worker-pool.js';
17
+ import type { WorkerAnalysisOpts } from './wasm-worker-protocol.js';
18
+
19
+ /** Default worker opts: run all analyses so output matches parseFilesFull. */
20
+ const FULL_ANALYSIS: WorkerAnalysisOpts = {
21
+ ast: true,
22
+ complexity: true,
23
+ cfg: true,
24
+ dataflow: true,
25
+ };
26
+
27
+ /** Extract-only opts: skip visitor walk for typeMap backfill / similar fast paths. */
28
+ const EXTRACT_ONLY: WorkerAnalysisOpts = {
29
+ ast: false,
30
+ complexity: false,
31
+ cfg: false,
32
+ dataflow: false,
33
+ };
16
34
 
17
35
  // Re-export all extractors for backward compatibility
18
36
  export {
@@ -262,7 +280,7 @@ function disposeMapEntries(entries: Iterable<[string, any]>, label: string): voi
262
280
  }
263
281
  }
264
282
 
265
- export function disposeParsers(): void {
283
+ export async function disposeParsers(): Promise<void> {
266
284
  if (_cachedParsers) {
267
285
  disposeMapEntries(_cachedParsers, 'parser');
268
286
  _cachedParsers = null;
@@ -276,6 +294,7 @@ export function disposeParsers(): void {
276
294
  _initialized = false;
277
295
  _allParsersLoaded = false;
278
296
  _loadingPromises.clear();
297
+ await disposeWasmWorkerPool();
279
298
  }
280
299
 
281
300
  export function getParser(parsers: Map<string, Parser | null>, filePath: string): Parser | null {
@@ -286,33 +305,33 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
286
305
  }
287
306
 
288
307
  /**
289
- * Pre-parse files missing `_tree` via WASM so downstream phases (CFG, dataflow)
290
- * don't each need to create parsers and re-parse independently.
291
- * Only parses files whose extension is in SUPPORTED_EXTENSIONS.
308
+ * Backfill missing AST-analysis data (astNodes, dataflow, def.complexity,
309
+ * def.cfg) via the WASM worker pool for files that were parsed by the native
310
+ * engine but are missing one or more analyses.
311
+ *
312
+ * Historically this function populated `symbols._tree` so the main-thread
313
+ * visitor walk in `ast-analysis/engine.ts` could run. After the worker-isolation
314
+ * refactor (#965), the worker runs every visitor itself and returns pre-computed
315
+ * analysis data — `_tree` is never set on the main thread.
316
+ *
317
+ * Name is preserved for caller compatibility; the function now ensures
318
+ * *analysis data* rather than *trees*.
292
319
  */
293
320
  export async function ensureWasmTrees(
294
321
  fileSymbols: Map<string, any>,
295
322
  rootDir: string,
296
323
  ): Promise<void> {
297
- // Single pass: collect absolute paths for files that need parsing
298
- const filePaths: string[] = [];
324
+ // Collect files that still need analysis data and are parseable by WASM.
325
+ const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
299
326
  for (const [relPath, symbols] of fileSymbols) {
300
- if (!symbols._tree && _extToLang.has(path.extname(relPath).toLowerCase())) {
301
- filePaths.push(path.join(rootDir, relPath));
302
- }
327
+ if (symbols._tree) continue; // legacy path — leave existing trees alone
328
+ if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
329
+ pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
303
330
  }
304
- if (filePaths.length === 0) return;
305
- const parsers = await ensureParsersForFiles(filePaths);
331
+ if (pending.length === 0) return;
306
332
 
307
- for (const [relPath, symbols] of fileSymbols) {
308
- if (symbols._tree) continue;
309
- const ext = path.extname(relPath).toLowerCase();
310
- const entry = _extToLang.get(ext);
311
- if (!entry) continue;
312
- const parser = parsers.get(entry.id);
313
- if (!parser) continue;
314
-
315
- const absPath = path.join(rootDir, relPath);
333
+ const pool = getWasmWorkerPool();
334
+ for (const { relPath, absPath, symbols } of pending) {
316
335
  let code: string;
317
336
  try {
318
337
  code = fs.readFileSync(absPath, 'utf-8');
@@ -320,11 +339,45 @@ export async function ensureWasmTrees(
320
339
  debug(`ensureWasmTrees: cannot read ${relPath}: ${(e as Error).message}`);
321
340
  continue;
322
341
  }
323
- try {
324
- symbols._tree = parser.parse(code);
325
- symbols._langId = entry.id;
326
- } catch (e: unknown) {
327
- debug(`ensureWasmTrees: parse failed for ${relPath}: ${(e as Error).message}`);
342
+ const output = await pool.parse(absPath, code, FULL_ANALYSIS);
343
+ if (!output) continue; // worker crashed or returned null — skip silently
344
+ mergeAnalysisData(symbols, output);
345
+ }
346
+ }
347
+
348
+ /**
349
+ * Merge pre-computed analysis data from a worker result onto existing symbols.
350
+ * Only fills gaps — never overwrites fields the caller already populated.
351
+ * Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
352
+ * per-definition complexity and cfg.
353
+ */
354
+ function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
355
+ if (!symbols._langId && worker._langId) symbols._langId = worker._langId;
356
+ if (!symbols._lineCount && worker._lineCount) symbols._lineCount = worker._lineCount;
357
+ if (!Array.isArray(symbols.astNodes) && Array.isArray(worker.astNodes)) {
358
+ symbols.astNodes = worker.astNodes;
359
+ }
360
+ if (!symbols.dataflow && worker.dataflow) symbols.dataflow = worker.dataflow;
361
+ if (worker.typeMap && worker.typeMap.size > 0) {
362
+ if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
363
+ symbols.typeMap = new Map(worker.typeMap);
364
+ } else {
365
+ for (const [k, v] of worker.typeMap) {
366
+ if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
367
+ }
368
+ }
369
+ }
370
+ const existingDefs: any[] = Array.isArray(symbols.definitions) ? symbols.definitions : [];
371
+ const workerDefs: any[] = Array.isArray(worker.definitions) ? worker.definitions : [];
372
+ // Index existing defs by (kind, name, line) — mirrors engine.ts matching key.
373
+ const byKey = new Map<string, any>();
374
+ for (const d of existingDefs) byKey.set(`${d.kind}|${d.name}|${d.line}`, d);
375
+ for (const wd of workerDefs) {
376
+ const existing = byKey.get(`${wd.kind}|${wd.name}|${wd.line}`);
377
+ if (!existing) continue;
378
+ if (!existing.complexity && wd.complexity) existing.complexity = wd.complexity;
379
+ if ((!existing.cfg || !Array.isArray(existing.cfg.blocks)) && wd.cfg?.blocks) {
380
+ existing.cfg = wd.cfg;
328
381
  }
329
382
  }
330
383
  }
@@ -338,6 +391,27 @@ export function isWasmAvailable(): boolean {
338
391
  );
339
392
  }
340
393
 
394
+ /**
395
+ * Return the set of lowercase file extensions whose WASM grammar is actually
396
+ * installed on disk. Used to scope engine-parity backfill to files that WASM
397
+ * can recover — languages without an installed grammar are skipped by both
398
+ * engines, so they don't represent a native-engine drop.
399
+ *
400
+ * Cached on first call; the grammars directory is shipped immutable.
401
+ */
402
+ let _installedWasmExts: Set<string> | null = null;
403
+ export function getInstalledWasmExtensions(): Set<string> {
404
+ if (_installedWasmExts) return _installedWasmExts;
405
+ const exts = new Set<string>();
406
+ for (const entry of LANGUAGE_REGISTRY) {
407
+ if (fs.existsSync(grammarPath(entry.grammarFile))) {
408
+ for (const ext of entry.extensions) exts.add(ext.toLowerCase());
409
+ }
410
+ }
411
+ _installedWasmExts = exts;
412
+ return exts;
413
+ }
414
+
341
415
  // ── Unified API ──────────────────────────────────────────────────────────────
342
416
 
343
417
  function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine {
@@ -721,23 +795,13 @@ async function backfillTypeMap(
721
795
  return { typeMap: new Map(), backfilled: false };
722
796
  }
723
797
  }
724
- const parsers = await ensureParsersForFiles([filePath]);
725
- const extracted = wasmExtractSymbols(parsers, filePath, code);
726
- try {
727
- if (!extracted || extracted.symbols.typeMap.size === 0) {
728
- return { typeMap: new Map(), backfilled: false };
729
- }
730
- return { typeMap: extracted.symbols.typeMap, backfilled: true };
731
- } finally {
732
- // Free the WASM tree to prevent memory accumulation across repeated builds
733
- if (extracted?.tree && typeof extracted.tree.delete === 'function') {
734
- try {
735
- extracted.tree.delete();
736
- } catch (e) {
737
- debug(`backfillTypeMap: WASM tree cleanup failed: ${toErrorMessage(e)}`);
738
- }
739
- }
798
+ const pool = getWasmWorkerPool();
799
+ // Extract-only no visitor walk, we only need the typeMap from this pass.
800
+ const output = await pool.parse(filePath, code, EXTRACT_ONLY);
801
+ if (!output || output.typeMap.size === 0) {
802
+ return { typeMap: new Map(), backfilled: false };
740
803
  }
804
+ return { typeMap: output.typeMap, backfilled: true };
741
805
  }
742
806
 
743
807
  /**
@@ -765,7 +829,16 @@ function wasmExtractSymbols(
765
829
  if (!entry) return null;
766
830
  const query = _queryCache.get(entry.id) ?? undefined;
767
831
  // Query (web-tree-sitter) is structurally compatible with TreeSitterQuery at runtime
768
- const symbols = entry.extractor(tree as any, filePath, query as any);
832
+ let symbols: ExtractorOutput | null;
833
+ try {
834
+ symbols = entry.extractor(tree as any, filePath, query as any);
835
+ } catch (e: unknown) {
836
+ warn(`Extractor error in ${filePath}: ${(e as Error).message}`);
837
+ // Free WASM tree to prevent memory leak — web-tree-sitter trees are backed
838
+ // by WASM linear memory and are not garbage-collected automatically.
839
+ if (typeof (tree as any).delete === 'function') (tree as any).delete();
840
+ return null;
841
+ }
769
842
  return symbols ? { symbols, tree, langId: entry.id } : null;
770
843
  }
771
844
 
@@ -796,10 +869,9 @@ export async function parseFileAuto(
796
869
  return patched;
797
870
  }
798
871
 
799
- // WASM path
800
- const parsers = await ensureParsersForFiles([filePath]);
801
- const extracted = wasmExtractSymbols(parsers, filePath, source);
802
- return extracted ? extracted.symbols : null;
872
+ // WASM path — dispatch to isolated worker
873
+ const pool = getWasmWorkerPool();
874
+ return pool.parse(filePath, source, FULL_ANALYSIS);
803
875
  }
804
876
 
805
877
  /** Backfill typeMap via WASM for TS/TSX files parsed by the native engine. */
@@ -812,40 +884,44 @@ async function backfillTypeMapBatch(
812
884
  );
813
885
  if (tsFiles.length === 0) return;
814
886
 
815
- const parsers = await ensureParsersForFiles(tsFiles.map((f) => f.filePath));
887
+ const pool = getWasmWorkerPool();
816
888
  for (const { filePath, relPath } of tsFiles) {
817
- let extracted: WasmExtractResult | null | undefined;
889
+ let code: string;
818
890
  try {
819
- const code = fs.readFileSync(filePath, 'utf-8');
820
- extracted = wasmExtractSymbols(parsers, filePath, code);
821
- if (extracted?.symbols && extracted.symbols.typeMap.size > 0) {
822
- const symbols = result.get(relPath);
823
- if (!symbols) continue;
824
- symbols.typeMap = extracted.symbols.typeMap;
825
- symbols._typeMapBackfilled = true;
826
- }
891
+ code = fs.readFileSync(filePath, 'utf-8');
827
892
  } catch (e) {
828
- debug(`batchExtract: typeMap backfill failed: ${toErrorMessage(e)}`);
829
- } finally {
830
- if (extracted?.tree && typeof extracted.tree.delete === 'function') {
831
- try {
832
- extracted.tree.delete();
833
- } catch (e) {
834
- debug(`batchExtract: WASM tree cleanup failed: ${toErrorMessage(e)}`);
835
- }
836
- }
893
+ debug(`batchExtract: cannot read ${filePath}: ${toErrorMessage(e)}`);
894
+ continue;
837
895
  }
896
+ const output = await pool.parse(filePath, code, EXTRACT_ONLY);
897
+ if (!output || output.typeMap.size === 0) continue;
898
+ const symbols = result.get(relPath);
899
+ if (!symbols) continue;
900
+ symbols.typeMap = output.typeMap;
901
+ symbols._typeMapBackfilled = true;
838
902
  }
839
903
  }
840
904
 
841
- /** Parse files via WASM engine, returning a Map<relPath, symbols>. */
905
+ /**
906
+ * Parse files via WASM engine, returning a Map<relPath, symbols>.
907
+ *
908
+ * Each file is dispatched to the WASM worker pool. The worker parses, extracts,
909
+ * and runs all AST analyses (complexity, CFG, dataflow, ast-store) in its own
910
+ * thread, returning fully pre-computed ExtractorOutput. V8 fatal errors from
911
+ * tree-sitter WASM (#965) kill only the worker — the pool skips the file and
912
+ * restarts the worker for the next one.
913
+ *
914
+ * `_tree` is NEVER set by this path. All downstream analyses operate on the
915
+ * pre-computed `astNodes` / `dataflow` / `def.complexity` / `def.cfg` fields.
916
+ */
842
917
  async function parseFilesWasm(
843
918
  filePaths: string[],
844
919
  rootDir: string,
845
920
  ): Promise<Map<string, ExtractorOutput>> {
846
921
  const result = new Map<string, ExtractorOutput>();
847
- const parsers = await ensureParsersForFiles(filePaths);
922
+ const pool = getWasmWorkerPool();
848
923
  for (const filePath of filePaths) {
924
+ if (!_extToLang.has(path.extname(filePath).toLowerCase())) continue;
849
925
  let code: string;
850
926
  try {
851
927
  code = fs.readFileSync(filePath, 'utf-8');
@@ -853,13 +929,10 @@ async function parseFilesWasm(
853
929
  warn(`Skipping ${path.relative(rootDir, filePath)}: ${(err as Error).message}`);
854
930
  continue;
855
931
  }
856
- const extracted = wasmExtractSymbols(parsers, filePath, code);
857
- if (extracted) {
932
+ const output = await pool.parse(filePath, code, FULL_ANALYSIS);
933
+ if (output) {
858
934
  const relPath = path.relative(rootDir, filePath).split(path.sep).join('/');
859
- extracted.symbols._tree = extracted.tree;
860
- extracted.symbols._langId = extracted.langId;
861
- extracted.symbols._lineCount = code.split('\n').length;
862
- result.set(relPath, extracted.symbols);
935
+ result.set(relPath, output);
863
936
  }
864
937
  }
865
938
  return result;
@@ -884,8 +957,10 @@ export async function parseFilesAuto(
884
957
  ? native.parseFilesFull(filePaths, rootDir)
885
958
  : native.parseFiles(filePaths, rootDir, true, true);
886
959
  const needsTypeMap: { filePath: string; relPath: string }[] = [];
960
+ const nativeParsed = new Set<string>();
887
961
  for (const r of nativeResults) {
888
962
  if (!r) continue;
963
+ nativeParsed.add(r.file);
889
964
  const patched = patchNativeResult(r);
890
965
  const relPath = path.relative(rootDir, r.file).split(path.sep).join('/');
891
966
  result.set(relPath, patched);
@@ -901,6 +976,24 @@ export async function parseFilesAuto(
901
976
  if (needsTypeMap.length > 0) {
902
977
  await backfillTypeMapBatch(needsTypeMap, result);
903
978
  }
979
+
980
+ // Engine parity: native may silently drop files whose extensions are in
981
+ // SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
982
+ // extractor/grammar is missing or fails. WASM handles these — fall back so
983
+ // both engines process the same file set (#967). Restrict to installed WASM
984
+ // grammars so we don't warn about files that neither engine can parse.
985
+ const installedExts = getInstalledWasmExtensions();
986
+ const dropped = filePaths.filter(
987
+ (f) => !nativeParsed.has(f) && installedExts.has(path.extname(f).toLowerCase()),
988
+ );
989
+ if (dropped.length > 0) {
990
+ warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
991
+ const wasmResults = await parseFilesWasm(dropped, rootDir);
992
+ for (const [relPath, symbols] of wasmResults) {
993
+ result.set(relPath, symbols);
994
+ }
995
+ }
996
+
904
997
  return result;
905
998
  }
906
999
 
@@ -1,6 +1,6 @@
1
1
  import fs from 'node:fs';
2
2
  import path from 'node:path';
3
- import { closeDb, findDbPath, openDb } from '../../db/index.js';
3
+ import { closeDb, findDbPath, getBuildMeta, openDb } from '../../db/index.js';
4
4
  import { warn } from '../../infrastructure/logger.js';
5
5
  import { DbError } from '../../shared/errors.js';
6
6
  import type { BetterSqlite3Database, NodeRow } from '../../types.js';
@@ -73,6 +73,21 @@ export async function buildEmbeddings(
73
73
  const db = openDb(dbPath) as BetterSqlite3Database;
74
74
  initEmbeddingsSchema(db);
75
75
 
76
+ // Prefer the repo root recorded at build time — embed may be invoked from a
77
+ // different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
78
+ // positional rootDir will be wrong in that case. For legacy DBs without
79
+ // root_dir metadata, fall back to `<dbParent>` only when the DB lives at
80
+ // the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
81
+ // the caller-provided rootDir (which may be an explicit positional arg).
82
+ // `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
83
+ // conventional-layout check is required to keep the rootDir path reachable.
84
+ const metaRoot = getBuildMeta(db, 'root_dir');
85
+ const resolvedDbPath = path.resolve(dbPath);
86
+ const dbDirName = path.basename(path.dirname(resolvedDbPath));
87
+ const dbParent =
88
+ dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
89
+ const resolvedRoot = metaRoot || dbParent || rootDir;
90
+
76
91
  db.exec('DELETE FROM embeddings');
77
92
  db.exec('DELETE FROM embedding_meta');
78
93
  db.exec('DELETE FROM fts_index');
@@ -98,13 +113,17 @@ export async function buildEmbeddings(
98
113
  const config = getModelConfig(modelKey);
99
114
  const contextWindow = config.contextWindow;
100
115
  let overflowCount = 0;
116
+ let filesRead = 0;
117
+ let filesSkipped = 0;
101
118
 
102
119
  for (const [file, fileNodes] of byFile) {
103
- const fullPath = path.isAbsolute(file) ? file : path.join(rootDir, file);
120
+ const fullPath = path.isAbsolute(file) ? file : path.join(resolvedRoot, file);
104
121
  let lines: string[];
105
122
  try {
106
123
  lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
124
+ filesRead++;
107
125
  } catch (err: unknown) {
126
+ filesSkipped++;
108
127
  warn(`Cannot read ${file} for embeddings: ${(err as Error).message}`);
109
128
  continue;
110
129
  }
@@ -136,6 +155,19 @@ export async function buildEmbeddings(
136
155
  );
137
156
  }
138
157
 
158
+ // If there were symbols to embed but every file failed to read, the DB was
159
+ // almost certainly built from a different location than the current cwd.
160
+ // Surface this clearly instead of emitting a silent "Stored 0 embeddings".
161
+ if (byFile.size > 0 && filesRead === 0) {
162
+ closeDb(db);
163
+ throw new DbError(
164
+ `embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
165
+ `Tried resolving against: ${resolvedRoot}\n` +
166
+ 'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
167
+ { file: dbPath },
168
+ );
169
+ }
170
+
139
171
  console.log(`Embedding ${texts.length} symbols...`);
140
172
  const { vectors, dim } = await embed(texts, modelKey);
141
173