@optave/codegraph 3.10.0 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +13 -13
  2. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  3. package/dist/ast-analysis/rules/index.js +77 -0
  4. package/dist/ast-analysis/rules/index.js.map +1 -1
  5. package/dist/cli/commands/audit.js +1 -1
  6. package/dist/cli/commands/audit.js.map +1 -1
  7. package/dist/cli/commands/build.d.ts.map +1 -1
  8. package/dist/cli/commands/build.js +2 -0
  9. package/dist/cli/commands/build.js.map +1 -1
  10. package/dist/cli/commands/check.js +1 -1
  11. package/dist/cli/commands/check.js.map +1 -1
  12. package/dist/cli/commands/children.js +1 -1
  13. package/dist/cli/commands/children.js.map +1 -1
  14. package/dist/cli/commands/diff-impact.js +1 -1
  15. package/dist/cli/commands/diff-impact.js.map +1 -1
  16. package/dist/cli/commands/roles.js +1 -1
  17. package/dist/cli/commands/roles.js.map +1 -1
  18. package/dist/cli/commands/structure.js +1 -1
  19. package/dist/cli/commands/structure.js.map +1 -1
  20. package/dist/cli/shared/options.js +1 -1
  21. package/dist/cli/shared/options.js.map +1 -1
  22. package/dist/db/connection.d.ts.map +1 -1
  23. package/dist/db/connection.js +8 -0
  24. package/dist/db/connection.js.map +1 -1
  25. package/dist/domain/graph/builder/incremental.d.ts +0 -6
  26. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  27. package/dist/domain/graph/builder/incremental.js +6 -23
  28. package/dist/domain/graph/builder/incremental.js.map +1 -1
  29. package/dist/domain/graph/builder/pipeline.d.ts +44 -0
  30. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  31. package/dist/domain/graph/builder/pipeline.js +181 -39
  32. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  33. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  34. package/dist/domain/graph/builder/stages/build-edges.js +8 -2
  35. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  36. package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
  37. package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
  38. package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
  39. package/dist/domain/graph/watcher.d.ts.map +1 -1
  40. package/dist/domain/graph/watcher.js +23 -18
  41. package/dist/domain/graph/watcher.js.map +1 -1
  42. package/dist/domain/parser.d.ts.map +1 -1
  43. package/dist/domain/parser.js +27 -1
  44. package/dist/domain/parser.js.map +1 -1
  45. package/dist/domain/search/models.d.ts +16 -0
  46. package/dist/domain/search/models.d.ts.map +1 -1
  47. package/dist/domain/search/models.js +35 -1
  48. package/dist/domain/search/models.js.map +1 -1
  49. package/dist/domain/wasm-worker-entry.js +8 -1
  50. package/dist/domain/wasm-worker-entry.js.map +1 -1
  51. package/dist/extractors/c.js +25 -6
  52. package/dist/extractors/c.js.map +1 -1
  53. package/dist/extractors/cpp.js +47 -6
  54. package/dist/extractors/cpp.js.map +1 -1
  55. package/dist/extractors/cuda.js +90 -14
  56. package/dist/extractors/cuda.js.map +1 -1
  57. package/dist/extractors/elixir.js +83 -3
  58. package/dist/extractors/elixir.js.map +1 -1
  59. package/dist/extractors/erlang.js +56 -20
  60. package/dist/extractors/erlang.js.map +1 -1
  61. package/dist/extractors/fsharp.d.ts +7 -0
  62. package/dist/extractors/fsharp.d.ts.map +1 -1
  63. package/dist/extractors/fsharp.js +94 -0
  64. package/dist/extractors/fsharp.js.map +1 -1
  65. package/dist/extractors/gleam.js +6 -2
  66. package/dist/extractors/gleam.js.map +1 -1
  67. package/dist/extractors/groovy.js +41 -1
  68. package/dist/extractors/groovy.js.map +1 -1
  69. package/dist/extractors/haskell.js +48 -4
  70. package/dist/extractors/haskell.js.map +1 -1
  71. package/dist/extractors/julia.js +172 -41
  72. package/dist/extractors/julia.js.map +1 -1
  73. package/dist/extractors/kotlin.js +4 -0
  74. package/dist/extractors/kotlin.js.map +1 -1
  75. package/dist/extractors/objc.js +184 -47
  76. package/dist/extractors/objc.js.map +1 -1
  77. package/dist/extractors/python.js +7 -4
  78. package/dist/extractors/python.js.map +1 -1
  79. package/dist/extractors/r.js +93 -52
  80. package/dist/extractors/r.js.map +1 -1
  81. package/dist/extractors/scala.d.ts.map +1 -1
  82. package/dist/extractors/scala.js +18 -32
  83. package/dist/extractors/scala.js.map +1 -1
  84. package/dist/extractors/solidity.js +18 -9
  85. package/dist/extractors/solidity.js.map +1 -1
  86. package/dist/extractors/verilog.js +80 -15
  87. package/dist/extractors/verilog.js.map +1 -1
  88. package/dist/mcp/tool-registry.d.ts.map +1 -1
  89. package/dist/mcp/tool-registry.js +4 -0
  90. package/dist/mcp/tool-registry.js.map +1 -1
  91. package/dist/mcp/tools/semantic-search.d.ts +1 -0
  92. package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
  93. package/dist/mcp/tools/semantic-search.js +1 -0
  94. package/dist/mcp/tools/semantic-search.js.map +1 -1
  95. package/dist/types.d.ts +15 -1
  96. package/dist/types.d.ts.map +1 -1
  97. package/grammars/tree-sitter-erlang.wasm +0 -0
  98. package/grammars/tree-sitter-fsharp.wasm +0 -0
  99. package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
  100. package/grammars/tree-sitter-gleam.wasm +0 -0
  101. package/package.json +10 -10
  102. package/src/ast-analysis/rules/index.ts +87 -0
  103. package/src/cli/commands/audit.ts +1 -1
  104. package/src/cli/commands/build.ts +2 -0
  105. package/src/cli/commands/check.ts +1 -1
  106. package/src/cli/commands/children.ts +1 -1
  107. package/src/cli/commands/diff-impact.ts +1 -1
  108. package/src/cli/commands/roles.ts +1 -1
  109. package/src/cli/commands/structure.ts +1 -1
  110. package/src/cli/shared/options.ts +1 -1
  111. package/src/db/connection.ts +8 -0
  112. package/src/domain/graph/builder/incremental.ts +6 -41
  113. package/src/domain/graph/builder/pipeline.ts +222 -37
  114. package/src/domain/graph/builder/stages/build-edges.ts +9 -2
  115. package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
  116. package/src/domain/graph/watcher.ts +21 -23
  117. package/src/domain/parser.ts +27 -1
  118. package/src/domain/search/models.ts +36 -1
  119. package/src/domain/wasm-worker-entry.ts +8 -1
  120. package/src/extractors/c.ts +27 -8
  121. package/src/extractors/cpp.ts +50 -8
  122. package/src/extractors/cuda.ts +90 -16
  123. package/src/extractors/elixir.ts +75 -3
  124. package/src/extractors/erlang.ts +63 -20
  125. package/src/extractors/fsharp.ts +104 -0
  126. package/src/extractors/gleam.ts +7 -2
  127. package/src/extractors/groovy.ts +45 -1
  128. package/src/extractors/haskell.ts +45 -4
  129. package/src/extractors/julia.ts +164 -43
  130. package/src/extractors/kotlin.ts +4 -0
  131. package/src/extractors/objc.ts +171 -47
  132. package/src/extractors/python.ts +5 -3
  133. package/src/extractors/r.ts +88 -48
  134. package/src/extractors/scala.ts +24 -36
  135. package/src/extractors/solidity.ts +17 -8
  136. package/src/extractors/verilog.ts +83 -15
  137. package/src/mcp/tool-registry.ts +5 -0
  138. package/src/mcp/tools/semantic-search.ts +2 -0
  139. package/src/types.ts +15 -0
@@ -9,7 +9,7 @@
9
9
  */
10
10
  import fs from 'node:fs';
11
11
  import path from 'node:path';
12
- import { bulkNodeIdsByFile } from '../../../db/index.js';
12
+ import { bulkNodeIdsByFile, purgeFileData } from '../../../db/index.js';
13
13
  import { debug, warn } from '../../../infrastructure/logger.js';
14
14
  import { normalizePath } from '../../../shared/constants.js';
15
15
  import type {
@@ -29,8 +29,6 @@ export interface IncrementalStmts {
29
29
  insertNode: { run: (...params: unknown[]) => unknown };
30
30
  insertEdge: { run: (...params: unknown[]) => unknown };
31
31
  getNodeId: { get: (...params: unknown[]) => { id: number } | undefined };
32
- deleteEdgesForFile: { run: (...params: unknown[]) => unknown };
33
- deleteNodes: { run: (...params: unknown[]) => unknown };
34
32
  countNodes: { get: (...params: unknown[]) => { c: number } | undefined };
35
33
  listSymbols: { all: (...params: unknown[]) => unknown[] };
36
34
  findNodeInFile: { all: (...params: unknown[]) => unknown[] };
@@ -208,40 +206,6 @@ function rebuildDirContainment(
208
206
  return 0;
209
207
  }
210
208
 
211
- // ── Ancillary table cleanup ────────────────────────────────────────────
212
-
213
- function purgeAncillaryData(db: BetterSqlite3Database, relPath: string): void {
214
- const tryExec = (sql: string, ...args: string[]): void => {
215
- try {
216
- db.prepare(sql).run(...args);
217
- } catch (err: unknown) {
218
- if (!(err as Error | undefined)?.message?.includes('no such table')) throw err;
219
- }
220
- };
221
- tryExec(
222
- 'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
223
- relPath,
224
- );
225
- tryExec(
226
- 'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
227
- relPath,
228
- );
229
- tryExec(
230
- 'DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
231
- relPath,
232
- );
233
- tryExec(
234
- 'DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
235
- relPath,
236
- );
237
- tryExec(
238
- 'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)',
239
- relPath,
240
- relPath,
241
- );
242
- tryExec('DELETE FROM ast_nodes WHERE file = ?', relPath);
243
- }
244
-
245
209
  // ── Import edge building ────────────────────────────────────────────────
246
210
 
247
211
  // Lazily-cached prepared statements for barrel resolution (avoid re-preparing in hot loops)
@@ -547,10 +511,11 @@ export async function rebuildFile(
547
511
  // Find reverse-deps BEFORE purging (edges still reference the old nodes)
548
512
  const reverseDeps = findReverseDeps(db, relPath);
549
513
 
550
- // Purge ancillary tables, then edges, then nodes
551
- purgeAncillaryData(db, relPath);
552
- stmts.deleteEdgesForFile.run(relPath);
553
- stmts.deleteNodes.run(relPath);
514
+ // Purge ancillary tables (incl. embeddings), edges, and nodes in one pass.
515
+ // Embeddings must be purged before nodes — better-sqlite3 enforces foreign
516
+ // keys by default, and `embeddings.node_id` references `nodes.id`. Issue #1176.
517
+ // `purgeHashes: false` preserves file_hashes for the next incremental build.
518
+ purgeFileData(db, relPath, { purgeHashes: false });
554
519
 
555
520
  if (!fs.existsSync(filePath)) {
556
521
  if (cache) (cache as { remove(p: string): void }).remove(filePath);
@@ -15,6 +15,7 @@ import {
15
15
  initSchema,
16
16
  MIGRATIONS,
17
17
  openDb,
18
+ purgeFilesData,
18
19
  releaseAdvisoryLock,
19
20
  setBuildMeta,
20
21
  } from '../../../db/index.js';
@@ -38,6 +39,7 @@ import {
38
39
  formatDropExtensionSummary,
39
40
  getActiveEngine,
40
41
  getInstalledWasmExtensions,
42
+ NATIVE_SUPPORTED_EXTENSIONS,
41
43
  parseFilesWasmForBackfill,
42
44
  } from '../../parser.js';
43
45
  import { writeJournalHeader } from '../journal.js';
@@ -165,7 +167,9 @@ function loadAliases(ctx: PipelineContext): void {
165
167
 
166
168
  function setupPipeline(ctx: PipelineContext): void {
167
169
  ctx.rootDir = path.resolve(ctx.rootDir);
168
- ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db');
170
+ ctx.dbPath = ctx.opts.dbPath
171
+ ? path.resolve(ctx.opts.dbPath)
172
+ : path.join(ctx.rootDir, '.codegraph', 'graph.db');
169
173
 
170
174
  // Detect whether native engine is available.
171
175
  const enginePref = ctx.opts.engine || 'auto';
@@ -183,6 +187,16 @@ function setupPipeline(ctx: PipelineContext): void {
183
187
  initSchema(ctx.db);
184
188
 
185
189
  ctx.config = loadConfig(ctx.rootDir);
190
+ // Merge caller-supplied excludes on top of the file-config excludes so
191
+ // programmatic callers (e.g. benchmark scripts) can extend exclusion
192
+ // without mutating .codegraphrc.json. Native orchestrator picks this up
193
+ // automatically — it reads exclude off the serialized ctx.config below.
194
+ if (ctx.opts.exclude?.length) {
195
+ ctx.config = {
196
+ ...ctx.config,
197
+ exclude: [...(ctx.config.exclude ?? []), ...ctx.opts.exclude],
198
+ };
199
+ }
186
200
  ctx.incremental =
187
201
  ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false;
188
202
 
@@ -658,6 +672,15 @@ async function tryNativeOrchestrator(
658
672
 
659
673
  if (result.earlyExit) {
660
674
  info('No changes detected');
675
+ // Even on no-op rebuilds, dropped-language files added since the last
676
+ // full build are still missing from `nodes`/`file_hashes` (#1083), and
677
+ // WASM-only files deleted from disk leave stale rows behind (#1073).
678
+ // The orchestrator's file_collector skipped them, so its earlyExit
679
+ // doesn't imply DB consistency. Run the gap repair before returning.
680
+ const gap = detectDroppedLanguageGap(ctx);
681
+ if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
682
+ await backfillNativeDroppedFiles(ctx, gap);
683
+ }
661
684
  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
662
685
  return 'early-exit';
663
686
  }
@@ -753,37 +776,163 @@ async function tryNativeOrchestrator(
753
776
  // stale native binaries). WASM handles those — backfill via WASM so both
754
777
  // engines process the same file set (#967).
755
778
  //
756
- // Runs on full builds and on incrementals when the orchestrator reports
757
- // any file activity (removals or changes). The orchestrator's
758
- // `detect_removed_files` filter (#1070) skips files outside its narrower
759
- // file_collector, so on a current binary a no-op rebuild reports
760
- // `removedCount=0` and `changedCount=0`, making the backfill call pure
761
- // overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
762
- // binaries lacking the filter still report `removedCount>0` and get the
763
- // gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
764
- // narrows (but does not fully close) the gap where a brand-new
765
- // unsupported-extension file is added on an otherwise-quiet incremental
766
- // — see #1091 for the residual gap.
779
+ // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
780
+ // both gating and the backfill itself. On dirty incrementals/full builds
781
+ // the orchestrator signals trigger backfill, so the walk happens once
782
+ // (instead of redundantly inside backfill). On quiet incrementals we
783
+ // still pay the walk so we can detect brand-new files in dropped-language
784
+ // extensions a gap that the orchestrator's `detect_removed_files`
785
+ // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
786
+ // because the expensive part (WASM re-parse of the missing set) is
787
+ // gated below.
767
788
  const removedCount = result.removedCount ?? 0;
768
789
  const changedCount = result.changedCount ?? 0;
769
- if (result.isFullBuild || removedCount > 0 || changedCount > 0) {
770
- await backfillNativeDroppedFiles(ctx);
790
+ const gap = detectDroppedLanguageGap(ctx);
791
+ if (
792
+ result.isFullBuild ||
793
+ removedCount > 0 ||
794
+ changedCount > 0 ||
795
+ gap.missingAbs.length > 0 ||
796
+ gap.staleRel.length > 0
797
+ ) {
798
+ await backfillNativeDroppedFiles(ctx, gap);
771
799
  }
772
800
 
773
801
  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
774
802
  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
775
803
  }
776
804
 
805
+ /** Files the native orchestrator silently dropped — the working set for backfill. */
806
+ interface DroppedLanguageGap {
807
+ /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
808
+ missingRel: string[];
809
+ /** Absolute paths, aligned by index with `missingRel`. */
810
+ missingAbs: string[];
811
+ /**
812
+ * Relative paths of WASM-only files present in DB but absent from disk (#1073).
813
+ * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
814
+ * backfill must purge them. Always disjoint from `missingRel`.
815
+ */
816
+ staleRel: string[];
817
+ }
818
+
777
819
  /**
778
- * Backfill files that the native orchestrator silently dropped during parse.
779
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
820
+ * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
821
+ * is pure and unit-testable independently of `getInstalledWasmExtensions` and
822
+ * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
823
+ */
824
+ export interface WasmOnlyStaleFilesInput {
825
+ /** Distinct `file` values from the `nodes` table. */
826
+ existingNodes: ReadonlySet<string>;
827
+ /** Distinct `file` values from the `file_hashes` table. */
828
+ existingHashes: ReadonlySet<string>;
829
+ /** Relative paths currently on disk (from `collectFilesUtil`). */
830
+ expected: ReadonlySet<string>;
831
+ /** Lowercased extensions whose WASM grammar is installed. */
832
+ installedExts: ReadonlySet<string>;
833
+ /** Extensions covered by the Rust addon — Rust owns deletion for these. */
834
+ nativeSupported: ReadonlySet<string>;
835
+ }
836
+
837
+ /**
838
+ * Compute the WASM-only files present in the DB but missing from disk (#1073).
839
+ *
840
+ * Returns relative paths that:
841
+ * - appear in `existingNodes` or `existingHashes` (in DB),
842
+ * - are absent from `expected` (not on disk),
843
+ * - have an extension installed for WASM, AND
844
+ * - have an extension NOT covered by `nativeSupported` — Rust's
845
+ * `purge_changed_files` handles deletion for natively-supported extensions
846
+ * via its own `detect_removed_files`, so the caller must not double-purge.
847
+ *
848
+ * Extensions are lowercased before lookup to match the registry and Rust's
849
+ * `LanguageKind::from_extension` (which normalises case for the languages
850
+ * where both cases are conventional, e.g. R's `.r` / `.R`).
851
+ *
852
+ * DB paths are forced to forward slashes before comparison with `expected`
853
+ * (which is always normalised). The on-disk invariant is that DB rows are
854
+ * written with forward slashes, but a stale row written by older code on
855
+ * Windows could carry back-slashes — normalising here makes the comparison
856
+ * platform-safe and prevents false-positive purges of live rows. We replace
857
+ * `\\` explicitly (rather than calling `normalizePath`, which only touches
858
+ * `path.sep`) so the defence works when running on POSIX against a DB that
859
+ * was migrated from Windows.
860
+ *
861
+ * Exported for unit testing.
780
862
  */
781
- async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
782
- // Compute the missing-file set FIRST, before any expensive DB handoff.
783
- // NativeDbProxy supports .prepare().all(), so the upfront query works
784
- // whether ctx.db is a proxy or a real better-sqlite3 connection. On
785
- // incremental no-op rebuilds nothing is missing, so we want to early-return
786
- // without paying the close-native / reopen-better-sqlite3 cost.
863
+ export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
864
+ const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
865
+ const stale: string[] = [];
866
+ const seen = new Set<string>();
867
+ const consider = (rawRel: string): void => {
868
+ const rel = rawRel.replace(/\\/g, '/');
869
+ if (expected.has(rel) || seen.has(rel)) return;
870
+ const ext = path.extname(rel).toLowerCase();
871
+ if (nativeSupported.has(ext)) return;
872
+ if (!installedExts.has(ext)) return;
873
+ seen.add(rel);
874
+ // Push the ORIGINAL raw path (not the normalised form) so the eventual
875
+ // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
876
+ // matches the actual stored row. The dedup `seen` set keeps the
877
+ // normalised form so a file written once with `\` and once with `/`
878
+ // is still treated as one entry — but the value the SQL sees has to
879
+ // be byte-identical to what's on disk in the DB.
880
+ stale.push(rawRel);
881
+ };
882
+ for (const rel of existingNodes) consider(rel);
883
+ for (const rel of existingHashes) consider(rel);
884
+ return stale;
885
+ }
886
+
887
+ /**
888
+ * Group relative paths by their lowercased extension. Shape matches the bucket
889
+ * type that `formatDropExtensionSummary` consumes, so callers can render a
890
+ * log-friendly per-extension summary without going through `classifyNativeDrops`
891
+ * when the reason is already known (e.g. the stale-purge path where every path
892
+ * is guaranteed `unsupported-by-native`).
893
+ */
894
+ function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
895
+ const buckets = new Map<string, string[]>();
896
+ for (const rel of relPaths) {
897
+ const ext = path.extname(rel).toLowerCase();
898
+ let list = buckets.get(ext);
899
+ if (!list) {
900
+ list = [];
901
+ buckets.set(ext, list);
902
+ }
903
+ list.push(rel);
904
+ }
905
+ return buckets;
906
+ }
907
+
908
+ /**
909
+ * Detect files the native orchestrator silently dropped.
910
+ *
911
+ * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
912
+ * is "missing" if it's absent from EITHER table — both must be present for
913
+ * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
914
+ * legacy DBs where `nodes` was populated but `file_hashes` was not).
915
+ *
916
+ * Restricted to files with an installed WASM grammar; extensions in
917
+ * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
918
+ * installs) can't be parsed by either engine, so they're not a native
919
+ * regression — excluding them keeps the warn count in
920
+ * `backfillNativeDroppedFiles` meaningful.
921
+ *
922
+ * Also detects WASM-only files deleted from disk (#1073). Rust's
923
+ * `detect_removed_files` filter (#1070) skips files outside its supported
924
+ * extensions, so deletions of WASM-only languages don't reach the native
925
+ * purge path; the rest of the backfill only inserts rows, so without this
926
+ * step stale `nodes`/`file_hashes` rows would linger across incremental
927
+ * rebuilds until the next full rebuild.
928
+ *
929
+ * Cheap (no DB handoff, no parsing): used both to gate the backfill call
930
+ * and as its working set. NativeDbProxy supports `.prepare().all()`, so
931
+ * this works whether `ctx.db` is a proxy or a real better-sqlite3
932
+ * connection — letting us skip the close-native / reopen-better-sqlite3
933
+ * cost when there's nothing to backfill.
934
+ */
935
+ function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
787
936
  const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
788
937
  const expected = new Set(
789
938
  collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
@@ -794,12 +943,6 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
794
943
  .all() as Array<{ file: string }>;
795
944
  const existingNodes = new Set(existingNodeRows.map((r) => r.file));
796
945
 
797
- // Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
798
- // (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
799
- // (e.g. a DB written by old code where `nodes` was populated but
800
- // `file_hashes` was not). Treating "in nodes but not in file_hashes" as
801
- // missing closes the gap so the backfill repairs the file_hashes row even
802
- // when the node row already exists.
803
946
  let existingHashes = new Set<string>();
804
947
  try {
805
948
  const existingHashRows = ctx.db
@@ -810,27 +953,49 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
810
953
  // file_hashes table may not exist on legacy DBs; treat as fully missing
811
954
  // so the backfill writes rows on the upsert path below.
812
955
  debug(
813
- `backfillNativeDroppedFiles: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
956
+ `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
814
957
  );
815
958
  }
816
959
 
817
- // Restrict backfill to files with an installed WASM grammar. Extensions in
818
- // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
819
- // minimal installs) can't be parsed by either engine, so they're not a
820
- // native regression — excluding them keeps the warn count meaningful.
821
960
  const installedExts = getInstalledWasmExtensions();
822
961
  const missingRel: string[] = [];
823
962
  const missingAbs: string[] = [];
824
963
  for (const rel of expected) {
825
- // A file is "missing" if it's absent from EITHER nodes OR file_hashes.
826
- // Both must be present for fast-skip to work correctly.
827
964
  if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
828
965
  const ext = path.extname(rel).toLowerCase();
829
966
  if (!installedExts.has(ext)) continue;
830
967
  missingRel.push(rel);
831
968
  missingAbs.push(path.join(ctx.rootDir, rel));
832
969
  }
833
- if (missingAbs.length === 0) return;
970
+
971
+ const staleRel = computeWasmOnlyStaleFiles({
972
+ existingNodes,
973
+ existingHashes,
974
+ expected,
975
+ installedExts,
976
+ nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
977
+ });
978
+
979
+ return { missingRel, missingAbs, staleRel };
980
+ }
981
+
982
+ /**
983
+ * Backfill files that the native orchestrator silently dropped during parse.
984
+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
985
+ *
986
+ * Also purges stale rows for WASM-only files deleted from disk (#1073), which
987
+ * Rust's `detect_removed_files` filter (#1070) skips.
988
+ *
989
+ * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
990
+ * can use the same scan for both gating and the actual backfill — avoiding
991
+ * a redundant fs walk when the orchestrator's signals already triggered.
992
+ */
993
+ async function backfillNativeDroppedFiles(
994
+ ctx: PipelineContext,
995
+ gap: DroppedLanguageGap,
996
+ ): Promise<void> {
997
+ const { missingRel, missingAbs, staleRel } = gap;
998
+ if (missingAbs.length === 0 && staleRel.length === 0) return;
834
999
 
835
1000
  // Now that we know there's work to do, hand off to better-sqlite3 (needed
836
1001
  // for the INSERT path below).
@@ -840,6 +1005,26 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
840
1005
  ctx.nativeFirstProxy = false;
841
1006
  }
842
1007
 
1008
+ const dbConn = ctx.db as unknown as BetterSqlite3Database;
1009
+
1010
+ // Purge WASM-only files that were deleted from disk (#1073). Rust's
1011
+ // detect_removed_files skips them and the insert path below never visits
1012
+ // them, so without this their rows would persist across rebuilds until the
1013
+ // next full rebuild reset the DB.
1014
+ if (staleRel.length > 0) {
1015
+ // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
1016
+ // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
1017
+ // always bucket 100% into `unsupported-by-native`. Build the extension
1018
+ // summary directly to avoid a redundant classification pass.
1019
+ const staleByExt = groupByExtension(staleRel);
1020
+ info(
1021
+ `Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`,
1022
+ );
1023
+ purgeFilesData(dbConn, staleRel);
1024
+ }
1025
+
1026
+ if (missingAbs.length === 0) return;
1027
+
843
1028
  // Classify drops so users see per-extension reasons instead of just a count
844
1029
  // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
845
1030
  // extractor); `native-extractor-failure` indicates a real native bug since
@@ -888,7 +1073,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
888
1073
  exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
889
1074
  }
890
1075
  }
891
- const db = ctx.db as unknown as BetterSqlite3Database;
1076
+ const db = dbConn;
892
1077
  batchInsertNodes(db, rows);
893
1078
 
894
1079
  // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
@@ -770,9 +770,11 @@ function reconnectReverseDepEdges(ctx: PipelineContext): void {
770
770
  * their import targets. Falls back to loading ALL nodes for full builds or
771
771
  * larger incremental changes.
772
772
  */
773
+ const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
774
+
773
775
  function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } {
774
776
  const { db, fileSymbols, isFullBuild, batchResolved } = ctx;
775
- const nodeKindFilter = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
777
+ const nodeKindFilter = NODE_KIND_FILTER_SQL;
776
778
 
777
779
  // Gate: only scope for small incremental on large codebases
778
780
  if (!isFullBuild && fileSymbols.size <= ctx.config.build.smallFilesThreshold) {
@@ -816,8 +818,13 @@ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolea
816
818
  function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void {
817
819
  if (!scopedLoad) return;
818
820
  const { db } = ctx;
821
+ // Match the upfront kind filter exactly. Using `kind != 'file'` here lets
822
+ // parameters, properties, and other non-definition kinds leak into call
823
+ // resolution, producing bogus call edges like `parser.ts → <a parameter
824
+ // with the same name>` (#1174 follow-up). Calls only ever target the
825
+ // definition kinds, so the fallback's filter must agree with `loadNodes`.
819
826
  const fallbackStmt = db.prepare(
820
- `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND kind != 'file'`,
827
+ `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND ${NODE_KIND_FILTER_SQL}`,
821
828
  );
822
829
  const originalGet = ctx.nodesByName.get.bind(ctx.nodesByName);
823
830
  ctx.nodesByName.get = (name: string) => {
@@ -33,15 +33,23 @@ function buildReexportMap(ctx: PipelineContext): void {
33
33
  }
34
34
 
35
35
  /**
36
- * Find barrel files related to changed files for scoped re-parsing.
37
- * For small incremental builds (<=smallFilesThreshold files), only barrels that re-export from
38
- * or are imported by the changed files. For larger changes, all barrels.
36
+ * Find barrel files related to `fromRelPaths` for scoped re-parsing.
37
+ * For small frontiers (<=smallFilesThreshold files), only barrels that re-export from
38
+ * or are imported by `fromRelPaths`. For larger frontiers, all barrels.
39
+ *
40
+ * `firstPass` gates the reexport-from DB scan: re-parsed barrels haven't
41
+ * changed content, so subsequent passes can't surface new reexport-from
42
+ * candidates and only need to follow imports of newly-merged barrels
43
+ * (mirrors the Rust orchestrator's seed-only `collect_reexport_from_barrels`).
39
44
  */
40
- function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> {
45
+ function findBarrelCandidates(
46
+ ctx: PipelineContext,
47
+ fromRelPaths: readonly string[],
48
+ firstPass: boolean,
49
+ ): Array<{ file: string }> {
41
50
  const { db, fileSymbols, rootDir, aliases } = ctx;
42
- const changedRelPaths = new Set<string>(fileSymbols.keys());
43
51
 
44
- if (changedRelPaths.size <= ctx.config.build.smallFilesThreshold) {
52
+ if (fromRelPaths.length <= ctx.config.build.smallFilesThreshold) {
45
53
  const allBarrelFiles = new Set(
46
54
  (
47
55
  db
@@ -56,9 +64,9 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> {
56
64
 
57
65
  const barrels = new Set<string>();
58
66
 
59
- // Find barrels imported by changed files using parsed import data
67
+ // Find barrels imported by `fromRelPaths` using parsed import data
60
68
  // (can't query DB edges -- they were purged for the changed files).
61
- for (const relPath of changedRelPaths) {
69
+ for (const relPath of fromRelPaths) {
62
70
  const symbols = fileSymbols.get(relPath);
63
71
  if (!symbols) continue;
64
72
  for (const imp of symbols.imports) {
@@ -71,16 +79,17 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> {
71
79
  }
72
80
  }
73
81
 
74
- // Also find barrels that re-export from the changed files
75
- const reexportSourceStmt = db.prepare(
76
- `SELECT DISTINCT n1.file FROM edges e
77
- JOIN nodes n1 ON e.source_id = n1.id
78
- JOIN nodes n2 ON e.target_id = n2.id
79
- WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?`,
80
- );
81
- for (const relPath of changedRelPaths) {
82
- for (const row of reexportSourceStmt.all(relPath) as Array<{ file: string }>) {
83
- barrels.add(row.file);
82
+ if (firstPass) {
83
+ const reexportSourceStmt = db.prepare(
84
+ `SELECT DISTINCT n1.file FROM edges e
85
+ JOIN nodes n1 ON e.source_id = n1.id
86
+ JOIN nodes n2 ON e.target_id = n2.id
87
+ WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?`,
88
+ );
89
+ for (const relPath of fromRelPaths) {
90
+ for (const row of reexportSourceStmt.all(relPath) as Array<{ file: string }>) {
91
+ barrels.add(row.file);
92
+ }
84
93
  }
85
94
  }
86
95
  return [...barrels].map((file) => ({ file }));
@@ -95,11 +104,22 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> {
95
104
  .all() as Array<{ file: string }>;
96
105
  }
97
106
 
98
- /** Re-parse barrel files and update fileSymbols/reexportMap with fresh data. */
107
+ /**
108
+ * Re-parse barrel files and update fileSymbols/reexportMap with fresh data.
109
+ * Returns the relative paths of newly-merged files so the caller can scan
110
+ * them for the next level of barrel candidates.
111
+ *
112
+ * A re-parsed file is marked `barrel-only` only when it really is one (the
113
+ * `isBarrelFile` check — reexports >= ownDefs). The previous unconditional
114
+ * `.add(relPath)` caused hybrid barrels with many local defs (e.g. a file
115
+ * with one `export type ... from` and dozens of internal functions) to drop
116
+ * all their non-reexport imports in build-edges, since the barrel-only branch
117
+ * skips them (#1174).
118
+ */
99
119
  async function reparseBarrelFiles(
100
120
  ctx: PipelineContext,
101
121
  barrelCandidates: Array<{ file: string }>,
102
- ): Promise<void> {
122
+ ): Promise<string[]> {
103
123
  const { db, fileSymbols, rootDir, engineOpts } = ctx;
104
124
 
105
125
  const barrelPaths: string[] = [];
@@ -109,18 +129,27 @@ async function reparseBarrelFiles(
109
129
  }
110
130
  }
111
131
 
112
- if (barrelPaths.length === 0) return;
132
+ if (barrelPaths.length === 0) return [];
113
133
 
134
+ // Preserve `contains` and `parameter_of` — those are emitted by insertNodes,
135
+ // which only runs on the original (changed + reverse-dep) fileSymbols. Barrel
136
+ // candidates are merged here *after* insertNodes, so wiping those kinds
137
+ // would permanently drop them (mirrors the Rust orchestrator's Stage 6b
138
+ // delete in build_pipeline.rs).
114
139
  const deleteOutgoingEdges = db.prepare(
115
- 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
140
+ `DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)
141
+ AND kind NOT IN ('contains', 'parameter_of')`,
116
142
  );
117
143
 
144
+ const added: string[] = [];
118
145
  try {
119
146
  const barrelSymbols = await parseFilesAuto(barrelPaths, rootDir, engineOpts);
120
147
  for (const [relPath, fileSym] of barrelSymbols) {
121
148
  deleteOutgoingEdges.run(relPath);
122
149
  fileSymbols.set(relPath, fileSym);
123
- ctx.barrelOnlyFiles.add(relPath);
150
+ if (isBarrelFile(ctx, relPath)) {
151
+ ctx.barrelOnlyFiles.add(relPath);
152
+ }
124
153
  const reexports = fileSym.imports.filter((imp: Import) => imp.reexport);
125
154
  if (reexports.length > 0) {
126
155
  ctx.reexportMap.set(
@@ -132,10 +161,12 @@ async function reparseBarrelFiles(
132
161
  })),
133
162
  );
134
163
  }
164
+ added.push(relPath);
135
165
  }
136
166
  } catch (e: unknown) {
137
167
  debug(`Barrel re-parse failed (non-fatal): ${(e as Error).message}`);
138
168
  }
169
+ return added;
139
170
  }
140
171
 
141
172
  export async function resolveImports(ctx: PipelineContext): Promise<void> {
@@ -156,8 +187,31 @@ export async function resolveImports(ctx: PipelineContext): Promise<void> {
156
187
 
157
188
  ctx.barrelOnlyFiles = new Set<string>();
158
189
  if (!isFullBuild) {
159
- const barrelCandidates = findBarrelCandidates(ctx);
160
- await reparseBarrelFiles(ctx, barrelCandidates);
190
+ // Iteratively discover and re-parse barrel chains. A barrel that imports
191
+ // another barrel (e.g. `parser.ts → extractors/index.ts → extractors/<lang>.ts`)
192
+ // needs both loaded so build-edges can emit the barrel-through edges from
193
+ // the first barrel to the leaf targets. Without iteration, only the first
194
+ // level of barrels gets merged into fileSymbols; the deeper chain has no
195
+ // entry in reexportMap and the resolver silently drops the affected edges
196
+ // on every incremental rebuild (#1174).
197
+ //
198
+ // Convergence is guaranteed because fileSymbols grows monotonically and
199
+ // is bounded by the set of barrel files in the project — each iteration
200
+ // either adds a previously-unseen barrel or terminates.
201
+ //
202
+ // Subsequent passes only walk newly-merged barrels' imports (`frontier`
203
+ // = paths returned by reparseBarrelFiles), matching the Rust
204
+ // orchestrator's `&newly_added` slice. Without this, every pass would
205
+ // re-query the DB for every key in `fileSymbols`.
206
+ let frontier: readonly string[] = [...fileSymbols.keys()];
207
+ let firstPass = true;
208
+ while (frontier.length > 0) {
209
+ const barrelCandidates = findBarrelCandidates(ctx, frontier, firstPass);
210
+ const added = await reparseBarrelFiles(ctx, barrelCandidates);
211
+ if (added.length === 0) break;
212
+ frontier = added;
213
+ firstPass = false;
214
+ }
161
215
  }
162
216
  }
163
217