@optave/codegraph 3.9.6 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +26 -12
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +1 -1
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/index.js +77 -0
  7. package/dist/ast-analysis/rules/index.js.map +1 -1
  8. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  9. package/dist/ast-analysis/visitors/ast-store-visitor.js +50 -8
  10. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  11. package/dist/cli/commands/audit.js +1 -1
  12. package/dist/cli/commands/audit.js.map +1 -1
  13. package/dist/cli/commands/build.d.ts.map +1 -1
  14. package/dist/cli/commands/build.js +2 -0
  15. package/dist/cli/commands/build.js.map +1 -1
  16. package/dist/cli/commands/check.js +1 -1
  17. package/dist/cli/commands/check.js.map +1 -1
  18. package/dist/cli/commands/children.js +1 -1
  19. package/dist/cli/commands/children.js.map +1 -1
  20. package/dist/cli/commands/diff-impact.js +1 -1
  21. package/dist/cli/commands/diff-impact.js.map +1 -1
  22. package/dist/cli/commands/roles.js +1 -1
  23. package/dist/cli/commands/roles.js.map +1 -1
  24. package/dist/cli/commands/structure.js +1 -1
  25. package/dist/cli/commands/structure.js.map +1 -1
  26. package/dist/cli/shared/options.js +1 -1
  27. package/dist/cli/shared/options.js.map +1 -1
  28. package/dist/db/connection.d.ts.map +1 -1
  29. package/dist/db/connection.js +8 -0
  30. package/dist/db/connection.js.map +1 -1
  31. package/dist/domain/graph/builder/context.d.ts +10 -0
  32. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  33. package/dist/domain/graph/builder/context.js +10 -0
  34. package/dist/domain/graph/builder/context.js.map +1 -1
  35. package/dist/domain/graph/builder/helpers.d.ts +7 -2
  36. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  37. package/dist/domain/graph/builder/helpers.js +7 -2
  38. package/dist/domain/graph/builder/helpers.js.map +1 -1
  39. package/dist/domain/graph/builder/incremental.d.ts +0 -6
  40. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  41. package/dist/domain/graph/builder/incremental.js +6 -23
  42. package/dist/domain/graph/builder/incremental.js.map +1 -1
  43. package/dist/domain/graph/builder/pipeline.d.ts +44 -0
  44. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  45. package/dist/domain/graph/builder/pipeline.js +348 -42
  46. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  47. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  48. package/dist/domain/graph/builder/stages/build-edges.js +8 -2
  49. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  50. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  51. package/dist/domain/graph/builder/stages/collect-files.js +8 -0
  52. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  53. package/dist/domain/graph/builder/stages/detect-changes.d.ts +24 -0
  54. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  55. package/dist/domain/graph/builder/stages/detect-changes.js +117 -3
  56. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  57. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  58. package/dist/domain/graph/builder/stages/finalize.js +9 -6
  59. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  60. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +30 -0
  61. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  62. package/dist/domain/graph/builder/stages/insert-nodes.js +36 -13
  63. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  64. package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
  65. package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
  66. package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
  67. package/dist/domain/graph/watcher.d.ts.map +1 -1
  68. package/dist/domain/graph/watcher.js +23 -18
  69. package/dist/domain/graph/watcher.js.map +1 -1
  70. package/dist/domain/parser.d.ts +14 -1
  71. package/dist/domain/parser.d.ts.map +1 -1
  72. package/dist/domain/parser.js +104 -11
  73. package/dist/domain/parser.js.map +1 -1
  74. package/dist/domain/search/models.d.ts +16 -0
  75. package/dist/domain/search/models.d.ts.map +1 -1
  76. package/dist/domain/search/models.js +36 -2
  77. package/dist/domain/search/models.js.map +1 -1
  78. package/dist/domain/wasm-worker-entry.js +20 -13
  79. package/dist/domain/wasm-worker-entry.js.map +1 -1
  80. package/dist/extractors/c.js +25 -6
  81. package/dist/extractors/c.js.map +1 -1
  82. package/dist/extractors/cpp.js +47 -6
  83. package/dist/extractors/cpp.js.map +1 -1
  84. package/dist/extractors/cuda.js +90 -14
  85. package/dist/extractors/cuda.js.map +1 -1
  86. package/dist/extractors/elixir.js +83 -3
  87. package/dist/extractors/elixir.js.map +1 -1
  88. package/dist/extractors/erlang.js +56 -20
  89. package/dist/extractors/erlang.js.map +1 -1
  90. package/dist/extractors/fsharp.d.ts +7 -0
  91. package/dist/extractors/fsharp.d.ts.map +1 -1
  92. package/dist/extractors/fsharp.js +94 -0
  93. package/dist/extractors/fsharp.js.map +1 -1
  94. package/dist/extractors/gleam.js +6 -2
  95. package/dist/extractors/gleam.js.map +1 -1
  96. package/dist/extractors/groovy.js +41 -1
  97. package/dist/extractors/groovy.js.map +1 -1
  98. package/dist/extractors/haskell.js +48 -4
  99. package/dist/extractors/haskell.js.map +1 -1
  100. package/dist/extractors/julia.js +172 -41
  101. package/dist/extractors/julia.js.map +1 -1
  102. package/dist/extractors/kotlin.js +4 -0
  103. package/dist/extractors/kotlin.js.map +1 -1
  104. package/dist/extractors/objc.js +184 -47
  105. package/dist/extractors/objc.js.map +1 -1
  106. package/dist/extractors/python.js +7 -4
  107. package/dist/extractors/python.js.map +1 -1
  108. package/dist/extractors/r.js +93 -52
  109. package/dist/extractors/r.js.map +1 -1
  110. package/dist/extractors/scala.d.ts.map +1 -1
  111. package/dist/extractors/scala.js +18 -32
  112. package/dist/extractors/scala.js.map +1 -1
  113. package/dist/extractors/solidity.js +18 -9
  114. package/dist/extractors/solidity.js.map +1 -1
  115. package/dist/extractors/verilog.js +80 -15
  116. package/dist/extractors/verilog.js.map +1 -1
  117. package/dist/infrastructure/config.d.ts +1 -0
  118. package/dist/infrastructure/config.d.ts.map +1 -1
  119. package/dist/infrastructure/config.js +1 -0
  120. package/dist/infrastructure/config.js.map +1 -1
  121. package/dist/mcp/server.d.ts.map +1 -1
  122. package/dist/mcp/server.js +14 -8
  123. package/dist/mcp/server.js.map +1 -1
  124. package/dist/mcp/tool-registry.d.ts +1 -1
  125. package/dist/mcp/tool-registry.d.ts.map +1 -1
  126. package/dist/mcp/tool-registry.js +23 -5
  127. package/dist/mcp/tool-registry.js.map +1 -1
  128. package/dist/mcp/tools/semantic-search.d.ts +1 -0
  129. package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
  130. package/dist/mcp/tools/semantic-search.js +1 -0
  131. package/dist/mcp/tools/semantic-search.js.map +1 -1
  132. package/dist/types.d.ts +16 -1
  133. package/dist/types.d.ts.map +1 -1
  134. package/grammars/tree-sitter-erlang.wasm +0 -0
  135. package/grammars/tree-sitter-fsharp.wasm +0 -0
  136. package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
  137. package/grammars/tree-sitter-gleam.wasm +0 -0
  138. package/package.json +11 -10
  139. package/src/ast-analysis/engine.ts +3 -1
  140. package/src/ast-analysis/rules/index.ts +87 -0
  141. package/src/ast-analysis/visitors/ast-store-visitor.ts +45 -9
  142. package/src/cli/commands/audit.ts +1 -1
  143. package/src/cli/commands/build.ts +2 -0
  144. package/src/cli/commands/check.ts +1 -1
  145. package/src/cli/commands/children.ts +1 -1
  146. package/src/cli/commands/diff-impact.ts +1 -1
  147. package/src/cli/commands/roles.ts +1 -1
  148. package/src/cli/commands/structure.ts +1 -1
  149. package/src/cli/shared/options.ts +1 -1
  150. package/src/db/connection.ts +8 -0
  151. package/src/domain/graph/builder/context.ts +10 -0
  152. package/src/domain/graph/builder/helpers.ts +8 -3
  153. package/src/domain/graph/builder/incremental.ts +6 -41
  154. package/src/domain/graph/builder/pipeline.ts +404 -41
  155. package/src/domain/graph/builder/stages/build-edges.ts +9 -2
  156. package/src/domain/graph/builder/stages/collect-files.ts +9 -0
  157. package/src/domain/graph/builder/stages/detect-changes.ts +130 -4
  158. package/src/domain/graph/builder/stages/finalize.ts +9 -6
  159. package/src/domain/graph/builder/stages/insert-nodes.ts +38 -14
  160. package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
  161. package/src/domain/graph/watcher.ts +21 -23
  162. package/src/domain/parser.ts +110 -10
  163. package/src/domain/search/models.ts +37 -2
  164. package/src/domain/wasm-worker-entry.ts +20 -13
  165. package/src/extractors/c.ts +27 -8
  166. package/src/extractors/cpp.ts +50 -8
  167. package/src/extractors/cuda.ts +90 -16
  168. package/src/extractors/elixir.ts +75 -3
  169. package/src/extractors/erlang.ts +63 -20
  170. package/src/extractors/fsharp.ts +104 -0
  171. package/src/extractors/gleam.ts +7 -2
  172. package/src/extractors/groovy.ts +45 -1
  173. package/src/extractors/haskell.ts +45 -4
  174. package/src/extractors/julia.ts +164 -43
  175. package/src/extractors/kotlin.ts +4 -0
  176. package/src/extractors/objc.ts +171 -47
  177. package/src/extractors/python.ts +5 -3
  178. package/src/extractors/r.ts +88 -48
  179. package/src/extractors/scala.ts +24 -36
  180. package/src/extractors/solidity.ts +17 -8
  181. package/src/extractors/verilog.ts +83 -15
  182. package/src/infrastructure/config.ts +1 -0
  183. package/src/mcp/server.ts +16 -9
  184. package/src/mcp/tool-registry.ts +28 -5
  185. package/src/mcp/tools/semantic-search.ts +2 -0
  186. package/src/types.ts +16 -0
@@ -9,11 +9,13 @@ import path from 'node:path';
9
9
  import { performance } from 'node:perf_hooks';
10
10
  import {
11
11
  acquireAdvisoryLock,
12
+ closeDb,
12
13
  closeDbPair,
13
14
  getBuildMeta,
14
15
  initSchema,
15
16
  MIGRATIONS,
16
17
  openDb,
18
+ purgeFilesData,
17
19
  releaseAdvisoryLock,
18
20
  setBuildMeta,
19
21
  } from '../../../db/index.js';
@@ -37,17 +39,26 @@ import {
37
39
  formatDropExtensionSummary,
38
40
  getActiveEngine,
39
41
  getInstalledWasmExtensions,
40
- parseFilesAuto,
42
+ NATIVE_SUPPORTED_EXTENSIONS,
43
+ parseFilesWasmForBackfill,
41
44
  } from '../../parser.js';
45
+ import { writeJournalHeader } from '../journal.js';
42
46
  import { setWorkspaces } from '../resolve.js';
43
47
  import { PipelineContext } from './context.js';
44
- import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
48
+ import {
49
+ batchInsertNodes,
50
+ collectFiles as collectFilesUtil,
51
+ fileHash,
52
+ fileStat,
53
+ loadPathAliases,
54
+ readFileSafe,
55
+ } from './helpers.js';
45
56
  import { NativeDbProxy } from './native-db-proxy.js';
46
57
  import { buildEdges } from './stages/build-edges.js';
47
58
  import { buildStructure } from './stages/build-structure.js';
48
59
  // Pipeline stages
49
60
  import { collectFiles } from './stages/collect-files.js';
50
- import { detectChanges } from './stages/detect-changes.js';
61
+ import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
51
62
  import { finalize } from './stages/finalize.js';
52
63
  import { insertNodes } from './stages/insert-nodes.js';
53
64
  import { parseFiles } from './stages/parse-files.js';
@@ -67,9 +78,14 @@ function initializeEngine(ctx: PipelineContext): void {
67
78
  suspendJsDb: undefined,
68
79
  resumeJsDb: undefined,
69
80
  };
70
- const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts);
81
+ const {
82
+ name: engineName,
83
+ version: engineVersion,
84
+ binaryVersion: nativeBinaryVersion,
85
+ } = getActiveEngine(ctx.engineOpts);
71
86
  ctx.engineName = engineName as 'native' | 'wasm';
72
87
  ctx.engineVersion = engineVersion;
88
+ ctx.nativeBinaryVersion = nativeBinaryVersion;
73
89
  info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
74
90
  }
75
91
 
@@ -96,13 +112,15 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
96
112
  );
97
113
  ctx.forceFullRebuild = true;
98
114
  }
99
- // When the native engine is active, the Rust addon's version (ctx.engineVersion)
100
- // is written into codegraph_version by setBuildMeta after a native orchestrator
101
- // build. The check must compare against the same version, otherwise JS and Rust
102
- // fight over which version to record causing every incremental build to be
103
- // promoted to a full rebuild when npm and crate versions diverge.
115
+ // When the native engine is active, the Rust orchestrator writes
116
+ // build_meta.codegraph_version = CARGO_PKG_VERSION (the binary's own value).
117
+ // Compare against the same value here so a CI hot-swap that leaves the
118
+ // platform package.json behind doesn't trigger a perpetual full-rebuild
119
+ // loop on every incremental (#1066).
104
120
  const effectiveVersion =
105
- ctx.engineName === 'native' && ctx.engineVersion ? ctx.engineVersion : CODEGRAPH_VERSION;
121
+ ctx.engineName === 'native' && ctx.nativeBinaryVersion
122
+ ? ctx.nativeBinaryVersion
123
+ : CODEGRAPH_VERSION;
106
124
  const prevVersion = meta('codegraph_version');
107
125
  if (prevVersion && prevVersion !== effectiveVersion) {
108
126
  info(
@@ -149,7 +167,9 @@ function loadAliases(ctx: PipelineContext): void {
149
167
 
150
168
  function setupPipeline(ctx: PipelineContext): void {
151
169
  ctx.rootDir = path.resolve(ctx.rootDir);
152
- ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db');
170
+ ctx.dbPath = ctx.opts.dbPath
171
+ ? path.resolve(ctx.opts.dbPath)
172
+ : path.join(ctx.rootDir, '.codegraph', 'graph.db');
153
173
 
154
174
  // Detect whether native engine is available.
155
175
  const enginePref = ctx.opts.engine || 'auto';
@@ -167,6 +187,16 @@ function setupPipeline(ctx: PipelineContext): void {
167
187
  initSchema(ctx.db);
168
188
 
169
189
  ctx.config = loadConfig(ctx.rootDir);
190
+ // Merge caller-supplied excludes on top of the file-config excludes so
191
+ // programmatic callers (e.g. benchmark scripts) can extend exclusion
192
+ // without mutating .codegraphrc.json. Native orchestrator picks this up
193
+ // automatically — it reads exclude off the serialized ctx.config below.
194
+ if (ctx.opts.exclude?.length) {
195
+ ctx.config = {
196
+ ...ctx.config,
197
+ exclude: [...(ctx.config.exclude ?? []), ...ctx.opts.exclude],
198
+ };
199
+ }
170
200
  ctx.incremental =
171
201
  ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false;
172
202
 
@@ -642,6 +672,15 @@ async function tryNativeOrchestrator(
642
672
 
643
673
  if (result.earlyExit) {
644
674
  info('No changes detected');
675
+ // Even on no-op rebuilds, dropped-language files added since the last
676
+ // full build are still missing from `nodes`/`file_hashes` (#1083), and
677
+ // WASM-only files deleted from disk leave stale rows behind (#1073).
678
+ // The orchestrator's file_collector skipped them, so its earlyExit
679
+ // doesn't imply DB consistency. Run the gap repair before returning.
680
+ const gap = detectDroppedLanguageGap(ctx);
681
+ if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
682
+ await backfillNativeDroppedFiles(ctx, gap);
683
+ }
645
684
  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
646
685
  return 'early-exit';
647
686
  }
@@ -656,16 +695,24 @@ async function tryNativeOrchestrator(
656
695
  const p = result.phases;
657
696
 
658
697
  // Sync build_meta so JS-side version/engine checks work on next build.
659
- // Use the Rust addon version as codegraph_version when the native
660
- // orchestrator performed the build the Rust side's check_version_mismatch
661
- // compares this value against CARGO_PKG_VERSION. Writing the JS
662
- // CODEGRAPH_VERSION here would create a permanent mismatch whenever the
663
- // npm package version diverges from the Rust crate version, forcing every
664
- // subsequent native build to be a full rebuild (no incremental).
698
+ // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
699
+ // platform package.json version (ctx.engineVersion). The Rust side's
700
+ // check_version_mismatch compares against CARGO_PKG_VERSION; writing
701
+ // the package.json value would create a permanent mismatch whenever
702
+ // the binary and platform package.json diverge e.g., CI hot-swap
703
+ // via ci-install-native.mjs (#1066) forcing every subsequent build
704
+ // to be a full rebuild.
705
+ //
706
+ // When the native addon doesn't expose engineVersion() (older addon),
707
+ // fall back to CODEGRAPH_VERSION — same fallback used by both
708
+ // checkEngineSchemaMismatch (read path) and persistBuildMetadata
709
+ // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
710
+ // here would re-introduce the asymmetry this PR fixes for that case.
711
+ const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
665
712
  setBuildMeta(ctx.db, {
666
713
  engine: ctx.engineName,
667
- engine_version: ctx.engineVersion || '',
668
- codegraph_version: ctx.engineVersion || CODEGRAPH_VERSION,
714
+ engine_version: nativeVersionForMeta,
715
+ codegraph_version: nativeVersionForMeta,
669
716
  schema_version: String(ctx.schemaVersion),
670
717
  built_at: new Date().toISOString(),
671
718
  });
@@ -729,53 +776,253 @@ async function tryNativeOrchestrator(
729
776
  // stale native binaries). WASM handles those — backfill via WASM so both
730
777
  // engines process the same file set (#967).
731
778
  //
732
- // Only runs on full builds: incremental builds only touch changed files,
733
- // which are parsed through parseFilesAuto (which has its own per-file
734
- // backfill), so a full filesystem scan here would be wasted work.
735
- if (result.isFullBuild) {
736
- await backfillNativeDroppedFiles(ctx);
779
+ // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
780
+ // both gating and the backfill itself. On dirty incrementals/full builds
781
+ // the orchestrator signals trigger backfill, so the walk happens once
782
+ // (instead of redundantly inside backfill). On quiet incrementals we
783
+ // still pay the walk so we can detect brand-new files in dropped-language
784
+ // extensions — a gap that the orchestrator's `detect_removed_files`
785
+ // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
786
+ // because the expensive part (WASM re-parse of the missing set) is
787
+ // gated below.
788
+ const removedCount = result.removedCount ?? 0;
789
+ const changedCount = result.changedCount ?? 0;
790
+ const gap = detectDroppedLanguageGap(ctx);
791
+ if (
792
+ result.isFullBuild ||
793
+ removedCount > 0 ||
794
+ changedCount > 0 ||
795
+ gap.missingAbs.length > 0 ||
796
+ gap.staleRel.length > 0
797
+ ) {
798
+ await backfillNativeDroppedFiles(ctx, gap);
737
799
  }
738
800
 
739
801
  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
740
802
  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
741
803
  }
742
804
 
805
+ /** Files the native orchestrator silently dropped — the working set for backfill. */
806
+ interface DroppedLanguageGap {
807
+ /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
808
+ missingRel: string[];
809
+ /** Absolute paths, aligned by index with `missingRel`. */
810
+ missingAbs: string[];
811
+ /**
812
+ * Relative paths of WASM-only files present in DB but absent from disk (#1073).
813
+ * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
814
+ * backfill must purge them. Always disjoint from `missingRel`.
815
+ */
816
+ staleRel: string[];
817
+ }
818
+
743
819
  /**
744
- * Backfill files that the native orchestrator silently dropped during parse.
745
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
820
+ * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
821
+ * is pure and unit-testable independently of `getInstalledWasmExtensions` and
822
+ * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
746
823
  */
747
- async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
748
- // Needs a real better-sqlite3 connection for INSERT.
749
- if (ctx.nativeFirstProxy) {
750
- closeNativeDb(ctx, 'pre-parity-backfill');
751
- ctx.db = openDb(ctx.dbPath);
752
- ctx.nativeFirstProxy = false;
824
+ export interface WasmOnlyStaleFilesInput {
825
+ /** Distinct `file` values from the `nodes` table. */
826
+ existingNodes: ReadonlySet<string>;
827
+ /** Distinct `file` values from the `file_hashes` table. */
828
+ existingHashes: ReadonlySet<string>;
829
+ /** Relative paths currently on disk (from `collectFilesUtil`). */
830
+ expected: ReadonlySet<string>;
831
+ /** Lowercased extensions whose WASM grammar is installed. */
832
+ installedExts: ReadonlySet<string>;
833
+ /** Extensions covered by the Rust addon — Rust owns deletion for these. */
834
+ nativeSupported: ReadonlySet<string>;
835
+ }
836
+
837
+ /**
838
+ * Compute the WASM-only files present in the DB but missing from disk (#1073).
839
+ *
840
+ * Returns relative paths that:
841
+ * - appear in `existingNodes` or `existingHashes` (in DB),
842
+ * - are absent from `expected` (not on disk),
843
+ * - have an extension installed for WASM, AND
844
+ * - have an extension NOT covered by `nativeSupported` — Rust's
845
+ * `purge_changed_files` handles deletion for natively-supported extensions
846
+ * via its own `detect_removed_files`, so the caller must not double-purge.
847
+ *
848
+ * Extensions are lowercased before lookup to match the registry and Rust's
849
+ * `LanguageKind::from_extension` (which normalises case for the languages
850
+ * where both cases are conventional, e.g. R's `.r` / `.R`).
851
+ *
852
+ * DB paths are forced to forward slashes before comparison with `expected`
853
+ * (which is always normalised). The on-disk invariant is that DB rows are
854
+ * written with forward slashes, but a stale row written by older code on
855
+ * Windows could carry back-slashes — normalising here makes the comparison
856
+ * platform-safe and prevents false-positive purges of live rows. We replace
857
+ * `\\` explicitly (rather than calling `normalizePath`, which only touches
858
+ * `path.sep`) so the defence works when running on POSIX against a DB that
859
+ * was migrated from Windows.
860
+ *
861
+ * Exported for unit testing.
862
+ */
863
+ export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
864
+ const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
865
+ const stale: string[] = [];
866
+ const seen = new Set<string>();
867
+ const consider = (rawRel: string): void => {
868
+ const rel = rawRel.replace(/\\/g, '/');
869
+ if (expected.has(rel) || seen.has(rel)) return;
870
+ const ext = path.extname(rel).toLowerCase();
871
+ if (nativeSupported.has(ext)) return;
872
+ if (!installedExts.has(ext)) return;
873
+ seen.add(rel);
874
+ // Push the ORIGINAL raw path (not the normalised form) so the eventual
875
+ // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
876
+ // matches the actual stored row. The dedup `seen` set keeps the
877
+ // normalised form so a file written once with `\` and once with `/`
878
+ // is still treated as one entry — but the value the SQL sees has to
879
+ // be byte-identical to what's on disk in the DB.
880
+ stale.push(rawRel);
881
+ };
882
+ for (const rel of existingNodes) consider(rel);
883
+ for (const rel of existingHashes) consider(rel);
884
+ return stale;
885
+ }
886
+
887
+ /**
888
+ * Group relative paths by their lowercased extension. Shape matches the bucket
889
+ * type that `formatDropExtensionSummary` consumes, so callers can render a
890
+ * log-friendly per-extension summary without going through `classifyNativeDrops`
891
+ * when the reason is already known (e.g. the stale-purge path where every path
892
+ * is guaranteed `unsupported-by-native`).
893
+ */
894
+ function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
895
+ const buckets = new Map<string, string[]>();
896
+ for (const rel of relPaths) {
897
+ const ext = path.extname(rel).toLowerCase();
898
+ let list = buckets.get(ext);
899
+ if (!list) {
900
+ list = [];
901
+ buckets.set(ext, list);
902
+ }
903
+ list.push(rel);
753
904
  }
905
+ return buckets;
906
+ }
754
907
 
908
+ /**
909
+ * Detect files the native orchestrator silently dropped.
910
+ *
911
+ * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
912
+ * is "missing" if it's absent from EITHER table — both must be present for
913
+ * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
914
+ * legacy DBs where `nodes` was populated but `file_hashes` was not).
915
+ *
916
+ * Restricted to files with an installed WASM grammar; extensions in
917
+ * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
918
+ * installs) can't be parsed by either engine, so they're not a native
919
+ * regression — excluding them keeps the warn count in
920
+ * `backfillNativeDroppedFiles` meaningful.
921
+ *
922
+ * Also detects WASM-only files deleted from disk (#1073). Rust's
923
+ * `detect_removed_files` filter (#1070) skips files outside its supported
924
+ * extensions, so deletions of WASM-only languages don't reach the native
925
+ * purge path; the rest of the backfill only inserts rows, so without this
926
+ * step stale `nodes`/`file_hashes` rows would linger across incremental
927
+ * rebuilds until the next full rebuild.
928
+ *
929
+ * Cheap (no DB handoff, no parsing): used both to gate the backfill call
930
+ * and as its working set. NativeDbProxy supports `.prepare().all()`, so
931
+ * this works whether `ctx.db` is a proxy or a real better-sqlite3
932
+ * connection — letting us skip the close-native / reopen-better-sqlite3
933
+ * cost when there's nothing to backfill.
934
+ */
935
+ function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
755
936
  const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
756
937
  const expected = new Set(
757
938
  collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
758
939
  );
759
940
 
760
- const existingRows = ctx.db
941
+ const existingNodeRows = ctx.db
761
942
  .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
762
943
  .all() as Array<{ file: string }>;
763
- const existing = new Set(existingRows.map((r) => r.file));
944
+ const existingNodes = new Set(existingNodeRows.map((r) => r.file));
945
+
946
+ let existingHashes = new Set<string>();
947
+ try {
948
+ const existingHashRows = ctx.db
949
+ .prepare('SELECT DISTINCT file FROM file_hashes')
950
+ .all() as Array<{ file: string }>;
951
+ existingHashes = new Set(existingHashRows.map((r) => r.file));
952
+ } catch (e) {
953
+ // file_hashes table may not exist on legacy DBs; treat as fully missing
954
+ // so the backfill writes rows on the upsert path below.
955
+ debug(
956
+ `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
957
+ );
958
+ }
764
959
 
765
- // Restrict backfill to files with an installed WASM grammar. Extensions in
766
- // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
767
- // minimal installs) can't be parsed by either engine, so they're not a
768
- // native regression — excluding them keeps the warn count meaningful.
769
960
  const installedExts = getInstalledWasmExtensions();
770
961
  const missingRel: string[] = [];
771
962
  const missingAbs: string[] = [];
772
963
  for (const rel of expected) {
773
- if (existing.has(rel)) continue;
964
+ if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
774
965
  const ext = path.extname(rel).toLowerCase();
775
966
  if (!installedExts.has(ext)) continue;
776
967
  missingRel.push(rel);
777
968
  missingAbs.push(path.join(ctx.rootDir, rel));
778
969
  }
970
+
971
+ const staleRel = computeWasmOnlyStaleFiles({
972
+ existingNodes,
973
+ existingHashes,
974
+ expected,
975
+ installedExts,
976
+ nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
977
+ });
978
+
979
+ return { missingRel, missingAbs, staleRel };
980
+ }
981
+
982
+ /**
983
+ * Backfill files that the native orchestrator silently dropped during parse.
984
+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
985
+ *
986
+ * Also purges stale rows for WASM-only files deleted from disk (#1073), which
987
+ * Rust's `detect_removed_files` filter (#1070) skips.
988
+ *
989
+ * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
990
+ * can use the same scan for both gating and the actual backfill — avoiding
991
+ * a redundant fs walk when the orchestrator's signals already triggered.
992
+ */
993
+ async function backfillNativeDroppedFiles(
994
+ ctx: PipelineContext,
995
+ gap: DroppedLanguageGap,
996
+ ): Promise<void> {
997
+ const { missingRel, missingAbs, staleRel } = gap;
998
+ if (missingAbs.length === 0 && staleRel.length === 0) return;
999
+
1000
+ // Now that we know there's work to do, hand off to better-sqlite3 (needed
1001
+ // for the INSERT path below).
1002
+ if (ctx.nativeFirstProxy) {
1003
+ closeNativeDb(ctx, 'pre-parity-backfill');
1004
+ ctx.db = openDb(ctx.dbPath);
1005
+ ctx.nativeFirstProxy = false;
1006
+ }
1007
+
1008
+ const dbConn = ctx.db as unknown as BetterSqlite3Database;
1009
+
1010
+ // Purge WASM-only files that were deleted from disk (#1073). Rust's
1011
+ // detect_removed_files skips them and the insert path below never visits
1012
+ // them, so without this their rows would persist across rebuilds until the
1013
+ // next full rebuild reset the DB.
1014
+ if (staleRel.length > 0) {
1015
+ // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
1016
+ // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
1017
+ // always bucket 100% into `unsupported-by-native`. Build the extension
1018
+ // summary directly to avoid a redundant classification pass.
1019
+ const staleByExt = groupByExtension(staleRel);
1020
+ info(
1021
+ `Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`,
1022
+ );
1023
+ purgeFilesData(dbConn, staleRel);
1024
+ }
1025
+
779
1026
  if (missingAbs.length === 0) return;
780
1027
 
781
1028
  // Classify drops so users see per-extension reasons instead of just a count
@@ -793,7 +1040,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
793
1040
  `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
794
1041
  );
795
1042
  }
796
- const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
1043
+ const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
797
1044
 
798
1045
  const rows: unknown[][] = [];
799
1046
  const exportKeys: unknown[][] = [];
@@ -826,7 +1073,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
826
1073
  exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
827
1074
  }
828
1075
  }
829
- const db = ctx.db as unknown as BetterSqlite3Database;
1076
+ const db = dbConn;
830
1077
  batchInsertNodes(db, rows);
831
1078
 
832
1079
  // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
@@ -853,6 +1100,68 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
853
1100
  updateStmt.run(...vals);
854
1101
  }
855
1102
  }
1103
+
1104
+ // Persist file_hashes rows for every backfilled file. The Rust orchestrator
1105
+ // only hashes files it parsed itself, so without this step files in
1106
+ // optional-language extensions (e.g. .clj when no Rust extractor exists)
1107
+ // would be missing from `file_hashes` — permanently breaking the JS-side
1108
+ // fast-skip pre-flight (#1054), which rejects on `collected file missing
1109
+ // from file_hashes` and forces every no-op rebuild back through the full
1110
+ // ~2s native pipeline (#1068).
1111
+ //
1112
+ // Iterates `missingRel` (every collected file the Rust orchestrator
1113
+ // dropped), not `wasmResults`, so files that produced zero symbols still
1114
+ // get a row.
1115
+ try {
1116
+ const upsertHash = db.prepare(
1117
+ 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
1118
+ );
1119
+ const writeHashes = db.transaction(() => {
1120
+ for (let i = 0; i < missingRel.length; i++) {
1121
+ const relPath = missingRel[i];
1122
+ const absPath = missingAbs[i];
1123
+ if (!relPath || !absPath) continue;
1124
+ let code: string | null;
1125
+ try {
1126
+ code = readFileSafe(absPath);
1127
+ } catch (e) {
1128
+ debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
1129
+ continue;
1130
+ }
1131
+ if (code === null) continue;
1132
+ const stat = fileStat(absPath);
1133
+ const mtime = stat ? stat.mtime : 0;
1134
+ const size = stat ? stat.size : 0;
1135
+ upsertHash.run(relPath, fileHash(code), mtime, size);
1136
+ }
1137
+ });
1138
+ writeHashes();
1139
+ } catch (e) {
1140
+ debug(
1141
+ `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
1142
+ );
1143
+ }
1144
+
1145
+ // Free WASM parse trees from the inline backfill path (#1058).
1146
+ // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
1147
+ // backed by WASM linear memory) on every result, but these symbols are
1148
+ // consumed locally for DB row construction and never added to
1149
+ // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
1150
+ // sees them. Without this, trees leak WASM memory until process exit —
1151
+ // bounded per run but cumulative across in-process integration tests.
1152
+ // Mirrors the cleanup discipline established for #931.
1153
+ for (const [, symbols] of wasmResults) {
1154
+ const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
1155
+ if (tree && typeof tree.delete === 'function') {
1156
+ try {
1157
+ tree.delete();
1158
+ } catch {
1159
+ /* ignore cleanup errors */
1160
+ }
1161
+ }
1162
+ (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
1163
+ (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
1164
+ }
856
1165
  }
857
1166
 
858
1167
  // ── Pipeline stages execution ───────────────────────────────────────────
@@ -979,6 +1288,60 @@ export async function buildGraph(
979
1288
  try {
980
1289
  setupPipeline(ctx);
981
1290
 
1291
+ // ── JS-side fast-skip for native incremental (#1054) ──────────────
1292
+ // The Rust orchestrator's internal early-exit fires reliably locally
1293
+ // but not in CI, where every no-op rebuild was paying the full ~2s
1294
+ // pipeline cost. A read-only mtime+size check here matches WASM's
1295
+ // ~20ms early-exit and skips the orchestrator entirely when no
1296
+ // source files have changed. Tier-2 hashing is left to the native
1297
+ // side: any mismatch falls through and lets Rust's detect_changes
1298
+ // remain the source of truth.
1299
+ //
1300
+ // Diagnostic logging gated by CODEGRAPH_FAST_SKIP_DIAG (#1066) — when
1301
+ // any of the call-site guards short-circuit (forceFullRebuild,
1302
+ // engineName, scope, etc.) we log the reason so the bench gate run
1303
+ // produces observable output even if `detectNoChanges` is never
1304
+ // entered.
1305
+ const fastSkipDiag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
1306
+ if (fastSkipDiag) {
1307
+ const reasons: string[] = [];
1308
+ if (!ctx.nativeAvailable) reasons.push('nativeAvailable=false');
1309
+ if (ctx.engineName !== 'native') reasons.push(`engineName=${ctx.engineName}`);
1310
+ if (!ctx.incremental) reasons.push('incremental=false');
1311
+ if (ctx.forceFullRebuild) reasons.push('forceFullRebuild=true');
1312
+ if ((ctx.opts as Record<string, unknown>).scope) reasons.push('scope=set');
1313
+ if (reasons.length > 0) {
1314
+ info(`[fast-skip] false: pre-flight gate skipped — ${reasons.join(', ')}`);
1315
+ }
1316
+ }
1317
+ if (
1318
+ ctx.nativeAvailable &&
1319
+ ctx.engineName === 'native' &&
1320
+ ctx.incremental &&
1321
+ !ctx.forceFullRebuild &&
1322
+ !(ctx.opts as Record<string, unknown>).scope
1323
+ ) {
1324
+ try {
1325
+ await collectFiles(ctx);
1326
+ if (
1327
+ detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record<string, unknown>)
1328
+ ) {
1329
+ info('No changes detected. Graph is up to date.');
1330
+ writeJournalHeader(ctx.rootDir, Date.now());
1331
+ closeDb(ctx.db);
1332
+ return;
1333
+ }
1334
+ } catch (err) {
1335
+ // Pre-flight is best-effort — any failure falls through to the
1336
+ // orchestrator, which performs its own complete detection.
1337
+ // Reset ctx.allFiles so runPipelineStages re-collects under its own
1338
+ // engine state if we ended up partially populated before throwing.
1339
+ ctx.allFiles = undefined as unknown as string[];
1340
+ ctx.discoveredDirs = undefined as unknown as Set<string>;
1341
+ debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`);
1342
+ }
1343
+ }
1344
+
982
1345
  // ── Rust orchestrator fast path (#695) ────────────────────────────
983
1346
  // When available, run the entire build pipeline in Rust with zero
984
1347
  // napi crossings (eliminates WAL dual-connection dance). Falls back
@@ -770,9 +770,11 @@ function reconnectReverseDepEdges(ctx: PipelineContext): void {
770
770
  * their import targets. Falls back to loading ALL nodes for full builds or
771
771
  * larger incremental changes.
772
772
  */
773
+ const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
774
+
773
775
  function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } {
774
776
  const { db, fileSymbols, isFullBuild, batchResolved } = ctx;
775
- const nodeKindFilter = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
777
+ const nodeKindFilter = NODE_KIND_FILTER_SQL;
776
778
 
777
779
  // Gate: only scope for small incremental on large codebases
778
780
  if (!isFullBuild && fileSymbols.size <= ctx.config.build.smallFilesThreshold) {
@@ -816,8 +818,13 @@ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolea
816
818
  function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void {
817
819
  if (!scopedLoad) return;
818
820
  const { db } = ctx;
821
+ // Match the upfront kind filter exactly. Using `kind != 'file'` here lets
822
+ // parameters, properties, and other non-definition kinds leak into call
823
+ // resolution, producing bogus call edges like `parser.ts → <a parameter
824
+ // with the same name>` (#1174 follow-up). Calls only ever target the
825
+ // definition kinds, so the fallback's filter must agree with `loadNodes`.
819
826
  const fallbackStmt = db.prepare(
820
- `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND kind != 'file'`,
827
+ `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND ${NODE_KIND_FILTER_SQL}`,
821
828
  );
822
829
  const originalGet = ctx.nodesByName.get.bind(ctx.nodesByName);
823
830
  ctx.nodesByName.get = (name: string) => {
@@ -100,6 +100,15 @@ function tryFastCollect(
100
100
  export async function collectFiles(ctx: PipelineContext): Promise<void> {
101
101
  const { rootDir, config, opts } = ctx;
102
102
 
103
+ // Skip when the JS-side fast-skip pre-flight (#1054) already populated the
104
+ // file list and changes were detected, causing fallthrough to the native
105
+ // orchestrator and then to runPipelineStages. Avoids redoing the filesystem
106
+ // walk on the non-skip path (~8ms on 473 files). On pre-flight failure the
107
+ // caller resets ctx.allFiles so this guard correctly falls through.
108
+ if (!opts.scope && ctx.allFiles?.length && ctx.discoveredDirs?.size) {
109
+ return;
110
+ }
111
+
103
112
  if (opts.scope) {
104
113
  // Scoped rebuild: rebuild only specified files.
105
114
  //