@optave/codegraph 3.9.6 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +30 -16
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +1 -1
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  6. package/dist/ast-analysis/visitors/ast-store-visitor.js +50 -8
  7. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  8. package/dist/domain/graph/builder/context.d.ts +10 -0
  9. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  10. package/dist/domain/graph/builder/context.js +10 -0
  11. package/dist/domain/graph/builder/context.js.map +1 -1
  12. package/dist/domain/graph/builder/helpers.d.ts +7 -2
  13. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  14. package/dist/domain/graph/builder/helpers.js +7 -2
  15. package/dist/domain/graph/builder/helpers.js.map +1 -1
  16. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  17. package/dist/domain/graph/builder/pipeline.js +197 -33
  18. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  19. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  20. package/dist/domain/graph/builder/stages/collect-files.js +8 -0
  21. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  22. package/dist/domain/graph/builder/stages/detect-changes.d.ts +24 -0
  23. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  24. package/dist/domain/graph/builder/stages/detect-changes.js +117 -3
  25. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  26. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  27. package/dist/domain/graph/builder/stages/finalize.js +9 -6
  28. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  29. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +30 -0
  30. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  31. package/dist/domain/graph/builder/stages/insert-nodes.js +36 -13
  32. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  33. package/dist/domain/parser.d.ts +14 -1
  34. package/dist/domain/parser.d.ts.map +1 -1
  35. package/dist/domain/parser.js +77 -10
  36. package/dist/domain/parser.js.map +1 -1
  37. package/dist/domain/search/models.js +1 -1
  38. package/dist/domain/wasm-worker-entry.js +12 -12
  39. package/dist/domain/wasm-worker-entry.js.map +1 -1
  40. package/dist/infrastructure/config.d.ts +1 -0
  41. package/dist/infrastructure/config.d.ts.map +1 -1
  42. package/dist/infrastructure/config.js +1 -0
  43. package/dist/infrastructure/config.js.map +1 -1
  44. package/dist/mcp/server.d.ts.map +1 -1
  45. package/dist/mcp/server.js +14 -8
  46. package/dist/mcp/server.js.map +1 -1
  47. package/dist/mcp/tool-registry.d.ts +1 -1
  48. package/dist/mcp/tool-registry.d.ts.map +1 -1
  49. package/dist/mcp/tool-registry.js +19 -5
  50. package/dist/mcp/tool-registry.js.map +1 -1
  51. package/dist/types.d.ts +1 -0
  52. package/dist/types.d.ts.map +1 -1
  53. package/grammars/tree-sitter-erlang.wasm +0 -0
  54. package/package.json +8 -7
  55. package/src/ast-analysis/engine.ts +3 -1
  56. package/src/ast-analysis/visitors/ast-store-visitor.ts +45 -9
  57. package/src/domain/graph/builder/context.ts +10 -0
  58. package/src/domain/graph/builder/helpers.ts +8 -3
  59. package/src/domain/graph/builder/pipeline.ts +211 -33
  60. package/src/domain/graph/builder/stages/collect-files.ts +9 -0
  61. package/src/domain/graph/builder/stages/detect-changes.ts +130 -4
  62. package/src/domain/graph/builder/stages/finalize.ts +9 -6
  63. package/src/domain/graph/builder/stages/insert-nodes.ts +38 -14
  64. package/src/domain/parser.ts +83 -9
  65. package/src/domain/search/models.ts +1 -1
  66. package/src/domain/wasm-worker-entry.ts +12 -12
  67. package/src/infrastructure/config.ts +1 -0
  68. package/src/mcp/server.ts +16 -9
  69. package/src/mcp/tool-registry.ts +23 -5
  70. package/src/types.ts +1 -0
@@ -222,12 +222,17 @@ export function fileHash(content: string): string {
222
222
  }
223
223
 
224
224
  /**
225
- * Stat a file, returning { mtimeMs, size } or null on error.
225
+ * Stat a file, returning { mtime, size } or null on error.
226
+ *
227
+ * `mtime` is `Math.floor(stat.mtimeMs)` so it matches the integer column
228
+ * stored in the DB. Floor-once-here keeps every consumer honest: storing or
229
+ * comparing a non-floored `mtimeMs` against the integer DB column would cause
230
+ * spurious fast-skip misses on the next build.
226
231
  */
227
- export function fileStat(filePath: string): { mtimeMs: number; size: number } | null {
232
+ export function fileStat(filePath: string): { mtime: number; size: number } | null {
228
233
  try {
229
234
  const s = fs.statSync(filePath);
230
- return { mtimeMs: s.mtimeMs, size: s.size };
235
+ return { mtime: Math.floor(s.mtimeMs), size: s.size };
231
236
  } catch {
232
237
  return null;
233
238
  }
@@ -9,6 +9,7 @@ import path from 'node:path';
9
9
  import { performance } from 'node:perf_hooks';
10
10
  import {
11
11
  acquireAdvisoryLock,
12
+ closeDb,
12
13
  closeDbPair,
13
14
  getBuildMeta,
14
15
  initSchema,
@@ -37,17 +38,25 @@ import {
37
38
  formatDropExtensionSummary,
38
39
  getActiveEngine,
39
40
  getInstalledWasmExtensions,
40
- parseFilesAuto,
41
+ parseFilesWasmForBackfill,
41
42
  } from '../../parser.js';
43
+ import { writeJournalHeader } from '../journal.js';
42
44
  import { setWorkspaces } from '../resolve.js';
43
45
  import { PipelineContext } from './context.js';
44
- import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
46
+ import {
47
+ batchInsertNodes,
48
+ collectFiles as collectFilesUtil,
49
+ fileHash,
50
+ fileStat,
51
+ loadPathAliases,
52
+ readFileSafe,
53
+ } from './helpers.js';
45
54
  import { NativeDbProxy } from './native-db-proxy.js';
46
55
  import { buildEdges } from './stages/build-edges.js';
47
56
  import { buildStructure } from './stages/build-structure.js';
48
57
  // Pipeline stages
49
58
  import { collectFiles } from './stages/collect-files.js';
50
- import { detectChanges } from './stages/detect-changes.js';
59
+ import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
51
60
  import { finalize } from './stages/finalize.js';
52
61
  import { insertNodes } from './stages/insert-nodes.js';
53
62
  import { parseFiles } from './stages/parse-files.js';
@@ -67,9 +76,14 @@ function initializeEngine(ctx: PipelineContext): void {
67
76
  suspendJsDb: undefined,
68
77
  resumeJsDb: undefined,
69
78
  };
70
- const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts);
79
+ const {
80
+ name: engineName,
81
+ version: engineVersion,
82
+ binaryVersion: nativeBinaryVersion,
83
+ } = getActiveEngine(ctx.engineOpts);
71
84
  ctx.engineName = engineName as 'native' | 'wasm';
72
85
  ctx.engineVersion = engineVersion;
86
+ ctx.nativeBinaryVersion = nativeBinaryVersion;
73
87
  info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
74
88
  }
75
89
 
@@ -96,13 +110,15 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
96
110
  );
97
111
  ctx.forceFullRebuild = true;
98
112
  }
99
- // When the native engine is active, the Rust addon's version (ctx.engineVersion)
100
- // is written into codegraph_version by setBuildMeta after a native orchestrator
101
- // build. The check must compare against the same version, otherwise JS and Rust
102
- // fight over which version to record causing every incremental build to be
103
- // promoted to a full rebuild when npm and crate versions diverge.
113
+ // When the native engine is active, the Rust orchestrator writes
114
+ // build_meta.codegraph_version = CARGO_PKG_VERSION (the binary's own value).
115
+ // Compare against the same value here so a CI hot-swap that leaves the
116
+ // platform package.json behind doesn't trigger a perpetual full-rebuild
117
+ // loop on every incremental (#1066).
104
118
  const effectiveVersion =
105
- ctx.engineName === 'native' && ctx.engineVersion ? ctx.engineVersion : CODEGRAPH_VERSION;
119
+ ctx.engineName === 'native' && ctx.nativeBinaryVersion
120
+ ? ctx.nativeBinaryVersion
121
+ : CODEGRAPH_VERSION;
106
122
  const prevVersion = meta('codegraph_version');
107
123
  if (prevVersion && prevVersion !== effectiveVersion) {
108
124
  info(
@@ -656,16 +672,24 @@ async function tryNativeOrchestrator(
656
672
  const p = result.phases;
657
673
 
658
674
  // Sync build_meta so JS-side version/engine checks work on next build.
659
- // Use the Rust addon version as codegraph_version when the native
660
- // orchestrator performed the build the Rust side's check_version_mismatch
661
- // compares this value against CARGO_PKG_VERSION. Writing the JS
662
- // CODEGRAPH_VERSION here would create a permanent mismatch whenever the
663
- // npm package version diverges from the Rust crate version, forcing every
664
- // subsequent native build to be a full rebuild (no incremental).
675
+ // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
676
+ // platform package.json version (ctx.engineVersion). The Rust side's
677
+ // check_version_mismatch compares against CARGO_PKG_VERSION; writing
678
+ // the package.json value would create a permanent mismatch whenever
679
+ // the binary and platform package.json diverge e.g., CI hot-swap
680
+ // via ci-install-native.mjs (#1066) forcing every subsequent build
681
+ // to be a full rebuild.
682
+ //
683
+ // When the native addon doesn't expose engineVersion() (older addon),
684
+ // fall back to CODEGRAPH_VERSION — same fallback used by both
685
+ // checkEngineSchemaMismatch (read path) and persistBuildMetadata
686
+ // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
687
+ // here would re-introduce the asymmetry this PR fixes for that case.
688
+ const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
665
689
  setBuildMeta(ctx.db, {
666
690
  engine: ctx.engineName,
667
- engine_version: ctx.engineVersion || '',
668
- codegraph_version: ctx.engineVersion || CODEGRAPH_VERSION,
691
+ engine_version: nativeVersionForMeta,
692
+ codegraph_version: nativeVersionForMeta,
669
693
  schema_version: String(ctx.schemaVersion),
670
694
  built_at: new Date().toISOString(),
671
695
  });
@@ -729,10 +753,20 @@ async function tryNativeOrchestrator(
729
753
  // stale native binaries). WASM handles those — backfill via WASM so both
730
754
  // engines process the same file set (#967).
731
755
  //
732
- // Only runs on full builds: incremental builds only touch changed files,
733
- // which are parsed through parseFilesAuto (which has its own per-file
734
- // backfill), so a full filesystem scan here would be wasted work.
735
- if (result.isFullBuild) {
756
+ // Runs on full builds and on incrementals when the orchestrator reports
757
+ // any file activity (removals or changes). The orchestrator's
758
+ // `detect_removed_files` filter (#1070) skips files outside its narrower
759
+ // file_collector, so on a current binary a no-op rebuild reports
760
+ // `removedCount=0` and `changedCount=0`, making the backfill call pure
761
+ // overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
762
+ // binaries lacking the filter still report `removedCount>0` and get the
763
+ // gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
764
+ // narrows (but does not fully close) the gap where a brand-new
765
+ // unsupported-extension file is added on an otherwise-quiet incremental
766
+ // — see #1091 for the residual gap.
767
+ const removedCount = result.removedCount ?? 0;
768
+ const changedCount = result.changedCount ?? 0;
769
+ if (result.isFullBuild || removedCount > 0 || changedCount > 0) {
736
770
  await backfillNativeDroppedFiles(ctx);
737
771
  }
738
772
 
@@ -745,22 +779,40 @@ async function tryNativeOrchestrator(
745
779
  * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
746
780
  */
747
781
  async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
748
- // Needs a real better-sqlite3 connection for INSERT.
749
- if (ctx.nativeFirstProxy) {
750
- closeNativeDb(ctx, 'pre-parity-backfill');
751
- ctx.db = openDb(ctx.dbPath);
752
- ctx.nativeFirstProxy = false;
753
- }
754
-
782
+ // Compute the missing-file set FIRST, before any expensive DB handoff.
783
+ // NativeDbProxy supports .prepare().all(), so the upfront query works
784
+ // whether ctx.db is a proxy or a real better-sqlite3 connection. On
785
+ // incremental no-op rebuilds nothing is missing, so we want to early-return
786
+ // without paying the close-native / reopen-better-sqlite3 cost.
755
787
  const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
756
788
  const expected = new Set(
757
789
  collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
758
790
  );
759
791
 
760
- const existingRows = ctx.db
792
+ const existingNodeRows = ctx.db
761
793
  .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
762
794
  .all() as Array<{ file: string }>;
763
- const existing = new Set(existingRows.map((r) => r.file));
795
+ const existingNodes = new Set(existingNodeRows.map((r) => r.file));
796
+
797
+ // Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
798
+ // (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
799
+ // (e.g. a DB written by old code where `nodes` was populated but
800
+ // `file_hashes` was not). Treating "in nodes but not in file_hashes" as
801
+ // missing closes the gap so the backfill repairs the file_hashes row even
802
+ // when the node row already exists.
803
+ let existingHashes = new Set<string>();
804
+ try {
805
+ const existingHashRows = ctx.db
806
+ .prepare('SELECT DISTINCT file FROM file_hashes')
807
+ .all() as Array<{ file: string }>;
808
+ existingHashes = new Set(existingHashRows.map((r) => r.file));
809
+ } catch (e) {
810
+ // file_hashes table may not exist on legacy DBs; treat as fully missing
811
+ // so the backfill writes rows on the upsert path below.
812
+ debug(
813
+ `backfillNativeDroppedFiles: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
814
+ );
815
+ }
764
816
 
765
817
  // Restrict backfill to files with an installed WASM grammar. Extensions in
766
818
  // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
@@ -770,7 +822,9 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
770
822
  const missingRel: string[] = [];
771
823
  const missingAbs: string[] = [];
772
824
  for (const rel of expected) {
773
- if (existing.has(rel)) continue;
825
+ // A file is "missing" if it's absent from EITHER nodes OR file_hashes.
826
+ // Both must be present for fast-skip to work correctly.
827
+ if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
774
828
  const ext = path.extname(rel).toLowerCase();
775
829
  if (!installedExts.has(ext)) continue;
776
830
  missingRel.push(rel);
@@ -778,6 +832,14 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
778
832
  }
779
833
  if (missingAbs.length === 0) return;
780
834
 
835
+ // Now that we know there's work to do, hand off to better-sqlite3 (needed
836
+ // for the INSERT path below).
837
+ if (ctx.nativeFirstProxy) {
838
+ closeNativeDb(ctx, 'pre-parity-backfill');
839
+ ctx.db = openDb(ctx.dbPath);
840
+ ctx.nativeFirstProxy = false;
841
+ }
842
+
781
843
  // Classify drops so users see per-extension reasons instead of just a count
782
844
  // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
783
845
  // extractor); `native-extractor-failure` indicates a real native bug since
@@ -793,7 +855,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
793
855
  `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
794
856
  );
795
857
  }
796
- const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
858
+ const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
797
859
 
798
860
  const rows: unknown[][] = [];
799
861
  const exportKeys: unknown[][] = [];
@@ -853,6 +915,68 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
853
915
  updateStmt.run(...vals);
854
916
  }
855
917
  }
918
+
919
+ // Persist file_hashes rows for every backfilled file. The Rust orchestrator
920
+ // only hashes files it parsed itself, so without this step files in
921
+ // optional-language extensions (e.g. .clj when no Rust extractor exists)
922
+ // would be missing from `file_hashes` — permanently breaking the JS-side
923
+ // fast-skip pre-flight (#1054), which rejects on `collected file missing
924
+ // from file_hashes` and forces every no-op rebuild back through the full
925
+ // ~2s native pipeline (#1068).
926
+ //
927
+ // Iterates `missingRel` (every collected file the Rust orchestrator
928
+ // dropped), not `wasmResults`, so files that produced zero symbols still
929
+ // get a row.
930
+ try {
931
+ const upsertHash = db.prepare(
932
+ 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
933
+ );
934
+ const writeHashes = db.transaction(() => {
935
+ for (let i = 0; i < missingRel.length; i++) {
936
+ const relPath = missingRel[i];
937
+ const absPath = missingAbs[i];
938
+ if (!relPath || !absPath) continue;
939
+ let code: string | null;
940
+ try {
941
+ code = readFileSafe(absPath);
942
+ } catch (e) {
943
+ debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
944
+ continue;
945
+ }
946
+ if (code === null) continue;
947
+ const stat = fileStat(absPath);
948
+ const mtime = stat ? stat.mtime : 0;
949
+ const size = stat ? stat.size : 0;
950
+ upsertHash.run(relPath, fileHash(code), mtime, size);
951
+ }
952
+ });
953
+ writeHashes();
954
+ } catch (e) {
955
+ debug(
956
+ `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
957
+ );
958
+ }
959
+
960
+ // Free WASM parse trees from the inline backfill path (#1058).
961
+ // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
962
+ // backed by WASM linear memory) on every result, but these symbols are
963
+ // consumed locally for DB row construction and never added to
964
+ // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
965
+ // sees them. Without this, trees leak WASM memory until process exit —
966
+ // bounded per run but cumulative across in-process integration tests.
967
+ // Mirrors the cleanup discipline established for #931.
968
+ for (const [, symbols] of wasmResults) {
969
+ const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
970
+ if (tree && typeof tree.delete === 'function') {
971
+ try {
972
+ tree.delete();
973
+ } catch {
974
+ /* ignore cleanup errors */
975
+ }
976
+ }
977
+ (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
978
+ (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
979
+ }
856
980
  }
857
981
 
858
982
  // ── Pipeline stages execution ───────────────────────────────────────────
@@ -979,6 +1103,60 @@ export async function buildGraph(
979
1103
  try {
980
1104
  setupPipeline(ctx);
981
1105
 
1106
+ // ── JS-side fast-skip for native incremental (#1054) ──────────────
1107
+ // The Rust orchestrator's internal early-exit fires reliably locally
1108
+ // but not in CI, where every no-op rebuild was paying the full ~2s
1109
+ // pipeline cost. A read-only mtime+size check here matches WASM's
1110
+ // ~20ms early-exit and skips the orchestrator entirely when no
1111
+ // source files have changed. Tier-2 hashing is left to the native
1112
+ // side: any mismatch falls through and lets Rust's detect_changes
1113
+ // remain the source of truth.
1114
+ //
1115
+ // Diagnostic logging gated by CODEGRAPH_FAST_SKIP_DIAG (#1066) — when
1116
+ // any of the call-site guards short-circuit (forceFullRebuild,
1117
+ // engineName, scope, etc.) we log the reason so the bench gate run
1118
+ // produces observable output even if `detectNoChanges` is never
1119
+ // entered.
1120
+ const fastSkipDiag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
1121
+ if (fastSkipDiag) {
1122
+ const reasons: string[] = [];
1123
+ if (!ctx.nativeAvailable) reasons.push('nativeAvailable=false');
1124
+ if (ctx.engineName !== 'native') reasons.push(`engineName=${ctx.engineName}`);
1125
+ if (!ctx.incremental) reasons.push('incremental=false');
1126
+ if (ctx.forceFullRebuild) reasons.push('forceFullRebuild=true');
1127
+ if ((ctx.opts as Record<string, unknown>).scope) reasons.push('scope=set');
1128
+ if (reasons.length > 0) {
1129
+ info(`[fast-skip] false: pre-flight gate skipped — ${reasons.join(', ')}`);
1130
+ }
1131
+ }
1132
+ if (
1133
+ ctx.nativeAvailable &&
1134
+ ctx.engineName === 'native' &&
1135
+ ctx.incremental &&
1136
+ !ctx.forceFullRebuild &&
1137
+ !(ctx.opts as Record<string, unknown>).scope
1138
+ ) {
1139
+ try {
1140
+ await collectFiles(ctx);
1141
+ if (
1142
+ detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record<string, unknown>)
1143
+ ) {
1144
+ info('No changes detected. Graph is up to date.');
1145
+ writeJournalHeader(ctx.rootDir, Date.now());
1146
+ closeDb(ctx.db);
1147
+ return;
1148
+ }
1149
+ } catch (err) {
1150
+ // Pre-flight is best-effort — any failure falls through to the
1151
+ // orchestrator, which performs its own complete detection.
1152
+ // Reset ctx.allFiles so runPipelineStages re-collects under its own
1153
+ // engine state if we ended up partially populated before throwing.
1154
+ ctx.allFiles = undefined as unknown as string[];
1155
+ ctx.discoveredDirs = undefined as unknown as Set<string>;
1156
+ debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`);
1157
+ }
1158
+ }
1159
+
982
1160
  // ── Rust orchestrator fast path (#695) ────────────────────────────
983
1161
  // When available, run the entire build pipeline in Rust with zero
984
1162
  // napi crossings (eliminates WAL dual-connection dance). Falls back
@@ -100,6 +100,15 @@ function tryFastCollect(
100
100
  export async function collectFiles(ctx: PipelineContext): Promise<void> {
101
101
  const { rootDir, config, opts } = ctx;
102
102
 
103
+ // Skip when the JS-side fast-skip pre-flight (#1054) already populated the
104
+ // file list and changes were detected, causing fallthrough to the native
105
+ // orchestrator and then to runPipelineStages. Avoids redoing the filesystem
106
+ // walk on the non-skip path (~8ms on 473 files). On pre-flight failure the
107
+ // caller resets ctx.allFiles so this guard correctly falls through.
108
+ if (!opts.scope && ctx.allFiles?.length && ctx.discoveredDirs?.size) {
109
+ return;
110
+ }
111
+
103
112
  if (opts.scope) {
104
113
  // Scoped rebuild: rebuild only specified files.
105
114
  //
@@ -27,7 +27,7 @@ interface FileHashRow {
27
27
  }
28
28
 
29
29
  interface FileStat {
30
- mtimeMs: number;
30
+ mtime: number;
31
31
  size: number;
32
32
  }
33
33
 
@@ -182,7 +182,7 @@ function mtimeAndHashTiers(
182
182
  if (!stat) continue;
183
183
  const storedMtime = record.mtime || 0;
184
184
  const storedSize = record.size || 0;
185
- if (storedSize > 0 && Math.floor(stat.mtimeMs) === storedMtime && stat.size === storedSize) {
185
+ if (storedSize > 0 && stat.mtime === storedMtime && stat.size === storedSize) {
186
186
  skipped.push(relPath);
187
187
  continue;
188
188
  }
@@ -512,6 +512,132 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
512
512
  purgeAndAddReverseDeps(ctx, changePaths, reverseDeps);
513
513
  }
514
514
 
515
+ /**
516
+ * Read-only pre-flight check for the native orchestrator.
517
+ *
518
+ * Returns true iff every collected source file has matching mtime+size in
519
+ * `file_hashes` and no DB-tracked file has been removed. When true, the
520
+ * caller can short-circuit before invoking the native orchestrator —
521
+ * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
522
+ * per-call native rebuild overhead seen in CI (#1054).
523
+ *
524
+ * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
525
+ * hashing is left to the native side: when this returns false the caller
526
+ * falls through to the orchestrator, which performs its own complete
527
+ * detection and is the source of truth.
528
+ *
529
+ * Conservatively returns false when CFG or dataflow analysis is enabled
530
+ * but the corresponding tables are empty — otherwise the fast-skip would
531
+ * silently suppress the pending-analysis pass that the JS path runs via
532
+ * `runPendingAnalysis`, and CFG/dataflow data would never populate on
533
+ * repos where source files don't change between builds.
534
+ *
535
+ * Pure read of `db` and the filesystem — never mutates either.
536
+ */
537
+ export function detectNoChanges(
538
+ db: BetterSqlite3Database,
539
+ allFiles: string[],
540
+ rootDir: string,
541
+ opts?: Record<string, unknown>,
542
+ ): boolean {
543
+ // Diagnostic logging gated by env var — used by the bench gate to surface
544
+ // why the fast-skip is not firing on CI runners (#1066). Off by default to
545
+ // avoid noise on every regular incremental build.
546
+ const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
547
+ const log = (reason: string): void => {
548
+ if (diag) info(`[fast-skip] ${reason}`);
549
+ };
550
+
551
+ let hasTable = false;
552
+ try {
553
+ db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
554
+ hasTable = true;
555
+ } catch {
556
+ /* table missing — first build */
557
+ }
558
+ if (!hasTable) {
559
+ log('false: file_hashes table missing');
560
+ return false;
561
+ }
562
+
563
+ const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[];
564
+ if (rows.length === 0) {
565
+ log('false: file_hashes table empty');
566
+ return false;
567
+ }
568
+ const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
569
+
570
+ const currentFiles = new Set<string>();
571
+ for (const file of allFiles) {
572
+ currentFiles.add(normalizePath(path.relative(rootDir, file)));
573
+ }
574
+ for (const existingFile of existing.keys()) {
575
+ if (!currentFiles.has(existingFile)) {
576
+ log(`false: tracked file no longer collected: ${existingFile}`);
577
+ return false;
578
+ }
579
+ }
580
+
581
+ for (const file of allFiles) {
582
+ const relPath = normalizePath(path.relative(rootDir, file));
583
+ const record = existing.get(relPath);
584
+ if (!record) {
585
+ log(`false: collected file missing from file_hashes: ${relPath}`);
586
+ return false;
587
+ }
588
+ const stat = fileStat(file) as FileStat | undefined;
589
+ if (!stat) {
590
+ log(`false: stat failed for ${relPath}`);
591
+ return false;
592
+ }
593
+ const storedMtime = record.mtime || 0;
594
+ const storedSize = record.size || 0;
595
+ if (storedSize <= 0) {
596
+ log(`false: stored size <= 0 for ${relPath} (stored=${record.size})`);
597
+ return false;
598
+ }
599
+ if (stat.mtime !== storedMtime || stat.size !== storedSize) {
600
+ log(
601
+ `false: mtime/size diff for ${relPath}: stat=${stat.mtime}/${stat.size} stored=${storedMtime}/${storedSize}`,
602
+ );
603
+ return false;
604
+ }
605
+ }
606
+
607
+ // Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
608
+ // table is empty (analysis newly enabled, or tables wiped between builds),
609
+ // fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
610
+ // Mirrors the check at the top of runPendingAnalysis (see line ~244).
611
+ if (opts) {
612
+ if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
613
+ log('false: pending-analysis guard — cfg_blocks is empty');
614
+ return false;
615
+ }
616
+ if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
617
+ log('false: pending-analysis guard — dataflow is empty');
618
+ return false;
619
+ }
620
+ }
621
+
622
+ log(`true: all checks passed (${allFiles.length} files)`);
623
+ return true;
624
+ }
625
+
626
+ /**
627
+ * Returns true if `table` exists and has zero rows, matching the empty-table
628
+ * semantics of `runPendingAnalysis`. A missing table is treated as empty
629
+ * (the conservative outcome), so the caller falls through to the orchestrator
630
+ * which will create the schema and populate it.
631
+ */
632
+ function hasEmptyAnalysisTable(db: BetterSqlite3Database, table: string): boolean {
633
+ try {
634
+ const row = db.prepare(`SELECT COUNT(*) as c FROM ${table}`).get() as { c: number } | undefined;
635
+ return (row?.c ?? 0) === 0;
636
+ } catch {
637
+ return true;
638
+ }
639
+ }
640
+
515
641
  export async function detectChanges(ctx: PipelineContext): Promise<void> {
516
642
  const start = performance.now();
517
643
  try {
@@ -537,7 +663,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
537
663
  relPath: c.relPath,
538
664
  content: c.content,
539
665
  hash: c.hash,
540
- stat: c.stat ? { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size } : undefined,
666
+ stat: c.stat ? { mtime: c.stat.mtime, size: c.stat.size } : undefined,
541
667
  _reverseDepOnly: c._reverseDepOnly,
542
668
  }));
543
669
  ctx.metadataUpdates = increResult.changed
@@ -548,7 +674,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
548
674
  .map((c) => ({
549
675
  relPath: c.relPath,
550
676
  hash: c.hash,
551
- stat: { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size },
677
+ stat: { mtime: c.stat.mtime, size: c.stat.size },
552
678
  }));
553
679
  if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) {
554
680
  const ranAnalysis = await runPendingAnalysis(ctx);
@@ -82,13 +82,16 @@ function persistBuildMetadata(
82
82
  ): void {
83
83
  const useNativeDb = ctx.engineName === 'native' && !!ctx.nativeDb;
84
84
  if (!ctx.isFullBuild && ctx.allSymbols.size <= 3) return;
85
- // When the native engine is active, persist the Rust addon version so that
86
- // checkEngineSchemaMismatch compares against the same value on the next build.
87
- // Writing CODEGRAPH_VERSION (the npm package version) here would create a
88
- // permanent mismatch whenever npm and crate versions diverge, forcing every
89
- // subsequent build to be a full rebuild.
85
+ // When the native engine is active, persist the binary's CARGO_PKG_VERSION
86
+ // (ctx.nativeBinaryVersion). The Rust orchestrator's check_version_mismatch
87
+ // compares against that exact value, so writing the platform package.json
88
+ // version (ctx.engineVersion) which can drift from the binary in CI
89
+ // hot-swap flows (#1066) would force every subsequent native build to
90
+ // be a full rebuild.
90
91
  const codeVersionToWrite =
91
- ctx.engineName === 'native' && ctx.engineVersion ? ctx.engineVersion : CODEGRAPH_VERSION;
92
+ ctx.engineName === 'native' && ctx.nativeBinaryVersion
93
+ ? ctx.nativeBinaryVersion
94
+ : CODEGRAPH_VERSION;
92
95
  // Persist the repo root so downstream commands (e.g. `codegraph embed`)
93
96
  // can resolve relative file paths regardless of the invoking cwd.
94
97
  // Use realpathSync (symlink-resolving) to match the Rust engine's