@optave/codegraph 3.9.6 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -16
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +1 -1
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +50 -8
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +10 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js +10 -0
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +7 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +7 -2
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +197 -33
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +8 -0
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts +24 -0
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +117 -3
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +9 -6
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts +30 -0
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +36 -13
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/parser.d.ts +14 -1
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +77 -10
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/models.js +1 -1
- package/dist/domain/wasm-worker-entry.js +12 -12
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/infrastructure/config.d.ts +1 -0
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +1 -0
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/mcp/server.js +14 -8
- package/dist/mcp/server.js.map +1 -1
- package/dist/mcp/tool-registry.d.ts +1 -1
- package/dist/mcp/tool-registry.d.ts.map +1 -1
- package/dist/mcp/tool-registry.js +19 -5
- package/dist/mcp/tool-registry.js.map +1 -1
- package/dist/types.d.ts +1 -0
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/package.json +8 -7
- package/src/ast-analysis/engine.ts +3 -1
- package/src/ast-analysis/visitors/ast-store-visitor.ts +45 -9
- package/src/domain/graph/builder/context.ts +10 -0
- package/src/domain/graph/builder/helpers.ts +8 -3
- package/src/domain/graph/builder/pipeline.ts +211 -33
- package/src/domain/graph/builder/stages/collect-files.ts +9 -0
- package/src/domain/graph/builder/stages/detect-changes.ts +130 -4
- package/src/domain/graph/builder/stages/finalize.ts +9 -6
- package/src/domain/graph/builder/stages/insert-nodes.ts +38 -14
- package/src/domain/parser.ts +83 -9
- package/src/domain/search/models.ts +1 -1
- package/src/domain/wasm-worker-entry.ts +12 -12
- package/src/infrastructure/config.ts +1 -0
- package/src/mcp/server.ts +16 -9
- package/src/mcp/tool-registry.ts +23 -5
- package/src/types.ts +1 -0
|
@@ -222,12 +222,17 @@ export function fileHash(content: string): string {
|
|
|
222
222
|
}
|
|
223
223
|
|
|
224
224
|
/**
|
|
225
|
-
* Stat a file, returning {
|
|
225
|
+
* Stat a file, returning { mtime, size } or null on error.
|
|
226
|
+
*
|
|
227
|
+
* `mtime` is `Math.floor(stat.mtimeMs)` so it matches the integer column
|
|
228
|
+
* stored in the DB. Floor-once-here keeps every consumer honest: storing or
|
|
229
|
+
* comparing a non-floored `mtimeMs` against the integer DB column would cause
|
|
230
|
+
* spurious fast-skip misses on the next build.
|
|
226
231
|
*/
|
|
227
|
-
export function fileStat(filePath: string): {
|
|
232
|
+
export function fileStat(filePath: string): { mtime: number; size: number } | null {
|
|
228
233
|
try {
|
|
229
234
|
const s = fs.statSync(filePath);
|
|
230
|
-
return {
|
|
235
|
+
return { mtime: Math.floor(s.mtimeMs), size: s.size };
|
|
231
236
|
} catch {
|
|
232
237
|
return null;
|
|
233
238
|
}
|
|
@@ -9,6 +9,7 @@ import path from 'node:path';
|
|
|
9
9
|
import { performance } from 'node:perf_hooks';
|
|
10
10
|
import {
|
|
11
11
|
acquireAdvisoryLock,
|
|
12
|
+
closeDb,
|
|
12
13
|
closeDbPair,
|
|
13
14
|
getBuildMeta,
|
|
14
15
|
initSchema,
|
|
@@ -37,17 +38,25 @@ import {
|
|
|
37
38
|
formatDropExtensionSummary,
|
|
38
39
|
getActiveEngine,
|
|
39
40
|
getInstalledWasmExtensions,
|
|
40
|
-
|
|
41
|
+
parseFilesWasmForBackfill,
|
|
41
42
|
} from '../../parser.js';
|
|
43
|
+
import { writeJournalHeader } from '../journal.js';
|
|
42
44
|
import { setWorkspaces } from '../resolve.js';
|
|
43
45
|
import { PipelineContext } from './context.js';
|
|
44
|
-
import {
|
|
46
|
+
import {
|
|
47
|
+
batchInsertNodes,
|
|
48
|
+
collectFiles as collectFilesUtil,
|
|
49
|
+
fileHash,
|
|
50
|
+
fileStat,
|
|
51
|
+
loadPathAliases,
|
|
52
|
+
readFileSafe,
|
|
53
|
+
} from './helpers.js';
|
|
45
54
|
import { NativeDbProxy } from './native-db-proxy.js';
|
|
46
55
|
import { buildEdges } from './stages/build-edges.js';
|
|
47
56
|
import { buildStructure } from './stages/build-structure.js';
|
|
48
57
|
// Pipeline stages
|
|
49
58
|
import { collectFiles } from './stages/collect-files.js';
|
|
50
|
-
import { detectChanges } from './stages/detect-changes.js';
|
|
59
|
+
import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
|
|
51
60
|
import { finalize } from './stages/finalize.js';
|
|
52
61
|
import { insertNodes } from './stages/insert-nodes.js';
|
|
53
62
|
import { parseFiles } from './stages/parse-files.js';
|
|
@@ -67,9 +76,14 @@ function initializeEngine(ctx: PipelineContext): void {
|
|
|
67
76
|
suspendJsDb: undefined,
|
|
68
77
|
resumeJsDb: undefined,
|
|
69
78
|
};
|
|
70
|
-
const {
|
|
79
|
+
const {
|
|
80
|
+
name: engineName,
|
|
81
|
+
version: engineVersion,
|
|
82
|
+
binaryVersion: nativeBinaryVersion,
|
|
83
|
+
} = getActiveEngine(ctx.engineOpts);
|
|
71
84
|
ctx.engineName = engineName as 'native' | 'wasm';
|
|
72
85
|
ctx.engineVersion = engineVersion;
|
|
86
|
+
ctx.nativeBinaryVersion = nativeBinaryVersion;
|
|
73
87
|
info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
|
|
74
88
|
}
|
|
75
89
|
|
|
@@ -96,13 +110,15 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
|
|
|
96
110
|
);
|
|
97
111
|
ctx.forceFullRebuild = true;
|
|
98
112
|
}
|
|
99
|
-
// When the native engine is active, the Rust
|
|
100
|
-
//
|
|
101
|
-
//
|
|
102
|
-
//
|
|
103
|
-
//
|
|
113
|
+
// When the native engine is active, the Rust orchestrator writes
|
|
114
|
+
// build_meta.codegraph_version = CARGO_PKG_VERSION (the binary's own value).
|
|
115
|
+
// Compare against the same value here so a CI hot-swap that leaves the
|
|
116
|
+
// platform package.json behind doesn't trigger a perpetual full-rebuild
|
|
117
|
+
// loop on every incremental (#1066).
|
|
104
118
|
const effectiveVersion =
|
|
105
|
-
ctx.engineName === 'native' && ctx.
|
|
119
|
+
ctx.engineName === 'native' && ctx.nativeBinaryVersion
|
|
120
|
+
? ctx.nativeBinaryVersion
|
|
121
|
+
: CODEGRAPH_VERSION;
|
|
106
122
|
const prevVersion = meta('codegraph_version');
|
|
107
123
|
if (prevVersion && prevVersion !== effectiveVersion) {
|
|
108
124
|
info(
|
|
@@ -656,16 +672,24 @@ async function tryNativeOrchestrator(
|
|
|
656
672
|
const p = result.phases;
|
|
657
673
|
|
|
658
674
|
// Sync build_meta so JS-side version/engine checks work on next build.
|
|
659
|
-
// Use the
|
|
660
|
-
//
|
|
661
|
-
// compares
|
|
662
|
-
//
|
|
663
|
-
//
|
|
664
|
-
//
|
|
675
|
+
// Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
|
|
676
|
+
// platform package.json version (ctx.engineVersion). The Rust side's
|
|
677
|
+
// check_version_mismatch compares against CARGO_PKG_VERSION; writing
|
|
678
|
+
// the package.json value would create a permanent mismatch whenever
|
|
679
|
+
// the binary and platform package.json diverge — e.g., CI hot-swap
|
|
680
|
+
// via ci-install-native.mjs (#1066) — forcing every subsequent build
|
|
681
|
+
// to be a full rebuild.
|
|
682
|
+
//
|
|
683
|
+
// When the native addon doesn't expose engineVersion() (older addon),
|
|
684
|
+
// fall back to CODEGRAPH_VERSION — same fallback used by both
|
|
685
|
+
// checkEngineSchemaMismatch (read path) and persistBuildMetadata
|
|
686
|
+
// (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
|
|
687
|
+
// here would re-introduce the asymmetry this PR fixes for that case.
|
|
688
|
+
const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
|
|
665
689
|
setBuildMeta(ctx.db, {
|
|
666
690
|
engine: ctx.engineName,
|
|
667
|
-
engine_version:
|
|
668
|
-
codegraph_version:
|
|
691
|
+
engine_version: nativeVersionForMeta,
|
|
692
|
+
codegraph_version: nativeVersionForMeta,
|
|
669
693
|
schema_version: String(ctx.schemaVersion),
|
|
670
694
|
built_at: new Date().toISOString(),
|
|
671
695
|
});
|
|
@@ -729,10 +753,20 @@ async function tryNativeOrchestrator(
|
|
|
729
753
|
// stale native binaries). WASM handles those — backfill via WASM so both
|
|
730
754
|
// engines process the same file set (#967).
|
|
731
755
|
//
|
|
732
|
-
//
|
|
733
|
-
//
|
|
734
|
-
//
|
|
735
|
-
|
|
756
|
+
// Runs on full builds and on incrementals when the orchestrator reports
|
|
757
|
+
// any file activity (removals or changes). The orchestrator's
|
|
758
|
+
// `detect_removed_files` filter (#1070) skips files outside its narrower
|
|
759
|
+
// file_collector, so on a current binary a no-op rebuild reports
|
|
760
|
+
// `removedCount=0` and `changedCount=0`, making the backfill call pure
|
|
761
|
+
// overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
|
|
762
|
+
// binaries lacking the filter still report `removedCount>0` and get the
|
|
763
|
+
// gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
|
|
764
|
+
// narrows (but does not fully close) the gap where a brand-new
|
|
765
|
+
// unsupported-extension file is added on an otherwise-quiet incremental
|
|
766
|
+
// — see #1091 for the residual gap.
|
|
767
|
+
const removedCount = result.removedCount ?? 0;
|
|
768
|
+
const changedCount = result.changedCount ?? 0;
|
|
769
|
+
if (result.isFullBuild || removedCount > 0 || changedCount > 0) {
|
|
736
770
|
await backfillNativeDroppedFiles(ctx);
|
|
737
771
|
}
|
|
738
772
|
|
|
@@ -745,22 +779,40 @@ async function tryNativeOrchestrator(
|
|
|
745
779
|
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
|
|
746
780
|
*/
|
|
747
781
|
async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
748
|
-
//
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
}
|
|
754
|
-
|
|
782
|
+
// Compute the missing-file set FIRST, before any expensive DB handoff.
|
|
783
|
+
// NativeDbProxy supports .prepare().all(), so the upfront query works
|
|
784
|
+
// whether ctx.db is a proxy or a real better-sqlite3 connection. On
|
|
785
|
+
// incremental no-op rebuilds nothing is missing, so we want to early-return
|
|
786
|
+
// without paying the close-native / reopen-better-sqlite3 cost.
|
|
755
787
|
const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
|
|
756
788
|
const expected = new Set(
|
|
757
789
|
collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
|
|
758
790
|
);
|
|
759
791
|
|
|
760
|
-
const
|
|
792
|
+
const existingNodeRows = ctx.db
|
|
761
793
|
.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
|
|
762
794
|
.all() as Array<{ file: string }>;
|
|
763
|
-
const
|
|
795
|
+
const existingNodes = new Set(existingNodeRows.map((r) => r.file));
|
|
796
|
+
|
|
797
|
+
// Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
|
|
798
|
+
// (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
|
|
799
|
+
// (e.g. a DB written by old code where `nodes` was populated but
|
|
800
|
+
// `file_hashes` was not). Treating "in nodes but not in file_hashes" as
|
|
801
|
+
// missing closes the gap so the backfill repairs the file_hashes row even
|
|
802
|
+
// when the node row already exists.
|
|
803
|
+
let existingHashes = new Set<string>();
|
|
804
|
+
try {
|
|
805
|
+
const existingHashRows = ctx.db
|
|
806
|
+
.prepare('SELECT DISTINCT file FROM file_hashes')
|
|
807
|
+
.all() as Array<{ file: string }>;
|
|
808
|
+
existingHashes = new Set(existingHashRows.map((r) => r.file));
|
|
809
|
+
} catch (e) {
|
|
810
|
+
// file_hashes table may not exist on legacy DBs; treat as fully missing
|
|
811
|
+
// so the backfill writes rows on the upsert path below.
|
|
812
|
+
debug(
|
|
813
|
+
`backfillNativeDroppedFiles: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
|
|
814
|
+
);
|
|
815
|
+
}
|
|
764
816
|
|
|
765
817
|
// Restrict backfill to files with an installed WASM grammar. Extensions in
|
|
766
818
|
// LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
|
|
@@ -770,7 +822,9 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
770
822
|
const missingRel: string[] = [];
|
|
771
823
|
const missingAbs: string[] = [];
|
|
772
824
|
for (const rel of expected) {
|
|
773
|
-
if
|
|
825
|
+
// A file is "missing" if it's absent from EITHER nodes OR file_hashes.
|
|
826
|
+
// Both must be present for fast-skip to work correctly.
|
|
827
|
+
if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
|
|
774
828
|
const ext = path.extname(rel).toLowerCase();
|
|
775
829
|
if (!installedExts.has(ext)) continue;
|
|
776
830
|
missingRel.push(rel);
|
|
@@ -778,6 +832,14 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
778
832
|
}
|
|
779
833
|
if (missingAbs.length === 0) return;
|
|
780
834
|
|
|
835
|
+
// Now that we know there's work to do, hand off to better-sqlite3 (needed
|
|
836
|
+
// for the INSERT path below).
|
|
837
|
+
if (ctx.nativeFirstProxy) {
|
|
838
|
+
closeNativeDb(ctx, 'pre-parity-backfill');
|
|
839
|
+
ctx.db = openDb(ctx.dbPath);
|
|
840
|
+
ctx.nativeFirstProxy = false;
|
|
841
|
+
}
|
|
842
|
+
|
|
781
843
|
// Classify drops so users see per-extension reasons instead of just a count
|
|
782
844
|
// (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
|
|
783
845
|
// extractor); `native-extractor-failure` indicates a real native bug since
|
|
@@ -793,7 +855,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
793
855
|
`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
|
|
794
856
|
);
|
|
795
857
|
}
|
|
796
|
-
const wasmResults = await
|
|
858
|
+
const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
|
|
797
859
|
|
|
798
860
|
const rows: unknown[][] = [];
|
|
799
861
|
const exportKeys: unknown[][] = [];
|
|
@@ -853,6 +915,68 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
853
915
|
updateStmt.run(...vals);
|
|
854
916
|
}
|
|
855
917
|
}
|
|
918
|
+
|
|
919
|
+
// Persist file_hashes rows for every backfilled file. The Rust orchestrator
|
|
920
|
+
// only hashes files it parsed itself, so without this step files in
|
|
921
|
+
// optional-language extensions (e.g. .clj when no Rust extractor exists)
|
|
922
|
+
// would be missing from `file_hashes` — permanently breaking the JS-side
|
|
923
|
+
// fast-skip pre-flight (#1054), which rejects on `collected file missing
|
|
924
|
+
// from file_hashes` and forces every no-op rebuild back through the full
|
|
925
|
+
// ~2s native pipeline (#1068).
|
|
926
|
+
//
|
|
927
|
+
// Iterates `missingRel` (every collected file the Rust orchestrator
|
|
928
|
+
// dropped), not `wasmResults`, so files that produced zero symbols still
|
|
929
|
+
// get a row.
|
|
930
|
+
try {
|
|
931
|
+
const upsertHash = db.prepare(
|
|
932
|
+
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
|
|
933
|
+
);
|
|
934
|
+
const writeHashes = db.transaction(() => {
|
|
935
|
+
for (let i = 0; i < missingRel.length; i++) {
|
|
936
|
+
const relPath = missingRel[i];
|
|
937
|
+
const absPath = missingAbs[i];
|
|
938
|
+
if (!relPath || !absPath) continue;
|
|
939
|
+
let code: string | null;
|
|
940
|
+
try {
|
|
941
|
+
code = readFileSafe(absPath);
|
|
942
|
+
} catch (e) {
|
|
943
|
+
debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
|
|
944
|
+
continue;
|
|
945
|
+
}
|
|
946
|
+
if (code === null) continue;
|
|
947
|
+
const stat = fileStat(absPath);
|
|
948
|
+
const mtime = stat ? stat.mtime : 0;
|
|
949
|
+
const size = stat ? stat.size : 0;
|
|
950
|
+
upsertHash.run(relPath, fileHash(code), mtime, size);
|
|
951
|
+
}
|
|
952
|
+
});
|
|
953
|
+
writeHashes();
|
|
954
|
+
} catch (e) {
|
|
955
|
+
debug(
|
|
956
|
+
`backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
|
|
957
|
+
);
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
// Free WASM parse trees from the inline backfill path (#1058).
|
|
961
|
+
// `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
|
|
962
|
+
// backed by WASM linear memory) on every result, but these symbols are
|
|
963
|
+
// consumed locally for DB row construction and never added to
|
|
964
|
+
// `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
|
|
965
|
+
// sees them. Without this, trees leak WASM memory until process exit —
|
|
966
|
+
// bounded per run but cumulative across in-process integration tests.
|
|
967
|
+
// Mirrors the cleanup discipline established for #931.
|
|
968
|
+
for (const [, symbols] of wasmResults) {
|
|
969
|
+
const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
|
|
970
|
+
if (tree && typeof tree.delete === 'function') {
|
|
971
|
+
try {
|
|
972
|
+
tree.delete();
|
|
973
|
+
} catch {
|
|
974
|
+
/* ignore cleanup errors */
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
(symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
|
|
978
|
+
(symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
|
|
979
|
+
}
|
|
856
980
|
}
|
|
857
981
|
|
|
858
982
|
// ── Pipeline stages execution ───────────────────────────────────────────
|
|
@@ -979,6 +1103,60 @@ export async function buildGraph(
|
|
|
979
1103
|
try {
|
|
980
1104
|
setupPipeline(ctx);
|
|
981
1105
|
|
|
1106
|
+
// ── JS-side fast-skip for native incremental (#1054) ──────────────
|
|
1107
|
+
// The Rust orchestrator's internal early-exit fires reliably locally
|
|
1108
|
+
// but not in CI, where every no-op rebuild was paying the full ~2s
|
|
1109
|
+
// pipeline cost. A read-only mtime+size check here matches WASM's
|
|
1110
|
+
// ~20ms early-exit and skips the orchestrator entirely when no
|
|
1111
|
+
// source files have changed. Tier-2 hashing is left to the native
|
|
1112
|
+
// side: any mismatch falls through and lets Rust's detect_changes
|
|
1113
|
+
// remain the source of truth.
|
|
1114
|
+
//
|
|
1115
|
+
// Diagnostic logging gated by CODEGRAPH_FAST_SKIP_DIAG (#1066) — when
|
|
1116
|
+
// any of the call-site guards short-circuit (forceFullRebuild,
|
|
1117
|
+
// engineName, scope, etc.) we log the reason so the bench gate run
|
|
1118
|
+
// produces observable output even if `detectNoChanges` is never
|
|
1119
|
+
// entered.
|
|
1120
|
+
const fastSkipDiag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
|
|
1121
|
+
if (fastSkipDiag) {
|
|
1122
|
+
const reasons: string[] = [];
|
|
1123
|
+
if (!ctx.nativeAvailable) reasons.push('nativeAvailable=false');
|
|
1124
|
+
if (ctx.engineName !== 'native') reasons.push(`engineName=${ctx.engineName}`);
|
|
1125
|
+
if (!ctx.incremental) reasons.push('incremental=false');
|
|
1126
|
+
if (ctx.forceFullRebuild) reasons.push('forceFullRebuild=true');
|
|
1127
|
+
if ((ctx.opts as Record<string, unknown>).scope) reasons.push('scope=set');
|
|
1128
|
+
if (reasons.length > 0) {
|
|
1129
|
+
info(`[fast-skip] false: pre-flight gate skipped — ${reasons.join(', ')}`);
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
if (
|
|
1133
|
+
ctx.nativeAvailable &&
|
|
1134
|
+
ctx.engineName === 'native' &&
|
|
1135
|
+
ctx.incremental &&
|
|
1136
|
+
!ctx.forceFullRebuild &&
|
|
1137
|
+
!(ctx.opts as Record<string, unknown>).scope
|
|
1138
|
+
) {
|
|
1139
|
+
try {
|
|
1140
|
+
await collectFiles(ctx);
|
|
1141
|
+
if (
|
|
1142
|
+
detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record<string, unknown>)
|
|
1143
|
+
) {
|
|
1144
|
+
info('No changes detected. Graph is up to date.');
|
|
1145
|
+
writeJournalHeader(ctx.rootDir, Date.now());
|
|
1146
|
+
closeDb(ctx.db);
|
|
1147
|
+
return;
|
|
1148
|
+
}
|
|
1149
|
+
} catch (err) {
|
|
1150
|
+
// Pre-flight is best-effort — any failure falls through to the
|
|
1151
|
+
// orchestrator, which performs its own complete detection.
|
|
1152
|
+
// Reset ctx.allFiles so runPipelineStages re-collects under its own
|
|
1153
|
+
// engine state if we ended up partially populated before throwing.
|
|
1154
|
+
ctx.allFiles = undefined as unknown as string[];
|
|
1155
|
+
ctx.discoveredDirs = undefined as unknown as Set<string>;
|
|
1156
|
+
debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`);
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
|
|
982
1160
|
// ── Rust orchestrator fast path (#695) ────────────────────────────
|
|
983
1161
|
// When available, run the entire build pipeline in Rust with zero
|
|
984
1162
|
// napi crossings (eliminates WAL dual-connection dance). Falls back
|
|
@@ -100,6 +100,15 @@ function tryFastCollect(
|
|
|
100
100
|
export async function collectFiles(ctx: PipelineContext): Promise<void> {
|
|
101
101
|
const { rootDir, config, opts } = ctx;
|
|
102
102
|
|
|
103
|
+
// Skip when the JS-side fast-skip pre-flight (#1054) already populated the
|
|
104
|
+
// file list and changes were detected, causing fallthrough to the native
|
|
105
|
+
// orchestrator and then to runPipelineStages. Avoids redoing the filesystem
|
|
106
|
+
// walk on the non-skip path (~8ms on 473 files). On pre-flight failure the
|
|
107
|
+
// caller resets ctx.allFiles so this guard correctly falls through.
|
|
108
|
+
if (!opts.scope && ctx.allFiles?.length && ctx.discoveredDirs?.size) {
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
103
112
|
if (opts.scope) {
|
|
104
113
|
// Scoped rebuild: rebuild only specified files.
|
|
105
114
|
//
|
|
@@ -27,7 +27,7 @@ interface FileHashRow {
|
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
interface FileStat {
|
|
30
|
-
|
|
30
|
+
mtime: number;
|
|
31
31
|
size: number;
|
|
32
32
|
}
|
|
33
33
|
|
|
@@ -182,7 +182,7 @@ function mtimeAndHashTiers(
|
|
|
182
182
|
if (!stat) continue;
|
|
183
183
|
const storedMtime = record.mtime || 0;
|
|
184
184
|
const storedSize = record.size || 0;
|
|
185
|
-
if (storedSize > 0 &&
|
|
185
|
+
if (storedSize > 0 && stat.mtime === storedMtime && stat.size === storedSize) {
|
|
186
186
|
skipped.push(relPath);
|
|
187
187
|
continue;
|
|
188
188
|
}
|
|
@@ -512,6 +512,132 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
|
|
|
512
512
|
purgeAndAddReverseDeps(ctx, changePaths, reverseDeps);
|
|
513
513
|
}
|
|
514
514
|
|
|
515
|
+
/**
|
|
516
|
+
* Read-only pre-flight check for the native orchestrator.
|
|
517
|
+
*
|
|
518
|
+
* Returns true iff every collected source file has matching mtime+size in
|
|
519
|
+
* `file_hashes` and no DB-tracked file has been removed. When true, the
|
|
520
|
+
* caller can short-circuit before invoking the native orchestrator —
|
|
521
|
+
* matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
|
|
522
|
+
* per-call native rebuild overhead seen in CI (#1054).
|
|
523
|
+
*
|
|
524
|
+
* Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
|
|
525
|
+
* hashing is left to the native side: when this returns false the caller
|
|
526
|
+
* falls through to the orchestrator, which performs its own complete
|
|
527
|
+
* detection and is the source of truth.
|
|
528
|
+
*
|
|
529
|
+
* Conservatively returns false when CFG or dataflow analysis is enabled
|
|
530
|
+
* but the corresponding tables are empty — otherwise the fast-skip would
|
|
531
|
+
* silently suppress the pending-analysis pass that the JS path runs via
|
|
532
|
+
* `runPendingAnalysis`, and CFG/dataflow data would never populate on
|
|
533
|
+
* repos where source files don't change between builds.
|
|
534
|
+
*
|
|
535
|
+
* Pure read of `db` and the filesystem — never mutates either.
|
|
536
|
+
*/
|
|
537
|
+
export function detectNoChanges(
|
|
538
|
+
db: BetterSqlite3Database,
|
|
539
|
+
allFiles: string[],
|
|
540
|
+
rootDir: string,
|
|
541
|
+
opts?: Record<string, unknown>,
|
|
542
|
+
): boolean {
|
|
543
|
+
// Diagnostic logging gated by env var — used by the bench gate to surface
|
|
544
|
+
// why the fast-skip is not firing on CI runners (#1066). Off by default to
|
|
545
|
+
// avoid noise on every regular incremental build.
|
|
546
|
+
const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
|
|
547
|
+
const log = (reason: string): void => {
|
|
548
|
+
if (diag) info(`[fast-skip] ${reason}`);
|
|
549
|
+
};
|
|
550
|
+
|
|
551
|
+
let hasTable = false;
|
|
552
|
+
try {
|
|
553
|
+
db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
|
|
554
|
+
hasTable = true;
|
|
555
|
+
} catch {
|
|
556
|
+
/* table missing — first build */
|
|
557
|
+
}
|
|
558
|
+
if (!hasTable) {
|
|
559
|
+
log('false: file_hashes table missing');
|
|
560
|
+
return false;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[];
|
|
564
|
+
if (rows.length === 0) {
|
|
565
|
+
log('false: file_hashes table empty');
|
|
566
|
+
return false;
|
|
567
|
+
}
|
|
568
|
+
const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
|
|
569
|
+
|
|
570
|
+
const currentFiles = new Set<string>();
|
|
571
|
+
for (const file of allFiles) {
|
|
572
|
+
currentFiles.add(normalizePath(path.relative(rootDir, file)));
|
|
573
|
+
}
|
|
574
|
+
for (const existingFile of existing.keys()) {
|
|
575
|
+
if (!currentFiles.has(existingFile)) {
|
|
576
|
+
log(`false: tracked file no longer collected: ${existingFile}`);
|
|
577
|
+
return false;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
for (const file of allFiles) {
|
|
582
|
+
const relPath = normalizePath(path.relative(rootDir, file));
|
|
583
|
+
const record = existing.get(relPath);
|
|
584
|
+
if (!record) {
|
|
585
|
+
log(`false: collected file missing from file_hashes: ${relPath}`);
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
const stat = fileStat(file) as FileStat | undefined;
|
|
589
|
+
if (!stat) {
|
|
590
|
+
log(`false: stat failed for ${relPath}`);
|
|
591
|
+
return false;
|
|
592
|
+
}
|
|
593
|
+
const storedMtime = record.mtime || 0;
|
|
594
|
+
const storedSize = record.size || 0;
|
|
595
|
+
if (storedSize <= 0) {
|
|
596
|
+
log(`false: stored size <= 0 for ${relPath} (stored=${record.size})`);
|
|
597
|
+
return false;
|
|
598
|
+
}
|
|
599
|
+
if (stat.mtime !== storedMtime || stat.size !== storedSize) {
|
|
600
|
+
log(
|
|
601
|
+
`false: mtime/size diff for ${relPath}: stat=${stat.mtime}/${stat.size} stored=${storedMtime}/${storedSize}`,
|
|
602
|
+
);
|
|
603
|
+
return false;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
|
|
608
|
+
// table is empty (analysis newly enabled, or tables wiped between builds),
|
|
609
|
+
// fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
|
|
610
|
+
// Mirrors the check at the top of runPendingAnalysis (see line ~244).
|
|
611
|
+
if (opts) {
|
|
612
|
+
if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
|
|
613
|
+
log('false: pending-analysis guard — cfg_blocks is empty');
|
|
614
|
+
return false;
|
|
615
|
+
}
|
|
616
|
+
if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
|
|
617
|
+
log('false: pending-analysis guard — dataflow is empty');
|
|
618
|
+
return false;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
log(`true: all checks passed (${allFiles.length} files)`);
|
|
623
|
+
return true;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* Returns true if `table` exists and has zero rows, matching the empty-table
|
|
628
|
+
* semantics of `runPendingAnalysis`. A missing table is treated as empty
|
|
629
|
+
* (the conservative outcome), so the caller falls through to the orchestrator
|
|
630
|
+
* which will create the schema and populate it.
|
|
631
|
+
*/
|
|
632
|
+
function hasEmptyAnalysisTable(db: BetterSqlite3Database, table: string): boolean {
|
|
633
|
+
try {
|
|
634
|
+
const row = db.prepare(`SELECT COUNT(*) as c FROM ${table}`).get() as { c: number } | undefined;
|
|
635
|
+
return (row?.c ?? 0) === 0;
|
|
636
|
+
} catch {
|
|
637
|
+
return true;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
515
641
|
export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
516
642
|
const start = performance.now();
|
|
517
643
|
try {
|
|
@@ -537,7 +663,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
|
537
663
|
relPath: c.relPath,
|
|
538
664
|
content: c.content,
|
|
539
665
|
hash: c.hash,
|
|
540
|
-
stat: c.stat ? { mtime:
|
|
666
|
+
stat: c.stat ? { mtime: c.stat.mtime, size: c.stat.size } : undefined,
|
|
541
667
|
_reverseDepOnly: c._reverseDepOnly,
|
|
542
668
|
}));
|
|
543
669
|
ctx.metadataUpdates = increResult.changed
|
|
@@ -548,7 +674,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
|
548
674
|
.map((c) => ({
|
|
549
675
|
relPath: c.relPath,
|
|
550
676
|
hash: c.hash,
|
|
551
|
-
stat: { mtime:
|
|
677
|
+
stat: { mtime: c.stat.mtime, size: c.stat.size },
|
|
552
678
|
}));
|
|
553
679
|
if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) {
|
|
554
680
|
const ranAnalysis = await runPendingAnalysis(ctx);
|
|
@@ -82,13 +82,16 @@ function persistBuildMetadata(
|
|
|
82
82
|
): void {
|
|
83
83
|
const useNativeDb = ctx.engineName === 'native' && !!ctx.nativeDb;
|
|
84
84
|
if (!ctx.isFullBuild && ctx.allSymbols.size <= 3) return;
|
|
85
|
-
// When the native engine is active, persist the
|
|
86
|
-
//
|
|
87
|
-
//
|
|
88
|
-
//
|
|
89
|
-
//
|
|
85
|
+
// When the native engine is active, persist the binary's CARGO_PKG_VERSION
|
|
86
|
+
// (ctx.nativeBinaryVersion). The Rust orchestrator's check_version_mismatch
|
|
87
|
+
// compares against that exact value, so writing the platform package.json
|
|
88
|
+
// version (ctx.engineVersion) — which can drift from the binary in CI
|
|
89
|
+
// hot-swap flows (#1066) — would force every subsequent native build to
|
|
90
|
+
// be a full rebuild.
|
|
90
91
|
const codeVersionToWrite =
|
|
91
|
-
ctx.engineName === 'native' && ctx.
|
|
92
|
+
ctx.engineName === 'native' && ctx.nativeBinaryVersion
|
|
93
|
+
? ctx.nativeBinaryVersion
|
|
94
|
+
: CODEGRAPH_VERSION;
|
|
92
95
|
// Persist the repo root so downstream commands (e.g. `codegraph embed`)
|
|
93
96
|
// can resolve relative file paths regardless of the invoking cwd.
|
|
94
97
|
// Use realpathSync (symlink-resolving) to match the Rust engine's
|