@optave/codegraph 3.9.5 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -16
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +4 -3
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
- package/dist/ast-analysis/rules/csharp.js +8 -1
- package/dist/ast-analysis/rules/csharp.js.map +1 -1
- package/dist/ast-analysis/rules/go.d.ts.map +1 -1
- package/dist/ast-analysis/rules/go.js +4 -1
- package/dist/ast-analysis/rules/go.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts +6 -0
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +151 -4
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/rules/java.d.ts.map +1 -1
- package/dist/ast-analysis/rules/java.js +5 -1
- package/dist/ast-analysis/rules/java.js.map +1 -1
- package/dist/ast-analysis/rules/php.d.ts.map +1 -1
- package/dist/ast-analysis/rules/php.js +6 -1
- package/dist/ast-analysis/rules/php.js.map +1 -1
- package/dist/ast-analysis/rules/python.d.ts.map +1 -1
- package/dist/ast-analysis/rules/python.js +5 -1
- package/dist/ast-analysis/rules/python.js.map +1 -1
- package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
- package/dist/ast-analysis/rules/ruby.js +4 -1
- package/dist/ast-analysis/rules/ruby.js.map +1 -1
- package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
- package/dist/ast-analysis/rules/rust.js +5 -1
- package/dist/ast-analysis/rules/rust.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +171 -37
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +10 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js +10 -0
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +7 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +7 -2
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +210 -34
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +8 -0
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts +24 -0
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +117 -3
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +9 -6
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts +30 -0
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +36 -13
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/parser.d.ts +54 -1
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +181 -10
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/models.js +2 -2
- package/dist/domain/wasm-worker-entry.js +15 -14
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/features/ast.d.ts.map +1 -1
- package/dist/features/ast.js +11 -9
- package/dist/features/ast.js.map +1 -1
- package/dist/infrastructure/config.d.ts +1 -0
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +1 -0
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/mcp/server.js +14 -8
- package/dist/mcp/server.js.map +1 -1
- package/dist/mcp/tool-registry.d.ts +1 -1
- package/dist/mcp/tool-registry.d.ts.map +1 -1
- package/dist/mcp/tool-registry.js +19 -5
- package/dist/mcp/tool-registry.js.map +1 -1
- package/dist/types.d.ts +1 -0
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/package.json +8 -7
- package/src/ast-analysis/engine.ts +14 -2
- package/src/ast-analysis/rules/csharp.ts +8 -1
- package/src/ast-analysis/rules/go.ts +4 -1
- package/src/ast-analysis/rules/index.ts +181 -4
- package/src/ast-analysis/rules/java.ts +5 -1
- package/src/ast-analysis/rules/php.ts +6 -1
- package/src/ast-analysis/rules/python.ts +5 -1
- package/src/ast-analysis/rules/ruby.ts +4 -1
- package/src/ast-analysis/rules/rust.ts +5 -1
- package/src/ast-analysis/visitors/ast-store-visitor.ts +165 -34
- package/src/domain/graph/builder/context.ts +10 -0
- package/src/domain/graph/builder/helpers.ts +8 -3
- package/src/domain/graph/builder/pipeline.ts +234 -36
- package/src/domain/graph/builder/stages/collect-files.ts +9 -0
- package/src/domain/graph/builder/stages/detect-changes.ts +130 -4
- package/src/domain/graph/builder/stages/finalize.ts +9 -6
- package/src/domain/graph/builder/stages/insert-nodes.ts +38 -14
- package/src/domain/parser.ts +205 -9
- package/src/domain/search/models.ts +2 -2
- package/src/domain/wasm-worker-entry.ts +23 -13
- package/src/features/ast.ts +22 -9
- package/src/infrastructure/config.ts +1 -0
- package/src/mcp/server.ts +16 -9
- package/src/mcp/tool-registry.ts +23 -5
- package/src/types.ts +1 -0
|
@@ -9,6 +9,7 @@ import path from 'node:path';
|
|
|
9
9
|
import { performance } from 'node:perf_hooks';
|
|
10
10
|
import {
|
|
11
11
|
acquireAdvisoryLock,
|
|
12
|
+
closeDb,
|
|
12
13
|
closeDbPair,
|
|
13
14
|
getBuildMeta,
|
|
14
15
|
initSchema,
|
|
@@ -32,16 +33,30 @@ import type {
|
|
|
32
33
|
ExtractorOutput,
|
|
33
34
|
SqliteStatement,
|
|
34
35
|
} from '../../../types.js';
|
|
35
|
-
import {
|
|
36
|
+
import {
|
|
37
|
+
classifyNativeDrops,
|
|
38
|
+
formatDropExtensionSummary,
|
|
39
|
+
getActiveEngine,
|
|
40
|
+
getInstalledWasmExtensions,
|
|
41
|
+
parseFilesWasmForBackfill,
|
|
42
|
+
} from '../../parser.js';
|
|
43
|
+
import { writeJournalHeader } from '../journal.js';
|
|
36
44
|
import { setWorkspaces } from '../resolve.js';
|
|
37
45
|
import { PipelineContext } from './context.js';
|
|
38
|
-
import {
|
|
46
|
+
import {
|
|
47
|
+
batchInsertNodes,
|
|
48
|
+
collectFiles as collectFilesUtil,
|
|
49
|
+
fileHash,
|
|
50
|
+
fileStat,
|
|
51
|
+
loadPathAliases,
|
|
52
|
+
readFileSafe,
|
|
53
|
+
} from './helpers.js';
|
|
39
54
|
import { NativeDbProxy } from './native-db-proxy.js';
|
|
40
55
|
import { buildEdges } from './stages/build-edges.js';
|
|
41
56
|
import { buildStructure } from './stages/build-structure.js';
|
|
42
57
|
// Pipeline stages
|
|
43
58
|
import { collectFiles } from './stages/collect-files.js';
|
|
44
|
-
import { detectChanges } from './stages/detect-changes.js';
|
|
59
|
+
import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
|
|
45
60
|
import { finalize } from './stages/finalize.js';
|
|
46
61
|
import { insertNodes } from './stages/insert-nodes.js';
|
|
47
62
|
import { parseFiles } from './stages/parse-files.js';
|
|
@@ -61,9 +76,14 @@ function initializeEngine(ctx: PipelineContext): void {
|
|
|
61
76
|
suspendJsDb: undefined,
|
|
62
77
|
resumeJsDb: undefined,
|
|
63
78
|
};
|
|
64
|
-
const {
|
|
79
|
+
const {
|
|
80
|
+
name: engineName,
|
|
81
|
+
version: engineVersion,
|
|
82
|
+
binaryVersion: nativeBinaryVersion,
|
|
83
|
+
} = getActiveEngine(ctx.engineOpts);
|
|
65
84
|
ctx.engineName = engineName as 'native' | 'wasm';
|
|
66
85
|
ctx.engineVersion = engineVersion;
|
|
86
|
+
ctx.nativeBinaryVersion = nativeBinaryVersion;
|
|
67
87
|
info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
|
|
68
88
|
}
|
|
69
89
|
|
|
@@ -90,13 +110,15 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
|
|
|
90
110
|
);
|
|
91
111
|
ctx.forceFullRebuild = true;
|
|
92
112
|
}
|
|
93
|
-
// When the native engine is active, the Rust
|
|
94
|
-
//
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
//
|
|
113
|
+
// When the native engine is active, the Rust orchestrator writes
|
|
114
|
+
// build_meta.codegraph_version = CARGO_PKG_VERSION (the binary's own value).
|
|
115
|
+
// Compare against the same value here so a CI hot-swap that leaves the
|
|
116
|
+
// platform package.json behind doesn't trigger a perpetual full-rebuild
|
|
117
|
+
// loop on every incremental (#1066).
|
|
98
118
|
const effectiveVersion =
|
|
99
|
-
ctx.engineName === 'native' && ctx.
|
|
119
|
+
ctx.engineName === 'native' && ctx.nativeBinaryVersion
|
|
120
|
+
? ctx.nativeBinaryVersion
|
|
121
|
+
: CODEGRAPH_VERSION;
|
|
100
122
|
const prevVersion = meta('codegraph_version');
|
|
101
123
|
if (prevVersion && prevVersion !== effectiveVersion) {
|
|
102
124
|
info(
|
|
@@ -650,16 +672,24 @@ async function tryNativeOrchestrator(
|
|
|
650
672
|
const p = result.phases;
|
|
651
673
|
|
|
652
674
|
// Sync build_meta so JS-side version/engine checks work on next build.
|
|
653
|
-
// Use the
|
|
654
|
-
//
|
|
655
|
-
// compares
|
|
656
|
-
//
|
|
657
|
-
//
|
|
658
|
-
//
|
|
675
|
+
// Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
|
|
676
|
+
// platform package.json version (ctx.engineVersion). The Rust side's
|
|
677
|
+
// check_version_mismatch compares against CARGO_PKG_VERSION; writing
|
|
678
|
+
// the package.json value would create a permanent mismatch whenever
|
|
679
|
+
// the binary and platform package.json diverge — e.g., CI hot-swap
|
|
680
|
+
// via ci-install-native.mjs (#1066) — forcing every subsequent build
|
|
681
|
+
// to be a full rebuild.
|
|
682
|
+
//
|
|
683
|
+
// When the native addon doesn't expose engineVersion() (older addon),
|
|
684
|
+
// fall back to CODEGRAPH_VERSION — same fallback used by both
|
|
685
|
+
// checkEngineSchemaMismatch (read path) and persistBuildMetadata
|
|
686
|
+
// (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
|
|
687
|
+
// here would re-introduce the asymmetry this PR fixes for that case.
|
|
688
|
+
const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
|
|
659
689
|
setBuildMeta(ctx.db, {
|
|
660
690
|
engine: ctx.engineName,
|
|
661
|
-
engine_version:
|
|
662
|
-
codegraph_version:
|
|
691
|
+
engine_version: nativeVersionForMeta,
|
|
692
|
+
codegraph_version: nativeVersionForMeta,
|
|
663
693
|
schema_version: String(ctx.schemaVersion),
|
|
664
694
|
built_at: new Date().toISOString(),
|
|
665
695
|
});
|
|
@@ -723,10 +753,20 @@ async function tryNativeOrchestrator(
|
|
|
723
753
|
// stale native binaries). WASM handles those — backfill via WASM so both
|
|
724
754
|
// engines process the same file set (#967).
|
|
725
755
|
//
|
|
726
|
-
//
|
|
727
|
-
//
|
|
728
|
-
//
|
|
729
|
-
|
|
756
|
+
// Runs on full builds and on incrementals when the orchestrator reports
|
|
757
|
+
// any file activity (removals or changes). The orchestrator's
|
|
758
|
+
// `detect_removed_files` filter (#1070) skips files outside its narrower
|
|
759
|
+
// file_collector, so on a current binary a no-op rebuild reports
|
|
760
|
+
// `removedCount=0` and `changedCount=0`, making the backfill call pure
|
|
761
|
+
// overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
|
|
762
|
+
// binaries lacking the filter still report `removedCount>0` and get the
|
|
763
|
+
// gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
|
|
764
|
+
// narrows (but does not fully close) the gap where a brand-new
|
|
765
|
+
// unsupported-extension file is added on an otherwise-quiet incremental
|
|
766
|
+
// — see #1091 for the residual gap.
|
|
767
|
+
const removedCount = result.removedCount ?? 0;
|
|
768
|
+
const changedCount = result.changedCount ?? 0;
|
|
769
|
+
if (result.isFullBuild || removedCount > 0 || changedCount > 0) {
|
|
730
770
|
await backfillNativeDroppedFiles(ctx);
|
|
731
771
|
}
|
|
732
772
|
|
|
@@ -739,41 +779,83 @@ async function tryNativeOrchestrator(
|
|
|
739
779
|
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
|
|
740
780
|
*/
|
|
741
781
|
async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
742
|
-
//
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
}
|
|
748
|
-
|
|
782
|
+
// Compute the missing-file set FIRST, before any expensive DB handoff.
|
|
783
|
+
// NativeDbProxy supports .prepare().all(), so the upfront query works
|
|
784
|
+
// whether ctx.db is a proxy or a real better-sqlite3 connection. On
|
|
785
|
+
// incremental no-op rebuilds nothing is missing, so we want to early-return
|
|
786
|
+
// without paying the close-native / reopen-better-sqlite3 cost.
|
|
749
787
|
const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
|
|
750
788
|
const expected = new Set(
|
|
751
789
|
collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
|
|
752
790
|
);
|
|
753
791
|
|
|
754
|
-
const
|
|
792
|
+
const existingNodeRows = ctx.db
|
|
755
793
|
.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
|
|
756
794
|
.all() as Array<{ file: string }>;
|
|
757
|
-
const
|
|
795
|
+
const existingNodes = new Set(existingNodeRows.map((r) => r.file));
|
|
796
|
+
|
|
797
|
+
// Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
|
|
798
|
+
// (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
|
|
799
|
+
// (e.g. a DB written by old code where `nodes` was populated but
|
|
800
|
+
// `file_hashes` was not). Treating "in nodes but not in file_hashes" as
|
|
801
|
+
// missing closes the gap so the backfill repairs the file_hashes row even
|
|
802
|
+
// when the node row already exists.
|
|
803
|
+
let existingHashes = new Set<string>();
|
|
804
|
+
try {
|
|
805
|
+
const existingHashRows = ctx.db
|
|
806
|
+
.prepare('SELECT DISTINCT file FROM file_hashes')
|
|
807
|
+
.all() as Array<{ file: string }>;
|
|
808
|
+
existingHashes = new Set(existingHashRows.map((r) => r.file));
|
|
809
|
+
} catch (e) {
|
|
810
|
+
// file_hashes table may not exist on legacy DBs; treat as fully missing
|
|
811
|
+
// so the backfill writes rows on the upsert path below.
|
|
812
|
+
debug(
|
|
813
|
+
`backfillNativeDroppedFiles: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
|
|
814
|
+
);
|
|
815
|
+
}
|
|
758
816
|
|
|
759
817
|
// Restrict backfill to files with an installed WASM grammar. Extensions in
|
|
760
818
|
// LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
|
|
761
819
|
// minimal installs) can't be parsed by either engine, so they're not a
|
|
762
820
|
// native regression — excluding them keeps the warn count meaningful.
|
|
763
821
|
const installedExts = getInstalledWasmExtensions();
|
|
822
|
+
const missingRel: string[] = [];
|
|
764
823
|
const missingAbs: string[] = [];
|
|
765
824
|
for (const rel of expected) {
|
|
766
|
-
if
|
|
825
|
+
// A file is "missing" if it's absent from EITHER nodes OR file_hashes.
|
|
826
|
+
// Both must be present for fast-skip to work correctly.
|
|
827
|
+
if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
|
|
767
828
|
const ext = path.extname(rel).toLowerCase();
|
|
768
829
|
if (!installedExts.has(ext)) continue;
|
|
830
|
+
missingRel.push(rel);
|
|
769
831
|
missingAbs.push(path.join(ctx.rootDir, rel));
|
|
770
832
|
}
|
|
771
833
|
if (missingAbs.length === 0) return;
|
|
772
834
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
)
|
|
776
|
-
|
|
835
|
+
// Now that we know there's work to do, hand off to better-sqlite3 (needed
|
|
836
|
+
// for the INSERT path below).
|
|
837
|
+
if (ctx.nativeFirstProxy) {
|
|
838
|
+
closeNativeDb(ctx, 'pre-parity-backfill');
|
|
839
|
+
ctx.db = openDb(ctx.dbPath);
|
|
840
|
+
ctx.nativeFirstProxy = false;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// Classify drops so users see per-extension reasons instead of just a count
|
|
844
|
+
// (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
|
|
845
|
+
// extractor); `native-extractor-failure` indicates a real native bug since
|
|
846
|
+
// the language IS supported by the addon yet the file was dropped anyway.
|
|
847
|
+
const { byReason, totals } = classifyNativeDrops(missingRel);
|
|
848
|
+
if (totals['unsupported-by-native'] > 0) {
|
|
849
|
+
info(
|
|
850
|
+
`Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`,
|
|
851
|
+
);
|
|
852
|
+
}
|
|
853
|
+
if (totals['native-extractor-failure'] > 0) {
|
|
854
|
+
warn(
|
|
855
|
+
`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
|
|
856
|
+
);
|
|
857
|
+
}
|
|
858
|
+
const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
|
|
777
859
|
|
|
778
860
|
const rows: unknown[][] = [];
|
|
779
861
|
const exportKeys: unknown[][] = [];
|
|
@@ -833,6 +915,68 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
833
915
|
updateStmt.run(...vals);
|
|
834
916
|
}
|
|
835
917
|
}
|
|
918
|
+
|
|
919
|
+
// Persist file_hashes rows for every backfilled file. The Rust orchestrator
|
|
920
|
+
// only hashes files it parsed itself, so without this step files in
|
|
921
|
+
// optional-language extensions (e.g. .clj when no Rust extractor exists)
|
|
922
|
+
// would be missing from `file_hashes` — permanently breaking the JS-side
|
|
923
|
+
// fast-skip pre-flight (#1054), which rejects on `collected file missing
|
|
924
|
+
// from file_hashes` and forces every no-op rebuild back through the full
|
|
925
|
+
// ~2s native pipeline (#1068).
|
|
926
|
+
//
|
|
927
|
+
// Iterates `missingRel` (every collected file the Rust orchestrator
|
|
928
|
+
// dropped), not `wasmResults`, so files that produced zero symbols still
|
|
929
|
+
// get a row.
|
|
930
|
+
try {
|
|
931
|
+
const upsertHash = db.prepare(
|
|
932
|
+
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
|
|
933
|
+
);
|
|
934
|
+
const writeHashes = db.transaction(() => {
|
|
935
|
+
for (let i = 0; i < missingRel.length; i++) {
|
|
936
|
+
const relPath = missingRel[i];
|
|
937
|
+
const absPath = missingAbs[i];
|
|
938
|
+
if (!relPath || !absPath) continue;
|
|
939
|
+
let code: string | null;
|
|
940
|
+
try {
|
|
941
|
+
code = readFileSafe(absPath);
|
|
942
|
+
} catch (e) {
|
|
943
|
+
debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
|
|
944
|
+
continue;
|
|
945
|
+
}
|
|
946
|
+
if (code === null) continue;
|
|
947
|
+
const stat = fileStat(absPath);
|
|
948
|
+
const mtime = stat ? stat.mtime : 0;
|
|
949
|
+
const size = stat ? stat.size : 0;
|
|
950
|
+
upsertHash.run(relPath, fileHash(code), mtime, size);
|
|
951
|
+
}
|
|
952
|
+
});
|
|
953
|
+
writeHashes();
|
|
954
|
+
} catch (e) {
|
|
955
|
+
debug(
|
|
956
|
+
`backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
|
|
957
|
+
);
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
// Free WASM parse trees from the inline backfill path (#1058).
|
|
961
|
+
// `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
|
|
962
|
+
// backed by WASM linear memory) on every result, but these symbols are
|
|
963
|
+
// consumed locally for DB row construction and never added to
|
|
964
|
+
// `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
|
|
965
|
+
// sees them. Without this, trees leak WASM memory until process exit —
|
|
966
|
+
// bounded per run but cumulative across in-process integration tests.
|
|
967
|
+
// Mirrors the cleanup discipline established for #931.
|
|
968
|
+
for (const [, symbols] of wasmResults) {
|
|
969
|
+
const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
|
|
970
|
+
if (tree && typeof tree.delete === 'function') {
|
|
971
|
+
try {
|
|
972
|
+
tree.delete();
|
|
973
|
+
} catch {
|
|
974
|
+
/* ignore cleanup errors */
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
(symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
|
|
978
|
+
(symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
|
|
979
|
+
}
|
|
836
980
|
}
|
|
837
981
|
|
|
838
982
|
// ── Pipeline stages execution ───────────────────────────────────────────
|
|
@@ -959,6 +1103,60 @@ export async function buildGraph(
|
|
|
959
1103
|
try {
|
|
960
1104
|
setupPipeline(ctx);
|
|
961
1105
|
|
|
1106
|
+
// ── JS-side fast-skip for native incremental (#1054) ──────────────
|
|
1107
|
+
// The Rust orchestrator's internal early-exit fires reliably locally
|
|
1108
|
+
// but not in CI, where every no-op rebuild was paying the full ~2s
|
|
1109
|
+
// pipeline cost. A read-only mtime+size check here matches WASM's
|
|
1110
|
+
// ~20ms early-exit and skips the orchestrator entirely when no
|
|
1111
|
+
// source files have changed. Tier-2 hashing is left to the native
|
|
1112
|
+
// side: any mismatch falls through and lets Rust's detect_changes
|
|
1113
|
+
// remain the source of truth.
|
|
1114
|
+
//
|
|
1115
|
+
// Diagnostic logging gated by CODEGRAPH_FAST_SKIP_DIAG (#1066) — when
|
|
1116
|
+
// any of the call-site guards short-circuit (forceFullRebuild,
|
|
1117
|
+
// engineName, scope, etc.) we log the reason so the bench gate run
|
|
1118
|
+
// produces observable output even if `detectNoChanges` is never
|
|
1119
|
+
// entered.
|
|
1120
|
+
const fastSkipDiag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
|
|
1121
|
+
if (fastSkipDiag) {
|
|
1122
|
+
const reasons: string[] = [];
|
|
1123
|
+
if (!ctx.nativeAvailable) reasons.push('nativeAvailable=false');
|
|
1124
|
+
if (ctx.engineName !== 'native') reasons.push(`engineName=${ctx.engineName}`);
|
|
1125
|
+
if (!ctx.incremental) reasons.push('incremental=false');
|
|
1126
|
+
if (ctx.forceFullRebuild) reasons.push('forceFullRebuild=true');
|
|
1127
|
+
if ((ctx.opts as Record<string, unknown>).scope) reasons.push('scope=set');
|
|
1128
|
+
if (reasons.length > 0) {
|
|
1129
|
+
info(`[fast-skip] false: pre-flight gate skipped — ${reasons.join(', ')}`);
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
if (
|
|
1133
|
+
ctx.nativeAvailable &&
|
|
1134
|
+
ctx.engineName === 'native' &&
|
|
1135
|
+
ctx.incremental &&
|
|
1136
|
+
!ctx.forceFullRebuild &&
|
|
1137
|
+
!(ctx.opts as Record<string, unknown>).scope
|
|
1138
|
+
) {
|
|
1139
|
+
try {
|
|
1140
|
+
await collectFiles(ctx);
|
|
1141
|
+
if (
|
|
1142
|
+
detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record<string, unknown>)
|
|
1143
|
+
) {
|
|
1144
|
+
info('No changes detected. Graph is up to date.');
|
|
1145
|
+
writeJournalHeader(ctx.rootDir, Date.now());
|
|
1146
|
+
closeDb(ctx.db);
|
|
1147
|
+
return;
|
|
1148
|
+
}
|
|
1149
|
+
} catch (err) {
|
|
1150
|
+
// Pre-flight is best-effort — any failure falls through to the
|
|
1151
|
+
// orchestrator, which performs its own complete detection.
|
|
1152
|
+
// Reset ctx.allFiles so runPipelineStages re-collects under its own
|
|
1153
|
+
// engine state if we ended up partially populated before throwing.
|
|
1154
|
+
ctx.allFiles = undefined as unknown as string[];
|
|
1155
|
+
ctx.discoveredDirs = undefined as unknown as Set<string>;
|
|
1156
|
+
debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`);
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
|
|
962
1160
|
// ── Rust orchestrator fast path (#695) ────────────────────────────
|
|
963
1161
|
// When available, run the entire build pipeline in Rust with zero
|
|
964
1162
|
// napi crossings (eliminates WAL dual-connection dance). Falls back
|
|
@@ -100,6 +100,15 @@ function tryFastCollect(
|
|
|
100
100
|
export async function collectFiles(ctx: PipelineContext): Promise<void> {
|
|
101
101
|
const { rootDir, config, opts } = ctx;
|
|
102
102
|
|
|
103
|
+
// Skip when the JS-side fast-skip pre-flight (#1054) already populated the
|
|
104
|
+
// file list and changes were detected, causing fallthrough to the native
|
|
105
|
+
// orchestrator and then to runPipelineStages. Avoids redoing the filesystem
|
|
106
|
+
// walk on the non-skip path (~8ms on 473 files). On pre-flight failure the
|
|
107
|
+
// caller resets ctx.allFiles so this guard correctly falls through.
|
|
108
|
+
if (!opts.scope && ctx.allFiles?.length && ctx.discoveredDirs?.size) {
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
103
112
|
if (opts.scope) {
|
|
104
113
|
// Scoped rebuild: rebuild only specified files.
|
|
105
114
|
//
|
|
@@ -27,7 +27,7 @@ interface FileHashRow {
|
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
interface FileStat {
|
|
30
|
-
|
|
30
|
+
mtime: number;
|
|
31
31
|
size: number;
|
|
32
32
|
}
|
|
33
33
|
|
|
@@ -182,7 +182,7 @@ function mtimeAndHashTiers(
|
|
|
182
182
|
if (!stat) continue;
|
|
183
183
|
const storedMtime = record.mtime || 0;
|
|
184
184
|
const storedSize = record.size || 0;
|
|
185
|
-
if (storedSize > 0 &&
|
|
185
|
+
if (storedSize > 0 && stat.mtime === storedMtime && stat.size === storedSize) {
|
|
186
186
|
skipped.push(relPath);
|
|
187
187
|
continue;
|
|
188
188
|
}
|
|
@@ -512,6 +512,132 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
|
|
|
512
512
|
purgeAndAddReverseDeps(ctx, changePaths, reverseDeps);
|
|
513
513
|
}
|
|
514
514
|
|
|
515
|
+
/**
|
|
516
|
+
* Read-only pre-flight check for the native orchestrator.
|
|
517
|
+
*
|
|
518
|
+
* Returns true iff every collected source file has matching mtime+size in
|
|
519
|
+
* `file_hashes` and no DB-tracked file has been removed. When true, the
|
|
520
|
+
* caller can short-circuit before invoking the native orchestrator —
|
|
521
|
+
* matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
|
|
522
|
+
* per-call native rebuild overhead seen in CI (#1054).
|
|
523
|
+
*
|
|
524
|
+
* Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
|
|
525
|
+
* hashing is left to the native side: when this returns false the caller
|
|
526
|
+
* falls through to the orchestrator, which performs its own complete
|
|
527
|
+
* detection and is the source of truth.
|
|
528
|
+
*
|
|
529
|
+
* Conservatively returns false when CFG or dataflow analysis is enabled
|
|
530
|
+
* but the corresponding tables are empty — otherwise the fast-skip would
|
|
531
|
+
* silently suppress the pending-analysis pass that the JS path runs via
|
|
532
|
+
* `runPendingAnalysis`, and CFG/dataflow data would never populate on
|
|
533
|
+
* repos where source files don't change between builds.
|
|
534
|
+
*
|
|
535
|
+
* Pure read of `db` and the filesystem — never mutates either.
|
|
536
|
+
*/
|
|
537
|
+
export function detectNoChanges(
|
|
538
|
+
db: BetterSqlite3Database,
|
|
539
|
+
allFiles: string[],
|
|
540
|
+
rootDir: string,
|
|
541
|
+
opts?: Record<string, unknown>,
|
|
542
|
+
): boolean {
|
|
543
|
+
// Diagnostic logging gated by env var — used by the bench gate to surface
|
|
544
|
+
// why the fast-skip is not firing on CI runners (#1066). Off by default to
|
|
545
|
+
// avoid noise on every regular incremental build.
|
|
546
|
+
const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
|
|
547
|
+
const log = (reason: string): void => {
|
|
548
|
+
if (diag) info(`[fast-skip] ${reason}`);
|
|
549
|
+
};
|
|
550
|
+
|
|
551
|
+
let hasTable = false;
|
|
552
|
+
try {
|
|
553
|
+
db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
|
|
554
|
+
hasTable = true;
|
|
555
|
+
} catch {
|
|
556
|
+
/* table missing — first build */
|
|
557
|
+
}
|
|
558
|
+
if (!hasTable) {
|
|
559
|
+
log('false: file_hashes table missing');
|
|
560
|
+
return false;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[];
|
|
564
|
+
if (rows.length === 0) {
|
|
565
|
+
log('false: file_hashes table empty');
|
|
566
|
+
return false;
|
|
567
|
+
}
|
|
568
|
+
const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
|
|
569
|
+
|
|
570
|
+
const currentFiles = new Set<string>();
|
|
571
|
+
for (const file of allFiles) {
|
|
572
|
+
currentFiles.add(normalizePath(path.relative(rootDir, file)));
|
|
573
|
+
}
|
|
574
|
+
for (const existingFile of existing.keys()) {
|
|
575
|
+
if (!currentFiles.has(existingFile)) {
|
|
576
|
+
log(`false: tracked file no longer collected: ${existingFile}`);
|
|
577
|
+
return false;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
for (const file of allFiles) {
|
|
582
|
+
const relPath = normalizePath(path.relative(rootDir, file));
|
|
583
|
+
const record = existing.get(relPath);
|
|
584
|
+
if (!record) {
|
|
585
|
+
log(`false: collected file missing from file_hashes: ${relPath}`);
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
const stat = fileStat(file) as FileStat | undefined;
|
|
589
|
+
if (!stat) {
|
|
590
|
+
log(`false: stat failed for ${relPath}`);
|
|
591
|
+
return false;
|
|
592
|
+
}
|
|
593
|
+
const storedMtime = record.mtime || 0;
|
|
594
|
+
const storedSize = record.size || 0;
|
|
595
|
+
if (storedSize <= 0) {
|
|
596
|
+
log(`false: stored size <= 0 for ${relPath} (stored=${record.size})`);
|
|
597
|
+
return false;
|
|
598
|
+
}
|
|
599
|
+
if (stat.mtime !== storedMtime || stat.size !== storedSize) {
|
|
600
|
+
log(
|
|
601
|
+
`false: mtime/size diff for ${relPath}: stat=${stat.mtime}/${stat.size} stored=${storedMtime}/${storedSize}`,
|
|
602
|
+
);
|
|
603
|
+
return false;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
|
|
608
|
+
// table is empty (analysis newly enabled, or tables wiped between builds),
|
|
609
|
+
// fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
|
|
610
|
+
// Mirrors the check at the top of runPendingAnalysis (see line ~244).
|
|
611
|
+
if (opts) {
|
|
612
|
+
if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
|
|
613
|
+
log('false: pending-analysis guard — cfg_blocks is empty');
|
|
614
|
+
return false;
|
|
615
|
+
}
|
|
616
|
+
if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
|
|
617
|
+
log('false: pending-analysis guard — dataflow is empty');
|
|
618
|
+
return false;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
log(`true: all checks passed (${allFiles.length} files)`);
|
|
623
|
+
return true;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* Returns true if `table` exists and has zero rows, matching the empty-table
|
|
628
|
+
* semantics of `runPendingAnalysis`. A missing table is treated as empty
|
|
629
|
+
* (the conservative outcome), so the caller falls through to the orchestrator
|
|
630
|
+
* which will create the schema and populate it.
|
|
631
|
+
*/
|
|
632
|
+
function hasEmptyAnalysisTable(db: BetterSqlite3Database, table: string): boolean {
|
|
633
|
+
try {
|
|
634
|
+
const row = db.prepare(`SELECT COUNT(*) as c FROM ${table}`).get() as { c: number } | undefined;
|
|
635
|
+
return (row?.c ?? 0) === 0;
|
|
636
|
+
} catch {
|
|
637
|
+
return true;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
515
641
|
export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
516
642
|
const start = performance.now();
|
|
517
643
|
try {
|
|
@@ -537,7 +663,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
|
537
663
|
relPath: c.relPath,
|
|
538
664
|
content: c.content,
|
|
539
665
|
hash: c.hash,
|
|
540
|
-
stat: c.stat ? { mtime:
|
|
666
|
+
stat: c.stat ? { mtime: c.stat.mtime, size: c.stat.size } : undefined,
|
|
541
667
|
_reverseDepOnly: c._reverseDepOnly,
|
|
542
668
|
}));
|
|
543
669
|
ctx.metadataUpdates = increResult.changed
|
|
@@ -548,7 +674,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
|
548
674
|
.map((c) => ({
|
|
549
675
|
relPath: c.relPath,
|
|
550
676
|
hash: c.hash,
|
|
551
|
-
stat: { mtime:
|
|
677
|
+
stat: { mtime: c.stat.mtime, size: c.stat.size },
|
|
552
678
|
}));
|
|
553
679
|
if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) {
|
|
554
680
|
const ranAnalysis = await runPendingAnalysis(ctx);
|
|
@@ -82,13 +82,16 @@ function persistBuildMetadata(
|
|
|
82
82
|
): void {
|
|
83
83
|
const useNativeDb = ctx.engineName === 'native' && !!ctx.nativeDb;
|
|
84
84
|
if (!ctx.isFullBuild && ctx.allSymbols.size <= 3) return;
|
|
85
|
-
// When the native engine is active, persist the
|
|
86
|
-
//
|
|
87
|
-
//
|
|
88
|
-
//
|
|
89
|
-
//
|
|
85
|
+
// When the native engine is active, persist the binary's CARGO_PKG_VERSION
|
|
86
|
+
// (ctx.nativeBinaryVersion). The Rust orchestrator's check_version_mismatch
|
|
87
|
+
// compares against that exact value, so writing the platform package.json
|
|
88
|
+
// version (ctx.engineVersion) — which can drift from the binary in CI
|
|
89
|
+
// hot-swap flows (#1066) — would force every subsequent native build to
|
|
90
|
+
// be a full rebuild.
|
|
90
91
|
const codeVersionToWrite =
|
|
91
|
-
ctx.engineName === 'native' && ctx.
|
|
92
|
+
ctx.engineName === 'native' && ctx.nativeBinaryVersion
|
|
93
|
+
? ctx.nativeBinaryVersion
|
|
94
|
+
: CODEGRAPH_VERSION;
|
|
92
95
|
// Persist the repo root so downstream commands (e.g. `codegraph embed`)
|
|
93
96
|
// can resolve relative file paths regardless of the invoking cwd.
|
|
94
97
|
// Use realpathSync (symlink-resolving) to match the Rust engine's
|