@optave/codegraph 3.9.6 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -12
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +1 -1
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +77 -0
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +50 -8
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/cli/commands/audit.js +1 -1
- package/dist/cli/commands/audit.js.map +1 -1
- package/dist/cli/commands/build.d.ts.map +1 -1
- package/dist/cli/commands/build.js +2 -0
- package/dist/cli/commands/build.js.map +1 -1
- package/dist/cli/commands/check.js +1 -1
- package/dist/cli/commands/check.js.map +1 -1
- package/dist/cli/commands/children.js +1 -1
- package/dist/cli/commands/children.js.map +1 -1
- package/dist/cli/commands/diff-impact.js +1 -1
- package/dist/cli/commands/diff-impact.js.map +1 -1
- package/dist/cli/commands/roles.js +1 -1
- package/dist/cli/commands/roles.js.map +1 -1
- package/dist/cli/commands/structure.js +1 -1
- package/dist/cli/commands/structure.js.map +1 -1
- package/dist/cli/shared/options.js +1 -1
- package/dist/cli/shared/options.js.map +1 -1
- package/dist/db/connection.d.ts.map +1 -1
- package/dist/db/connection.js +8 -0
- package/dist/db/connection.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +10 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js +10 -0
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +7 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +7 -2
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/incremental.d.ts +0 -6
- package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
- package/dist/domain/graph/builder/incremental.js +6 -23
- package/dist/domain/graph/builder/incremental.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts +44 -0
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +348 -42
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.js +8 -2
- package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +8 -0
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts +24 -0
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +117 -3
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +9 -6
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts +30 -0
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +36 -13
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
- package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +23 -18
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +14 -1
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +104 -11
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/models.d.ts +16 -0
- package/dist/domain/search/models.d.ts.map +1 -1
- package/dist/domain/search/models.js +36 -2
- package/dist/domain/search/models.js.map +1 -1
- package/dist/domain/wasm-worker-entry.js +20 -13
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/extractors/c.js +25 -6
- package/dist/extractors/c.js.map +1 -1
- package/dist/extractors/cpp.js +47 -6
- package/dist/extractors/cpp.js.map +1 -1
- package/dist/extractors/cuda.js +90 -14
- package/dist/extractors/cuda.js.map +1 -1
- package/dist/extractors/elixir.js +83 -3
- package/dist/extractors/elixir.js.map +1 -1
- package/dist/extractors/erlang.js +56 -20
- package/dist/extractors/erlang.js.map +1 -1
- package/dist/extractors/fsharp.d.ts +7 -0
- package/dist/extractors/fsharp.d.ts.map +1 -1
- package/dist/extractors/fsharp.js +94 -0
- package/dist/extractors/fsharp.js.map +1 -1
- package/dist/extractors/gleam.js +6 -2
- package/dist/extractors/gleam.js.map +1 -1
- package/dist/extractors/groovy.js +41 -1
- package/dist/extractors/groovy.js.map +1 -1
- package/dist/extractors/haskell.js +48 -4
- package/dist/extractors/haskell.js.map +1 -1
- package/dist/extractors/julia.js +172 -41
- package/dist/extractors/julia.js.map +1 -1
- package/dist/extractors/kotlin.js +4 -0
- package/dist/extractors/kotlin.js.map +1 -1
- package/dist/extractors/objc.js +184 -47
- package/dist/extractors/objc.js.map +1 -1
- package/dist/extractors/python.js +7 -4
- package/dist/extractors/python.js.map +1 -1
- package/dist/extractors/r.js +93 -52
- package/dist/extractors/r.js.map +1 -1
- package/dist/extractors/scala.d.ts.map +1 -1
- package/dist/extractors/scala.js +18 -32
- package/dist/extractors/scala.js.map +1 -1
- package/dist/extractors/solidity.js +18 -9
- package/dist/extractors/solidity.js.map +1 -1
- package/dist/extractors/verilog.js +80 -15
- package/dist/extractors/verilog.js.map +1 -1
- package/dist/infrastructure/config.d.ts +1 -0
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +1 -0
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/mcp/server.js +14 -8
- package/dist/mcp/server.js.map +1 -1
- package/dist/mcp/tool-registry.d.ts +1 -1
- package/dist/mcp/tool-registry.d.ts.map +1 -1
- package/dist/mcp/tool-registry.js +23 -5
- package/dist/mcp/tool-registry.js.map +1 -1
- package/dist/mcp/tools/semantic-search.d.ts +1 -0
- package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
- package/dist/mcp/tools/semantic-search.js +1 -0
- package/dist/mcp/tools/semantic-search.js.map +1 -1
- package/dist/types.d.ts +16 -1
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/grammars/tree-sitter-fsharp.wasm +0 -0
- package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
- package/grammars/tree-sitter-gleam.wasm +0 -0
- package/package.json +11 -10
- package/src/ast-analysis/engine.ts +3 -1
- package/src/ast-analysis/rules/index.ts +87 -0
- package/src/ast-analysis/visitors/ast-store-visitor.ts +45 -9
- package/src/cli/commands/audit.ts +1 -1
- package/src/cli/commands/build.ts +2 -0
- package/src/cli/commands/check.ts +1 -1
- package/src/cli/commands/children.ts +1 -1
- package/src/cli/commands/diff-impact.ts +1 -1
- package/src/cli/commands/roles.ts +1 -1
- package/src/cli/commands/structure.ts +1 -1
- package/src/cli/shared/options.ts +1 -1
- package/src/db/connection.ts +8 -0
- package/src/domain/graph/builder/context.ts +10 -0
- package/src/domain/graph/builder/helpers.ts +8 -3
- package/src/domain/graph/builder/incremental.ts +6 -41
- package/src/domain/graph/builder/pipeline.ts +404 -41
- package/src/domain/graph/builder/stages/build-edges.ts +9 -2
- package/src/domain/graph/builder/stages/collect-files.ts +9 -0
- package/src/domain/graph/builder/stages/detect-changes.ts +130 -4
- package/src/domain/graph/builder/stages/finalize.ts +9 -6
- package/src/domain/graph/builder/stages/insert-nodes.ts +38 -14
- package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
- package/src/domain/graph/watcher.ts +21 -23
- package/src/domain/parser.ts +110 -10
- package/src/domain/search/models.ts +37 -2
- package/src/domain/wasm-worker-entry.ts +20 -13
- package/src/extractors/c.ts +27 -8
- package/src/extractors/cpp.ts +50 -8
- package/src/extractors/cuda.ts +90 -16
- package/src/extractors/elixir.ts +75 -3
- package/src/extractors/erlang.ts +63 -20
- package/src/extractors/fsharp.ts +104 -0
- package/src/extractors/gleam.ts +7 -2
- package/src/extractors/groovy.ts +45 -1
- package/src/extractors/haskell.ts +45 -4
- package/src/extractors/julia.ts +164 -43
- package/src/extractors/kotlin.ts +4 -0
- package/src/extractors/objc.ts +171 -47
- package/src/extractors/python.ts +5 -3
- package/src/extractors/r.ts +88 -48
- package/src/extractors/scala.ts +24 -36
- package/src/extractors/solidity.ts +17 -8
- package/src/extractors/verilog.ts +83 -15
- package/src/infrastructure/config.ts +1 -0
- package/src/mcp/server.ts +16 -9
- package/src/mcp/tool-registry.ts +28 -5
- package/src/mcp/tools/semantic-search.ts +2 -0
- package/src/types.ts +16 -0
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
import fs from 'node:fs';
|
|
8
8
|
import path from 'node:path';
|
|
9
9
|
import { performance } from 'node:perf_hooks';
|
|
10
|
-
import { acquireAdvisoryLock, closeDbPair, getBuildMeta, initSchema, MIGRATIONS, openDb, releaseAdvisoryLock, setBuildMeta, } from '../../../db/index.js';
|
|
10
|
+
import { acquireAdvisoryLock, closeDb, closeDbPair, getBuildMeta, initSchema, MIGRATIONS, openDb, purgeFilesData, releaseAdvisoryLock, setBuildMeta, } from '../../../db/index.js';
|
|
11
11
|
import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js';
|
|
12
12
|
import { debug, info, warn } from '../../../infrastructure/logger.js';
|
|
13
13
|
import { loadNative } from '../../../infrastructure/native.js';
|
|
@@ -15,16 +15,17 @@ import { semverCompare } from '../../../infrastructure/update-check.js';
|
|
|
15
15
|
import { normalizePath } from '../../../shared/constants.js';
|
|
16
16
|
import { toErrorMessage } from '../../../shared/errors.js';
|
|
17
17
|
import { CODEGRAPH_VERSION } from '../../../shared/version.js';
|
|
18
|
-
import { classifyNativeDrops, formatDropExtensionSummary, getActiveEngine, getInstalledWasmExtensions,
|
|
18
|
+
import { classifyNativeDrops, formatDropExtensionSummary, getActiveEngine, getInstalledWasmExtensions, NATIVE_SUPPORTED_EXTENSIONS, parseFilesWasmForBackfill, } from '../../parser.js';
|
|
19
|
+
import { writeJournalHeader } from '../journal.js';
|
|
19
20
|
import { setWorkspaces } from '../resolve.js';
|
|
20
21
|
import { PipelineContext } from './context.js';
|
|
21
|
-
import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
|
|
22
|
+
import { batchInsertNodes, collectFiles as collectFilesUtil, fileHash, fileStat, loadPathAliases, readFileSafe, } from './helpers.js';
|
|
22
23
|
import { NativeDbProxy } from './native-db-proxy.js';
|
|
23
24
|
import { buildEdges } from './stages/build-edges.js';
|
|
24
25
|
import { buildStructure } from './stages/build-structure.js';
|
|
25
26
|
// Pipeline stages
|
|
26
27
|
import { collectFiles } from './stages/collect-files.js';
|
|
27
|
-
import { detectChanges } from './stages/detect-changes.js';
|
|
28
|
+
import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
|
|
28
29
|
import { finalize } from './stages/finalize.js';
|
|
29
30
|
import { insertNodes } from './stages/insert-nodes.js';
|
|
30
31
|
import { parseFiles } from './stages/parse-files.js';
|
|
@@ -42,9 +43,10 @@ function initializeEngine(ctx) {
|
|
|
42
43
|
suspendJsDb: undefined,
|
|
43
44
|
resumeJsDb: undefined,
|
|
44
45
|
};
|
|
45
|
-
const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts);
|
|
46
|
+
const { name: engineName, version: engineVersion, binaryVersion: nativeBinaryVersion, } = getActiveEngine(ctx.engineOpts);
|
|
46
47
|
ctx.engineName = engineName;
|
|
47
48
|
ctx.engineVersion = engineVersion;
|
|
49
|
+
ctx.nativeBinaryVersion = nativeBinaryVersion;
|
|
48
50
|
info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
|
|
49
51
|
}
|
|
50
52
|
function checkEngineSchemaMismatch(ctx) {
|
|
@@ -67,12 +69,14 @@ function checkEngineSchemaMismatch(ctx) {
|
|
|
67
69
|
info(`Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`);
|
|
68
70
|
ctx.forceFullRebuild = true;
|
|
69
71
|
}
|
|
70
|
-
// When the native engine is active, the Rust
|
|
71
|
-
//
|
|
72
|
-
//
|
|
73
|
-
//
|
|
74
|
-
//
|
|
75
|
-
const effectiveVersion = ctx.engineName === 'native' && ctx.
|
|
72
|
+
// When the native engine is active, the Rust orchestrator writes
|
|
73
|
+
// build_meta.codegraph_version = CARGO_PKG_VERSION (the binary's own value).
|
|
74
|
+
// Compare against the same value here so a CI hot-swap that leaves the
|
|
75
|
+
// platform package.json behind doesn't trigger a perpetual full-rebuild
|
|
76
|
+
// loop on every incremental (#1066).
|
|
77
|
+
const effectiveVersion = ctx.engineName === 'native' && ctx.nativeBinaryVersion
|
|
78
|
+
? ctx.nativeBinaryVersion
|
|
79
|
+
: CODEGRAPH_VERSION;
|
|
76
80
|
const prevVersion = meta('codegraph_version');
|
|
77
81
|
if (prevVersion && prevVersion !== effectiveVersion) {
|
|
78
82
|
info(`Codegraph version changed (${prevVersion} → ${effectiveVersion}), promoting to full rebuild.`);
|
|
@@ -113,7 +117,9 @@ function loadAliases(ctx) {
|
|
|
113
117
|
}
|
|
114
118
|
function setupPipeline(ctx) {
|
|
115
119
|
ctx.rootDir = path.resolve(ctx.rootDir);
|
|
116
|
-
ctx.dbPath =
|
|
120
|
+
ctx.dbPath = ctx.opts.dbPath
|
|
121
|
+
? path.resolve(ctx.opts.dbPath)
|
|
122
|
+
: path.join(ctx.rootDir, '.codegraph', 'graph.db');
|
|
117
123
|
// Detect whether native engine is available.
|
|
118
124
|
const enginePref = ctx.opts.engine || 'auto';
|
|
119
125
|
const native = enginePref !== 'wasm' ? loadNative() : null;
|
|
@@ -129,6 +135,16 @@ function setupPipeline(ctx) {
|
|
|
129
135
|
ctx.db = openDb(ctx.dbPath);
|
|
130
136
|
initSchema(ctx.db);
|
|
131
137
|
ctx.config = loadConfig(ctx.rootDir);
|
|
138
|
+
// Merge caller-supplied excludes on top of the file-config excludes so
|
|
139
|
+
// programmatic callers (e.g. benchmark scripts) can extend exclusion
|
|
140
|
+
// without mutating .codegraphrc.json. Native orchestrator picks this up
|
|
141
|
+
// automatically — it reads exclude off the serialized ctx.config below.
|
|
142
|
+
if (ctx.opts.exclude?.length) {
|
|
143
|
+
ctx.config = {
|
|
144
|
+
...ctx.config,
|
|
145
|
+
exclude: [...(ctx.config.exclude ?? []), ...ctx.opts.exclude],
|
|
146
|
+
};
|
|
147
|
+
}
|
|
132
148
|
ctx.incremental =
|
|
133
149
|
ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false;
|
|
134
150
|
initializeEngine(ctx);
|
|
@@ -508,6 +524,15 @@ async function tryNativeOrchestrator(ctx) {
|
|
|
508
524
|
const result = JSON.parse(resultJson);
|
|
509
525
|
if (result.earlyExit) {
|
|
510
526
|
info('No changes detected');
|
|
527
|
+
// Even on no-op rebuilds, dropped-language files added since the last
|
|
528
|
+
// full build are still missing from `nodes`/`file_hashes` (#1083), and
|
|
529
|
+
// WASM-only files deleted from disk leave stale rows behind (#1073).
|
|
530
|
+
// The orchestrator's file_collector skipped them, so its earlyExit
|
|
531
|
+
// doesn't imply DB consistency. Run the gap repair before returning.
|
|
532
|
+
const gap = detectDroppedLanguageGap(ctx);
|
|
533
|
+
if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
|
|
534
|
+
await backfillNativeDroppedFiles(ctx, gap);
|
|
535
|
+
}
|
|
511
536
|
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
|
|
512
537
|
return 'early-exit';
|
|
513
538
|
}
|
|
@@ -519,16 +544,24 @@ async function tryNativeOrchestrator(ctx) {
|
|
|
519
544
|
}
|
|
520
545
|
const p = result.phases;
|
|
521
546
|
// Sync build_meta so JS-side version/engine checks work on next build.
|
|
522
|
-
// Use the
|
|
523
|
-
//
|
|
524
|
-
// compares
|
|
525
|
-
//
|
|
526
|
-
//
|
|
527
|
-
//
|
|
547
|
+
// Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
|
|
548
|
+
// platform package.json version (ctx.engineVersion). The Rust side's
|
|
549
|
+
// check_version_mismatch compares against CARGO_PKG_VERSION; writing
|
|
550
|
+
// the package.json value would create a permanent mismatch whenever
|
|
551
|
+
// the binary and platform package.json diverge — e.g., CI hot-swap
|
|
552
|
+
// via ci-install-native.mjs (#1066) — forcing every subsequent build
|
|
553
|
+
// to be a full rebuild.
|
|
554
|
+
//
|
|
555
|
+
// When the native addon doesn't expose engineVersion() (older addon),
|
|
556
|
+
// fall back to CODEGRAPH_VERSION — same fallback used by both
|
|
557
|
+
// checkEngineSchemaMismatch (read path) and persistBuildMetadata
|
|
558
|
+
// (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
|
|
559
|
+
// here would re-introduce the asymmetry this PR fixes for that case.
|
|
560
|
+
const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
|
|
528
561
|
setBuildMeta(ctx.db, {
|
|
529
562
|
engine: ctx.engineName,
|
|
530
|
-
engine_version:
|
|
531
|
-
codegraph_version:
|
|
563
|
+
engine_version: nativeVersionForMeta,
|
|
564
|
+
codegraph_version: nativeVersionForMeta,
|
|
532
565
|
schema_version: String(ctx.schemaVersion),
|
|
533
566
|
built_at: new Date().toISOString(),
|
|
534
567
|
});
|
|
@@ -578,41 +611,153 @@ async function tryNativeOrchestrator(ctx) {
|
|
|
578
611
|
// stale native binaries). WASM handles those — backfill via WASM so both
|
|
579
612
|
// engines process the same file set (#967).
|
|
580
613
|
//
|
|
581
|
-
//
|
|
582
|
-
//
|
|
583
|
-
//
|
|
584
|
-
|
|
585
|
-
|
|
614
|
+
// Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
|
|
615
|
+
// both gating and the backfill itself. On dirty incrementals/full builds
|
|
616
|
+
// the orchestrator signals trigger backfill, so the walk happens once
|
|
617
|
+
// (instead of redundantly inside backfill). On quiet incrementals we
|
|
618
|
+
// still pay the walk so we can detect brand-new files in dropped-language
|
|
619
|
+
// extensions — a gap that the orchestrator's `detect_removed_files`
|
|
620
|
+
// filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
|
|
621
|
+
// because the expensive part (WASM re-parse of the missing set) is
|
|
622
|
+
// gated below.
|
|
623
|
+
const removedCount = result.removedCount ?? 0;
|
|
624
|
+
const changedCount = result.changedCount ?? 0;
|
|
625
|
+
const gap = detectDroppedLanguageGap(ctx);
|
|
626
|
+
if (result.isFullBuild ||
|
|
627
|
+
removedCount > 0 ||
|
|
628
|
+
changedCount > 0 ||
|
|
629
|
+
gap.missingAbs.length > 0 ||
|
|
630
|
+
gap.staleRel.length > 0) {
|
|
631
|
+
await backfillNativeDroppedFiles(ctx, gap);
|
|
586
632
|
}
|
|
587
633
|
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
|
|
588
634
|
return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
|
|
589
635
|
}
|
|
590
636
|
/**
|
|
591
|
-
*
|
|
592
|
-
*
|
|
637
|
+
* Compute the WASM-only files present in the DB but missing from disk (#1073).
|
|
638
|
+
*
|
|
639
|
+
* Returns relative paths that:
|
|
640
|
+
* - appear in `existingNodes` or `existingHashes` (in DB),
|
|
641
|
+
* - are absent from `expected` (not on disk),
|
|
642
|
+
* - have an extension installed for WASM, AND
|
|
643
|
+
* - have an extension NOT covered by `nativeSupported` — Rust's
|
|
644
|
+
* `purge_changed_files` handles deletion for natively-supported extensions
|
|
645
|
+
* via its own `detect_removed_files`, so the caller must not double-purge.
|
|
646
|
+
*
|
|
647
|
+
* Extensions are lowercased before lookup to match the registry and Rust's
|
|
648
|
+
* `LanguageKind::from_extension` (which normalises case for the languages
|
|
649
|
+
* where both cases are conventional, e.g. R's `.r` / `.R`).
|
|
650
|
+
*
|
|
651
|
+
* DB paths are forced to forward slashes before comparison with `expected`
|
|
652
|
+
* (which is always normalised). The on-disk invariant is that DB rows are
|
|
653
|
+
* written with forward slashes, but a stale row written by older code on
|
|
654
|
+
* Windows could carry back-slashes — normalising here makes the comparison
|
|
655
|
+
* platform-safe and prevents false-positive purges of live rows. We replace
|
|
656
|
+
* `\\` explicitly (rather than calling `normalizePath`, which only touches
|
|
657
|
+
* `path.sep`) so the defence works when running on POSIX against a DB that
|
|
658
|
+
* was migrated from Windows.
|
|
659
|
+
*
|
|
660
|
+
* Exported for unit testing.
|
|
593
661
|
*/
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
662
|
+
export function computeWasmOnlyStaleFiles(input) {
|
|
663
|
+
const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
|
|
664
|
+
const stale = [];
|
|
665
|
+
const seen = new Set();
|
|
666
|
+
const consider = (rawRel) => {
|
|
667
|
+
const rel = rawRel.replace(/\\/g, '/');
|
|
668
|
+
if (expected.has(rel) || seen.has(rel))
|
|
669
|
+
return;
|
|
670
|
+
const ext = path.extname(rel).toLowerCase();
|
|
671
|
+
if (nativeSupported.has(ext))
|
|
672
|
+
return;
|
|
673
|
+
if (!installedExts.has(ext))
|
|
674
|
+
return;
|
|
675
|
+
seen.add(rel);
|
|
676
|
+
// Push the ORIGINAL raw path (not the normalised form) so the eventual
|
|
677
|
+
// `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
|
|
678
|
+
// matches the actual stored row. The dedup `seen` set keeps the
|
|
679
|
+
// normalised form so a file written once with `\` and once with `/`
|
|
680
|
+
// is still treated as one entry — but the value the SQL sees has to
|
|
681
|
+
// be byte-identical to what's on disk in the DB.
|
|
682
|
+
stale.push(rawRel);
|
|
683
|
+
};
|
|
684
|
+
for (const rel of existingNodes)
|
|
685
|
+
consider(rel);
|
|
686
|
+
for (const rel of existingHashes)
|
|
687
|
+
consider(rel);
|
|
688
|
+
return stale;
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Group relative paths by their lowercased extension. Shape matches the bucket
|
|
692
|
+
* type that `formatDropExtensionSummary` consumes, so callers can render a
|
|
693
|
+
* log-friendly per-extension summary without going through `classifyNativeDrops`
|
|
694
|
+
* when the reason is already known (e.g. the stale-purge path where every path
|
|
695
|
+
* is guaranteed `unsupported-by-native`).
|
|
696
|
+
*/
|
|
697
|
+
function groupByExtension(relPaths) {
|
|
698
|
+
const buckets = new Map();
|
|
699
|
+
for (const rel of relPaths) {
|
|
700
|
+
const ext = path.extname(rel).toLowerCase();
|
|
701
|
+
let list = buckets.get(ext);
|
|
702
|
+
if (!list) {
|
|
703
|
+
list = [];
|
|
704
|
+
buckets.set(ext, list);
|
|
705
|
+
}
|
|
706
|
+
list.push(rel);
|
|
600
707
|
}
|
|
708
|
+
return buckets;
|
|
709
|
+
}
|
|
710
|
+
/**
|
|
711
|
+
* Detect files the native orchestrator silently dropped.
|
|
712
|
+
*
|
|
713
|
+
* Walks the filesystem and compares against `nodes` + `file_hashes`. A file
|
|
714
|
+
* is "missing" if it's absent from EITHER table — both must be present for
|
|
715
|
+
* the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
|
|
716
|
+
* legacy DBs where `nodes` was populated but `file_hashes` was not).
|
|
717
|
+
*
|
|
718
|
+
* Restricted to files with an installed WASM grammar; extensions in
|
|
719
|
+
* `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
|
|
720
|
+
* installs) can't be parsed by either engine, so they're not a native
|
|
721
|
+
* regression — excluding them keeps the warn count in
|
|
722
|
+
* `backfillNativeDroppedFiles` meaningful.
|
|
723
|
+
*
|
|
724
|
+
* Also detects WASM-only files deleted from disk (#1073). Rust's
|
|
725
|
+
* `detect_removed_files` filter (#1070) skips files outside its supported
|
|
726
|
+
* extensions, so deletions of WASM-only languages don't reach the native
|
|
727
|
+
* purge path; the rest of the backfill only inserts rows, so without this
|
|
728
|
+
* step stale `nodes`/`file_hashes` rows would linger across incremental
|
|
729
|
+
* rebuilds until the next full rebuild.
|
|
730
|
+
*
|
|
731
|
+
* Cheap (no DB handoff, no parsing): used both to gate the backfill call
|
|
732
|
+
* and as its working set. NativeDbProxy supports `.prepare().all()`, so
|
|
733
|
+
* this works whether `ctx.db` is a proxy or a real better-sqlite3
|
|
734
|
+
* connection — letting us skip the close-native / reopen-better-sqlite3
|
|
735
|
+
* cost when there's nothing to backfill.
|
|
736
|
+
*/
|
|
737
|
+
function detectDroppedLanguageGap(ctx) {
|
|
601
738
|
const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set());
|
|
602
739
|
const expected = new Set(collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))));
|
|
603
|
-
const
|
|
740
|
+
const existingNodeRows = ctx.db
|
|
604
741
|
.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
|
|
605
742
|
.all();
|
|
606
|
-
const
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
743
|
+
const existingNodes = new Set(existingNodeRows.map((r) => r.file));
|
|
744
|
+
let existingHashes = new Set();
|
|
745
|
+
try {
|
|
746
|
+
const existingHashRows = ctx.db
|
|
747
|
+
.prepare('SELECT DISTINCT file FROM file_hashes')
|
|
748
|
+
.all();
|
|
749
|
+
existingHashes = new Set(existingHashRows.map((r) => r.file));
|
|
750
|
+
}
|
|
751
|
+
catch (e) {
|
|
752
|
+
// file_hashes table may not exist on legacy DBs; treat as fully missing
|
|
753
|
+
// so the backfill writes rows on the upsert path below.
|
|
754
|
+
debug(`detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`);
|
|
755
|
+
}
|
|
611
756
|
const installedExts = getInstalledWasmExtensions();
|
|
612
757
|
const missingRel = [];
|
|
613
758
|
const missingAbs = [];
|
|
614
759
|
for (const rel of expected) {
|
|
615
|
-
if (
|
|
760
|
+
if (existingNodes.has(rel) && existingHashes.has(rel))
|
|
616
761
|
continue;
|
|
617
762
|
const ext = path.extname(rel).toLowerCase();
|
|
618
763
|
if (!installedExts.has(ext))
|
|
@@ -620,6 +765,51 @@ async function backfillNativeDroppedFiles(ctx) {
|
|
|
620
765
|
missingRel.push(rel);
|
|
621
766
|
missingAbs.push(path.join(ctx.rootDir, rel));
|
|
622
767
|
}
|
|
768
|
+
const staleRel = computeWasmOnlyStaleFiles({
|
|
769
|
+
existingNodes,
|
|
770
|
+
existingHashes,
|
|
771
|
+
expected,
|
|
772
|
+
installedExts,
|
|
773
|
+
nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
|
|
774
|
+
});
|
|
775
|
+
return { missingRel, missingAbs, staleRel };
|
|
776
|
+
}
|
|
777
|
+
/**
|
|
778
|
+
* Backfill files that the native orchestrator silently dropped during parse.
|
|
779
|
+
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
|
|
780
|
+
*
|
|
781
|
+
* Also purges stale rows for WASM-only files deleted from disk (#1073), which
|
|
782
|
+
* Rust's `detect_removed_files` filter (#1070) skips.
|
|
783
|
+
*
|
|
784
|
+
* Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
|
|
785
|
+
* can use the same scan for both gating and the actual backfill — avoiding
|
|
786
|
+
* a redundant fs walk when the orchestrator's signals already triggered.
|
|
787
|
+
*/
|
|
788
|
+
async function backfillNativeDroppedFiles(ctx, gap) {
|
|
789
|
+
const { missingRel, missingAbs, staleRel } = gap;
|
|
790
|
+
if (missingAbs.length === 0 && staleRel.length === 0)
|
|
791
|
+
return;
|
|
792
|
+
// Now that we know there's work to do, hand off to better-sqlite3 (needed
|
|
793
|
+
// for the INSERT path below).
|
|
794
|
+
if (ctx.nativeFirstProxy) {
|
|
795
|
+
closeNativeDb(ctx, 'pre-parity-backfill');
|
|
796
|
+
ctx.db = openDb(ctx.dbPath);
|
|
797
|
+
ctx.nativeFirstProxy = false;
|
|
798
|
+
}
|
|
799
|
+
const dbConn = ctx.db;
|
|
800
|
+
// Purge WASM-only files that were deleted from disk (#1073). Rust's
|
|
801
|
+
// detect_removed_files skips them and the insert path below never visits
|
|
802
|
+
// them, so without this their rows would persist across rebuilds until the
|
|
803
|
+
// next full rebuild reset the DB.
|
|
804
|
+
if (staleRel.length > 0) {
|
|
805
|
+
// `computeWasmOnlyStaleFiles` guarantees every path here has an extension
|
|
806
|
+
// outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
|
|
807
|
+
// always bucket 100% into `unsupported-by-native`. Build the extension
|
|
808
|
+
// summary directly to avoid a redundant classification pass.
|
|
809
|
+
const staleByExt = groupByExtension(staleRel);
|
|
810
|
+
info(`Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`);
|
|
811
|
+
purgeFilesData(dbConn, staleRel);
|
|
812
|
+
}
|
|
623
813
|
if (missingAbs.length === 0)
|
|
624
814
|
return;
|
|
625
815
|
// Classify drops so users see per-extension reasons instead of just a count
|
|
@@ -633,7 +823,7 @@ async function backfillNativeDroppedFiles(ctx) {
|
|
|
633
823
|
if (totals['native-extractor-failure'] > 0) {
|
|
634
824
|
warn(`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`);
|
|
635
825
|
}
|
|
636
|
-
const wasmResults = await
|
|
826
|
+
const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
|
|
637
827
|
const rows = [];
|
|
638
828
|
const exportKeys = [];
|
|
639
829
|
for (const [relPath, symbols] of wasmResults) {
|
|
@@ -665,7 +855,7 @@ async function backfillNativeDroppedFiles(ctx) {
|
|
|
665
855
|
exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
|
|
666
856
|
}
|
|
667
857
|
}
|
|
668
|
-
const db =
|
|
858
|
+
const db = dbConn;
|
|
669
859
|
batchInsertNodes(db, rows);
|
|
670
860
|
// Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
|
|
671
861
|
if (exportKeys.length > 0) {
|
|
@@ -688,6 +878,67 @@ async function backfillNativeDroppedFiles(ctx) {
|
|
|
688
878
|
updateStmt.run(...vals);
|
|
689
879
|
}
|
|
690
880
|
}
|
|
881
|
+
// Persist file_hashes rows for every backfilled file. The Rust orchestrator
|
|
882
|
+
// only hashes files it parsed itself, so without this step files in
|
|
883
|
+
// optional-language extensions (e.g. .clj when no Rust extractor exists)
|
|
884
|
+
// would be missing from `file_hashes` — permanently breaking the JS-side
|
|
885
|
+
// fast-skip pre-flight (#1054), which rejects on `collected file missing
|
|
886
|
+
// from file_hashes` and forces every no-op rebuild back through the full
|
|
887
|
+
// ~2s native pipeline (#1068).
|
|
888
|
+
//
|
|
889
|
+
// Iterates `missingRel` (every collected file the Rust orchestrator
|
|
890
|
+
// dropped), not `wasmResults`, so files that produced zero symbols still
|
|
891
|
+
// get a row.
|
|
892
|
+
try {
|
|
893
|
+
const upsertHash = db.prepare('INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)');
|
|
894
|
+
const writeHashes = db.transaction(() => {
|
|
895
|
+
for (let i = 0; i < missingRel.length; i++) {
|
|
896
|
+
const relPath = missingRel[i];
|
|
897
|
+
const absPath = missingAbs[i];
|
|
898
|
+
if (!relPath || !absPath)
|
|
899
|
+
continue;
|
|
900
|
+
let code;
|
|
901
|
+
try {
|
|
902
|
+
code = readFileSafe(absPath);
|
|
903
|
+
}
|
|
904
|
+
catch (e) {
|
|
905
|
+
debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
|
|
906
|
+
continue;
|
|
907
|
+
}
|
|
908
|
+
if (code === null)
|
|
909
|
+
continue;
|
|
910
|
+
const stat = fileStat(absPath);
|
|
911
|
+
const mtime = stat ? stat.mtime : 0;
|
|
912
|
+
const size = stat ? stat.size : 0;
|
|
913
|
+
upsertHash.run(relPath, fileHash(code), mtime, size);
|
|
914
|
+
}
|
|
915
|
+
});
|
|
916
|
+
writeHashes();
|
|
917
|
+
}
|
|
918
|
+
catch (e) {
|
|
919
|
+
debug(`backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`);
|
|
920
|
+
}
|
|
921
|
+
// Free WASM parse trees from the inline backfill path (#1058).
|
|
922
|
+
// `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
|
|
923
|
+
// backed by WASM linear memory) on every result, but these symbols are
|
|
924
|
+
// consumed locally for DB row construction and never added to
|
|
925
|
+
// `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
|
|
926
|
+
// sees them. Without this, trees leak WASM memory until process exit —
|
|
927
|
+
// bounded per run but cumulative across in-process integration tests.
|
|
928
|
+
// Mirrors the cleanup discipline established for #931.
|
|
929
|
+
for (const [, symbols] of wasmResults) {
|
|
930
|
+
const tree = symbols._tree;
|
|
931
|
+
if (tree && typeof tree.delete === 'function') {
|
|
932
|
+
try {
|
|
933
|
+
tree.delete();
|
|
934
|
+
}
|
|
935
|
+
catch {
|
|
936
|
+
/* ignore cleanup errors */
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
symbols._tree = undefined;
|
|
940
|
+
symbols._langId = undefined;
|
|
941
|
+
}
|
|
691
942
|
}
|
|
692
943
|
// ── Pipeline stages execution ───────────────────────────────────────────
|
|
693
944
|
async function runPipelineStages(ctx) {
|
|
@@ -795,6 +1046,61 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
795
1046
|
ctx.rootDir = rootDir;
|
|
796
1047
|
try {
|
|
797
1048
|
setupPipeline(ctx);
|
|
1049
|
+
// ── JS-side fast-skip for native incremental (#1054) ──────────────
|
|
1050
|
+
// The Rust orchestrator's internal early-exit fires reliably locally
|
|
1051
|
+
// but not in CI, where every no-op rebuild was paying the full ~2s
|
|
1052
|
+
// pipeline cost. A read-only mtime+size check here matches WASM's
|
|
1053
|
+
// ~20ms early-exit and skips the orchestrator entirely when no
|
|
1054
|
+
// source files have changed. Tier-2 hashing is left to the native
|
|
1055
|
+
// side: any mismatch falls through and lets Rust's detect_changes
|
|
1056
|
+
// remain the source of truth.
|
|
1057
|
+
//
|
|
1058
|
+
// Diagnostic logging gated by CODEGRAPH_FAST_SKIP_DIAG (#1066) — when
|
|
1059
|
+
// any of the call-site guards short-circuit (forceFullRebuild,
|
|
1060
|
+
// engineName, scope, etc.) we log the reason so the bench gate run
|
|
1061
|
+
// produces observable output even if `detectNoChanges` is never
|
|
1062
|
+
// entered.
|
|
1063
|
+
const fastSkipDiag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
|
|
1064
|
+
if (fastSkipDiag) {
|
|
1065
|
+
const reasons = [];
|
|
1066
|
+
if (!ctx.nativeAvailable)
|
|
1067
|
+
reasons.push('nativeAvailable=false');
|
|
1068
|
+
if (ctx.engineName !== 'native')
|
|
1069
|
+
reasons.push(`engineName=${ctx.engineName}`);
|
|
1070
|
+
if (!ctx.incremental)
|
|
1071
|
+
reasons.push('incremental=false');
|
|
1072
|
+
if (ctx.forceFullRebuild)
|
|
1073
|
+
reasons.push('forceFullRebuild=true');
|
|
1074
|
+
if (ctx.opts.scope)
|
|
1075
|
+
reasons.push('scope=set');
|
|
1076
|
+
if (reasons.length > 0) {
|
|
1077
|
+
info(`[fast-skip] false: pre-flight gate skipped — ${reasons.join(', ')}`);
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
if (ctx.nativeAvailable &&
|
|
1081
|
+
ctx.engineName === 'native' &&
|
|
1082
|
+
ctx.incremental &&
|
|
1083
|
+
!ctx.forceFullRebuild &&
|
|
1084
|
+
!ctx.opts.scope) {
|
|
1085
|
+
try {
|
|
1086
|
+
await collectFiles(ctx);
|
|
1087
|
+
if (detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts)) {
|
|
1088
|
+
info('No changes detected. Graph is up to date.');
|
|
1089
|
+
writeJournalHeader(ctx.rootDir, Date.now());
|
|
1090
|
+
closeDb(ctx.db);
|
|
1091
|
+
return;
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
catch (err) {
|
|
1095
|
+
// Pre-flight is best-effort — any failure falls through to the
|
|
1096
|
+
// orchestrator, which performs its own complete detection.
|
|
1097
|
+
// Reset ctx.allFiles so runPipelineStages re-collects under its own
|
|
1098
|
+
// engine state if we ended up partially populated before throwing.
|
|
1099
|
+
ctx.allFiles = undefined;
|
|
1100
|
+
ctx.discoveredDirs = undefined;
|
|
1101
|
+
debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
798
1104
|
// ── Rust orchestrator fast path (#695) ────────────────────────────
|
|
799
1105
|
// When available, run the entire build pipeline in Rust with zero
|
|
800
1106
|
// napi crossings (eliminates WAL dual-connection dance). Falls back
|