@optave/codegraph 3.10.0 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -13
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +77 -0
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/cli/commands/audit.js +1 -1
- package/dist/cli/commands/audit.js.map +1 -1
- package/dist/cli/commands/build.d.ts.map +1 -1
- package/dist/cli/commands/build.js +2 -0
- package/dist/cli/commands/build.js.map +1 -1
- package/dist/cli/commands/check.js +1 -1
- package/dist/cli/commands/check.js.map +1 -1
- package/dist/cli/commands/children.js +1 -1
- package/dist/cli/commands/children.js.map +1 -1
- package/dist/cli/commands/diff-impact.js +1 -1
- package/dist/cli/commands/diff-impact.js.map +1 -1
- package/dist/cli/commands/roles.js +1 -1
- package/dist/cli/commands/roles.js.map +1 -1
- package/dist/cli/commands/structure.js +1 -1
- package/dist/cli/commands/structure.js.map +1 -1
- package/dist/cli/shared/options.js +1 -1
- package/dist/cli/shared/options.js.map +1 -1
- package/dist/db/connection.d.ts.map +1 -1
- package/dist/db/connection.js +8 -0
- package/dist/db/connection.js.map +1 -1
- package/dist/domain/graph/builder/incremental.d.ts +0 -6
- package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
- package/dist/domain/graph/builder/incremental.js +6 -23
- package/dist/domain/graph/builder/incremental.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts +44 -0
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +181 -39
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.js +8 -2
- package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
- package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
- package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +23 -18
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +27 -1
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/models.d.ts +16 -0
- package/dist/domain/search/models.d.ts.map +1 -1
- package/dist/domain/search/models.js +35 -1
- package/dist/domain/search/models.js.map +1 -1
- package/dist/domain/wasm-worker-entry.js +8 -1
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/extractors/c.js +25 -6
- package/dist/extractors/c.js.map +1 -1
- package/dist/extractors/cpp.js +47 -6
- package/dist/extractors/cpp.js.map +1 -1
- package/dist/extractors/cuda.js +90 -14
- package/dist/extractors/cuda.js.map +1 -1
- package/dist/extractors/elixir.js +83 -3
- package/dist/extractors/elixir.js.map +1 -1
- package/dist/extractors/erlang.js +56 -20
- package/dist/extractors/erlang.js.map +1 -1
- package/dist/extractors/fsharp.d.ts +7 -0
- package/dist/extractors/fsharp.d.ts.map +1 -1
- package/dist/extractors/fsharp.js +94 -0
- package/dist/extractors/fsharp.js.map +1 -1
- package/dist/extractors/gleam.js +6 -2
- package/dist/extractors/gleam.js.map +1 -1
- package/dist/extractors/groovy.js +41 -1
- package/dist/extractors/groovy.js.map +1 -1
- package/dist/extractors/haskell.js +48 -4
- package/dist/extractors/haskell.js.map +1 -1
- package/dist/extractors/julia.js +172 -41
- package/dist/extractors/julia.js.map +1 -1
- package/dist/extractors/kotlin.js +4 -0
- package/dist/extractors/kotlin.js.map +1 -1
- package/dist/extractors/objc.js +184 -47
- package/dist/extractors/objc.js.map +1 -1
- package/dist/extractors/python.js +7 -4
- package/dist/extractors/python.js.map +1 -1
- package/dist/extractors/r.js +93 -52
- package/dist/extractors/r.js.map +1 -1
- package/dist/extractors/scala.d.ts.map +1 -1
- package/dist/extractors/scala.js +18 -32
- package/dist/extractors/scala.js.map +1 -1
- package/dist/extractors/solidity.js +18 -9
- package/dist/extractors/solidity.js.map +1 -1
- package/dist/extractors/verilog.js +80 -15
- package/dist/extractors/verilog.js.map +1 -1
- package/dist/mcp/tool-registry.d.ts.map +1 -1
- package/dist/mcp/tool-registry.js +4 -0
- package/dist/mcp/tool-registry.js.map +1 -1
- package/dist/mcp/tools/semantic-search.d.ts +1 -0
- package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
- package/dist/mcp/tools/semantic-search.js +1 -0
- package/dist/mcp/tools/semantic-search.js.map +1 -1
- package/dist/types.d.ts +15 -1
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/grammars/tree-sitter-fsharp.wasm +0 -0
- package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
- package/grammars/tree-sitter-gleam.wasm +0 -0
- package/package.json +10 -10
- package/src/ast-analysis/rules/index.ts +87 -0
- package/src/cli/commands/audit.ts +1 -1
- package/src/cli/commands/build.ts +2 -0
- package/src/cli/commands/check.ts +1 -1
- package/src/cli/commands/children.ts +1 -1
- package/src/cli/commands/diff-impact.ts +1 -1
- package/src/cli/commands/roles.ts +1 -1
- package/src/cli/commands/structure.ts +1 -1
- package/src/cli/shared/options.ts +1 -1
- package/src/db/connection.ts +8 -0
- package/src/domain/graph/builder/incremental.ts +6 -41
- package/src/domain/graph/builder/pipeline.ts +222 -37
- package/src/domain/graph/builder/stages/build-edges.ts +9 -2
- package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
- package/src/domain/graph/watcher.ts +21 -23
- package/src/domain/parser.ts +27 -1
- package/src/domain/search/models.ts +36 -1
- package/src/domain/wasm-worker-entry.ts +8 -1
- package/src/extractors/c.ts +27 -8
- package/src/extractors/cpp.ts +50 -8
- package/src/extractors/cuda.ts +90 -16
- package/src/extractors/elixir.ts +75 -3
- package/src/extractors/erlang.ts +63 -20
- package/src/extractors/fsharp.ts +104 -0
- package/src/extractors/gleam.ts +7 -2
- package/src/extractors/groovy.ts +45 -1
- package/src/extractors/haskell.ts +45 -4
- package/src/extractors/julia.ts +164 -43
- package/src/extractors/kotlin.ts +4 -0
- package/src/extractors/objc.ts +171 -47
- package/src/extractors/python.ts +5 -3
- package/src/extractors/r.ts +88 -48
- package/src/extractors/scala.ts +24 -36
- package/src/extractors/solidity.ts +17 -8
- package/src/extractors/verilog.ts +83 -15
- package/src/mcp/tool-registry.ts +5 -0
- package/src/mcp/tools/semantic-search.ts +2 -0
- package/src/types.ts +15 -0
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import fs from 'node:fs';
|
|
11
11
|
import path from 'node:path';
|
|
12
|
-
import { bulkNodeIdsByFile } from '../../../db/index.js';
|
|
12
|
+
import { bulkNodeIdsByFile, purgeFileData } from '../../../db/index.js';
|
|
13
13
|
import { debug, warn } from '../../../infrastructure/logger.js';
|
|
14
14
|
import { normalizePath } from '../../../shared/constants.js';
|
|
15
15
|
import type {
|
|
@@ -29,8 +29,6 @@ export interface IncrementalStmts {
|
|
|
29
29
|
insertNode: { run: (...params: unknown[]) => unknown };
|
|
30
30
|
insertEdge: { run: (...params: unknown[]) => unknown };
|
|
31
31
|
getNodeId: { get: (...params: unknown[]) => { id: number } | undefined };
|
|
32
|
-
deleteEdgesForFile: { run: (...params: unknown[]) => unknown };
|
|
33
|
-
deleteNodes: { run: (...params: unknown[]) => unknown };
|
|
34
32
|
countNodes: { get: (...params: unknown[]) => { c: number } | undefined };
|
|
35
33
|
listSymbols: { all: (...params: unknown[]) => unknown[] };
|
|
36
34
|
findNodeInFile: { all: (...params: unknown[]) => unknown[] };
|
|
@@ -208,40 +206,6 @@ function rebuildDirContainment(
|
|
|
208
206
|
return 0;
|
|
209
207
|
}
|
|
210
208
|
|
|
211
|
-
// ── Ancillary table cleanup ────────────────────────────────────────────
|
|
212
|
-
|
|
213
|
-
function purgeAncillaryData(db: BetterSqlite3Database, relPath: string): void {
|
|
214
|
-
const tryExec = (sql: string, ...args: string[]): void => {
|
|
215
|
-
try {
|
|
216
|
-
db.prepare(sql).run(...args);
|
|
217
|
-
} catch (err: unknown) {
|
|
218
|
-
if (!(err as Error | undefined)?.message?.includes('no such table')) throw err;
|
|
219
|
-
}
|
|
220
|
-
};
|
|
221
|
-
tryExec(
|
|
222
|
-
'DELETE FROM function_complexity WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
223
|
-
relPath,
|
|
224
|
-
);
|
|
225
|
-
tryExec(
|
|
226
|
-
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
227
|
-
relPath,
|
|
228
|
-
);
|
|
229
|
-
tryExec(
|
|
230
|
-
'DELETE FROM cfg_edges WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
231
|
-
relPath,
|
|
232
|
-
);
|
|
233
|
-
tryExec(
|
|
234
|
-
'DELETE FROM cfg_blocks WHERE function_node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
235
|
-
relPath,
|
|
236
|
-
);
|
|
237
|
-
tryExec(
|
|
238
|
-
'DELETE FROM dataflow WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) OR target_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
239
|
-
relPath,
|
|
240
|
-
relPath,
|
|
241
|
-
);
|
|
242
|
-
tryExec('DELETE FROM ast_nodes WHERE file = ?', relPath);
|
|
243
|
-
}
|
|
244
|
-
|
|
245
209
|
// ── Import edge building ────────────────────────────────────────────────
|
|
246
210
|
|
|
247
211
|
// Lazily-cached prepared statements for barrel resolution (avoid re-preparing in hot loops)
|
|
@@ -547,10 +511,11 @@ export async function rebuildFile(
|
|
|
547
511
|
// Find reverse-deps BEFORE purging (edges still reference the old nodes)
|
|
548
512
|
const reverseDeps = findReverseDeps(db, relPath);
|
|
549
513
|
|
|
550
|
-
// Purge ancillary tables,
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
514
|
+
// Purge ancillary tables (incl. embeddings), edges, and nodes in one pass.
|
|
515
|
+
// Embeddings must be purged before nodes — better-sqlite3 enforces foreign
|
|
516
|
+
// keys by default, and `embeddings.node_id` references `nodes.id`. Issue #1176.
|
|
517
|
+
// `purgeHashes: false` preserves file_hashes for the next incremental build.
|
|
518
|
+
purgeFileData(db, relPath, { purgeHashes: false });
|
|
554
519
|
|
|
555
520
|
if (!fs.existsSync(filePath)) {
|
|
556
521
|
if (cache) (cache as { remove(p: string): void }).remove(filePath);
|
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
initSchema,
|
|
16
16
|
MIGRATIONS,
|
|
17
17
|
openDb,
|
|
18
|
+
purgeFilesData,
|
|
18
19
|
releaseAdvisoryLock,
|
|
19
20
|
setBuildMeta,
|
|
20
21
|
} from '../../../db/index.js';
|
|
@@ -38,6 +39,7 @@ import {
|
|
|
38
39
|
formatDropExtensionSummary,
|
|
39
40
|
getActiveEngine,
|
|
40
41
|
getInstalledWasmExtensions,
|
|
42
|
+
NATIVE_SUPPORTED_EXTENSIONS,
|
|
41
43
|
parseFilesWasmForBackfill,
|
|
42
44
|
} from '../../parser.js';
|
|
43
45
|
import { writeJournalHeader } from '../journal.js';
|
|
@@ -165,7 +167,9 @@ function loadAliases(ctx: PipelineContext): void {
|
|
|
165
167
|
|
|
166
168
|
function setupPipeline(ctx: PipelineContext): void {
|
|
167
169
|
ctx.rootDir = path.resolve(ctx.rootDir);
|
|
168
|
-
ctx.dbPath =
|
|
170
|
+
ctx.dbPath = ctx.opts.dbPath
|
|
171
|
+
? path.resolve(ctx.opts.dbPath)
|
|
172
|
+
: path.join(ctx.rootDir, '.codegraph', 'graph.db');
|
|
169
173
|
|
|
170
174
|
// Detect whether native engine is available.
|
|
171
175
|
const enginePref = ctx.opts.engine || 'auto';
|
|
@@ -183,6 +187,16 @@ function setupPipeline(ctx: PipelineContext): void {
|
|
|
183
187
|
initSchema(ctx.db);
|
|
184
188
|
|
|
185
189
|
ctx.config = loadConfig(ctx.rootDir);
|
|
190
|
+
// Merge caller-supplied excludes on top of the file-config excludes so
|
|
191
|
+
// programmatic callers (e.g. benchmark scripts) can extend exclusion
|
|
192
|
+
// without mutating .codegraphrc.json. Native orchestrator picks this up
|
|
193
|
+
// automatically — it reads exclude off the serialized ctx.config below.
|
|
194
|
+
if (ctx.opts.exclude?.length) {
|
|
195
|
+
ctx.config = {
|
|
196
|
+
...ctx.config,
|
|
197
|
+
exclude: [...(ctx.config.exclude ?? []), ...ctx.opts.exclude],
|
|
198
|
+
};
|
|
199
|
+
}
|
|
186
200
|
ctx.incremental =
|
|
187
201
|
ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false;
|
|
188
202
|
|
|
@@ -658,6 +672,15 @@ async function tryNativeOrchestrator(
|
|
|
658
672
|
|
|
659
673
|
if (result.earlyExit) {
|
|
660
674
|
info('No changes detected');
|
|
675
|
+
// Even on no-op rebuilds, dropped-language files added since the last
|
|
676
|
+
// full build are still missing from `nodes`/`file_hashes` (#1083), and
|
|
677
|
+
// WASM-only files deleted from disk leave stale rows behind (#1073).
|
|
678
|
+
// The orchestrator's file_collector skipped them, so its earlyExit
|
|
679
|
+
// doesn't imply DB consistency. Run the gap repair before returning.
|
|
680
|
+
const gap = detectDroppedLanguageGap(ctx);
|
|
681
|
+
if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
|
|
682
|
+
await backfillNativeDroppedFiles(ctx, gap);
|
|
683
|
+
}
|
|
661
684
|
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
|
|
662
685
|
return 'early-exit';
|
|
663
686
|
}
|
|
@@ -753,37 +776,163 @@ async function tryNativeOrchestrator(
|
|
|
753
776
|
// stale native binaries). WASM handles those — backfill via WASM so both
|
|
754
777
|
// engines process the same file set (#967).
|
|
755
778
|
//
|
|
756
|
-
//
|
|
757
|
-
//
|
|
758
|
-
//
|
|
759
|
-
//
|
|
760
|
-
//
|
|
761
|
-
//
|
|
762
|
-
//
|
|
763
|
-
//
|
|
764
|
-
//
|
|
765
|
-
// unsupported-extension file is added on an otherwise-quiet incremental
|
|
766
|
-
// — see #1091 for the residual gap.
|
|
779
|
+
// Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
|
|
780
|
+
// both gating and the backfill itself. On dirty incrementals/full builds
|
|
781
|
+
// the orchestrator signals trigger backfill, so the walk happens once
|
|
782
|
+
// (instead of redundantly inside backfill). On quiet incrementals we
|
|
783
|
+
// still pay the walk so we can detect brand-new files in dropped-language
|
|
784
|
+
// extensions — a gap that the orchestrator's `detect_removed_files`
|
|
785
|
+
// filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
|
|
786
|
+
// because the expensive part (WASM re-parse of the missing set) is
|
|
787
|
+
// gated below.
|
|
767
788
|
const removedCount = result.removedCount ?? 0;
|
|
768
789
|
const changedCount = result.changedCount ?? 0;
|
|
769
|
-
|
|
770
|
-
|
|
790
|
+
const gap = detectDroppedLanguageGap(ctx);
|
|
791
|
+
if (
|
|
792
|
+
result.isFullBuild ||
|
|
793
|
+
removedCount > 0 ||
|
|
794
|
+
changedCount > 0 ||
|
|
795
|
+
gap.missingAbs.length > 0 ||
|
|
796
|
+
gap.staleRel.length > 0
|
|
797
|
+
) {
|
|
798
|
+
await backfillNativeDroppedFiles(ctx, gap);
|
|
771
799
|
}
|
|
772
800
|
|
|
773
801
|
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
|
|
774
802
|
return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
|
|
775
803
|
}
|
|
776
804
|
|
|
805
|
+
/** Files the native orchestrator silently dropped — the working set for backfill. */
|
|
806
|
+
interface DroppedLanguageGap {
|
|
807
|
+
/** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
|
|
808
|
+
missingRel: string[];
|
|
809
|
+
/** Absolute paths, aligned by index with `missingRel`. */
|
|
810
|
+
missingAbs: string[];
|
|
811
|
+
/**
|
|
812
|
+
* Relative paths of WASM-only files present in DB but absent from disk (#1073).
|
|
813
|
+
* Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
|
|
814
|
+
* backfill must purge them. Always disjoint from `missingRel`.
|
|
815
|
+
*/
|
|
816
|
+
staleRel: string[];
|
|
817
|
+
}
|
|
818
|
+
|
|
777
819
|
/**
|
|
778
|
-
*
|
|
779
|
-
*
|
|
820
|
+
* Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
|
|
821
|
+
* is pure and unit-testable independently of `getInstalledWasmExtensions` and
|
|
822
|
+
* the `NATIVE_SUPPORTED_EXTENSIONS` global state.
|
|
823
|
+
*/
|
|
824
|
+
export interface WasmOnlyStaleFilesInput {
|
|
825
|
+
/** Distinct `file` values from the `nodes` table. */
|
|
826
|
+
existingNodes: ReadonlySet<string>;
|
|
827
|
+
/** Distinct `file` values from the `file_hashes` table. */
|
|
828
|
+
existingHashes: ReadonlySet<string>;
|
|
829
|
+
/** Relative paths currently on disk (from `collectFilesUtil`). */
|
|
830
|
+
expected: ReadonlySet<string>;
|
|
831
|
+
/** Lowercased extensions whose WASM grammar is installed. */
|
|
832
|
+
installedExts: ReadonlySet<string>;
|
|
833
|
+
/** Extensions covered by the Rust addon — Rust owns deletion for these. */
|
|
834
|
+
nativeSupported: ReadonlySet<string>;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
/**
|
|
838
|
+
* Compute the WASM-only files present in the DB but missing from disk (#1073).
|
|
839
|
+
*
|
|
840
|
+
* Returns relative paths that:
|
|
841
|
+
* - appear in `existingNodes` or `existingHashes` (in DB),
|
|
842
|
+
* - are absent from `expected` (not on disk),
|
|
843
|
+
* - have an extension installed for WASM, AND
|
|
844
|
+
* - have an extension NOT covered by `nativeSupported` — Rust's
|
|
845
|
+
* `purge_changed_files` handles deletion for natively-supported extensions
|
|
846
|
+
* via its own `detect_removed_files`, so the caller must not double-purge.
|
|
847
|
+
*
|
|
848
|
+
* Extensions are lowercased before lookup to match the registry and Rust's
|
|
849
|
+
* `LanguageKind::from_extension` (which normalises case for the languages
|
|
850
|
+
* where both cases are conventional, e.g. R's `.r` / `.R`).
|
|
851
|
+
*
|
|
852
|
+
* DB paths are forced to forward slashes before comparison with `expected`
|
|
853
|
+
* (which is always normalised). The on-disk invariant is that DB rows are
|
|
854
|
+
* written with forward slashes, but a stale row written by older code on
|
|
855
|
+
* Windows could carry back-slashes — normalising here makes the comparison
|
|
856
|
+
* platform-safe and prevents false-positive purges of live rows. We replace
|
|
857
|
+
* `\\` explicitly (rather than calling `normalizePath`, which only touches
|
|
858
|
+
* `path.sep`) so the defence works when running on POSIX against a DB that
|
|
859
|
+
* was migrated from Windows.
|
|
860
|
+
*
|
|
861
|
+
* Exported for unit testing.
|
|
780
862
|
*/
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
863
|
+
export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
|
|
864
|
+
const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
|
|
865
|
+
const stale: string[] = [];
|
|
866
|
+
const seen = new Set<string>();
|
|
867
|
+
const consider = (rawRel: string): void => {
|
|
868
|
+
const rel = rawRel.replace(/\\/g, '/');
|
|
869
|
+
if (expected.has(rel) || seen.has(rel)) return;
|
|
870
|
+
const ext = path.extname(rel).toLowerCase();
|
|
871
|
+
if (nativeSupported.has(ext)) return;
|
|
872
|
+
if (!installedExts.has(ext)) return;
|
|
873
|
+
seen.add(rel);
|
|
874
|
+
// Push the ORIGINAL raw path (not the normalised form) so the eventual
|
|
875
|
+
// `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
|
|
876
|
+
// matches the actual stored row. The dedup `seen` set keeps the
|
|
877
|
+
// normalised form so a file written once with `\` and once with `/`
|
|
878
|
+
// is still treated as one entry — but the value the SQL sees has to
|
|
879
|
+
// be byte-identical to what's on disk in the DB.
|
|
880
|
+
stale.push(rawRel);
|
|
881
|
+
};
|
|
882
|
+
for (const rel of existingNodes) consider(rel);
|
|
883
|
+
for (const rel of existingHashes) consider(rel);
|
|
884
|
+
return stale;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
/**
|
|
888
|
+
* Group relative paths by their lowercased extension. Shape matches the bucket
|
|
889
|
+
* type that `formatDropExtensionSummary` consumes, so callers can render a
|
|
890
|
+
* log-friendly per-extension summary without going through `classifyNativeDrops`
|
|
891
|
+
* when the reason is already known (e.g. the stale-purge path where every path
|
|
892
|
+
* is guaranteed `unsupported-by-native`).
|
|
893
|
+
*/
|
|
894
|
+
function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
|
|
895
|
+
const buckets = new Map<string, string[]>();
|
|
896
|
+
for (const rel of relPaths) {
|
|
897
|
+
const ext = path.extname(rel).toLowerCase();
|
|
898
|
+
let list = buckets.get(ext);
|
|
899
|
+
if (!list) {
|
|
900
|
+
list = [];
|
|
901
|
+
buckets.set(ext, list);
|
|
902
|
+
}
|
|
903
|
+
list.push(rel);
|
|
904
|
+
}
|
|
905
|
+
return buckets;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/**
|
|
909
|
+
* Detect files the native orchestrator silently dropped.
|
|
910
|
+
*
|
|
911
|
+
* Walks the filesystem and compares against `nodes` + `file_hashes`. A file
|
|
912
|
+
* is "missing" if it's absent from EITHER table — both must be present for
|
|
913
|
+
* the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
|
|
914
|
+
* legacy DBs where `nodes` was populated but `file_hashes` was not).
|
|
915
|
+
*
|
|
916
|
+
* Restricted to files with an installed WASM grammar; extensions in
|
|
917
|
+
* `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
|
|
918
|
+
* installs) can't be parsed by either engine, so they're not a native
|
|
919
|
+
* regression — excluding them keeps the warn count in
|
|
920
|
+
* `backfillNativeDroppedFiles` meaningful.
|
|
921
|
+
*
|
|
922
|
+
* Also detects WASM-only files deleted from disk (#1073). Rust's
|
|
923
|
+
* `detect_removed_files` filter (#1070) skips files outside its supported
|
|
924
|
+
* extensions, so deletions of WASM-only languages don't reach the native
|
|
925
|
+
* purge path; the rest of the backfill only inserts rows, so without this
|
|
926
|
+
* step stale `nodes`/`file_hashes` rows would linger across incremental
|
|
927
|
+
* rebuilds until the next full rebuild.
|
|
928
|
+
*
|
|
929
|
+
* Cheap (no DB handoff, no parsing): used both to gate the backfill call
|
|
930
|
+
* and as its working set. NativeDbProxy supports `.prepare().all()`, so
|
|
931
|
+
* this works whether `ctx.db` is a proxy or a real better-sqlite3
|
|
932
|
+
* connection — letting us skip the close-native / reopen-better-sqlite3
|
|
933
|
+
* cost when there's nothing to backfill.
|
|
934
|
+
*/
|
|
935
|
+
function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
|
|
787
936
|
const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
|
|
788
937
|
const expected = new Set(
|
|
789
938
|
collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
|
|
@@ -794,12 +943,6 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
794
943
|
.all() as Array<{ file: string }>;
|
|
795
944
|
const existingNodes = new Set(existingNodeRows.map((r) => r.file));
|
|
796
945
|
|
|
797
|
-
// Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
|
|
798
|
-
// (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
|
|
799
|
-
// (e.g. a DB written by old code where `nodes` was populated but
|
|
800
|
-
// `file_hashes` was not). Treating "in nodes but not in file_hashes" as
|
|
801
|
-
// missing closes the gap so the backfill repairs the file_hashes row even
|
|
802
|
-
// when the node row already exists.
|
|
803
946
|
let existingHashes = new Set<string>();
|
|
804
947
|
try {
|
|
805
948
|
const existingHashRows = ctx.db
|
|
@@ -810,27 +953,49 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
810
953
|
// file_hashes table may not exist on legacy DBs; treat as fully missing
|
|
811
954
|
// so the backfill writes rows on the upsert path below.
|
|
812
955
|
debug(
|
|
813
|
-
`
|
|
956
|
+
`detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
|
|
814
957
|
);
|
|
815
958
|
}
|
|
816
959
|
|
|
817
|
-
// Restrict backfill to files with an installed WASM grammar. Extensions in
|
|
818
|
-
// LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
|
|
819
|
-
// minimal installs) can't be parsed by either engine, so they're not a
|
|
820
|
-
// native regression — excluding them keeps the warn count meaningful.
|
|
821
960
|
const installedExts = getInstalledWasmExtensions();
|
|
822
961
|
const missingRel: string[] = [];
|
|
823
962
|
const missingAbs: string[] = [];
|
|
824
963
|
for (const rel of expected) {
|
|
825
|
-
// A file is "missing" if it's absent from EITHER nodes OR file_hashes.
|
|
826
|
-
// Both must be present for fast-skip to work correctly.
|
|
827
964
|
if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
|
|
828
965
|
const ext = path.extname(rel).toLowerCase();
|
|
829
966
|
if (!installedExts.has(ext)) continue;
|
|
830
967
|
missingRel.push(rel);
|
|
831
968
|
missingAbs.push(path.join(ctx.rootDir, rel));
|
|
832
969
|
}
|
|
833
|
-
|
|
970
|
+
|
|
971
|
+
const staleRel = computeWasmOnlyStaleFiles({
|
|
972
|
+
existingNodes,
|
|
973
|
+
existingHashes,
|
|
974
|
+
expected,
|
|
975
|
+
installedExts,
|
|
976
|
+
nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
|
|
977
|
+
});
|
|
978
|
+
|
|
979
|
+
return { missingRel, missingAbs, staleRel };
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
/**
|
|
983
|
+
* Backfill files that the native orchestrator silently dropped during parse.
|
|
984
|
+
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
|
|
985
|
+
*
|
|
986
|
+
* Also purges stale rows for WASM-only files deleted from disk (#1073), which
|
|
987
|
+
* Rust's `detect_removed_files` filter (#1070) skips.
|
|
988
|
+
*
|
|
989
|
+
* Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
|
|
990
|
+
* can use the same scan for both gating and the actual backfill — avoiding
|
|
991
|
+
* a redundant fs walk when the orchestrator's signals already triggered.
|
|
992
|
+
*/
|
|
993
|
+
async function backfillNativeDroppedFiles(
|
|
994
|
+
ctx: PipelineContext,
|
|
995
|
+
gap: DroppedLanguageGap,
|
|
996
|
+
): Promise<void> {
|
|
997
|
+
const { missingRel, missingAbs, staleRel } = gap;
|
|
998
|
+
if (missingAbs.length === 0 && staleRel.length === 0) return;
|
|
834
999
|
|
|
835
1000
|
// Now that we know there's work to do, hand off to better-sqlite3 (needed
|
|
836
1001
|
// for the INSERT path below).
|
|
@@ -840,6 +1005,26 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
840
1005
|
ctx.nativeFirstProxy = false;
|
|
841
1006
|
}
|
|
842
1007
|
|
|
1008
|
+
const dbConn = ctx.db as unknown as BetterSqlite3Database;
|
|
1009
|
+
|
|
1010
|
+
// Purge WASM-only files that were deleted from disk (#1073). Rust's
|
|
1011
|
+
// detect_removed_files skips them and the insert path below never visits
|
|
1012
|
+
// them, so without this their rows would persist across rebuilds until the
|
|
1013
|
+
// next full rebuild reset the DB.
|
|
1014
|
+
if (staleRel.length > 0) {
|
|
1015
|
+
// `computeWasmOnlyStaleFiles` guarantees every path here has an extension
|
|
1016
|
+
// outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
|
|
1017
|
+
// always bucket 100% into `unsupported-by-native`. Build the extension
|
|
1018
|
+
// summary directly to avoid a redundant classification pass.
|
|
1019
|
+
const staleByExt = groupByExtension(staleRel);
|
|
1020
|
+
info(
|
|
1021
|
+
`Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`,
|
|
1022
|
+
);
|
|
1023
|
+
purgeFilesData(dbConn, staleRel);
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
if (missingAbs.length === 0) return;
|
|
1027
|
+
|
|
843
1028
|
// Classify drops so users see per-extension reasons instead of just a count
|
|
844
1029
|
// (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
|
|
845
1030
|
// extractor); `native-extractor-failure` indicates a real native bug since
|
|
@@ -888,7 +1073,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
|
888
1073
|
exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
|
|
889
1074
|
}
|
|
890
1075
|
}
|
|
891
|
-
const db =
|
|
1076
|
+
const db = dbConn;
|
|
892
1077
|
batchInsertNodes(db, rows);
|
|
893
1078
|
|
|
894
1079
|
// Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
|
|
@@ -770,9 +770,11 @@ function reconnectReverseDepEdges(ctx: PipelineContext): void {
|
|
|
770
770
|
* their import targets. Falls back to loading ALL nodes for full builds or
|
|
771
771
|
* larger incremental changes.
|
|
772
772
|
*/
|
|
773
|
+
const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
|
|
774
|
+
|
|
773
775
|
function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } {
|
|
774
776
|
const { db, fileSymbols, isFullBuild, batchResolved } = ctx;
|
|
775
|
-
const nodeKindFilter =
|
|
777
|
+
const nodeKindFilter = NODE_KIND_FILTER_SQL;
|
|
776
778
|
|
|
777
779
|
// Gate: only scope for small incremental on large codebases
|
|
778
780
|
if (!isFullBuild && fileSymbols.size <= ctx.config.build.smallFilesThreshold) {
|
|
@@ -816,8 +818,13 @@ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolea
|
|
|
816
818
|
function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void {
|
|
817
819
|
if (!scopedLoad) return;
|
|
818
820
|
const { db } = ctx;
|
|
821
|
+
// Match the upfront kind filter exactly. Using `kind != 'file'` here lets
|
|
822
|
+
// parameters, properties, and other non-definition kinds leak into call
|
|
823
|
+
// resolution, producing bogus call edges like `parser.ts → <a parameter
|
|
824
|
+
// with the same name>` (#1174 follow-up). Calls only ever target the
|
|
825
|
+
// definition kinds, so the fallback's filter must agree with `loadNodes`.
|
|
819
826
|
const fallbackStmt = db.prepare(
|
|
820
|
-
`SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND
|
|
827
|
+
`SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND ${NODE_KIND_FILTER_SQL}`,
|
|
821
828
|
);
|
|
822
829
|
const originalGet = ctx.nodesByName.get.bind(ctx.nodesByName);
|
|
823
830
|
ctx.nodesByName.get = (name: string) => {
|
|
@@ -33,15 +33,23 @@ function buildReexportMap(ctx: PipelineContext): void {
|
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
/**
|
|
36
|
-
* Find barrel files related to
|
|
37
|
-
* For small
|
|
38
|
-
* or are imported by
|
|
36
|
+
* Find barrel files related to `fromRelPaths` for scoped re-parsing.
|
|
37
|
+
* For small frontiers (<=smallFilesThreshold files), only barrels that re-export from
|
|
38
|
+
* or are imported by `fromRelPaths`. For larger frontiers, all barrels.
|
|
39
|
+
*
|
|
40
|
+
* `firstPass` gates the reexport-from DB scan: re-parsed barrels haven't
|
|
41
|
+
* changed content, so subsequent passes can't surface new reexport-from
|
|
42
|
+
* candidates and only need to follow imports of newly-merged barrels
|
|
43
|
+
* (mirrors the Rust orchestrator's seed-only `collect_reexport_from_barrels`).
|
|
39
44
|
*/
|
|
40
|
-
function findBarrelCandidates(
|
|
45
|
+
function findBarrelCandidates(
|
|
46
|
+
ctx: PipelineContext,
|
|
47
|
+
fromRelPaths: readonly string[],
|
|
48
|
+
firstPass: boolean,
|
|
49
|
+
): Array<{ file: string }> {
|
|
41
50
|
const { db, fileSymbols, rootDir, aliases } = ctx;
|
|
42
|
-
const changedRelPaths = new Set<string>(fileSymbols.keys());
|
|
43
51
|
|
|
44
|
-
if (
|
|
52
|
+
if (fromRelPaths.length <= ctx.config.build.smallFilesThreshold) {
|
|
45
53
|
const allBarrelFiles = new Set(
|
|
46
54
|
(
|
|
47
55
|
db
|
|
@@ -56,9 +64,9 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> {
|
|
|
56
64
|
|
|
57
65
|
const barrels = new Set<string>();
|
|
58
66
|
|
|
59
|
-
// Find barrels imported by
|
|
67
|
+
// Find barrels imported by `fromRelPaths` using parsed import data
|
|
60
68
|
// (can't query DB edges -- they were purged for the changed files).
|
|
61
|
-
for (const relPath of
|
|
69
|
+
for (const relPath of fromRelPaths) {
|
|
62
70
|
const symbols = fileSymbols.get(relPath);
|
|
63
71
|
if (!symbols) continue;
|
|
64
72
|
for (const imp of symbols.imports) {
|
|
@@ -71,16 +79,17 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> {
|
|
|
71
79
|
}
|
|
72
80
|
}
|
|
73
81
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
if (firstPass) {
|
|
83
|
+
const reexportSourceStmt = db.prepare(
|
|
84
|
+
`SELECT DISTINCT n1.file FROM edges e
|
|
85
|
+
JOIN nodes n1 ON e.source_id = n1.id
|
|
86
|
+
JOIN nodes n2 ON e.target_id = n2.id
|
|
87
|
+
WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?`,
|
|
88
|
+
);
|
|
89
|
+
for (const relPath of fromRelPaths) {
|
|
90
|
+
for (const row of reexportSourceStmt.all(relPath) as Array<{ file: string }>) {
|
|
91
|
+
barrels.add(row.file);
|
|
92
|
+
}
|
|
84
93
|
}
|
|
85
94
|
}
|
|
86
95
|
return [...barrels].map((file) => ({ file }));
|
|
@@ -95,11 +104,22 @@ function findBarrelCandidates(ctx: PipelineContext): Array<{ file: string }> {
|
|
|
95
104
|
.all() as Array<{ file: string }>;
|
|
96
105
|
}
|
|
97
106
|
|
|
98
|
-
/**
|
|
107
|
+
/**
|
|
108
|
+
* Re-parse barrel files and update fileSymbols/reexportMap with fresh data.
|
|
109
|
+
* Returns the relative paths of newly-merged files so the caller can scan
|
|
110
|
+
* them for the next level of barrel candidates.
|
|
111
|
+
*
|
|
112
|
+
* A re-parsed file is marked `barrel-only` only when it really is one (the
|
|
113
|
+
* `isBarrelFile` check — reexports >= ownDefs). The previous unconditional
|
|
114
|
+
* `.add(relPath)` caused hybrid barrels with many local defs (e.g. a file
|
|
115
|
+
* with one `export type ... from` and dozens of internal functions) to drop
|
|
116
|
+
* all their non-reexport imports in build-edges, since the barrel-only branch
|
|
117
|
+
* skips them (#1174).
|
|
118
|
+
*/
|
|
99
119
|
async function reparseBarrelFiles(
|
|
100
120
|
ctx: PipelineContext,
|
|
101
121
|
barrelCandidates: Array<{ file: string }>,
|
|
102
|
-
): Promise<
|
|
122
|
+
): Promise<string[]> {
|
|
103
123
|
const { db, fileSymbols, rootDir, engineOpts } = ctx;
|
|
104
124
|
|
|
105
125
|
const barrelPaths: string[] = [];
|
|
@@ -109,18 +129,27 @@ async function reparseBarrelFiles(
|
|
|
109
129
|
}
|
|
110
130
|
}
|
|
111
131
|
|
|
112
|
-
if (barrelPaths.length === 0) return;
|
|
132
|
+
if (barrelPaths.length === 0) return [];
|
|
113
133
|
|
|
134
|
+
// Preserve `contains` and `parameter_of` — those are emitted by insertNodes,
|
|
135
|
+
// which only runs on the original (changed + reverse-dep) fileSymbols. Barrel
|
|
136
|
+
// candidates are merged here *after* insertNodes, so wiping those kinds
|
|
137
|
+
// would permanently drop them (mirrors the Rust orchestrator's Stage 6b
|
|
138
|
+
// delete in build_pipeline.rs).
|
|
114
139
|
const deleteOutgoingEdges = db.prepare(
|
|
115
|
-
|
|
140
|
+
`DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)
|
|
141
|
+
AND kind NOT IN ('contains', 'parameter_of')`,
|
|
116
142
|
);
|
|
117
143
|
|
|
144
|
+
const added: string[] = [];
|
|
118
145
|
try {
|
|
119
146
|
const barrelSymbols = await parseFilesAuto(barrelPaths, rootDir, engineOpts);
|
|
120
147
|
for (const [relPath, fileSym] of barrelSymbols) {
|
|
121
148
|
deleteOutgoingEdges.run(relPath);
|
|
122
149
|
fileSymbols.set(relPath, fileSym);
|
|
123
|
-
ctx
|
|
150
|
+
if (isBarrelFile(ctx, relPath)) {
|
|
151
|
+
ctx.barrelOnlyFiles.add(relPath);
|
|
152
|
+
}
|
|
124
153
|
const reexports = fileSym.imports.filter((imp: Import) => imp.reexport);
|
|
125
154
|
if (reexports.length > 0) {
|
|
126
155
|
ctx.reexportMap.set(
|
|
@@ -132,10 +161,12 @@ async function reparseBarrelFiles(
|
|
|
132
161
|
})),
|
|
133
162
|
);
|
|
134
163
|
}
|
|
164
|
+
added.push(relPath);
|
|
135
165
|
}
|
|
136
166
|
} catch (e: unknown) {
|
|
137
167
|
debug(`Barrel re-parse failed (non-fatal): ${(e as Error).message}`);
|
|
138
168
|
}
|
|
169
|
+
return added;
|
|
139
170
|
}
|
|
140
171
|
|
|
141
172
|
export async function resolveImports(ctx: PipelineContext): Promise<void> {
|
|
@@ -156,8 +187,31 @@ export async function resolveImports(ctx: PipelineContext): Promise<void> {
|
|
|
156
187
|
|
|
157
188
|
ctx.barrelOnlyFiles = new Set<string>();
|
|
158
189
|
if (!isFullBuild) {
|
|
159
|
-
|
|
160
|
-
|
|
190
|
+
// Iteratively discover and re-parse barrel chains. A barrel that imports
|
|
191
|
+
// another barrel (e.g. `parser.ts → extractors/index.ts → extractors/<lang>.ts`)
|
|
192
|
+
// needs both loaded so build-edges can emit the barrel-through edges from
|
|
193
|
+
// the first barrel to the leaf targets. Without iteration, only the first
|
|
194
|
+
// level of barrels gets merged into fileSymbols; the deeper chain has no
|
|
195
|
+
// entry in reexportMap and the resolver silently drops the affected edges
|
|
196
|
+
// on every incremental rebuild (#1174).
|
|
197
|
+
//
|
|
198
|
+
// Convergence is guaranteed because fileSymbols grows monotonically and
|
|
199
|
+
// is bounded by the set of barrel files in the project — each iteration
|
|
200
|
+
// either adds a previously-unseen barrel or terminates.
|
|
201
|
+
//
|
|
202
|
+
// Subsequent passes only walk newly-merged barrels' imports (`frontier`
|
|
203
|
+
// = paths returned by reparseBarrelFiles), matching the Rust
|
|
204
|
+
// orchestrator's `&newly_added` slice. Without this, every pass would
|
|
205
|
+
// re-query the DB for every key in `fileSymbols`.
|
|
206
|
+
let frontier: readonly string[] = [...fileSymbols.keys()];
|
|
207
|
+
let firstPass = true;
|
|
208
|
+
while (frontier.length > 0) {
|
|
209
|
+
const barrelCandidates = findBarrelCandidates(ctx, frontier, firstPass);
|
|
210
|
+
const added = await reparseBarrelFiles(ctx, barrelCandidates);
|
|
211
|
+
if (added.length === 0) break;
|
|
212
|
+
frontier = added;
|
|
213
|
+
firstPass = false;
|
|
214
|
+
}
|
|
161
215
|
}
|
|
162
216
|
}
|
|
163
217
|
|