@optave/codegraph 3.9.3 → 3.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +10 -10
  2. package/dist/ast-analysis/visitor.d.ts.map +1 -1
  3. package/dist/ast-analysis/visitor.js +14 -0
  4. package/dist/ast-analysis/visitor.js.map +1 -1
  5. package/dist/cli/commands/watch.d.ts.map +1 -1
  6. package/dist/cli/commands/watch.js +2 -0
  7. package/dist/cli/commands/watch.js.map +1 -1
  8. package/dist/cli.js +24 -1
  9. package/dist/cli.js.map +1 -1
  10. package/dist/domain/graph/builder/context.d.ts +17 -0
  11. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  12. package/dist/domain/graph/builder/context.js +7 -0
  13. package/dist/domain/graph/builder/context.js.map +1 -1
  14. package/dist/domain/graph/builder/helpers.d.ts +13 -2
  15. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  16. package/dist/domain/graph/builder/helpers.js +30 -4
  17. package/dist/domain/graph/builder/helpers.js.map +1 -1
  18. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  19. package/dist/domain/graph/builder/pipeline.js +221 -51
  20. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  21. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  22. package/dist/domain/graph/builder/stages/build-edges.js +67 -6
  23. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  24. package/dist/domain/graph/builder/stages/build-structure.js +2 -2
  25. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  26. package/dist/domain/graph/builder/stages/collect-files.js +58 -26
  27. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  28. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  29. package/dist/domain/graph/builder/stages/detect-changes.js +105 -55
  30. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  31. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  32. package/dist/domain/graph/builder/stages/finalize.js +27 -4
  33. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  34. package/dist/domain/graph/builder/stages/run-analyses.d.ts.map +1 -1
  35. package/dist/domain/graph/builder/stages/run-analyses.js +5 -20
  36. package/dist/domain/graph/builder/stages/run-analyses.js.map +1 -1
  37. package/dist/domain/graph/journal.d.ts +15 -0
  38. package/dist/domain/graph/journal.d.ts.map +1 -1
  39. package/dist/domain/graph/journal.js +283 -28
  40. package/dist/domain/graph/journal.js.map +1 -1
  41. package/dist/domain/graph/watcher.d.ts +17 -0
  42. package/dist/domain/graph/watcher.d.ts.map +1 -1
  43. package/dist/domain/graph/watcher.js +23 -7
  44. package/dist/domain/graph/watcher.js.map +1 -1
  45. package/dist/domain/parser.d.ts +13 -4
  46. package/dist/domain/parser.d.ts.map +1 -1
  47. package/dist/domain/parser.js +174 -80
  48. package/dist/domain/parser.js.map +1 -1
  49. package/dist/domain/search/generator.d.ts.map +1 -1
  50. package/dist/domain/search/generator.js +28 -2
  51. package/dist/domain/search/generator.js.map +1 -1
  52. package/dist/domain/wasm-worker-entry.d.ts +24 -0
  53. package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
  54. package/dist/domain/wasm-worker-entry.js +643 -0
  55. package/dist/domain/wasm-worker-entry.js.map +1 -0
  56. package/dist/domain/wasm-worker-pool.d.ts +59 -0
  57. package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
  58. package/dist/domain/wasm-worker-pool.js +312 -0
  59. package/dist/domain/wasm-worker-pool.js.map +1 -0
  60. package/dist/domain/wasm-worker-protocol.d.ts +65 -0
  61. package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
  62. package/dist/domain/wasm-worker-protocol.js +13 -0
  63. package/dist/domain/wasm-worker-protocol.js.map +1 -0
  64. package/dist/extractors/javascript.js +265 -1
  65. package/dist/extractors/javascript.js.map +1 -1
  66. package/dist/features/boundaries.d.ts +2 -2
  67. package/dist/features/boundaries.d.ts.map +1 -1
  68. package/dist/features/boundaries.js +2 -31
  69. package/dist/features/boundaries.js.map +1 -1
  70. package/dist/features/snapshot.d.ts.map +1 -1
  71. package/dist/features/snapshot.js +99 -13
  72. package/dist/features/snapshot.js.map +1 -1
  73. package/dist/features/structure.d.ts.map +1 -1
  74. package/dist/features/structure.js +14 -1
  75. package/dist/features/structure.js.map +1 -1
  76. package/dist/graph/algorithms/louvain.d.ts.map +1 -1
  77. package/dist/graph/algorithms/louvain.js +2 -4
  78. package/dist/graph/algorithms/louvain.js.map +1 -1
  79. package/dist/infrastructure/config.d.ts.map +1 -1
  80. package/dist/infrastructure/config.js +12 -2
  81. package/dist/infrastructure/config.js.map +1 -1
  82. package/dist/shared/globs.d.ts +40 -0
  83. package/dist/shared/globs.d.ts.map +1 -0
  84. package/dist/shared/globs.js +126 -0
  85. package/dist/shared/globs.js.map +1 -0
  86. package/dist/types.d.ts +26 -1
  87. package/dist/types.d.ts.map +1 -1
  88. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  89. package/package.json +7 -7
  90. package/src/ast-analysis/visitor.ts +15 -0
  91. package/src/cli/commands/watch.ts +2 -0
  92. package/src/cli.ts +31 -8
  93. package/src/domain/graph/builder/context.ts +19 -0
  94. package/src/domain/graph/builder/helpers.ts +53 -3
  95. package/src/domain/graph/builder/pipeline.ts +235 -49
  96. package/src/domain/graph/builder/stages/build-edges.ts +80 -6
  97. package/src/domain/graph/builder/stages/build-structure.ts +2 -2
  98. package/src/domain/graph/builder/stages/collect-files.ts +56 -26
  99. package/src/domain/graph/builder/stages/detect-changes.ts +118 -61
  100. package/src/domain/graph/builder/stages/finalize.ts +27 -4
  101. package/src/domain/graph/builder/stages/run-analyses.ts +5 -26
  102. package/src/domain/graph/journal.ts +284 -27
  103. package/src/domain/graph/watcher.ts +29 -9
  104. package/src/domain/parser.ts +166 -73
  105. package/src/domain/search/generator.ts +34 -2
  106. package/src/domain/wasm-worker-entry.ts +788 -0
  107. package/src/domain/wasm-worker-pool.ts +330 -0
  108. package/src/domain/wasm-worker-protocol.ts +81 -0
  109. package/src/extractors/javascript.ts +290 -1
  110. package/src/features/boundaries.ts +2 -27
  111. package/src/features/snapshot.ts +93 -14
  112. package/src/features/structure.ts +17 -1
  113. package/src/graph/algorithms/louvain.ts +2 -4
  114. package/src/infrastructure/config.ts +12 -2
  115. package/src/shared/globs.ts +121 -0
  116. package/src/types.ts +26 -1
@@ -8,7 +8,8 @@ import fs from 'node:fs';
8
8
  import path from 'node:path';
9
9
  import { purgeFilesData } from '../../../db/index.js';
10
10
  import { warn } from '../../../infrastructure/logger.js';
11
- import { EXTENSIONS, IGNORE_DIRS } from '../../../shared/constants.js';
11
+ import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../../shared/constants.js';
12
+ import { compileGlobs, matchesAny } from '../../../shared/globs.js';
12
13
  import type {
13
14
  BetterSqlite3Database,
14
15
  CodegraphConfig,
@@ -58,9 +59,29 @@ function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set<string> | null): boo
58
59
  return false;
59
60
  }
60
61
 
62
+ /**
63
+ * Check whether a source file passes the configured include/exclude globs.
64
+ *
65
+ * Patterns are matched against the path relative to the project root,
66
+ * normalized to forward slashes (e.g. `src/foo/bar.ts`). When both lists
67
+ * are set, a file must match at least one include and no exclude.
68
+ */
69
+ export function passesIncludeExclude(
70
+ relPath: string,
71
+ includeRegexes: readonly RegExp[],
72
+ excludeRegexes: readonly RegExp[],
73
+ ): boolean {
74
+ if (includeRegexes.length > 0 && !matchesAny(includeRegexes, relPath)) return false;
75
+ if (excludeRegexes.length > 0 && matchesAny(excludeRegexes, relPath)) return false;
76
+ return true;
77
+ }
78
+
61
79
  /**
62
80
  * Recursively collect all source files under `dir`.
63
81
  * When `directories` is a Set, also tracks which directories contain files.
82
+ *
83
+ * The first invocation establishes `dir` as the project root against which
84
+ * `config.include` / `config.exclude` globs are matched.
64
85
  */
65
86
  export function collectFiles(
66
87
  dir: string,
@@ -68,6 +89,9 @@ export function collectFiles(
68
89
  config: Partial<CodegraphConfig>,
69
90
  directories: Set<string>,
70
91
  _visited?: Set<string>,
92
+ _rootDir?: string,
93
+ _includeRegexes?: readonly RegExp[],
94
+ _excludeRegexes?: readonly RegExp[],
71
95
  ): { files: string[]; directories: Set<string> };
72
96
  export function collectFiles(
73
97
  dir: string,
@@ -75,6 +99,9 @@ export function collectFiles(
75
99
  config?: Partial<CodegraphConfig>,
76
100
  directories?: null,
77
101
  _visited?: Set<string>,
102
+ _rootDir?: string,
103
+ _includeRegexes?: readonly RegExp[],
104
+ _excludeRegexes?: readonly RegExp[],
78
105
  ): string[];
79
106
  export function collectFiles(
80
107
  dir: string,
@@ -82,10 +109,20 @@ export function collectFiles(
82
109
  config: Partial<CodegraphConfig> = {},
83
110
  directories: Set<string> | null = null,
84
111
  _visited: Set<string> = new Set(),
112
+ _rootDir?: string,
113
+ _includeRegexes?: readonly RegExp[],
114
+ _excludeRegexes?: readonly RegExp[],
85
115
  ): string[] | { files: string[]; directories: Set<string> } {
86
116
  const trackDirs = directories instanceof Set;
87
117
  let hasFiles = false;
88
118
 
119
+ // First call: compute root and compile include/exclude patterns once,
120
+ // then pass them down recursive calls so we don't recompile per directory.
121
+ const rootDir = _rootDir ?? dir;
122
+ const includeRegexes = _includeRegexes ?? compileGlobs(config.include);
123
+ const excludeRegexes = _excludeRegexes ?? compileGlobs(config.exclude);
124
+ const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
125
+
89
126
  // Merge config ignoreDirs with defaults
90
127
  const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
91
128
 
@@ -116,11 +153,24 @@ export function collectFiles(
116
153
  const full = path.join(dir, entry.name);
117
154
  if (entry.isDirectory()) {
118
155
  if (trackDirs) {
119
- collectFiles(full, files, config, directories as Set<string>, _visited);
156
+ collectFiles(
157
+ full,
158
+ files,
159
+ config,
160
+ directories as Set<string>,
161
+ _visited,
162
+ rootDir,
163
+ includeRegexes,
164
+ excludeRegexes,
165
+ );
120
166
  } else {
121
- collectFiles(full, files, config, null, _visited);
167
+ collectFiles(full, files, config, null, _visited, rootDir, includeRegexes, excludeRegexes);
122
168
  }
123
169
  } else if (EXTENSIONS.has(path.extname(entry.name))) {
170
+ if (hasGlobFilters) {
171
+ const rel = normalizePath(path.relative(rootDir, full));
172
+ if (!passesIncludeExclude(rel, includeRegexes, excludeRegexes)) continue;
173
+ }
124
174
  files.push(full);
125
175
  hasFiles = true;
126
176
  }
@@ -21,6 +21,7 @@ import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js'
21
21
  import { debug, info, warn } from '../../../infrastructure/logger.js';
22
22
  import { loadNative } from '../../../infrastructure/native.js';
23
23
  import { semverCompare } from '../../../infrastructure/update-check.js';
24
+ import { normalizePath } from '../../../shared/constants.js';
24
25
  import { toErrorMessage } from '../../../shared/errors.js';
25
26
  import { CODEGRAPH_VERSION } from '../../../shared/version.js';
26
27
  import type {
@@ -29,11 +30,12 @@ import type {
29
30
  BuildResult,
30
31
  Definition,
31
32
  ExtractorOutput,
33
+ SqliteStatement,
32
34
  } from '../../../types.js';
33
- import { getActiveEngine } from '../../parser.js';
35
+ import { getActiveEngine, getInstalledWasmExtensions, parseFilesAuto } from '../../parser.js';
34
36
  import { setWorkspaces } from '../resolve.js';
35
37
  import { PipelineContext } from './context.js';
36
- import { loadPathAliases } from './helpers.js';
38
+ import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
37
39
  import { NativeDbProxy } from './native-db-proxy.js';
38
40
  import { buildEdges } from './stages/build-edges.js';
39
41
  import { buildStructure } from './stages/build-structure.js';
@@ -88,15 +90,37 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
88
90
  );
89
91
  ctx.forceFullRebuild = true;
90
92
  }
93
+ // When the native engine is active, the Rust addon's version (ctx.engineVersion)
94
+ // is written into codegraph_version by setBuildMeta after a native orchestrator
95
+ // build. The check must compare against the same version, otherwise JS and Rust
96
+ // fight over which version to record — causing every incremental build to be
97
+ // promoted to a full rebuild when npm and crate versions diverge.
98
+ const effectiveVersion =
99
+ ctx.engineName === 'native' && ctx.engineVersion ? ctx.engineVersion : CODEGRAPH_VERSION;
91
100
  const prevVersion = meta('codegraph_version');
92
- if (prevVersion && prevVersion !== CODEGRAPH_VERSION) {
101
+ if (prevVersion && prevVersion !== effectiveVersion) {
93
102
  info(
94
- `Codegraph version changed (${prevVersion} → ${CODEGRAPH_VERSION}), promoting to full rebuild.`,
103
+ `Codegraph version changed (${prevVersion} → ${effectiveVersion}), promoting to full rebuild.`,
95
104
  );
96
105
  ctx.forceFullRebuild = true;
97
106
  }
98
107
  }
99
108
 
109
+ function warnOnEmbeddingsWipe(ctx: PipelineContext): void {
110
+ const willBeFullBuild = !ctx.incremental || ctx.forceFullRebuild;
111
+ if (!willBeFullBuild) return;
112
+ let count = 0;
113
+ try {
114
+ count = (ctx.db.prepare('SELECT COUNT(*) AS c FROM embeddings').get() as { c: number }).c;
115
+ } catch {
116
+ return; // embeddings table missing — nothing to warn about
117
+ }
118
+ if (count === 0) return;
119
+ warn(
120
+ `Full rebuild will discard ${count} embedding${count === 1 ? '' : 's'}; re-run \`codegraph embed\` after the build.`,
121
+ );
122
+ }
123
+
100
124
  function loadAliases(ctx: PipelineContext): void {
101
125
  ctx.aliases = loadPathAliases(ctx.rootDir);
102
126
  if (ctx.config.aliases) {
@@ -126,38 +150,15 @@ function setupPipeline(ctx: PipelineContext): void {
126
150
  const native = enginePref !== 'wasm' ? loadNative() : null;
127
151
  ctx.nativeAvailable = !!native?.NativeDatabase;
128
152
 
129
- // When native is available, use a NativeDbProxy backed by a single rusqlite
130
- // connection. This eliminates the dual-connection WAL corruption problem.
131
- // The Rust orchestrator handles the full pipeline; the proxy is used for any
132
- // JS post-processing (e.g. structure fallback on large builds).
133
- if (ctx.nativeAvailable && native?.NativeDatabase) {
134
- try {
135
- const dir = path.dirname(ctx.dbPath);
136
- if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
137
- acquireAdvisoryLock(ctx.dbPath);
138
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
139
- ctx.nativeDb.initSchema();
140
- const proxy = new NativeDbProxy(ctx.nativeDb);
141
- proxy.__lockPath = `${ctx.dbPath}.lock`;
142
- ctx.db = proxy as unknown as typeof ctx.db;
143
- ctx.nativeFirstProxy = true;
144
- } catch (err) {
145
- warn(`NativeDatabase setup failed, falling back to better-sqlite3: ${toErrorMessage(err)}`);
146
- try {
147
- ctx.nativeDb?.close();
148
- } catch {
149
- /* ignore */
150
- }
151
- ctx.nativeDb = undefined;
152
- ctx.nativeFirstProxy = false;
153
- releaseAdvisoryLock(`${ctx.dbPath}.lock`);
154
- ctx.db = openDb(ctx.dbPath);
155
- initSchema(ctx.db);
156
- }
157
- } else {
158
- ctx.db = openDb(ctx.dbPath);
159
- initSchema(ctx.db);
160
- }
153
+ // Always use better-sqlite3 for setup it's cheap (~4ms) and only needed
154
+ // for metadata reads (schema mismatch check). NativeDatabase.openReadWrite
155
+ // is deferred to tryNativeOrchestrator, saving ~60ms on incremental builds
156
+ // where the Rust orchestrator handles the full pipeline, and avoiding the
157
+ // cost entirely on no-op builds that exit before reaching the orchestrator.
158
+ const dir = path.dirname(ctx.dbPath);
159
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
160
+ ctx.db = openDb(ctx.dbPath);
161
+ initSchema(ctx.db);
161
162
 
162
163
  ctx.config = loadConfig(ctx.rootDir);
163
164
  ctx.incremental =
@@ -165,6 +166,7 @@ function setupPipeline(ctx: PipelineContext): void {
165
166
 
166
167
  initializeEngine(ctx);
167
168
  checkEngineSchemaMismatch(ctx);
169
+ warnOnEmbeddingsWipe(ctx);
168
170
  loadAliases(ctx);
169
171
 
170
172
  // Workspace packages (monorepo)
@@ -182,6 +184,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
182
184
  return {
183
185
  phases: {
184
186
  setupMs: +(t.setupMs ?? 0).toFixed(1),
187
+ collectMs: +(t.collectMs ?? 0).toFixed(1),
188
+ detectMs: +(t.detectMs ?? 0).toFixed(1),
185
189
  parseMs: +(t.parseMs ?? 0).toFixed(1),
186
190
  insertMs: +(t.insertMs ?? 0).toFixed(1),
187
191
  resolveMs: +(t.resolveMs ?? 0).toFixed(1),
@@ -262,8 +266,6 @@ interface NativeOrchestratorResult {
262
266
  changedCount?: number;
263
267
  removedCount?: number;
264
268
  isFullBuild?: boolean;
265
- /** Full changed files including reverse-dep files — used by JS structure fallback. */
266
- structureScope?: string[];
267
269
  /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
268
270
  structureHandled?: boolean;
269
271
  /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
@@ -558,7 +560,9 @@ function formatNativeTimingResult(
558
560
  ): BuildResult {
559
561
  return {
560
562
  phases: {
561
- setupMs: +((p.setupMs ?? 0) + (p.collectMs ?? 0) + (p.detectMs ?? 0)).toFixed(1),
563
+ setupMs: +(p.setupMs ?? 0).toFixed(1),
564
+ collectMs: +(p.collectMs ?? 0).toFixed(1),
565
+ detectMs: +(p.detectMs ?? 0).toFixed(1),
562
566
  parseMs: +(p.parseMs ?? 0).toFixed(1),
563
567
  insertMs: +(p.insertMs ?? 0).toFixed(1),
564
568
  resolveMs: +(p.resolveMs ?? 0).toFixed(1),
@@ -584,15 +588,26 @@ async function tryNativeOrchestrator(
584
588
  return undefined;
585
589
  }
586
590
 
587
- // In native-first mode, nativeDb is already open from setupPipeline.
588
- // Otherwise, open it on demand (deferred to skip overhead on no-op rebuilds).
591
+ // Open NativeDatabase on demand deferred from setupPipeline to skip the
592
+ // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
593
+ // first to avoid dual-connection WAL corruption.
589
594
  if (!ctx.nativeDb && ctx.nativeAvailable) {
590
595
  const native = loadNative();
591
596
  if (native?.NativeDatabase) {
592
597
  try {
598
+ // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
599
+ // Uses raw close() instead of closeDb() intentionally — the advisory lock
600
+ // is kept and transferred to the NativeDbProxy below, not released here.
601
+ ctx.db.close();
602
+ acquireAdvisoryLock(ctx.dbPath);
593
603
  ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
594
604
  ctx.nativeDb.initSchema();
595
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
605
+ // Replace ctx.db with a NativeDbProxy so post-native JS fallback
606
+ // (structure, analysis) can use it without reopening better-sqlite3.
607
+ const proxy = new NativeDbProxy(ctx.nativeDb);
608
+ proxy.__lockPath = `${ctx.dbPath}.lock`;
609
+ ctx.db = proxy as unknown as typeof ctx.db;
610
+ ctx.nativeFirstProxy = true;
596
611
  } catch (err) {
597
612
  warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
598
613
  try {
@@ -601,6 +616,10 @@ async function tryNativeOrchestrator(
601
616
  debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
602
617
  }
603
618
  ctx.nativeDb = undefined;
619
+ ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
620
+ releaseAdvisoryLock(`${ctx.dbPath}.lock`);
621
+ // Reopen better-sqlite3 for JS pipeline fallback
622
+ ctx.db = openDb(ctx.dbPath);
604
623
  }
605
624
  }
606
625
  }
@@ -631,14 +650,18 @@ async function tryNativeOrchestrator(
631
650
  const p = result.phases;
632
651
 
633
652
  // Sync build_meta so JS-side version/engine checks work on next build.
653
+ // Use the Rust addon version as codegraph_version when the native
654
+ // orchestrator performed the build — the Rust side's check_version_mismatch
655
+ // compares this value against CARGO_PKG_VERSION. Writing the JS
656
+ // CODEGRAPH_VERSION here would create a permanent mismatch whenever the
657
+ // npm package version diverges from the Rust crate version, forcing every
658
+ // subsequent native build to be a full rebuild (no incremental).
634
659
  setBuildMeta(ctx.db, {
635
660
  engine: ctx.engineName,
636
661
  engine_version: ctx.engineVersion || '',
637
- codegraph_version: CODEGRAPH_VERSION,
662
+ codegraph_version: ctx.engineVersion || CODEGRAPH_VERSION,
638
663
  schema_version: String(ctx.schemaVersion),
639
664
  built_at: new Date().toISOString(),
640
- node_count: String(result.nodeCount ?? 0),
641
- edge_count: String(result.edgeCount ?? 0),
642
665
  });
643
666
 
644
667
  info(
@@ -686,7 +709,7 @@ async function tryNativeOrchestrator(
686
709
  ctx,
687
710
  fileSymbols,
688
711
  !!result.isFullBuild,
689
- result.structureScope ?? result.changedFiles,
712
+ result.changedFiles,
690
713
  );
691
714
  }
692
715
 
@@ -695,10 +718,123 @@ async function tryNativeOrchestrator(
695
718
  }
696
719
  }
697
720
 
721
+ // Engine parity: the native orchestrator silently drops files whose
722
+ // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
723
+ // stale native binaries). WASM handles those — backfill via WASM so both
724
+ // engines process the same file set (#967).
725
+ //
726
+ // Only runs on full builds: incremental builds only touch changed files,
727
+ // which are parsed through parseFilesAuto (which has its own per-file
728
+ // backfill), so a full filesystem scan here would be wasted work.
729
+ if (result.isFullBuild) {
730
+ await backfillNativeDroppedFiles(ctx);
731
+ }
732
+
698
733
  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
699
734
  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
700
735
  }
701
736
 
737
+ /**
738
+ * Backfill files that the native orchestrator silently dropped during parse.
739
+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
740
+ */
741
+ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
742
+ // Needs a real better-sqlite3 connection for INSERT.
743
+ if (ctx.nativeFirstProxy) {
744
+ closeNativeDb(ctx, 'pre-parity-backfill');
745
+ ctx.db = openDb(ctx.dbPath);
746
+ ctx.nativeFirstProxy = false;
747
+ }
748
+
749
+ const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
750
+ const expected = new Set(
751
+ collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
752
+ );
753
+
754
+ const existingRows = ctx.db
755
+ .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
756
+ .all() as Array<{ file: string }>;
757
+ const existing = new Set(existingRows.map((r) => r.file));
758
+
759
+ // Restrict backfill to files with an installed WASM grammar. Extensions in
760
+ // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
761
+ // minimal installs) can't be parsed by either engine, so they're not a
762
+ // native regression — excluding them keeps the warn count meaningful.
763
+ const installedExts = getInstalledWasmExtensions();
764
+ const missingAbs: string[] = [];
765
+ for (const rel of expected) {
766
+ if (existing.has(rel)) continue;
767
+ const ext = path.extname(rel).toLowerCase();
768
+ if (!installedExts.has(ext)) continue;
769
+ missingAbs.push(path.join(ctx.rootDir, rel));
770
+ }
771
+ if (missingAbs.length === 0) return;
772
+
773
+ warn(
774
+ `Native orchestrator dropped ${missingAbs.length} file(s); backfilling via WASM for engine parity`,
775
+ );
776
+ const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
777
+
778
+ const rows: unknown[][] = [];
779
+ const exportKeys: unknown[][] = [];
780
+ for (const [relPath, symbols] of wasmResults) {
781
+ // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
782
+ rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
783
+ for (const def of symbols.definitions ?? []) {
784
+ // Populate qualified_name/scope the same way the JS fallback does so
785
+ // downstream queries (cross-file references, "go to definition") find
786
+ // these symbols.
787
+ const dotIdx = def.name.lastIndexOf('.');
788
+ const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
789
+ rows.push([
790
+ def.name,
791
+ def.kind,
792
+ relPath,
793
+ def.line,
794
+ def.endLine ?? null,
795
+ null,
796
+ def.name,
797
+ scope,
798
+ def.visibility ?? null,
799
+ ]);
800
+ }
801
+ // Exports: insert the row (INSERT OR IGNORE — a matching definition row
802
+ // is a no-op) and queue a key for the second-pass exported=1 update, so
803
+ // queries filtering on exported=1 find backfilled symbols (#970).
804
+ for (const exp of symbols.exports ?? []) {
805
+ rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
806
+ exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
807
+ }
808
+ }
809
+ const db = ctx.db as unknown as BetterSqlite3Database;
810
+ batchInsertNodes(db, rows);
811
+
812
+ // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
813
+ if (exportKeys.length > 0) {
814
+ const EXPORT_CHUNK = 500;
815
+ const exportStmtCache = new Map<number, SqliteStatement>();
816
+ for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
817
+ const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
818
+ const chunkSize = end - i;
819
+ let updateStmt = exportStmtCache.get(chunkSize);
820
+ if (!updateStmt) {
821
+ const conditions = Array.from(
822
+ { length: chunkSize },
823
+ () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
824
+ ).join(' OR ');
825
+ updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
826
+ exportStmtCache.set(chunkSize, updateStmt);
827
+ }
828
+ const vals: unknown[] = [];
829
+ for (let j = i; j < end; j++) {
830
+ const k = exportKeys[j] as unknown[];
831
+ vals.push(k[0], k[1], k[2], k[3]);
832
+ }
833
+ updateStmt.run(...vals);
834
+ }
835
+ }
836
+ }
837
+
702
838
  // ── Pipeline stages execution ───────────────────────────────────────────
703
839
 
704
840
  async function runPipelineStages(ctx: PipelineContext): Promise<void> {
@@ -772,6 +908,23 @@ async function runPipelineStages(ctx: PipelineContext): Promise<void> {
772
908
 
773
909
  await runAnalyses(ctx);
774
910
 
911
+ // Release WASM trees deterministically on the success path — same cleanup
912
+ // as the error-path catch block. Without this, trees stay allocated until
913
+ // GC collects ctx, holding WASM memory for the rest of the build (#931).
914
+ if (ctx.allSymbols?.size > 0) {
915
+ for (const [, symbols] of ctx.allSymbols) {
916
+ const tree = symbols._tree as { delete?: () => void } | undefined;
917
+ if (tree && typeof tree.delete === 'function') {
918
+ try {
919
+ tree.delete();
920
+ } catch {
921
+ /* ignore cleanup errors */
922
+ }
923
+ }
924
+ symbols._tree = undefined;
925
+ }
926
+ }
927
+
775
928
  // Flush Rust WAL writes (AST, complexity, CFG, dataflow) so the JS
776
929
  // connection and any post-build readers can see them. One TRUNCATE
777
930
  // here replaces the N per-feature resumeJsDb checkpoints (#checkpoint-opt).
@@ -816,13 +969,46 @@ export async function buildGraph(
816
969
  if (nativeResult) return nativeResult;
817
970
  } catch (err) {
818
971
  warn(`Native build orchestrator failed, falling back to JS pipeline: ${toErrorMessage(err)}`);
819
- // Fall through to JS pipeline
972
+ // The version gate in checkEngineSchemaMismatch was skipped because
973
+ // nativeAvailable was true. Now that we're falling back to the JS
974
+ // pipeline, perform the codegraph_version check here so a version
975
+ // bump still promotes to a full rebuild (#928).
976
+ if (ctx.incremental && !ctx.forceFullRebuild) {
977
+ const prevVersion = getBuildMeta(ctx.db, 'codegraph_version');
978
+ if (prevVersion && prevVersion !== CODEGRAPH_VERSION) {
979
+ info(
980
+ `Codegraph version changed (${prevVersion} → ${CODEGRAPH_VERSION}), promoting to full rebuild.`,
981
+ );
982
+ ctx.forceFullRebuild = true;
983
+ // Re-check embeddings: the initial warnOnEmbeddingsWipe ran before
984
+ // forceFullRebuild was set here, so the silent-data-loss guard
985
+ // would otherwise miss this late-promotion path (#986 follow-up).
986
+ warnOnEmbeddingsWipe(ctx);
987
+ }
988
+ }
820
989
  }
821
990
 
822
991
  await runPipelineStages(ctx);
823
992
  } catch (err) {
824
- if (!ctx.earlyExit && ctx.db) {
825
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
993
+ if (!ctx.earlyExit) {
994
+ // Release WASM trees before closing DB to prevent V8 crash during
995
+ // GC cleanup of orphaned WASM objects (#931).
996
+ if (ctx.allSymbols?.size > 0) {
997
+ for (const [, symbols] of ctx.allSymbols) {
998
+ const tree = symbols._tree as { delete?: () => void } | undefined;
999
+ if (tree && typeof tree.delete === 'function') {
1000
+ try {
1001
+ tree.delete();
1002
+ } catch {
1003
+ /* ignore cleanup errors */
1004
+ }
1005
+ }
1006
+ symbols._tree = undefined;
1007
+ }
1008
+ }
1009
+ if (ctx.db) {
1010
+ closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
1011
+ }
826
1012
  }
827
1013
  throw err;
828
1014
  }
@@ -699,6 +699,69 @@ function buildClassHierarchyEdges(
699
699
  }
700
700
  }
701
701
 
702
+ // ── Reverse-dep edge reconnection (#932, #933) ─────────────────────────
703
+
704
+ /**
705
+ * Reconnect edges that were saved before changed-file purge.
706
+ *
707
+ * Each saved edge records: sourceId (still valid — reverse-dep nodes were not
708
+ * purged) and target attributes (name, kind, file, line). The target node was
709
+ * deleted and re-inserted with a new ID by insertNodes. We look up the new ID
710
+ * by (name, kind, file) and re-create the edge.
711
+ */
712
+ function reconnectReverseDepEdges(ctx: PipelineContext): void {
713
+ const { db } = ctx;
714
+ const findNodeStmt = db.prepare(
715
+ 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? ORDER BY ABS(line - ?) LIMIT 1',
716
+ );
717
+ const reconnectedRows: EdgeRowTuple[] = [];
718
+ let dropped = 0;
719
+
720
+ for (const saved of ctx.savedReverseDepEdges) {
721
+ const newTarget = findNodeStmt.get(
722
+ saved.tgtName,
723
+ saved.tgtKind,
724
+ saved.tgtFile,
725
+ saved.tgtLine,
726
+ ) as { id: number } | undefined;
727
+ if (newTarget) {
728
+ reconnectedRows.push([
729
+ saved.sourceId,
730
+ newTarget.id,
731
+ saved.edgeKind,
732
+ saved.confidence,
733
+ saved.dynamic,
734
+ ]);
735
+ } else {
736
+ // Target was removed or renamed in the changed file — edge is stale
737
+ dropped++;
738
+ }
739
+ }
740
+
741
+ if (reconnectedRows.length > 0) {
742
+ if (ctx.nativeDb?.bulkInsertEdges) {
743
+ const nativeEdges = reconnectedRows.map((r) => ({
744
+ sourceId: r[0],
745
+ targetId: r[1],
746
+ kind: r[2],
747
+ confidence: r[3],
748
+ dynamic: r[4],
749
+ }));
750
+ const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges);
751
+ if (!ok) {
752
+ batchInsertEdges(db, reconnectedRows);
753
+ }
754
+ } else {
755
+ batchInsertEdges(db, reconnectedRows);
756
+ }
757
+ }
758
+
759
+ debug(
760
+ `Reconnected ${reconnectedRows.length} reverse-dep edges` +
761
+ (dropped > 0 ? ` (${dropped} dropped — targets removed/renamed)` : ''),
762
+ );
763
+ }
764
+
702
765
  // ── Main entry point ────────────────────────────────────────────────────
703
766
 
704
767
  /**
@@ -798,10 +861,11 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {
798
861
  }
799
862
  }
800
863
 
801
- // Skip native import-edge path for small incremental builds (≤3 files):
802
- // napi-rs marshaling overhead exceeds computation savings.
864
+ // Skip native import-edge path for small incremental builds: napi-rs
865
+ // marshaling overhead (~13ms) exceeds Rust computation savings at this scale.
803
866
  const useNativeImportEdges =
804
- native?.buildImportEdges && (ctx.isFullBuild || ctx.fileSymbols.size > 3);
867
+ native?.buildImportEdges &&
868
+ (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold);
805
869
  if (useNativeImportEdges) {
806
870
  const beforeLen = allEdgeRows.length;
807
871
  buildImportEdgesNative(ctx, getNodeIdStmt, allEdgeRows, native!);
@@ -821,10 +885,11 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {
821
885
  buildImportEdges(ctx, getNodeIdStmt, allEdgeRows);
822
886
  }
823
887
 
824
- // Skip native call-edge path for small incremental builds (≤3 files):
825
- // napi-rs marshaling overhead for allNodes exceeds computation savings.
888
+ // Skip native call-edge path for small incremental builds: napi-rs
889
+ // marshaling overhead for allNodes exceeds Rust computation savings.
826
890
  const useNativeCallEdges =
827
- native?.buildCallEdges && (ctx.isFullBuild || ctx.fileSymbols.size > 3);
891
+ native?.buildCallEdges &&
892
+ (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold);
828
893
  if (useNativeCallEdges) {
829
894
  buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!);
830
895
  } else {
@@ -858,5 +923,14 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {
858
923
  }
859
924
  }
860
925
 
926
+ // Phase 3: Reconnect saved reverse-dep edges (#932, #933).
927
+ // When the WASM/JS path purged changed files, edges FROM reverse-dep files TO
928
+ // those files were deleted (target-side). The reverse-dep files were NOT
929
+ // reparsed — instead we saved the edge topology before purge and now reconnect
930
+ // each edge to the new node IDs created by insertNodes.
931
+ if (ctx.savedReverseDepEdges.length > 0) {
932
+ reconnectReverseDepEdges(ctx);
933
+ }
934
+
861
935
  ctx.timing.edgesMs = performance.now() - t0;
862
936
  }
@@ -174,13 +174,13 @@ function updateChangedFileMetrics(ctx: PipelineContext, changedFiles: string[]):
174
174
  SELECT COUNT(DISTINCT n_src.file) AS cnt FROM edges e
175
175
  JOIN nodes n_src ON e.source_id = n_src.id
176
176
  JOIN nodes n_tgt ON e.target_id = n_tgt.id
177
- WHERE e.kind = 'imports' AND n_tgt.file = ? AND n_src.file != n_tgt.file
177
+ WHERE e.kind IN ('imports', 'imports-type') AND n_tgt.file = ? AND n_src.file != n_tgt.file
178
178
  `);
179
179
  const getFanOut = db.prepare(`
180
180
  SELECT COUNT(DISTINCT n_tgt.file) AS cnt FROM edges e
181
181
  JOIN nodes n_src ON e.source_id = n_src.id
182
182
  JOIN nodes n_tgt ON e.target_id = n_tgt.id
183
- WHERE e.kind = 'imports' AND n_src.file = ? AND n_src.file != n_tgt.file
183
+ WHERE e.kind IN ('imports', 'imports-type') AND n_src.file = ? AND n_src.file != n_tgt.file
184
184
  `);
185
185
  const upsertMetric = db.prepare(`
186
186
  INSERT OR REPLACE INTO node_metrics