@optave/codegraph 3.11.0 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/README.md +38 -31
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  6. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  7. package/dist/ast-analysis/visitor-utils.js +83 -49
  8. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  9. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  11. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  12. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  14. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  15. package/dist/cli/commands/embed.d.ts.map +1 -1
  16. package/dist/cli/commands/embed.js +49 -4
  17. package/dist/cli/commands/embed.js.map +1 -1
  18. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  19. package/dist/domain/analysis/dependencies.js +106 -80
  20. package/dist/domain/analysis/dependencies.js.map +1 -1
  21. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  22. package/dist/domain/analysis/fn-impact.js +77 -52
  23. package/dist/domain/analysis/fn-impact.js.map +1 -1
  24. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  25. package/dist/domain/analysis/module-map.js +132 -121
  26. package/dist/domain/analysis/module-map.js.map +1 -1
  27. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  28. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  29. package/dist/domain/graph/builder/helpers.js +47 -33
  30. package/dist/domain/graph/builder/helpers.js.map +1 -1
  31. package/dist/domain/graph/builder/incremental.d.ts +6 -0
  32. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  33. package/dist/domain/graph/builder/incremental.js +142 -76
  34. package/dist/domain/graph/builder/incremental.js.map +1 -1
  35. package/dist/domain/graph/builder/pipeline.d.ts +1 -44
  36. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  37. package/dist/domain/graph/builder/pipeline.js +10 -766
  38. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  39. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  40. package/dist/domain/graph/builder/stages/build-edges.js +133 -96
  41. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  42. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  43. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  44. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  45. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  46. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  47. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  48. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  49. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  50. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  51. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  52. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  54. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  56. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  57. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  58. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  59. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  60. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  61. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  62. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  63. package/dist/domain/graph/cycles.d.ts +6 -4
  64. package/dist/domain/graph/cycles.d.ts.map +1 -1
  65. package/dist/domain/graph/cycles.js +50 -55
  66. package/dist/domain/graph/cycles.js.map +1 -1
  67. package/dist/domain/graph/journal.d.ts.map +1 -1
  68. package/dist/domain/graph/journal.js +89 -70
  69. package/dist/domain/graph/journal.js.map +1 -1
  70. package/dist/domain/graph/watcher.d.ts.map +1 -1
  71. package/dist/domain/graph/watcher.js +5 -2
  72. package/dist/domain/graph/watcher.js.map +1 -1
  73. package/dist/domain/parser.d.ts +12 -23
  74. package/dist/domain/parser.d.ts.map +1 -1
  75. package/dist/domain/parser.js +126 -79
  76. package/dist/domain/parser.js.map +1 -1
  77. package/dist/domain/search/generator.d.ts +3 -1
  78. package/dist/domain/search/generator.d.ts.map +1 -1
  79. package/dist/domain/search/generator.js +68 -45
  80. package/dist/domain/search/generator.js.map +1 -1
  81. package/dist/domain/search/models.d.ts +2 -0
  82. package/dist/domain/search/models.d.ts.map +1 -1
  83. package/dist/domain/search/models.js +37 -3
  84. package/dist/domain/search/models.js.map +1 -1
  85. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  86. package/dist/domain/search/search/hybrid.js +49 -40
  87. package/dist/domain/search/search/hybrid.js.map +1 -1
  88. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  89. package/dist/domain/search/search/semantic.js +69 -49
  90. package/dist/domain/search/search/semantic.js.map +1 -1
  91. package/dist/domain/wasm-worker-entry.js +201 -136
  92. package/dist/domain/wasm-worker-entry.js.map +1 -1
  93. package/dist/extractors/elixir.js +95 -71
  94. package/dist/extractors/elixir.js.map +1 -1
  95. package/dist/extractors/gleam.d.ts.map +1 -1
  96. package/dist/extractors/gleam.js +23 -31
  97. package/dist/extractors/gleam.js.map +1 -1
  98. package/dist/extractors/helpers.d.ts +79 -1
  99. package/dist/extractors/helpers.d.ts.map +1 -1
  100. package/dist/extractors/helpers.js +137 -0
  101. package/dist/extractors/helpers.js.map +1 -1
  102. package/dist/extractors/java.d.ts.map +1 -1
  103. package/dist/extractors/java.js +37 -49
  104. package/dist/extractors/java.js.map +1 -1
  105. package/dist/extractors/javascript.d.ts.map +1 -1
  106. package/dist/extractors/javascript.js +44 -44
  107. package/dist/extractors/javascript.js.map +1 -1
  108. package/dist/extractors/julia.js +27 -34
  109. package/dist/extractors/julia.js.map +1 -1
  110. package/dist/extractors/r.d.ts.map +1 -1
  111. package/dist/extractors/r.js +33 -58
  112. package/dist/extractors/r.js.map +1 -1
  113. package/dist/extractors/solidity.d.ts.map +1 -1
  114. package/dist/extractors/solidity.js +38 -61
  115. package/dist/extractors/solidity.js.map +1 -1
  116. package/dist/features/boundaries.d.ts.map +1 -1
  117. package/dist/features/boundaries.js +49 -39
  118. package/dist/features/boundaries.js.map +1 -1
  119. package/dist/features/cfg.d.ts.map +1 -1
  120. package/dist/features/cfg.js +90 -63
  121. package/dist/features/cfg.js.map +1 -1
  122. package/dist/features/check.d.ts.map +1 -1
  123. package/dist/features/check.js +43 -34
  124. package/dist/features/check.js.map +1 -1
  125. package/dist/features/cochange.d.ts.map +1 -1
  126. package/dist/features/cochange.js +68 -56
  127. package/dist/features/cochange.js.map +1 -1
  128. package/dist/features/complexity.d.ts.map +1 -1
  129. package/dist/features/complexity.js +105 -75
  130. package/dist/features/complexity.js.map +1 -1
  131. package/dist/features/dataflow.d.ts.map +1 -1
  132. package/dist/features/dataflow.js +37 -29
  133. package/dist/features/dataflow.js.map +1 -1
  134. package/dist/features/flow.d.ts.map +1 -1
  135. package/dist/features/flow.js +31 -22
  136. package/dist/features/flow.js.map +1 -1
  137. package/dist/features/graph-enrichment.d.ts.map +1 -1
  138. package/dist/features/graph-enrichment.js +77 -70
  139. package/dist/features/graph-enrichment.js.map +1 -1
  140. package/dist/features/owners.d.ts +17 -26
  141. package/dist/features/owners.d.ts.map +1 -1
  142. package/dist/features/owners.js +120 -109
  143. package/dist/features/owners.js.map +1 -1
  144. package/dist/features/sequence.d.ts.map +1 -1
  145. package/dist/features/sequence.js +59 -54
  146. package/dist/features/sequence.js.map +1 -1
  147. package/dist/features/structure-query.d.ts.map +1 -1
  148. package/dist/features/structure-query.js +60 -60
  149. package/dist/features/structure-query.js.map +1 -1
  150. package/dist/features/structure.js +28 -36
  151. package/dist/features/structure.js.map +1 -1
  152. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  153. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  154. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  155. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  156. package/dist/graph/classifiers/roles.js +63 -59
  157. package/dist/graph/classifiers/roles.js.map +1 -1
  158. package/dist/infrastructure/config.d.ts +1 -1
  159. package/dist/infrastructure/config.d.ts.map +1 -1
  160. package/dist/infrastructure/config.js +1 -1
  161. package/dist/infrastructure/config.js.map +1 -1
  162. package/dist/presentation/cfg.d.ts.map +1 -1
  163. package/dist/presentation/cfg.js +44 -29
  164. package/dist/presentation/cfg.js.map +1 -1
  165. package/dist/presentation/flow.d.ts.map +1 -1
  166. package/dist/presentation/flow.js +58 -38
  167. package/dist/presentation/flow.js.map +1 -1
  168. package/dist/types.d.ts +1 -1
  169. package/dist/types.d.ts.map +1 -1
  170. package/package.json +7 -7
  171. package/src/ast-analysis/engine.ts +145 -61
  172. package/src/ast-analysis/visitor-utils.ts +86 -46
  173. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  174. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  175. package/src/cli/commands/embed.ts +54 -4
  176. package/src/domain/analysis/dependencies.ts +166 -85
  177. package/src/domain/analysis/fn-impact.ts +120 -50
  178. package/src/domain/analysis/module-map.ts +175 -140
  179. package/src/domain/graph/builder/helpers.ts +85 -76
  180. package/src/domain/graph/builder/incremental.ts +217 -90
  181. package/src/domain/graph/builder/pipeline.ts +19 -957
  182. package/src/domain/graph/builder/stages/build-edges.ts +198 -140
  183. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  184. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  185. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  186. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  187. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  188. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  189. package/src/domain/graph/cycles.ts +51 -49
  190. package/src/domain/graph/journal.ts +84 -69
  191. package/src/domain/graph/watcher.ts +8 -2
  192. package/src/domain/parser.ts +143 -66
  193. package/src/domain/search/generator.ts +132 -74
  194. package/src/domain/search/models.ts +39 -3
  195. package/src/domain/search/search/hybrid.ts +53 -42
  196. package/src/domain/search/search/semantic.ts +105 -65
  197. package/src/domain/wasm-worker-entry.ts +235 -152
  198. package/src/extractors/elixir.ts +91 -64
  199. package/src/extractors/gleam.ts +33 -37
  200. package/src/extractors/helpers.ts +205 -1
  201. package/src/extractors/java.ts +42 -45
  202. package/src/extractors/javascript.ts +44 -43
  203. package/src/extractors/julia.ts +28 -35
  204. package/src/extractors/r.ts +38 -56
  205. package/src/extractors/solidity.ts +43 -71
  206. package/src/features/boundaries.ts +64 -46
  207. package/src/features/cfg.ts +145 -74
  208. package/src/features/check.ts +60 -43
  209. package/src/features/cochange.ts +95 -72
  210. package/src/features/complexity.ts +134 -79
  211. package/src/features/dataflow.ts +57 -34
  212. package/src/features/flow.ts +48 -24
  213. package/src/features/graph-enrichment.ts +105 -70
  214. package/src/features/owners.ts +186 -146
  215. package/src/features/sequence.ts +99 -69
  216. package/src/features/structure-query.ts +94 -79
  217. package/src/features/structure.ts +56 -56
  218. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  219. package/src/graph/classifiers/roles.ts +64 -54
  220. package/src/infrastructure/config.ts +1 -1
  221. package/src/presentation/cfg.ts +48 -32
  222. package/src/presentation/flow.ts +100 -52
  223. package/src/types.ts +1 -1
@@ -0,0 +1,942 @@
1
+ /**
2
+ * Native build orchestrator stage — runs the full Rust pipeline when available,
3
+ * with WASM fallback for files the native engine drops.
4
+ *
5
+ * Extracted from `pipeline.ts` to break the name-collision cycle between
6
+ * `buildGraph()` (this module's caller) and `ctx.nativeDb.buildGraph()` (the
7
+ * Rust orchestrator entry point invoked here). Codegraph's name-based call
8
+ * resolver previously conflated the two and reported a false-positive
9
+ * function-level cycle (`buildGraph ↔ tryNativeOrchestrator`).
10
+ *
11
+ * The orchestrator-selection strategy lives here so `pipeline.ts` stays a thin
12
+ * top-level controller: detect changes, try native, fall back to JS stages.
13
+ */
14
+ import path from 'node:path';
15
+ import { performance } from 'node:perf_hooks';
16
+ import {
17
+ acquireAdvisoryLock,
18
+ closeDbPair,
19
+ openDb,
20
+ purgeFilesData,
21
+ releaseAdvisoryLock,
22
+ setBuildMeta,
23
+ } from '../../../../db/index.js';
24
+ import { debug, info, warn } from '../../../../infrastructure/logger.js';
25
+ import { loadNative } from '../../../../infrastructure/native.js';
26
+ import { semverCompare } from '../../../../infrastructure/update-check.js';
27
+ import { normalizePath } from '../../../../shared/constants.js';
28
+ import { toErrorMessage } from '../../../../shared/errors.js';
29
+ import { CODEGRAPH_VERSION } from '../../../../shared/version.js';
30
+ import type {
31
+ BetterSqlite3Database,
32
+ BuildResult,
33
+ Definition,
34
+ ExtractorOutput,
35
+ SqliteStatement,
36
+ } from '../../../../types.js';
37
+ import {
38
+ classifyNativeDrops,
39
+ formatDropExtensionSummary,
40
+ getInstalledWasmExtensions,
41
+ NATIVE_SUPPORTED_EXTENSIONS,
42
+ parseFilesWasmForBackfill,
43
+ } from '../../../parser.js';
44
+ import type { PipelineContext } from '../context.js';
45
+ import {
46
+ batchInsertNodes,
47
+ collectFiles as collectFilesUtil,
48
+ fileHash,
49
+ fileStat,
50
+ readFileSafe,
51
+ } from '../helpers.js';
52
+ import { NativeDbProxy } from '../native-db-proxy.js';
53
+ import { closeNativeDb } from './native-db-lifecycle.js';
54
+
55
+ // ── Native orchestrator types ──────────────────────────────────────────
56
+
57
+ interface NativeOrchestratorResult {
58
+ phases: Record<string, number>;
59
+ earlyExit?: boolean;
60
+ nodeCount?: number;
61
+ edgeCount?: number;
62
+ fileCount?: number;
63
+ changedFiles?: string[];
64
+ changedCount?: number;
65
+ removedCount?: number;
66
+ isFullBuild?: boolean;
67
+ /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
68
+ structureHandled?: boolean;
69
+ /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
70
+ analysisComplete?: boolean;
71
+ }
72
+
73
+ /** Files the native orchestrator silently dropped — the working set for backfill. */
74
+ interface DroppedLanguageGap {
75
+ /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
76
+ missingRel: string[];
77
+ /** Absolute paths, aligned by index with `missingRel`. */
78
+ missingAbs: string[];
79
+ /**
80
+ * Relative paths of WASM-only files present in DB but absent from disk (#1073).
81
+ * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
82
+ * backfill must purge them. Always disjoint from `missingRel`.
83
+ */
84
+ staleRel: string[];
85
+ }
86
+
87
+ /**
88
+ * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
89
+ * is pure and unit-testable independently of `getInstalledWasmExtensions` and
90
+ * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
91
+ */
92
+ export interface WasmOnlyStaleFilesInput {
93
+ /** Distinct `file` values from the `nodes` table. */
94
+ existingNodes: ReadonlySet<string>;
95
+ /** Distinct `file` values from the `file_hashes` table. */
96
+ existingHashes: ReadonlySet<string>;
97
+ /** Relative paths currently on disk (from `collectFilesUtil`). */
98
+ expected: ReadonlySet<string>;
99
+ /** Lowercased extensions whose WASM grammar is installed. */
100
+ installedExts: ReadonlySet<string>;
101
+ /** Extensions covered by the Rust addon — Rust owns deletion for these. */
102
+ nativeSupported: ReadonlySet<string>;
103
+ }
104
+
105
+ // ── Native orchestrator helpers ───────────────────────────────────────
106
+
107
+ /** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
108
+ function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null {
109
+ if (ctx.forceFullRebuild) return 'forceFullRebuild';
110
+ // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
111
+ // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
112
+ // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
113
+ const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
114
+ if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`;
115
+ if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`;
116
+ return null;
117
+ }
118
+
119
+ /** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
120
+ * Returns false if the DB reopen fails (caller should return partial result). */
121
+ function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean {
122
+ closeNativeDb(ctx, 'post-native-build');
123
+ try {
124
+ ctx.db.close();
125
+ } catch (e) {
126
+ debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
127
+ }
128
+ try {
129
+ ctx.db = openDb(ctx.dbPath);
130
+ return true;
131
+ } catch (reopenErr) {
132
+ warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`);
133
+ return false;
134
+ }
135
+ }
136
+
137
+ /**
138
+ * Reconstruct fileSymbols from the DB after a native orchestrator build.
139
+ * When `scopeFiles` is provided, only loads those files (for analysis-only).
140
+ * When omitted, loads all files (needed for structure rebuilds).
141
+ */
142
+ function reconstructFileSymbolsFromDb(
143
+ ctx: PipelineContext,
144
+ scopeFiles?: string[],
145
+ ): Map<string, ExtractorOutput> {
146
+ let query =
147
+ 'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
148
+ const params: string[] = [];
149
+ if (scopeFiles && scopeFiles.length > 0) {
150
+ const placeholders = scopeFiles.map(() => '?').join(',');
151
+ query += ` AND file IN (${placeholders})`;
152
+ params.push(...scopeFiles);
153
+ }
154
+ query += ' ORDER BY file, line';
155
+
156
+ const rows = ctx.db.prepare(query).all(...params) as {
157
+ file: string;
158
+ name: string;
159
+ kind: string;
160
+ line: number;
161
+ endLine: number | null;
162
+ }[];
163
+
164
+ const fileSymbols = new Map<string, ExtractorOutput>();
165
+ for (const row of rows) {
166
+ let entry = fileSymbols.get(row.file);
167
+ if (!entry) {
168
+ entry = {
169
+ definitions: [],
170
+ calls: [],
171
+ imports: [],
172
+ classes: [],
173
+ exports: [],
174
+ typeMap: new Map(),
175
+ };
176
+ fileSymbols.set(row.file, entry);
177
+ }
178
+ entry.definitions.push({
179
+ name: row.name,
180
+ kind: row.kind as Definition['kind'],
181
+ line: row.line,
182
+ endLine: row.endLine ?? undefined,
183
+ });
184
+ }
185
+
186
+ // Populate import/export counts from DB edges so buildStructure
187
+ // computes correct import_count/export_count in node_metrics.
188
+ // The extractor arrays aren't persisted to the DB, so we derive
189
+ // counts from edge data instead (#804).
190
+ const importCountRows = ctx.db
191
+ .prepare(
192
+ `SELECT n.file, COUNT(*) AS cnt
193
+ FROM edges e JOIN nodes n ON e.source_id = n.id
194
+ WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
195
+ AND n.file IS NOT NULL
196
+ GROUP BY n.file`,
197
+ )
198
+ .all() as { file: string; cnt: number }[];
199
+ for (const row of importCountRows) {
200
+ const entry = fileSymbols.get(row.file);
201
+ if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports'];
202
+ }
203
+
204
+ const exportCountRows = ctx.db
205
+ .prepare(
206
+ `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
207
+ FROM edges e
208
+ JOIN nodes n_tgt ON e.target_id = n_tgt.id
209
+ JOIN nodes n_src ON e.source_id = n_src.id
210
+ WHERE e.kind IN ('imports', 'imports-type', 'reexports')
211
+ AND n_tgt.file IS NOT NULL
212
+ AND n_src.file != n_tgt.file
213
+ GROUP BY n_tgt.file`,
214
+ )
215
+ .all() as { file: string; cnt: number }[];
216
+ for (const row of exportCountRows) {
217
+ const entry = fileSymbols.get(row.file);
218
+ if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports'];
219
+ }
220
+
221
+ return fileSymbols;
222
+ }
223
+
224
+ /**
225
+ * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
226
+ * For full builds, passes changedFiles=null (full rebuild).
227
+ * For incremental builds, passes the changed file list to scope the update.
228
+ */
229
+ async function runPostNativeStructure(
230
+ ctx: PipelineContext,
231
+ allFileSymbols: Map<string, ExtractorOutput>,
232
+ isFullBuild: boolean,
233
+ changedFiles: string[] | undefined,
234
+ ): Promise<number> {
235
+ const structureStart = performance.now();
236
+ try {
237
+ const directories = new Set<string>();
238
+ for (const relPath of allFileSymbols.keys()) {
239
+ const parts = relPath.split('/');
240
+ for (let i = 1; i < parts.length; i++) {
241
+ directories.add(parts.slice(0, i).join('/'));
242
+ }
243
+ }
244
+
245
+ const lineCountMap = new Map<string, number>();
246
+ const cachedLineCounts = ctx.db
247
+ .prepare(
248
+ `SELECT n.name AS file, m.line_count
249
+ FROM node_metrics m JOIN nodes n ON m.node_id = n.id
250
+ WHERE n.kind = 'file'`,
251
+ )
252
+ .all() as Array<{ file: string; line_count: number }>;
253
+ for (const row of cachedLineCounts) {
254
+ lineCountMap.set(row.file, row.line_count);
255
+ }
256
+
257
+ // Full builds need null (rebuild everything). Incremental builds pass the
258
+ // changed file list so buildStructure only updates those files' metrics
259
+ // and contains edges — matching the JS pipeline's medium-incremental path.
260
+ const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
261
+ const { buildStructure: buildStructureFn } = (await import(
262
+ '../../../../features/structure.js'
263
+ )) as {
264
+ buildStructure: (
265
+ db: typeof ctx.db,
266
+ fileSymbols: Map<string, ExtractorOutput>,
267
+ rootDir: string,
268
+ lineCountMap: Map<string, number>,
269
+ directories: Set<string>,
270
+ changedFiles: string[] | null,
271
+ ) => void;
272
+ };
273
+ buildStructureFn(
274
+ ctx.db,
275
+ allFileSymbols,
276
+ ctx.rootDir,
277
+ lineCountMap,
278
+ directories,
279
+ changedFilePaths,
280
+ );
281
+ debug(
282
+ `Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`,
283
+ );
284
+ } catch (err) {
285
+ warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
286
+ }
287
+ return performance.now() - structureStart;
288
+ }
289
+
290
+ /**
291
+ * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
292
+ * Used when the Rust addon doesn't include analysis persistence (older addon
293
+ * version) or when analysis failed on the Rust side.
294
+ */
295
+ async function runPostNativeAnalysis(
296
+ ctx: PipelineContext,
297
+ allFileSymbols: Map<string, ExtractorOutput>,
298
+ changedFiles: string[] | undefined,
299
+ ): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> {
300
+ const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
301
+
302
+ // Scope analysis fileSymbols to changed files only
303
+ let analysisFileSymbols: Map<string, ExtractorOutput>;
304
+ if (changedFiles && changedFiles.length > 0) {
305
+ analysisFileSymbols = new Map();
306
+ for (const f of changedFiles) {
307
+ const entry = allFileSymbols.get(f);
308
+ if (entry) analysisFileSymbols.set(f, entry);
309
+ }
310
+ } else {
311
+ analysisFileSymbols = allFileSymbols;
312
+ }
313
+
314
+ // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
315
+ const native = loadNative();
316
+ if (native?.NativeDatabase) {
317
+ try {
318
+ ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
319
+ if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb;
320
+ } catch {
321
+ ctx.nativeDb = undefined;
322
+ if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined;
323
+ }
324
+ }
325
+
326
+ // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
327
+ // Previously each feature called wal_checkpoint(TRUNCATE) individually
328
+ // (~68ms each × 3-4 features). One FULL checkpoint suffices.
329
+ if (ctx.nativeDb && ctx.engineOpts) {
330
+ ctx.db.pragma('wal_checkpoint(FULL)');
331
+ ctx.engineOpts.suspendJsDb = () => {};
332
+ ctx.engineOpts.resumeJsDb = () => {};
333
+ }
334
+
335
+ try {
336
+ const { runAnalyses: runAnalysesFn } = (await import('../../../../ast-analysis/engine.js')) as {
337
+ runAnalyses: (
338
+ db: BetterSqlite3Database,
339
+ fileSymbols: Map<string, ExtractorOutput>,
340
+ rootDir: string,
341
+ opts: Record<string, unknown>,
342
+ engineOpts?: Record<string, unknown>,
343
+ ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>;
344
+ };
345
+ const result = await runAnalysesFn(
346
+ ctx.db,
347
+ analysisFileSymbols,
348
+ ctx.rootDir,
349
+ ctx.opts as Record<string, unknown>,
350
+ ctx.engineOpts as unknown as Record<string, unknown> | undefined,
351
+ );
352
+ timing.astMs = result.astMs ?? 0;
353
+ timing.complexityMs = result.complexityMs ?? 0;
354
+ timing.cfgMs = result.cfgMs ?? 0;
355
+ timing.dataflowMs = result.dataflowMs ?? 0;
356
+ } catch (err) {
357
+ warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
358
+ }
359
+
360
+ // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
361
+ // WAL writes so JS and external readers can see them. Runs once after
362
+ // all analysis features complete (not per-feature).
363
+ if (ctx.nativeDb) {
364
+ try {
365
+ ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
366
+ } catch {
367
+ /* ignore checkpoint errors */
368
+ }
369
+ try {
370
+ ctx.nativeDb.close();
371
+ } catch {
372
+ /* ignore close errors */
373
+ }
374
+ ctx.nativeDb = undefined;
375
+ if (ctx.engineOpts) {
376
+ ctx.engineOpts.nativeDb = undefined;
377
+ ctx.engineOpts.suspendJsDb = undefined;
378
+ ctx.engineOpts.resumeJsDb = undefined;
379
+ }
380
+ }
381
+
382
+ return timing;
383
+ }
384
+
385
+ /** Format timing result from native orchestrator phases + JS post-processing. */
386
+ function formatNativeTimingResult(
387
+ p: Record<string, number>,
388
+ structurePatchMs: number,
389
+ analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
390
+ ): BuildResult {
391
+ return {
392
+ phases: {
393
+ setupMs: +(p.setupMs ?? 0).toFixed(1),
394
+ collectMs: +(p.collectMs ?? 0).toFixed(1),
395
+ detectMs: +(p.detectMs ?? 0).toFixed(1),
396
+ parseMs: +(p.parseMs ?? 0).toFixed(1),
397
+ insertMs: +(p.insertMs ?? 0).toFixed(1),
398
+ resolveMs: +(p.resolveMs ?? 0).toFixed(1),
399
+ edgesMs: +(p.edgesMs ?? 0).toFixed(1),
400
+ structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
401
+ rolesMs: +(p.rolesMs ?? 0).toFixed(1),
402
+ astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
403
+ complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
404
+ cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
405
+ dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
406
+ finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
407
+ },
408
+ };
409
+ }
410
+
411
+ /**
412
+ * Compute the WASM-only files present in the DB but missing from disk (#1073).
413
+ *
414
+ * Returns relative paths that:
415
+ * - appear in `existingNodes` or `existingHashes` (in DB),
416
+ * - are absent from `expected` (not on disk),
417
+ * - have an extension installed for WASM, AND
418
+ * - have an extension NOT covered by `nativeSupported` — Rust's
419
+ * `purge_changed_files` handles deletion for natively-supported extensions
420
+ * via its own `detect_removed_files`, so the caller must not double-purge.
421
+ *
422
+ * Extensions are lowercased before lookup to match the registry and Rust's
423
+ * `LanguageKind::from_extension` (which normalises case for the languages
424
+ * where both cases are conventional, e.g. R's `.r` / `.R`).
425
+ *
426
+ * DB paths are forced to forward slashes before comparison with `expected`
427
+ * (which is always normalised). The on-disk invariant is that DB rows are
428
+ * written with forward slashes, but a stale row written by older code on
429
+ * Windows could carry back-slashes — normalising here makes the comparison
430
+ * platform-safe and prevents false-positive purges of live rows. We replace
431
+ * `\\` explicitly (rather than calling `normalizePath`, which only touches
432
+ * `path.sep`) so the defence works when running on POSIX against a DB that
433
+ * was migrated from Windows.
434
+ *
435
+ * Exported for unit testing.
436
+ */
437
+ export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
438
+ const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
439
+ const stale: string[] = [];
440
+ const seen = new Set<string>();
441
+ const consider = (rawRel: string): void => {
442
+ const rel = rawRel.replace(/\\/g, '/');
443
+ if (expected.has(rel) || seen.has(rel)) return;
444
+ const ext = path.extname(rel).toLowerCase();
445
+ if (nativeSupported.has(ext)) return;
446
+ if (!installedExts.has(ext)) return;
447
+ seen.add(rel);
448
+ // Push the ORIGINAL raw path (not the normalised form) so the eventual
449
+ // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
450
+ // matches the actual stored row. The dedup `seen` set keeps the
451
+ // normalised form so a file written once with `\` and once with `/`
452
+ // is still treated as one entry — but the value the SQL sees has to
453
+ // be byte-identical to what's on disk in the DB.
454
+ stale.push(rawRel);
455
+ };
456
+ for (const rel of existingNodes) consider(rel);
457
+ for (const rel of existingHashes) consider(rel);
458
+ return stale;
459
+ }
460
+
461
+ /**
462
+ * Group relative paths by their lowercased extension. Shape matches the bucket
463
+ * type that `formatDropExtensionSummary` consumes, so callers can render a
464
+ * log-friendly per-extension summary without going through `classifyNativeDrops`
465
+ * when the reason is already known (e.g. the stale-purge path where every path
466
+ * is guaranteed `unsupported-by-native`).
467
+ */
468
+ function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
469
+ const buckets = new Map<string, string[]>();
470
+ for (const rel of relPaths) {
471
+ const ext = path.extname(rel).toLowerCase();
472
+ let list = buckets.get(ext);
473
+ if (!list) {
474
+ list = [];
475
+ buckets.set(ext, list);
476
+ }
477
+ list.push(rel);
478
+ }
479
+ return buckets;
480
+ }
481
+
482
+ /**
483
+ * Detect files the native orchestrator silently dropped.
484
+ *
485
+ * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
486
+ * is "missing" if it's absent from EITHER table — both must be present for
487
+ * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
488
+ * legacy DBs where `nodes` was populated but `file_hashes` was not).
489
+ *
490
+ * Restricted to files with an installed WASM grammar; extensions in
491
+ * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
492
+ * installs) can't be parsed by either engine, so they're not a native
493
+ * regression — excluding them keeps the warn count in
494
+ * `backfillNativeDroppedFiles` meaningful.
495
+ *
496
+ * Also detects WASM-only files deleted from disk (#1073). Rust's
497
+ * `detect_removed_files` filter (#1070) skips files outside its supported
498
+ * extensions, so deletions of WASM-only languages don't reach the native
499
+ * purge path; the rest of the backfill only inserts rows, so without this
500
+ * step stale `nodes`/`file_hashes` rows would linger across incremental
501
+ * rebuilds until the next full rebuild.
502
+ *
503
+ * Cheap (no DB handoff, no parsing): used both to gate the backfill call
504
+ * and as its working set. NativeDbProxy supports `.prepare().all()`, so
505
+ * this works whether `ctx.db` is a proxy or a real better-sqlite3
506
+ * connection — letting us skip the close-native / reopen-better-sqlite3
507
+ * cost when there's nothing to backfill.
508
+ */
509
+ function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
510
+ const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
511
+ const expected = new Set(
512
+ collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
513
+ );
514
+
515
+ const existingNodeRows = ctx.db
516
+ .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
517
+ .all() as Array<{ file: string }>;
518
+ const existingNodes = new Set(existingNodeRows.map((r) => r.file));
519
+
520
+ let existingHashes = new Set<string>();
521
+ try {
522
+ const existingHashRows = ctx.db
523
+ .prepare('SELECT DISTINCT file FROM file_hashes')
524
+ .all() as Array<{ file: string }>;
525
+ existingHashes = new Set(existingHashRows.map((r) => r.file));
526
+ } catch (e) {
527
+ // file_hashes table may not exist on legacy DBs; treat as fully missing
528
+ // so the backfill writes rows on the upsert path below.
529
+ debug(
530
+ `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
531
+ );
532
+ }
533
+
534
+ const installedExts = getInstalledWasmExtensions();
535
+ const missingRel: string[] = [];
536
+ const missingAbs: string[] = [];
537
+ for (const rel of expected) {
538
+ if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
539
+ const ext = path.extname(rel).toLowerCase();
540
+ if (!installedExts.has(ext)) continue;
541
+ missingRel.push(rel);
542
+ missingAbs.push(path.join(ctx.rootDir, rel));
543
+ }
544
+
545
+ const staleRel = computeWasmOnlyStaleFiles({
546
+ existingNodes,
547
+ existingHashes,
548
+ expected,
549
+ installedExts,
550
+ nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
551
+ });
552
+
553
+ return { missingRel, missingAbs, staleRel };
554
+ }
555
+
556
+ /**
557
+ * Backfill files that the native orchestrator silently dropped during parse.
558
+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
559
+ *
560
+ * Also purges stale rows for WASM-only files deleted from disk (#1073), which
561
+ * Rust's `detect_removed_files` filter (#1070) skips.
562
+ *
563
+ * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
564
+ * can use the same scan for both gating and the actual backfill — avoiding
565
+ * a redundant fs walk when the orchestrator's signals already triggered.
566
+ */
567
+ async function backfillNativeDroppedFiles(
568
+ ctx: PipelineContext,
569
+ gap: DroppedLanguageGap,
570
+ ): Promise<void> {
571
+ const { missingRel, missingAbs, staleRel } = gap;
572
+ if (missingAbs.length === 0 && staleRel.length === 0) return;
573
+
574
+ // Now that we know there's work to do, hand off to better-sqlite3 (needed
575
+ // for the INSERT path below).
576
+ if (ctx.nativeFirstProxy) {
577
+ closeNativeDb(ctx, 'pre-parity-backfill');
578
+ ctx.db = openDb(ctx.dbPath);
579
+ ctx.nativeFirstProxy = false;
580
+ }
581
+
582
+ const dbConn = ctx.db as unknown as BetterSqlite3Database;
583
+
584
+ // Purge WASM-only files that were deleted from disk (#1073). Rust's
585
+ // detect_removed_files skips them and the insert path below never visits
586
+ // them, so without this their rows would persist across rebuilds until the
587
+ // next full rebuild reset the DB.
588
+ if (staleRel.length > 0) {
589
+ // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
590
+ // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
591
+ // always bucket 100% into `unsupported-by-native`. Build the extension
592
+ // summary directly to avoid a redundant classification pass.
593
+ const staleByExt = groupByExtension(staleRel);
594
+ info(
595
+ `Detected ${staleRel.length} deleted WASM-only file(s) across ${staleByExt.size} extension(s) the native orchestrator skipped; purging stale rows:${formatDropExtensionSummary(staleByExt)}`,
596
+ );
597
+ purgeFilesData(dbConn, staleRel);
598
+ }
599
+
600
+ if (missingAbs.length === 0) return;
601
+
602
+ // Classify drops so users see per-extension reasons instead of just a count
603
+ // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
604
+ // extractor); `native-extractor-failure` indicates a real native bug since
605
+ // the language IS supported by the addon yet the file was dropped anyway.
606
+ const { byReason, totals } = classifyNativeDrops(missingRel);
607
+ if (totals['unsupported-by-native'] > 0) {
608
+ const buckets = byReason['unsupported-by-native'];
609
+ info(
610
+ `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) across ${buckets.size} extension(s) in languages without a Rust extractor; backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
611
+ );
612
+ }
613
+ if (totals['native-extractor-failure'] > 0) {
614
+ const buckets = byReason['native-extractor-failure'];
615
+ warn(
616
+ `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) across ${buckets.size} extension(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
617
+ );
618
+ }
619
+ const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
620
+
621
+ const rows: unknown[][] = [];
622
+ const exportKeys: unknown[][] = [];
623
+ for (const [relPath, symbols] of wasmResults) {
624
+ // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
625
+ rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
626
+ for (const def of symbols.definitions ?? []) {
627
+ // Populate qualified_name/scope the same way the JS fallback does so
628
+ // downstream queries (cross-file references, "go to definition") find
629
+ // these symbols.
630
+ const dotIdx = def.name.lastIndexOf('.');
631
+ const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
632
+ rows.push([
633
+ def.name,
634
+ def.kind,
635
+ relPath,
636
+ def.line,
637
+ def.endLine ?? null,
638
+ null,
639
+ def.name,
640
+ scope,
641
+ def.visibility ?? null,
642
+ ]);
643
+ }
644
+ // Exports: insert the row (INSERT OR IGNORE — a matching definition row
645
+ // is a no-op) and queue a key for the second-pass exported=1 update, so
646
+ // queries filtering on exported=1 find backfilled symbols (#970).
647
+ for (const exp of symbols.exports ?? []) {
648
+ rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
649
+ exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
650
+ }
651
+ }
652
+ const db = dbConn;
653
+ batchInsertNodes(db, rows);
654
+
655
+ // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
656
+ if (exportKeys.length > 0) {
657
+ const EXPORT_CHUNK = 500;
658
+ const exportStmtCache = new Map<number, SqliteStatement>();
659
+ for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
660
+ const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
661
+ const chunkSize = end - i;
662
+ let updateStmt = exportStmtCache.get(chunkSize);
663
+ if (!updateStmt) {
664
+ const conditions = Array.from(
665
+ { length: chunkSize },
666
+ () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
667
+ ).join(' OR ');
668
+ updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
669
+ exportStmtCache.set(chunkSize, updateStmt);
670
+ }
671
+ const vals: unknown[] = [];
672
+ for (let j = i; j < end; j++) {
673
+ const k = exportKeys[j] as unknown[];
674
+ vals.push(k[0], k[1], k[2], k[3]);
675
+ }
676
+ updateStmt.run(...vals);
677
+ }
678
+ }
679
+
680
+ // Persist file_hashes rows for every backfilled file. The Rust orchestrator
681
+ // only hashes files it parsed itself, so without this step files in
682
+ // optional-language extensions (e.g. .clj when no Rust extractor exists)
683
+ // would be missing from `file_hashes` — permanently breaking the JS-side
684
+ // fast-skip pre-flight (#1054), which rejects on `collected file missing
685
+ // from file_hashes` and forces every no-op rebuild back through the full
686
+ // ~2s native pipeline (#1068).
687
+ //
688
+ // Iterates `missingRel` (every collected file the Rust orchestrator
689
+ // dropped), not `wasmResults`, so files that produced zero symbols still
690
+ // get a row.
691
+ try {
692
+ const upsertHash = db.prepare(
693
+ 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
694
+ );
695
+ const writeHashes = db.transaction(() => {
696
+ for (let i = 0; i < missingRel.length; i++) {
697
+ const relPath = missingRel[i];
698
+ const absPath = missingAbs[i];
699
+ if (!relPath || !absPath) continue;
700
+ let code: string | null;
701
+ try {
702
+ code = readFileSafe(absPath);
703
+ } catch (e) {
704
+ debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
705
+ continue;
706
+ }
707
+ if (code === null) continue;
708
+ const stat = fileStat(absPath);
709
+ const mtime = stat ? stat.mtime : 0;
710
+ const size = stat ? stat.size : 0;
711
+ upsertHash.run(relPath, fileHash(code), mtime, size);
712
+ }
713
+ });
714
+ writeHashes();
715
+ } catch (e) {
716
+ debug(
717
+ `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
718
+ );
719
+ }
720
+
721
+ // Free WASM parse trees from the inline backfill path (#1058).
722
+ // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
723
+ // backed by WASM linear memory) on every result, but these symbols are
724
+ // consumed locally for DB row construction and never added to
725
+ // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
726
+ // sees them. Without this, trees leak WASM memory until process exit —
727
+ // bounded per run but cumulative across in-process integration tests.
728
+ // Mirrors the cleanup discipline established for #931.
729
+ for (const [, symbols] of wasmResults) {
730
+ const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
731
+ if (tree && typeof tree.delete === 'function') {
732
+ try {
733
+ tree.delete();
734
+ } catch {
735
+ /* ignore cleanup errors */
736
+ }
737
+ }
738
+ (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
739
+ (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
740
+ }
741
+ }
742
+
743
+ /**
744
+ * Try the native build orchestrator.
745
+ *
746
+ * Returns:
747
+ * - `BuildResult` on success (caller should return it directly).
748
+ * - `'early-exit'` when the orchestrator detected no changes (caller should return undefined).
749
+ * - `undefined` when native is unavailable or skipped (caller should fall through to the JS pipeline).
750
+ *
751
+ * Encapsulates the orchestrator-selection strategy: open `NativeDatabase`,
752
+ * invoke `nativeDb.buildGraph()` (the Rust pipeline), and run post-native
753
+ * structure + analysis fallbacks. Lives in its own file to keep the Rust
754
+ * orchestrator entry point separated from the JS-side `buildGraph()` driver
755
+ * in `pipeline.ts`.
756
+ */
757
+ export async function tryNativeOrchestrator(
758
+ ctx: PipelineContext,
759
+ ): Promise<BuildResult | undefined | 'early-exit'> {
760
+ const skipReason = shouldSkipNativeOrchestrator(ctx);
761
+ if (skipReason) {
762
+ debug(`Skipping native orchestrator: ${skipReason}`);
763
+ return undefined;
764
+ }
765
+
766
+ // Open NativeDatabase on demand — deferred from setupPipeline to skip the
767
+ // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
768
+ // first to avoid dual-connection WAL corruption.
769
+ if (!ctx.nativeDb && ctx.nativeAvailable) {
770
+ const native = loadNative();
771
+ if (native?.NativeDatabase) {
772
+ try {
773
+ // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
774
+ // Uses raw close() instead of closeDb() intentionally — the advisory lock
775
+ // is kept and transferred to the NativeDbProxy below, not released here.
776
+ ctx.db.close();
777
+ acquireAdvisoryLock(ctx.dbPath);
778
+ ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
779
+ ctx.nativeDb.initSchema();
780
+ // Replace ctx.db with a NativeDbProxy so post-native JS fallback
781
+ // (structure, analysis) can use it without reopening better-sqlite3.
782
+ const proxy = new NativeDbProxy(ctx.nativeDb);
783
+ proxy.__lockPath = `${ctx.dbPath}.lock`;
784
+ ctx.db = proxy as unknown as typeof ctx.db;
785
+ ctx.nativeFirstProxy = true;
786
+ } catch (err) {
787
+ warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
788
+ try {
789
+ ctx.nativeDb?.close();
790
+ } catch (e) {
791
+ debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
792
+ }
793
+ ctx.nativeDb = undefined;
794
+ ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
795
+ releaseAdvisoryLock(`${ctx.dbPath}.lock`);
796
+ // Reopen better-sqlite3 for JS pipeline fallback
797
+ ctx.db = openDb(ctx.dbPath);
798
+ }
799
+ }
800
+ }
801
+
802
+ if (!ctx.nativeDb?.buildGraph) return undefined;
803
+
804
+ const resultJson = ctx.nativeDb.buildGraph(
805
+ ctx.rootDir,
806
+ JSON.stringify(ctx.config),
807
+ JSON.stringify(ctx.aliases),
808
+ JSON.stringify(ctx.opts),
809
+ );
810
+ const result = JSON.parse(resultJson) as NativeOrchestratorResult;
811
+
812
+ if (result.earlyExit) {
813
+ info('No changes detected');
814
+ // Even on no-op rebuilds, dropped-language files added since the last
815
+ // full build are still missing from `nodes`/`file_hashes` (#1083), and
816
+ // WASM-only files deleted from disk leave stale rows behind (#1073).
817
+ // The orchestrator's file_collector skipped them, so its earlyExit
818
+ // doesn't imply DB consistency. Run the gap repair before returning.
819
+ const gap = detectDroppedLanguageGap(ctx);
820
+ if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
821
+ await backfillNativeDroppedFiles(ctx, gap);
822
+ }
823
+ closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
824
+ return 'early-exit';
825
+ }
826
+
827
+ // Log incremental status to match JS pipeline output
828
+ const changed = result.changedCount ?? 0;
829
+ const removed = result.removedCount ?? 0;
830
+ if (!result.isFullBuild && (changed > 0 || removed > 0)) {
831
+ info(`Incremental: ${changed} changed, ${removed} removed`);
832
+ }
833
+
834
+ const p = result.phases;
835
+
836
+ // Sync build_meta so JS-side version/engine checks work on next build.
837
+ // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
838
+ // platform package.json version (ctx.engineVersion). The Rust side's
839
+ // check_version_mismatch compares against CARGO_PKG_VERSION; writing
840
+ // the package.json value would create a permanent mismatch whenever
841
+ // the binary and platform package.json diverge — e.g., CI hot-swap
842
+ // via ci-install-native.mjs (#1066) — forcing every subsequent build
843
+ // to be a full rebuild.
844
+ //
845
+ // When the native addon doesn't expose engineVersion() (older addon),
846
+ // fall back to CODEGRAPH_VERSION — same fallback used by both
847
+ // checkEngineSchemaMismatch (read path) and persistBuildMetadata
848
+ // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
849
+ // here would re-introduce the asymmetry this PR fixes for that case.
850
+ const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
851
+ setBuildMeta(ctx.db, {
852
+ engine: ctx.engineName,
853
+ engine_version: nativeVersionForMeta,
854
+ codegraph_version: nativeVersionForMeta,
855
+ schema_version: String(ctx.schemaVersion),
856
+ built_at: new Date().toISOString(),
857
+ });
858
+
859
+ info(
860
+ `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`,
861
+ );
862
+
863
+ // ── Post-native structure + analysis ──────────────────────────────
864
+ let analysisTiming = {
865
+ astMs: +(p.astMs ?? 0),
866
+ complexityMs: +(p.complexityMs ?? 0),
867
+ cfgMs: +(p.cfgMs ?? 0),
868
+ dataflowMs: +(p.dataflowMs ?? 0),
869
+ };
870
+ let structurePatchMs = 0;
871
+ // Skip JS structure when the Rust pipeline's small-incremental fast path
872
+ // already handled it. For full builds and large incrementals where Rust
873
+ // skipped structure, we must run the JS fallback.
874
+ const needsStructure = !result.structureHandled;
875
+ // When the Rust addon doesn't include analysis persistence (older addon
876
+ // version or analysis failed), fall back to JS-side analysis.
877
+ const needsAnalysisFallback =
878
+ !result.analysisComplete &&
879
+ (ctx.opts.ast !== false ||
880
+ ctx.opts.complexity !== false ||
881
+ ctx.opts.cfg !== false ||
882
+ ctx.opts.dataflow !== false);
883
+
884
+ if (needsStructure || needsAnalysisFallback) {
885
+ // When analysis fallback is needed, handoff to better-sqlite3 — the
886
+ // analysis engine uses the suspend/resume WAL pattern that requires a
887
+ // real better-sqlite3 connection, not the NativeDbProxy.
888
+ if (needsAnalysisFallback && ctx.nativeFirstProxy) {
889
+ closeNativeDb(ctx, 'pre-analysis-fallback');
890
+ ctx.db = openDb(ctx.dbPath);
891
+ ctx.nativeFirstProxy = false;
892
+ } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
893
+ // DB reopen failed — return partial result
894
+ return formatNativeTimingResult(p, 0, analysisTiming);
895
+ }
896
+
897
+ const fileSymbols = reconstructFileSymbolsFromDb(ctx);
898
+
899
+ if (needsStructure) {
900
+ structurePatchMs = await runPostNativeStructure(
901
+ ctx,
902
+ fileSymbols,
903
+ !!result.isFullBuild,
904
+ result.changedFiles,
905
+ );
906
+ }
907
+
908
+ if (needsAnalysisFallback) {
909
+ analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
910
+ }
911
+ }
912
+
913
+ // Engine parity: the native orchestrator silently drops files whose
914
+ // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
915
+ // stale native binaries). WASM handles those — backfill via WASM so both
916
+ // engines process the same file set (#967).
917
+ //
918
+ // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
919
+ // both gating and the backfill itself. On dirty incrementals/full builds
920
+ // the orchestrator signals trigger backfill, so the walk happens once
921
+ // (instead of redundantly inside backfill). On quiet incrementals we
922
+ // still pay the walk so we can detect brand-new files in dropped-language
923
+ // extensions — a gap that the orchestrator's `detect_removed_files`
924
+ // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
925
+ // because the expensive part (WASM re-parse of the missing set) is
926
+ // gated below.
927
+ const removedCount = result.removedCount ?? 0;
928
+ const changedCount = result.changedCount ?? 0;
929
+ const gap = detectDroppedLanguageGap(ctx);
930
+ if (
931
+ result.isFullBuild ||
932
+ removedCount > 0 ||
933
+ changedCount > 0 ||
934
+ gap.missingAbs.length > 0 ||
935
+ gap.staleRel.length > 0
936
+ ) {
937
+ await backfillNativeDroppedFiles(ctx, gap);
938
+ }
939
+
940
+ closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
941
+ return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
942
+ }