@optave/codegraph 3.11.0 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/README.md +38 -31
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  6. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  7. package/dist/ast-analysis/visitor-utils.js +83 -49
  8. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  9. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  11. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  12. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  14. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  15. package/dist/cli/commands/embed.d.ts.map +1 -1
  16. package/dist/cli/commands/embed.js +49 -4
  17. package/dist/cli/commands/embed.js.map +1 -1
  18. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  19. package/dist/domain/analysis/dependencies.js +106 -80
  20. package/dist/domain/analysis/dependencies.js.map +1 -1
  21. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  22. package/dist/domain/analysis/fn-impact.js +77 -52
  23. package/dist/domain/analysis/fn-impact.js.map +1 -1
  24. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  25. package/dist/domain/analysis/module-map.js +132 -121
  26. package/dist/domain/analysis/module-map.js.map +1 -1
  27. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  28. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  29. package/dist/domain/graph/builder/helpers.js +47 -33
  30. package/dist/domain/graph/builder/helpers.js.map +1 -1
  31. package/dist/domain/graph/builder/incremental.d.ts +6 -0
  32. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  33. package/dist/domain/graph/builder/incremental.js +142 -76
  34. package/dist/domain/graph/builder/incremental.js.map +1 -1
  35. package/dist/domain/graph/builder/pipeline.d.ts +1 -44
  36. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  37. package/dist/domain/graph/builder/pipeline.js +10 -766
  38. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  39. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  40. package/dist/domain/graph/builder/stages/build-edges.js +133 -96
  41. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  42. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  43. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  44. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  45. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  46. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  47. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  48. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  49. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  50. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  51. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  52. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  54. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  56. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  57. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  58. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  59. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  60. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  61. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  62. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  63. package/dist/domain/graph/cycles.d.ts +6 -4
  64. package/dist/domain/graph/cycles.d.ts.map +1 -1
  65. package/dist/domain/graph/cycles.js +50 -55
  66. package/dist/domain/graph/cycles.js.map +1 -1
  67. package/dist/domain/graph/journal.d.ts.map +1 -1
  68. package/dist/domain/graph/journal.js +89 -70
  69. package/dist/domain/graph/journal.js.map +1 -1
  70. package/dist/domain/graph/watcher.d.ts.map +1 -1
  71. package/dist/domain/graph/watcher.js +5 -2
  72. package/dist/domain/graph/watcher.js.map +1 -1
  73. package/dist/domain/parser.d.ts +12 -23
  74. package/dist/domain/parser.d.ts.map +1 -1
  75. package/dist/domain/parser.js +126 -79
  76. package/dist/domain/parser.js.map +1 -1
  77. package/dist/domain/search/generator.d.ts +3 -1
  78. package/dist/domain/search/generator.d.ts.map +1 -1
  79. package/dist/domain/search/generator.js +68 -45
  80. package/dist/domain/search/generator.js.map +1 -1
  81. package/dist/domain/search/models.d.ts +2 -0
  82. package/dist/domain/search/models.d.ts.map +1 -1
  83. package/dist/domain/search/models.js +37 -3
  84. package/dist/domain/search/models.js.map +1 -1
  85. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  86. package/dist/domain/search/search/hybrid.js +49 -40
  87. package/dist/domain/search/search/hybrid.js.map +1 -1
  88. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  89. package/dist/domain/search/search/semantic.js +69 -49
  90. package/dist/domain/search/search/semantic.js.map +1 -1
  91. package/dist/domain/wasm-worker-entry.js +201 -136
  92. package/dist/domain/wasm-worker-entry.js.map +1 -1
  93. package/dist/extractors/elixir.js +95 -71
  94. package/dist/extractors/elixir.js.map +1 -1
  95. package/dist/extractors/gleam.d.ts.map +1 -1
  96. package/dist/extractors/gleam.js +23 -31
  97. package/dist/extractors/gleam.js.map +1 -1
  98. package/dist/extractors/helpers.d.ts +79 -1
  99. package/dist/extractors/helpers.d.ts.map +1 -1
  100. package/dist/extractors/helpers.js +137 -0
  101. package/dist/extractors/helpers.js.map +1 -1
  102. package/dist/extractors/java.d.ts.map +1 -1
  103. package/dist/extractors/java.js +37 -49
  104. package/dist/extractors/java.js.map +1 -1
  105. package/dist/extractors/javascript.d.ts.map +1 -1
  106. package/dist/extractors/javascript.js +44 -44
  107. package/dist/extractors/javascript.js.map +1 -1
  108. package/dist/extractors/julia.js +27 -34
  109. package/dist/extractors/julia.js.map +1 -1
  110. package/dist/extractors/r.d.ts.map +1 -1
  111. package/dist/extractors/r.js +33 -58
  112. package/dist/extractors/r.js.map +1 -1
  113. package/dist/extractors/solidity.d.ts.map +1 -1
  114. package/dist/extractors/solidity.js +38 -61
  115. package/dist/extractors/solidity.js.map +1 -1
  116. package/dist/features/boundaries.d.ts.map +1 -1
  117. package/dist/features/boundaries.js +49 -39
  118. package/dist/features/boundaries.js.map +1 -1
  119. package/dist/features/cfg.d.ts.map +1 -1
  120. package/dist/features/cfg.js +90 -63
  121. package/dist/features/cfg.js.map +1 -1
  122. package/dist/features/check.d.ts.map +1 -1
  123. package/dist/features/check.js +43 -34
  124. package/dist/features/check.js.map +1 -1
  125. package/dist/features/cochange.d.ts.map +1 -1
  126. package/dist/features/cochange.js +68 -56
  127. package/dist/features/cochange.js.map +1 -1
  128. package/dist/features/complexity.d.ts.map +1 -1
  129. package/dist/features/complexity.js +105 -75
  130. package/dist/features/complexity.js.map +1 -1
  131. package/dist/features/dataflow.d.ts.map +1 -1
  132. package/dist/features/dataflow.js +37 -29
  133. package/dist/features/dataflow.js.map +1 -1
  134. package/dist/features/flow.d.ts.map +1 -1
  135. package/dist/features/flow.js +31 -22
  136. package/dist/features/flow.js.map +1 -1
  137. package/dist/features/graph-enrichment.d.ts.map +1 -1
  138. package/dist/features/graph-enrichment.js +77 -70
  139. package/dist/features/graph-enrichment.js.map +1 -1
  140. package/dist/features/owners.d.ts +17 -26
  141. package/dist/features/owners.d.ts.map +1 -1
  142. package/dist/features/owners.js +120 -109
  143. package/dist/features/owners.js.map +1 -1
  144. package/dist/features/sequence.d.ts.map +1 -1
  145. package/dist/features/sequence.js +59 -54
  146. package/dist/features/sequence.js.map +1 -1
  147. package/dist/features/structure-query.d.ts.map +1 -1
  148. package/dist/features/structure-query.js +60 -60
  149. package/dist/features/structure-query.js.map +1 -1
  150. package/dist/features/structure.js +28 -36
  151. package/dist/features/structure.js.map +1 -1
  152. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  153. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  154. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  155. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  156. package/dist/graph/classifiers/roles.js +63 -59
  157. package/dist/graph/classifiers/roles.js.map +1 -1
  158. package/dist/infrastructure/config.d.ts +1 -1
  159. package/dist/infrastructure/config.d.ts.map +1 -1
  160. package/dist/infrastructure/config.js +1 -1
  161. package/dist/infrastructure/config.js.map +1 -1
  162. package/dist/presentation/cfg.d.ts.map +1 -1
  163. package/dist/presentation/cfg.js +44 -29
  164. package/dist/presentation/cfg.js.map +1 -1
  165. package/dist/presentation/flow.d.ts.map +1 -1
  166. package/dist/presentation/flow.js +58 -38
  167. package/dist/presentation/flow.js.map +1 -1
  168. package/dist/types.d.ts +1 -1
  169. package/dist/types.d.ts.map +1 -1
  170. package/package.json +7 -7
  171. package/src/ast-analysis/engine.ts +145 -61
  172. package/src/ast-analysis/visitor-utils.ts +86 -46
  173. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  174. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  175. package/src/cli/commands/embed.ts +54 -4
  176. package/src/domain/analysis/dependencies.ts +166 -85
  177. package/src/domain/analysis/fn-impact.ts +120 -50
  178. package/src/domain/analysis/module-map.ts +175 -140
  179. package/src/domain/graph/builder/helpers.ts +85 -76
  180. package/src/domain/graph/builder/incremental.ts +217 -90
  181. package/src/domain/graph/builder/pipeline.ts +19 -957
  182. package/src/domain/graph/builder/stages/build-edges.ts +198 -140
  183. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  184. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  185. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  186. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  187. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  188. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  189. package/src/domain/graph/cycles.ts +51 -49
  190. package/src/domain/graph/journal.ts +84 -69
  191. package/src/domain/graph/watcher.ts +8 -2
  192. package/src/domain/parser.ts +143 -66
  193. package/src/domain/search/generator.ts +132 -74
  194. package/src/domain/search/models.ts +39 -3
  195. package/src/domain/search/search/hybrid.ts +53 -42
  196. package/src/domain/search/search/semantic.ts +105 -65
  197. package/src/domain/wasm-worker-entry.ts +235 -152
  198. package/src/extractors/elixir.ts +91 -64
  199. package/src/extractors/gleam.ts +33 -37
  200. package/src/extractors/helpers.ts +205 -1
  201. package/src/extractors/java.ts +42 -45
  202. package/src/extractors/javascript.ts +44 -43
  203. package/src/extractors/julia.ts +28 -35
  204. package/src/extractors/r.ts +38 -56
  205. package/src/extractors/solidity.ts +43 -71
  206. package/src/features/boundaries.ts +64 -46
  207. package/src/features/cfg.ts +145 -74
  208. package/src/features/check.ts +60 -43
  209. package/src/features/cochange.ts +95 -72
  210. package/src/features/complexity.ts +134 -79
  211. package/src/features/dataflow.ts +57 -34
  212. package/src/features/flow.ts +48 -24
  213. package/src/features/graph-enrichment.ts +105 -70
  214. package/src/features/owners.ts +186 -146
  215. package/src/features/sequence.ts +99 -69
  216. package/src/features/structure-query.ts +94 -79
  217. package/src/features/structure.ts +56 -56
  218. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  219. package/src/graph/classifiers/roles.ts +64 -54
  220. package/src/infrastructure/config.ts +1 -1
  221. package/src/presentation/cfg.ts +48 -32
  222. package/src/presentation/flow.ts +100 -52
  223. package/src/types.ts +1 -1
@@ -8,52 +8,24 @@ import fs from 'node:fs';
8
8
  import path from 'node:path';
9
9
  import { performance } from 'node:perf_hooks';
10
10
  import {
11
- acquireAdvisoryLock,
12
11
  closeDb,
13
12
  closeDbPair,
14
13
  getBuildMeta,
15
14
  initSchema,
16
15
  MIGRATIONS,
17
16
  openDb,
18
- purgeFilesData,
19
- releaseAdvisoryLock,
20
- setBuildMeta,
21
17
  } from '../../../db/index.js';
22
18
  import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js';
23
19
  import { debug, info, warn } from '../../../infrastructure/logger.js';
24
20
  import { loadNative } from '../../../infrastructure/native.js';
25
- import { semverCompare } from '../../../infrastructure/update-check.js';
26
- import { normalizePath } from '../../../shared/constants.js';
27
21
  import { toErrorMessage } from '../../../shared/errors.js';
28
22
  import { CODEGRAPH_VERSION } from '../../../shared/version.js';
29
- import type {
30
- BetterSqlite3Database,
31
- BuildGraphOpts,
32
- BuildResult,
33
- Definition,
34
- ExtractorOutput,
35
- SqliteStatement,
36
- } from '../../../types.js';
37
- import {
38
- classifyNativeDrops,
39
- formatDropExtensionSummary,
40
- getActiveEngine,
41
- getInstalledWasmExtensions,
42
- NATIVE_SUPPORTED_EXTENSIONS,
43
- parseFilesWasmForBackfill,
44
- } from '../../parser.js';
23
+ import type { BuildGraphOpts, BuildResult } from '../../../types.js';
24
+ import { getActiveEngine } from '../../parser.js';
45
25
  import { writeJournalHeader } from '../journal.js';
46
26
  import { setWorkspaces } from '../resolve.js';
47
27
  import { PipelineContext } from './context.js';
48
- import {
49
- batchInsertNodes,
50
- collectFiles as collectFilesUtil,
51
- fileHash,
52
- fileStat,
53
- loadPathAliases,
54
- readFileSafe,
55
- } from './helpers.js';
56
- import { NativeDbProxy } from './native-db-proxy.js';
28
+ import { loadPathAliases } from './helpers.js';
57
29
  import { buildEdges } from './stages/build-edges.js';
58
30
  import { buildStructure } from './stages/build-structure.js';
59
31
  // Pipeline stages
@@ -61,10 +33,24 @@ import { collectFiles } from './stages/collect-files.js';
61
33
  import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
62
34
  import { finalize } from './stages/finalize.js';
63
35
  import { insertNodes } from './stages/insert-nodes.js';
36
+ import {
37
+ closeNativeDb,
38
+ refreshJsDb,
39
+ reopenNativeDb,
40
+ suspendNativeDb,
41
+ } from './stages/native-db-lifecycle.js';
42
+ import { tryNativeOrchestrator } from './stages/native-orchestrator.js';
64
43
  import { parseFiles } from './stages/parse-files.js';
65
44
  import { resolveImports } from './stages/resolve-imports.js';
66
45
  import { runAnalyses } from './stages/run-analyses.js';
67
46
 
47
+ // Re-export computeWasmOnlyStaleFiles for backward compatibility with tests
48
+ // that import from this module path (#1073 unit tests).
49
+ export {
50
+ computeWasmOnlyStaleFiles,
51
+ type WasmOnlyStaleFilesInput,
52
+ } from './stages/native-orchestrator.js';
53
+
68
54
  // ── Setup helpers ───────────────────────────────────────────────────────
69
55
 
70
56
  function initializeEngine(ctx: PipelineContext): void {
@@ -237,932 +223,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
237
223
  };
238
224
  }
239
225
 
240
- // ── NativeDb lifecycle helpers ──────────────────────────────────────────
241
-
242
- /** Checkpoint WAL through rusqlite and close the native connection. */
243
- function closeNativeDb(ctx: PipelineContext, label: string): void {
244
- if (!ctx.nativeDb) return;
245
- try {
246
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
247
- } catch (e) {
248
- debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
249
- }
250
- try {
251
- ctx.nativeDb.close();
252
- } catch (e) {
253
- debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
254
- }
255
- ctx.nativeDb = undefined;
256
- }
257
-
258
- /** Try to reopen the native connection for a given pipeline phase. */
259
- function reopenNativeDb(ctx: PipelineContext, label: string): void {
260
- if ((ctx.opts.engine ?? 'auto') === 'wasm') return;
261
- const native = loadNative();
262
- if (!native?.NativeDatabase) return;
263
- try {
264
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
265
- } catch (e) {
266
- debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
267
- ctx.nativeDb = undefined;
268
- }
269
- }
270
-
271
- /** Close nativeDb and clear stale references in engineOpts. */
272
- function suspendNativeDb(ctx: PipelineContext, label: string): void {
273
- closeNativeDb(ctx, label);
274
- if (ctx.engineOpts?.nativeDb) {
275
- ctx.engineOpts.nativeDb = undefined;
276
- }
277
- }
278
-
279
- /**
280
- * After native writes, reopen the JS db connection to get a fresh page cache.
281
- * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
282
- * causing SQLITE_CORRUPT on the next read (#715, #736).
283
- */
284
- function refreshJsDb(ctx: PipelineContext): void {
285
- try {
286
- ctx.db.close();
287
- } catch (e) {
288
- debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
289
- }
290
- ctx.db = openDb(ctx.dbPath);
291
- }
292
-
293
- // ── Native orchestrator types ──────────────────────────────────────────
294
-
295
- interface NativeOrchestratorResult {
296
- phases: Record<string, number>;
297
- earlyExit?: boolean;
298
- nodeCount?: number;
299
- edgeCount?: number;
300
- fileCount?: number;
301
- changedFiles?: string[];
302
- changedCount?: number;
303
- removedCount?: number;
304
- isFullBuild?: boolean;
305
- /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
306
- structureHandled?: boolean;
307
- /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
308
- analysisComplete?: boolean;
309
- }
310
-
311
- // ── Native orchestrator helpers ───────────────────────────────────────
312
-
313
- /** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
314
- function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null {
315
- if (ctx.forceFullRebuild) return 'forceFullRebuild';
316
- // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
317
- // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
318
- // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
319
- const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
320
- if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`;
321
- if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`;
322
- return null;
323
- }
324
-
325
- /** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
326
- * Returns false if the DB reopen fails (caller should return partial result). */
327
- function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean {
328
- closeNativeDb(ctx, 'post-native-build');
329
- try {
330
- ctx.db.close();
331
- } catch (e) {
332
- debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
333
- }
334
- try {
335
- ctx.db = openDb(ctx.dbPath);
336
- return true;
337
- } catch (reopenErr) {
338
- warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`);
339
- return false;
340
- }
341
- }
342
-
343
- /**
344
- * Reconstruct fileSymbols from the DB after a native orchestrator build.
345
- * When `scopeFiles` is provided, only loads those files (for analysis-only).
346
- * When omitted, loads all files (needed for structure rebuilds).
347
- */
348
- function reconstructFileSymbolsFromDb(
349
- ctx: PipelineContext,
350
- scopeFiles?: string[],
351
- ): Map<string, ExtractorOutput> {
352
- let query =
353
- 'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
354
- const params: string[] = [];
355
- if (scopeFiles && scopeFiles.length > 0) {
356
- const placeholders = scopeFiles.map(() => '?').join(',');
357
- query += ` AND file IN (${placeholders})`;
358
- params.push(...scopeFiles);
359
- }
360
- query += ' ORDER BY file, line';
361
-
362
- const rows = ctx.db.prepare(query).all(...params) as {
363
- file: string;
364
- name: string;
365
- kind: string;
366
- line: number;
367
- endLine: number | null;
368
- }[];
369
-
370
- const fileSymbols = new Map<string, ExtractorOutput>();
371
- for (const row of rows) {
372
- let entry = fileSymbols.get(row.file);
373
- if (!entry) {
374
- entry = {
375
- definitions: [],
376
- calls: [],
377
- imports: [],
378
- classes: [],
379
- exports: [],
380
- typeMap: new Map(),
381
- };
382
- fileSymbols.set(row.file, entry);
383
- }
384
- entry.definitions.push({
385
- name: row.name,
386
- kind: row.kind as Definition['kind'],
387
- line: row.line,
388
- endLine: row.endLine ?? undefined,
389
- });
390
- }
391
-
392
- // Populate import/export counts from DB edges so buildStructure
393
- // computes correct import_count/export_count in node_metrics.
394
- // The extractor arrays aren't persisted to the DB, so we derive
395
- // counts from edge data instead (#804).
396
- const importCountRows = ctx.db
397
- .prepare(
398
- `SELECT n.file, COUNT(*) AS cnt
399
- FROM edges e JOIN nodes n ON e.source_id = n.id
400
- WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
401
- AND n.file IS NOT NULL
402
- GROUP BY n.file`,
403
- )
404
- .all() as { file: string; cnt: number }[];
405
- for (const row of importCountRows) {
406
- const entry = fileSymbols.get(row.file);
407
- if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports'];
408
- }
409
-
410
- const exportCountRows = ctx.db
411
- .prepare(
412
- `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
413
- FROM edges e
414
- JOIN nodes n_tgt ON e.target_id = n_tgt.id
415
- JOIN nodes n_src ON e.source_id = n_src.id
416
- WHERE e.kind IN ('imports', 'imports-type', 'reexports')
417
- AND n_tgt.file IS NOT NULL
418
- AND n_src.file != n_tgt.file
419
- GROUP BY n_tgt.file`,
420
- )
421
- .all() as { file: string; cnt: number }[];
422
- for (const row of exportCountRows) {
423
- const entry = fileSymbols.get(row.file);
424
- if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports'];
425
- }
426
-
427
- return fileSymbols;
428
- }
429
-
430
- /**
431
- * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
432
- * For full builds, passes changedFiles=null (full rebuild).
433
- * For incremental builds, passes the changed file list to scope the update.
434
- */
435
- async function runPostNativeStructure(
436
- ctx: PipelineContext,
437
- allFileSymbols: Map<string, ExtractorOutput>,
438
- isFullBuild: boolean,
439
- changedFiles: string[] | undefined,
440
- ): Promise<number> {
441
- const structureStart = performance.now();
442
- try {
443
- const directories = new Set<string>();
444
- for (const relPath of allFileSymbols.keys()) {
445
- const parts = relPath.split('/');
446
- for (let i = 1; i < parts.length; i++) {
447
- directories.add(parts.slice(0, i).join('/'));
448
- }
449
- }
450
-
451
- const lineCountMap = new Map<string, number>();
452
- const cachedLineCounts = ctx.db
453
- .prepare(
454
- `SELECT n.name AS file, m.line_count
455
- FROM node_metrics m JOIN nodes n ON m.node_id = n.id
456
- WHERE n.kind = 'file'`,
457
- )
458
- .all() as Array<{ file: string; line_count: number }>;
459
- for (const row of cachedLineCounts) {
460
- lineCountMap.set(row.file, row.line_count);
461
- }
462
-
463
- // Full builds need null (rebuild everything). Incremental builds pass the
464
- // changed file list so buildStructure only updates those files' metrics
465
- // and contains edges — matching the JS pipeline's medium-incremental path.
466
- const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
467
- const { buildStructure: buildStructureFn } = (await import(
468
- '../../../features/structure.js'
469
- )) as {
470
- buildStructure: (
471
- db: typeof ctx.db,
472
- fileSymbols: Map<string, ExtractorOutput>,
473
- rootDir: string,
474
- lineCountMap: Map<string, number>,
475
- directories: Set<string>,
476
- changedFiles: string[] | null,
477
- ) => void;
478
- };
479
- buildStructureFn(
480
- ctx.db,
481
- allFileSymbols,
482
- ctx.rootDir,
483
- lineCountMap,
484
- directories,
485
- changedFilePaths,
486
- );
487
- debug(
488
- `Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`,
489
- );
490
- } catch (err) {
491
- warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
492
- }
493
- return performance.now() - structureStart;
494
- }
495
-
496
- /**
497
- * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
498
- * Used when the Rust addon doesn't include analysis persistence (older addon
499
- * version) or when analysis failed on the Rust side.
500
- */
501
- async function runPostNativeAnalysis(
502
- ctx: PipelineContext,
503
- allFileSymbols: Map<string, ExtractorOutput>,
504
- changedFiles: string[] | undefined,
505
- ): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> {
506
- const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
507
-
508
- // Scope analysis fileSymbols to changed files only
509
- let analysisFileSymbols: Map<string, ExtractorOutput>;
510
- if (changedFiles && changedFiles.length > 0) {
511
- analysisFileSymbols = new Map();
512
- for (const f of changedFiles) {
513
- const entry = allFileSymbols.get(f);
514
- if (entry) analysisFileSymbols.set(f, entry);
515
- }
516
- } else {
517
- analysisFileSymbols = allFileSymbols;
518
- }
519
-
520
- // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
521
- const native = loadNative();
522
- if (native?.NativeDatabase) {
523
- try {
524
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
525
- if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb;
526
- } catch {
527
- ctx.nativeDb = undefined;
528
- if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined;
529
- }
530
- }
531
-
532
- // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
533
- // Previously each feature called wal_checkpoint(TRUNCATE) individually
534
- // (~68ms each × 3-4 features). One FULL checkpoint suffices.
535
- if (ctx.nativeDb && ctx.engineOpts) {
536
- ctx.db.pragma('wal_checkpoint(FULL)');
537
- ctx.engineOpts.suspendJsDb = () => {};
538
- ctx.engineOpts.resumeJsDb = () => {};
539
- }
540
-
541
- try {
542
- const { runAnalyses: runAnalysesFn } = (await import('../../../ast-analysis/engine.js')) as {
543
- runAnalyses: (
544
- db: BetterSqlite3Database,
545
- fileSymbols: Map<string, ExtractorOutput>,
546
- rootDir: string,
547
- opts: Record<string, unknown>,
548
- engineOpts?: Record<string, unknown>,
549
- ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>;
550
- };
551
- const result = await runAnalysesFn(
552
- ctx.db,
553
- analysisFileSymbols,
554
- ctx.rootDir,
555
- ctx.opts as Record<string, unknown>,
556
- ctx.engineOpts as unknown as Record<string, unknown> | undefined,
557
- );
558
- timing.astMs = result.astMs ?? 0;
559
- timing.complexityMs = result.complexityMs ?? 0;
560
- timing.cfgMs = result.cfgMs ?? 0;
561
- timing.dataflowMs = result.dataflowMs ?? 0;
562
- } catch (err) {
563
- warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
564
- }
565
-
566
- // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
567
- // WAL writes so JS and external readers can see them. Runs once after
568
- // all analysis features complete (not per-feature).
569
- if (ctx.nativeDb) {
570
- try {
571
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
572
- } catch {
573
- /* ignore checkpoint errors */
574
- }
575
- try {
576
- ctx.nativeDb.close();
577
- } catch {
578
- /* ignore close errors */
579
- }
580
- ctx.nativeDb = undefined;
581
- if (ctx.engineOpts) {
582
- ctx.engineOpts.nativeDb = undefined;
583
- ctx.engineOpts.suspendJsDb = undefined;
584
- ctx.engineOpts.resumeJsDb = undefined;
585
- }
586
- }
587
-
588
- return timing;
589
- }
590
-
591
- /** Format timing result from native orchestrator phases + JS post-processing. */
592
- function formatNativeTimingResult(
593
- p: Record<string, number>,
594
- structurePatchMs: number,
595
- analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
596
- ): BuildResult {
597
- return {
598
- phases: {
599
- setupMs: +(p.setupMs ?? 0).toFixed(1),
600
- collectMs: +(p.collectMs ?? 0).toFixed(1),
601
- detectMs: +(p.detectMs ?? 0).toFixed(1),
602
- parseMs: +(p.parseMs ?? 0).toFixed(1),
603
- insertMs: +(p.insertMs ?? 0).toFixed(1),
604
- resolveMs: +(p.resolveMs ?? 0).toFixed(1),
605
- edgesMs: +(p.edgesMs ?? 0).toFixed(1),
606
- structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
607
- rolesMs: +(p.rolesMs ?? 0).toFixed(1),
608
- astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
609
- complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
610
- cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
611
- dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
612
- finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
613
- },
614
- };
615
- }
616
-
617
- /** Try the native build orchestrator. Returns a BuildResult on success, undefined to fall through to JS pipeline. */
618
- async function tryNativeOrchestrator(
619
- ctx: PipelineContext,
620
- ): Promise<BuildResult | undefined | 'early-exit'> {
621
- const skipReason = shouldSkipNativeOrchestrator(ctx);
622
- if (skipReason) {
623
- debug(`Skipping native orchestrator: ${skipReason}`);
624
- return undefined;
625
- }
626
-
627
- // Open NativeDatabase on demand — deferred from setupPipeline to skip the
628
- // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
629
- // first to avoid dual-connection WAL corruption.
630
- if (!ctx.nativeDb && ctx.nativeAvailable) {
631
- const native = loadNative();
632
- if (native?.NativeDatabase) {
633
- try {
634
- // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
635
- // Uses raw close() instead of closeDb() intentionally — the advisory lock
636
- // is kept and transferred to the NativeDbProxy below, not released here.
637
- ctx.db.close();
638
- acquireAdvisoryLock(ctx.dbPath);
639
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
640
- ctx.nativeDb.initSchema();
641
- // Replace ctx.db with a NativeDbProxy so post-native JS fallback
642
- // (structure, analysis) can use it without reopening better-sqlite3.
643
- const proxy = new NativeDbProxy(ctx.nativeDb);
644
- proxy.__lockPath = `${ctx.dbPath}.lock`;
645
- ctx.db = proxy as unknown as typeof ctx.db;
646
- ctx.nativeFirstProxy = true;
647
- } catch (err) {
648
- warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
649
- try {
650
- ctx.nativeDb?.close();
651
- } catch (e) {
652
- debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
653
- }
654
- ctx.nativeDb = undefined;
655
- ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
656
- releaseAdvisoryLock(`${ctx.dbPath}.lock`);
657
- // Reopen better-sqlite3 for JS pipeline fallback
658
- ctx.db = openDb(ctx.dbPath);
659
- }
660
- }
661
- }
662
-
663
- if (!ctx.nativeDb?.buildGraph) return undefined;
664
-
665
- const resultJson = ctx.nativeDb.buildGraph(
666
- ctx.rootDir,
667
- JSON.stringify(ctx.config),
668
- JSON.stringify(ctx.aliases),
669
- JSON.stringify(ctx.opts),
670
- );
671
- const result = JSON.parse(resultJson) as NativeOrchestratorResult;
672
-
673
- if (result.earlyExit) {
674
- info('No changes detected');
675
- // Even on no-op rebuilds, dropped-language files added since the last
676
- // full build are still missing from `nodes`/`file_hashes` (#1083), and
677
- // WASM-only files deleted from disk leave stale rows behind (#1073).
678
- // The orchestrator's file_collector skipped them, so its earlyExit
679
- // doesn't imply DB consistency. Run the gap repair before returning.
680
- const gap = detectDroppedLanguageGap(ctx);
681
- if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
682
- await backfillNativeDroppedFiles(ctx, gap);
683
- }
684
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
685
- return 'early-exit';
686
- }
687
-
688
- // Log incremental status to match JS pipeline output
689
- const changed = result.changedCount ?? 0;
690
- const removed = result.removedCount ?? 0;
691
- if (!result.isFullBuild && (changed > 0 || removed > 0)) {
692
- info(`Incremental: ${changed} changed, ${removed} removed`);
693
- }
694
-
695
- const p = result.phases;
696
-
697
- // Sync build_meta so JS-side version/engine checks work on next build.
698
- // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
699
- // platform package.json version (ctx.engineVersion). The Rust side's
700
- // check_version_mismatch compares against CARGO_PKG_VERSION; writing
701
- // the package.json value would create a permanent mismatch whenever
702
- // the binary and platform package.json diverge — e.g., CI hot-swap
703
- // via ci-install-native.mjs (#1066) — forcing every subsequent build
704
- // to be a full rebuild.
705
- //
706
- // When the native addon doesn't expose engineVersion() (older addon),
707
- // fall back to CODEGRAPH_VERSION — same fallback used by both
708
- // checkEngineSchemaMismatch (read path) and persistBuildMetadata
709
- // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
710
- // here would re-introduce the asymmetry this PR fixes for that case.
711
- const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
712
- setBuildMeta(ctx.db, {
713
- engine: ctx.engineName,
714
- engine_version: nativeVersionForMeta,
715
- codegraph_version: nativeVersionForMeta,
716
- schema_version: String(ctx.schemaVersion),
717
- built_at: new Date().toISOString(),
718
- });
719
-
720
- info(
721
- `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`,
722
- );
723
-
724
- // ── Post-native structure + analysis ──────────────────────────────
725
- let analysisTiming = {
726
- astMs: +(p.astMs ?? 0),
727
- complexityMs: +(p.complexityMs ?? 0),
728
- cfgMs: +(p.cfgMs ?? 0),
729
- dataflowMs: +(p.dataflowMs ?? 0),
730
- };
731
- let structurePatchMs = 0;
732
- // Skip JS structure when the Rust pipeline's small-incremental fast path
733
- // already handled it. For full builds and large incrementals where Rust
734
- // skipped structure, we must run the JS fallback.
735
- const needsStructure = !result.structureHandled;
736
- // When the Rust addon doesn't include analysis persistence (older addon
737
- // version or analysis failed), fall back to JS-side analysis.
738
- const needsAnalysisFallback =
739
- !result.analysisComplete &&
740
- (ctx.opts.ast !== false ||
741
- ctx.opts.complexity !== false ||
742
- ctx.opts.cfg !== false ||
743
- ctx.opts.dataflow !== false);
744
-
745
- if (needsStructure || needsAnalysisFallback) {
746
- // When analysis fallback is needed, handoff to better-sqlite3 — the
747
- // analysis engine uses the suspend/resume WAL pattern that requires a
748
- // real better-sqlite3 connection, not the NativeDbProxy.
749
- if (needsAnalysisFallback && ctx.nativeFirstProxy) {
750
- closeNativeDb(ctx, 'pre-analysis-fallback');
751
- ctx.db = openDb(ctx.dbPath);
752
- ctx.nativeFirstProxy = false;
753
- } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
754
- // DB reopen failed — return partial result
755
- return formatNativeTimingResult(p, 0, analysisTiming);
756
- }
757
-
758
- const fileSymbols = reconstructFileSymbolsFromDb(ctx);
759
-
760
- if (needsStructure) {
761
- structurePatchMs = await runPostNativeStructure(
762
- ctx,
763
- fileSymbols,
764
- !!result.isFullBuild,
765
- result.changedFiles,
766
- );
767
- }
768
-
769
- if (needsAnalysisFallback) {
770
- analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
771
- }
772
- }
773
-
774
- // Engine parity: the native orchestrator silently drops files whose
775
- // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
776
- // stale native binaries). WASM handles those — backfill via WASM so both
777
- // engines process the same file set (#967).
778
- //
779
- // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
780
- // both gating and the backfill itself. On dirty incrementals/full builds
781
- // the orchestrator signals trigger backfill, so the walk happens once
782
- // (instead of redundantly inside backfill). On quiet incrementals we
783
- // still pay the walk so we can detect brand-new files in dropped-language
784
- // extensions — a gap that the orchestrator's `detect_removed_files`
785
- // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
786
- // because the expensive part (WASM re-parse of the missing set) is
787
- // gated below.
788
- const removedCount = result.removedCount ?? 0;
789
- const changedCount = result.changedCount ?? 0;
790
- const gap = detectDroppedLanguageGap(ctx);
791
- if (
792
- result.isFullBuild ||
793
- removedCount > 0 ||
794
- changedCount > 0 ||
795
- gap.missingAbs.length > 0 ||
796
- gap.staleRel.length > 0
797
- ) {
798
- await backfillNativeDroppedFiles(ctx, gap);
799
- }
800
-
801
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
802
- return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
803
- }
804
-
805
- /** Files the native orchestrator silently dropped — the working set for backfill. */
806
- interface DroppedLanguageGap {
807
- /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
808
- missingRel: string[];
809
- /** Absolute paths, aligned by index with `missingRel`. */
810
- missingAbs: string[];
811
- /**
812
- * Relative paths of WASM-only files present in DB but absent from disk (#1073).
813
- * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
814
- * backfill must purge them. Always disjoint from `missingRel`.
815
- */
816
- staleRel: string[];
817
- }
818
-
819
- /**
820
- * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
821
- * is pure and unit-testable independently of `getInstalledWasmExtensions` and
822
- * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
823
- */
824
- export interface WasmOnlyStaleFilesInput {
825
- /** Distinct `file` values from the `nodes` table. */
826
- existingNodes: ReadonlySet<string>;
827
- /** Distinct `file` values from the `file_hashes` table. */
828
- existingHashes: ReadonlySet<string>;
829
- /** Relative paths currently on disk (from `collectFilesUtil`). */
830
- expected: ReadonlySet<string>;
831
- /** Lowercased extensions whose WASM grammar is installed. */
832
- installedExts: ReadonlySet<string>;
833
- /** Extensions covered by the Rust addon — Rust owns deletion for these. */
834
- nativeSupported: ReadonlySet<string>;
835
- }
836
-
837
- /**
838
- * Compute the WASM-only files present in the DB but missing from disk (#1073).
839
- *
840
- * Returns relative paths that:
841
- * - appear in `existingNodes` or `existingHashes` (in DB),
842
- * - are absent from `expected` (not on disk),
843
- * - have an extension installed for WASM, AND
844
- * - have an extension NOT covered by `nativeSupported` — Rust's
845
- * `purge_changed_files` handles deletion for natively-supported extensions
846
- * via its own `detect_removed_files`, so the caller must not double-purge.
847
- *
848
- * Extensions are lowercased before lookup to match the registry and Rust's
849
- * `LanguageKind::from_extension` (which normalises case for the languages
850
- * where both cases are conventional, e.g. R's `.r` / `.R`).
851
- *
852
- * DB paths are forced to forward slashes before comparison with `expected`
853
- * (which is always normalised). The on-disk invariant is that DB rows are
854
- * written with forward slashes, but a stale row written by older code on
855
- * Windows could carry back-slashes — normalising here makes the comparison
856
- * platform-safe and prevents false-positive purges of live rows. We replace
857
- * `\\` explicitly (rather than calling `normalizePath`, which only touches
858
- * `path.sep`) so the defence works when running on POSIX against a DB that
859
- * was migrated from Windows.
860
- *
861
- * Exported for unit testing.
862
- */
863
- export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
864
- const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
865
- const stale: string[] = [];
866
- const seen = new Set<string>();
867
- const consider = (rawRel: string): void => {
868
- const rel = rawRel.replace(/\\/g, '/');
869
- if (expected.has(rel) || seen.has(rel)) return;
870
- const ext = path.extname(rel).toLowerCase();
871
- if (nativeSupported.has(ext)) return;
872
- if (!installedExts.has(ext)) return;
873
- seen.add(rel);
874
- // Push the ORIGINAL raw path (not the normalised form) so the eventual
875
- // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
876
- // matches the actual stored row. The dedup `seen` set keeps the
877
- // normalised form so a file written once with `\` and once with `/`
878
- // is still treated as one entry — but the value the SQL sees has to
879
- // be byte-identical to what's on disk in the DB.
880
- stale.push(rawRel);
881
- };
882
- for (const rel of existingNodes) consider(rel);
883
- for (const rel of existingHashes) consider(rel);
884
- return stale;
885
- }
886
-
887
- /**
888
- * Group relative paths by their lowercased extension. Shape matches the bucket
889
- * type that `formatDropExtensionSummary` consumes, so callers can render a
890
- * log-friendly per-extension summary without going through `classifyNativeDrops`
891
- * when the reason is already known (e.g. the stale-purge path where every path
892
- * is guaranteed `unsupported-by-native`).
893
- */
894
- function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
895
- const buckets = new Map<string, string[]>();
896
- for (const rel of relPaths) {
897
- const ext = path.extname(rel).toLowerCase();
898
- let list = buckets.get(ext);
899
- if (!list) {
900
- list = [];
901
- buckets.set(ext, list);
902
- }
903
- list.push(rel);
904
- }
905
- return buckets;
906
- }
907
-
908
- /**
909
- * Detect files the native orchestrator silently dropped.
910
- *
911
- * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
912
- * is "missing" if it's absent from EITHER table — both must be present for
913
- * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
914
- * legacy DBs where `nodes` was populated but `file_hashes` was not).
915
- *
916
- * Restricted to files with an installed WASM grammar; extensions in
917
- * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
918
- * installs) can't be parsed by either engine, so they're not a native
919
- * regression — excluding them keeps the warn count in
920
- * `backfillNativeDroppedFiles` meaningful.
921
- *
922
- * Also detects WASM-only files deleted from disk (#1073). Rust's
923
- * `detect_removed_files` filter (#1070) skips files outside its supported
924
- * extensions, so deletions of WASM-only languages don't reach the native
925
- * purge path; the rest of the backfill only inserts rows, so without this
926
- * step stale `nodes`/`file_hashes` rows would linger across incremental
927
- * rebuilds until the next full rebuild.
928
- *
929
- * Cheap (no DB handoff, no parsing): used both to gate the backfill call
930
- * and as its working set. NativeDbProxy supports `.prepare().all()`, so
931
- * this works whether `ctx.db` is a proxy or a real better-sqlite3
932
- * connection — letting us skip the close-native / reopen-better-sqlite3
933
- * cost when there's nothing to backfill.
934
- */
935
- function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
936
- const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
937
- const expected = new Set(
938
- collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
939
- );
940
-
941
- const existingNodeRows = ctx.db
942
- .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
943
- .all() as Array<{ file: string }>;
944
- const existingNodes = new Set(existingNodeRows.map((r) => r.file));
945
-
946
- let existingHashes = new Set<string>();
947
- try {
948
- const existingHashRows = ctx.db
949
- .prepare('SELECT DISTINCT file FROM file_hashes')
950
- .all() as Array<{ file: string }>;
951
- existingHashes = new Set(existingHashRows.map((r) => r.file));
952
- } catch (e) {
953
- // file_hashes table may not exist on legacy DBs; treat as fully missing
954
- // so the backfill writes rows on the upsert path below.
955
- debug(
956
- `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
957
- );
958
- }
959
-
960
- const installedExts = getInstalledWasmExtensions();
961
- const missingRel: string[] = [];
962
- const missingAbs: string[] = [];
963
- for (const rel of expected) {
964
- if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
965
- const ext = path.extname(rel).toLowerCase();
966
- if (!installedExts.has(ext)) continue;
967
- missingRel.push(rel);
968
- missingAbs.push(path.join(ctx.rootDir, rel));
969
- }
970
-
971
- const staleRel = computeWasmOnlyStaleFiles({
972
- existingNodes,
973
- existingHashes,
974
- expected,
975
- installedExts,
976
- nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
977
- });
978
-
979
- return { missingRel, missingAbs, staleRel };
980
- }
981
-
982
- /**
983
- * Backfill files that the native orchestrator silently dropped during parse.
984
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
985
- *
986
- * Also purges stale rows for WASM-only files deleted from disk (#1073), which
987
- * Rust's `detect_removed_files` filter (#1070) skips.
988
- *
989
- * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
990
- * can use the same scan for both gating and the actual backfill — avoiding
991
- * a redundant fs walk when the orchestrator's signals already triggered.
992
- */
993
- async function backfillNativeDroppedFiles(
994
- ctx: PipelineContext,
995
- gap: DroppedLanguageGap,
996
- ): Promise<void> {
997
- const { missingRel, missingAbs, staleRel } = gap;
998
- if (missingAbs.length === 0 && staleRel.length === 0) return;
999
-
1000
- // Now that we know there's work to do, hand off to better-sqlite3 (needed
1001
- // for the INSERT path below).
1002
- if (ctx.nativeFirstProxy) {
1003
- closeNativeDb(ctx, 'pre-parity-backfill');
1004
- ctx.db = openDb(ctx.dbPath);
1005
- ctx.nativeFirstProxy = false;
1006
- }
1007
-
1008
- const dbConn = ctx.db as unknown as BetterSqlite3Database;
1009
-
1010
- // Purge WASM-only files that were deleted from disk (#1073). Rust's
1011
- // detect_removed_files skips them and the insert path below never visits
1012
- // them, so without this their rows would persist across rebuilds until the
1013
- // next full rebuild reset the DB.
1014
- if (staleRel.length > 0) {
1015
- // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
1016
- // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
1017
- // always bucket 100% into `unsupported-by-native`. Build the extension
1018
- // summary directly to avoid a redundant classification pass.
1019
- const staleByExt = groupByExtension(staleRel);
1020
- info(
1021
- `Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`,
1022
- );
1023
- purgeFilesData(dbConn, staleRel);
1024
- }
1025
-
1026
- if (missingAbs.length === 0) return;
1027
-
1028
- // Classify drops so users see per-extension reasons instead of just a count
1029
- // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
1030
- // extractor); `native-extractor-failure` indicates a real native bug since
1031
- // the language IS supported by the addon yet the file was dropped anyway.
1032
- const { byReason, totals } = classifyNativeDrops(missingRel);
1033
- if (totals['unsupported-by-native'] > 0) {
1034
- info(
1035
- `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`,
1036
- );
1037
- }
1038
- if (totals['native-extractor-failure'] > 0) {
1039
- warn(
1040
- `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
1041
- );
1042
- }
1043
- const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
1044
-
1045
- const rows: unknown[][] = [];
1046
- const exportKeys: unknown[][] = [];
1047
- for (const [relPath, symbols] of wasmResults) {
1048
- // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
1049
- rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
1050
- for (const def of symbols.definitions ?? []) {
1051
- // Populate qualified_name/scope the same way the JS fallback does so
1052
- // downstream queries (cross-file references, "go to definition") find
1053
- // these symbols.
1054
- const dotIdx = def.name.lastIndexOf('.');
1055
- const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
1056
- rows.push([
1057
- def.name,
1058
- def.kind,
1059
- relPath,
1060
- def.line,
1061
- def.endLine ?? null,
1062
- null,
1063
- def.name,
1064
- scope,
1065
- def.visibility ?? null,
1066
- ]);
1067
- }
1068
- // Exports: insert the row (INSERT OR IGNORE — a matching definition row
1069
- // is a no-op) and queue a key for the second-pass exported=1 update, so
1070
- // queries filtering on exported=1 find backfilled symbols (#970).
1071
- for (const exp of symbols.exports ?? []) {
1072
- rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
1073
- exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
1074
- }
1075
- }
1076
- const db = dbConn;
1077
- batchInsertNodes(db, rows);
1078
-
1079
- // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
1080
- if (exportKeys.length > 0) {
1081
- const EXPORT_CHUNK = 500;
1082
- const exportStmtCache = new Map<number, SqliteStatement>();
1083
- for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
1084
- const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
1085
- const chunkSize = end - i;
1086
- let updateStmt = exportStmtCache.get(chunkSize);
1087
- if (!updateStmt) {
1088
- const conditions = Array.from(
1089
- { length: chunkSize },
1090
- () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
1091
- ).join(' OR ');
1092
- updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
1093
- exportStmtCache.set(chunkSize, updateStmt);
1094
- }
1095
- const vals: unknown[] = [];
1096
- for (let j = i; j < end; j++) {
1097
- const k = exportKeys[j] as unknown[];
1098
- vals.push(k[0], k[1], k[2], k[3]);
1099
- }
1100
- updateStmt.run(...vals);
1101
- }
1102
- }
1103
-
1104
- // Persist file_hashes rows for every backfilled file. The Rust orchestrator
1105
- // only hashes files it parsed itself, so without this step files in
1106
- // optional-language extensions (e.g. .clj when no Rust extractor exists)
1107
- // would be missing from `file_hashes` — permanently breaking the JS-side
1108
- // fast-skip pre-flight (#1054), which rejects on `collected file missing
1109
- // from file_hashes` and forces every no-op rebuild back through the full
1110
- // ~2s native pipeline (#1068).
1111
- //
1112
- // Iterates `missingRel` (every collected file the Rust orchestrator
1113
- // dropped), not `wasmResults`, so files that produced zero symbols still
1114
- // get a row.
1115
- try {
1116
- const upsertHash = db.prepare(
1117
- 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
1118
- );
1119
- const writeHashes = db.transaction(() => {
1120
- for (let i = 0; i < missingRel.length; i++) {
1121
- const relPath = missingRel[i];
1122
- const absPath = missingAbs[i];
1123
- if (!relPath || !absPath) continue;
1124
- let code: string | null;
1125
- try {
1126
- code = readFileSafe(absPath);
1127
- } catch (e) {
1128
- debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
1129
- continue;
1130
- }
1131
- if (code === null) continue;
1132
- const stat = fileStat(absPath);
1133
- const mtime = stat ? stat.mtime : 0;
1134
- const size = stat ? stat.size : 0;
1135
- upsertHash.run(relPath, fileHash(code), mtime, size);
1136
- }
1137
- });
1138
- writeHashes();
1139
- } catch (e) {
1140
- debug(
1141
- `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
1142
- );
1143
- }
1144
-
1145
- // Free WASM parse trees from the inline backfill path (#1058).
1146
- // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
1147
- // backed by WASM linear memory) on every result, but these symbols are
1148
- // consumed locally for DB row construction and never added to
1149
- // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
1150
- // sees them. Without this, trees leak WASM memory until process exit —
1151
- // bounded per run but cumulative across in-process integration tests.
1152
- // Mirrors the cleanup discipline established for #931.
1153
- for (const [, symbols] of wasmResults) {
1154
- const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
1155
- if (tree && typeof tree.delete === 'function') {
1156
- try {
1157
- tree.delete();
1158
- } catch {
1159
- /* ignore cleanup errors */
1160
- }
1161
- }
1162
- (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
1163
- (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
1164
- }
1165
- }
226
+ // Native db lifecycle and orchestrator helpers live in dedicated stage
227
+ // modules — see `./stages/native-db-lifecycle.ts` and `./stages/native-orchestrator.ts`.
1166
228
 
1167
229
  // ── Pipeline stages execution ───────────────────────────────────────────
1168
230