@optave/codegraph 3.10.0 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/README.md +40 -33
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/index.js +77 -0
  7. package/dist/ast-analysis/rules/index.js.map +1 -1
  8. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  9. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitor-utils.js +83 -49
  11. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  12. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  14. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  15. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  16. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  17. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  18. package/dist/cli/commands/audit.js +1 -1
  19. package/dist/cli/commands/audit.js.map +1 -1
  20. package/dist/cli/commands/build.d.ts.map +1 -1
  21. package/dist/cli/commands/build.js +2 -0
  22. package/dist/cli/commands/build.js.map +1 -1
  23. package/dist/cli/commands/check.js +1 -1
  24. package/dist/cli/commands/check.js.map +1 -1
  25. package/dist/cli/commands/children.js +1 -1
  26. package/dist/cli/commands/children.js.map +1 -1
  27. package/dist/cli/commands/diff-impact.js +1 -1
  28. package/dist/cli/commands/diff-impact.js.map +1 -1
  29. package/dist/cli/commands/embed.d.ts.map +1 -1
  30. package/dist/cli/commands/embed.js +49 -4
  31. package/dist/cli/commands/embed.js.map +1 -1
  32. package/dist/cli/commands/roles.js +1 -1
  33. package/dist/cli/commands/roles.js.map +1 -1
  34. package/dist/cli/commands/structure.js +1 -1
  35. package/dist/cli/commands/structure.js.map +1 -1
  36. package/dist/cli/shared/options.js +1 -1
  37. package/dist/cli/shared/options.js.map +1 -1
  38. package/dist/db/connection.d.ts.map +1 -1
  39. package/dist/db/connection.js +8 -0
  40. package/dist/db/connection.js.map +1 -1
  41. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  42. package/dist/domain/analysis/dependencies.js +106 -80
  43. package/dist/domain/analysis/dependencies.js.map +1 -1
  44. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  45. package/dist/domain/analysis/fn-impact.js +77 -52
  46. package/dist/domain/analysis/fn-impact.js.map +1 -1
  47. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  48. package/dist/domain/analysis/module-map.js +132 -121
  49. package/dist/domain/analysis/module-map.js.map +1 -1
  50. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  51. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  52. package/dist/domain/graph/builder/helpers.js +47 -33
  53. package/dist/domain/graph/builder/helpers.js.map +1 -1
  54. package/dist/domain/graph/builder/incremental.d.ts +6 -6
  55. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  56. package/dist/domain/graph/builder/incremental.js +148 -99
  57. package/dist/domain/graph/builder/incremental.js.map +1 -1
  58. package/dist/domain/graph/builder/pipeline.d.ts +1 -0
  59. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  60. package/dist/domain/graph/builder/pipeline.js +23 -637
  61. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  62. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  63. package/dist/domain/graph/builder/stages/build-edges.js +141 -98
  64. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  65. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  66. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  67. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  68. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  69. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  70. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  71. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  72. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  73. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  74. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  75. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  76. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  77. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  78. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  79. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  80. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  81. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  82. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  83. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  84. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  85. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  86. package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
  87. package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
  88. package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
  89. package/dist/domain/graph/cycles.d.ts +6 -4
  90. package/dist/domain/graph/cycles.d.ts.map +1 -1
  91. package/dist/domain/graph/cycles.js +50 -55
  92. package/dist/domain/graph/cycles.js.map +1 -1
  93. package/dist/domain/graph/journal.d.ts.map +1 -1
  94. package/dist/domain/graph/journal.js +89 -70
  95. package/dist/domain/graph/journal.js.map +1 -1
  96. package/dist/domain/graph/watcher.d.ts.map +1 -1
  97. package/dist/domain/graph/watcher.js +28 -20
  98. package/dist/domain/graph/watcher.js.map +1 -1
  99. package/dist/domain/parser.d.ts +12 -23
  100. package/dist/domain/parser.d.ts.map +1 -1
  101. package/dist/domain/parser.js +153 -80
  102. package/dist/domain/parser.js.map +1 -1
  103. package/dist/domain/search/generator.d.ts +3 -1
  104. package/dist/domain/search/generator.d.ts.map +1 -1
  105. package/dist/domain/search/generator.js +68 -45
  106. package/dist/domain/search/generator.js.map +1 -1
  107. package/dist/domain/search/models.d.ts +18 -0
  108. package/dist/domain/search/models.d.ts.map +1 -1
  109. package/dist/domain/search/models.js +72 -4
  110. package/dist/domain/search/models.js.map +1 -1
  111. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  112. package/dist/domain/search/search/hybrid.js +49 -40
  113. package/dist/domain/search/search/hybrid.js.map +1 -1
  114. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  115. package/dist/domain/search/search/semantic.js +69 -49
  116. package/dist/domain/search/search/semantic.js.map +1 -1
  117. package/dist/domain/wasm-worker-entry.js +209 -137
  118. package/dist/domain/wasm-worker-entry.js.map +1 -1
  119. package/dist/extractors/c.js +25 -6
  120. package/dist/extractors/c.js.map +1 -1
  121. package/dist/extractors/cpp.js +47 -6
  122. package/dist/extractors/cpp.js.map +1 -1
  123. package/dist/extractors/cuda.js +90 -14
  124. package/dist/extractors/cuda.js.map +1 -1
  125. package/dist/extractors/elixir.js +108 -4
  126. package/dist/extractors/elixir.js.map +1 -1
  127. package/dist/extractors/erlang.js +56 -20
  128. package/dist/extractors/erlang.js.map +1 -1
  129. package/dist/extractors/fsharp.d.ts +7 -0
  130. package/dist/extractors/fsharp.d.ts.map +1 -1
  131. package/dist/extractors/fsharp.js +94 -0
  132. package/dist/extractors/fsharp.js.map +1 -1
  133. package/dist/extractors/gleam.d.ts.map +1 -1
  134. package/dist/extractors/gleam.js +29 -33
  135. package/dist/extractors/gleam.js.map +1 -1
  136. package/dist/extractors/groovy.js +41 -1
  137. package/dist/extractors/groovy.js.map +1 -1
  138. package/dist/extractors/haskell.js +48 -4
  139. package/dist/extractors/haskell.js.map +1 -1
  140. package/dist/extractors/helpers.d.ts +79 -1
  141. package/dist/extractors/helpers.d.ts.map +1 -1
  142. package/dist/extractors/helpers.js +137 -0
  143. package/dist/extractors/helpers.js.map +1 -1
  144. package/dist/extractors/java.d.ts.map +1 -1
  145. package/dist/extractors/java.js +37 -49
  146. package/dist/extractors/java.js.map +1 -1
  147. package/dist/extractors/javascript.d.ts.map +1 -1
  148. package/dist/extractors/javascript.js +44 -44
  149. package/dist/extractors/javascript.js.map +1 -1
  150. package/dist/extractors/julia.js +198 -74
  151. package/dist/extractors/julia.js.map +1 -1
  152. package/dist/extractors/kotlin.js +4 -0
  153. package/dist/extractors/kotlin.js.map +1 -1
  154. package/dist/extractors/objc.js +184 -47
  155. package/dist/extractors/objc.js.map +1 -1
  156. package/dist/extractors/python.js +7 -4
  157. package/dist/extractors/python.js.map +1 -1
  158. package/dist/extractors/r.d.ts.map +1 -1
  159. package/dist/extractors/r.js +103 -87
  160. package/dist/extractors/r.js.map +1 -1
  161. package/dist/extractors/scala.d.ts.map +1 -1
  162. package/dist/extractors/scala.js +18 -32
  163. package/dist/extractors/scala.js.map +1 -1
  164. package/dist/extractors/solidity.d.ts.map +1 -1
  165. package/dist/extractors/solidity.js +55 -69
  166. package/dist/extractors/solidity.js.map +1 -1
  167. package/dist/extractors/verilog.js +80 -15
  168. package/dist/extractors/verilog.js.map +1 -1
  169. package/dist/features/boundaries.d.ts.map +1 -1
  170. package/dist/features/boundaries.js +49 -39
  171. package/dist/features/boundaries.js.map +1 -1
  172. package/dist/features/cfg.d.ts.map +1 -1
  173. package/dist/features/cfg.js +90 -63
  174. package/dist/features/cfg.js.map +1 -1
  175. package/dist/features/check.d.ts.map +1 -1
  176. package/dist/features/check.js +43 -34
  177. package/dist/features/check.js.map +1 -1
  178. package/dist/features/cochange.d.ts.map +1 -1
  179. package/dist/features/cochange.js +68 -56
  180. package/dist/features/cochange.js.map +1 -1
  181. package/dist/features/complexity.d.ts.map +1 -1
  182. package/dist/features/complexity.js +105 -75
  183. package/dist/features/complexity.js.map +1 -1
  184. package/dist/features/dataflow.d.ts.map +1 -1
  185. package/dist/features/dataflow.js +37 -29
  186. package/dist/features/dataflow.js.map +1 -1
  187. package/dist/features/flow.d.ts.map +1 -1
  188. package/dist/features/flow.js +31 -22
  189. package/dist/features/flow.js.map +1 -1
  190. package/dist/features/graph-enrichment.d.ts.map +1 -1
  191. package/dist/features/graph-enrichment.js +77 -70
  192. package/dist/features/graph-enrichment.js.map +1 -1
  193. package/dist/features/owners.d.ts +17 -26
  194. package/dist/features/owners.d.ts.map +1 -1
  195. package/dist/features/owners.js +120 -109
  196. package/dist/features/owners.js.map +1 -1
  197. package/dist/features/sequence.d.ts.map +1 -1
  198. package/dist/features/sequence.js +59 -54
  199. package/dist/features/sequence.js.map +1 -1
  200. package/dist/features/structure-query.d.ts.map +1 -1
  201. package/dist/features/structure-query.js +60 -60
  202. package/dist/features/structure-query.js.map +1 -1
  203. package/dist/features/structure.js +28 -36
  204. package/dist/features/structure.js.map +1 -1
  205. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  206. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  207. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  208. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  209. package/dist/graph/classifiers/roles.js +63 -59
  210. package/dist/graph/classifiers/roles.js.map +1 -1
  211. package/dist/infrastructure/config.d.ts +1 -1
  212. package/dist/infrastructure/config.d.ts.map +1 -1
  213. package/dist/infrastructure/config.js +1 -1
  214. package/dist/infrastructure/config.js.map +1 -1
  215. package/dist/mcp/tool-registry.d.ts.map +1 -1
  216. package/dist/mcp/tool-registry.js +4 -0
  217. package/dist/mcp/tool-registry.js.map +1 -1
  218. package/dist/mcp/tools/semantic-search.d.ts +1 -0
  219. package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
  220. package/dist/mcp/tools/semantic-search.js +1 -0
  221. package/dist/mcp/tools/semantic-search.js.map +1 -1
  222. package/dist/presentation/cfg.d.ts.map +1 -1
  223. package/dist/presentation/cfg.js +44 -29
  224. package/dist/presentation/cfg.js.map +1 -1
  225. package/dist/presentation/flow.d.ts.map +1 -1
  226. package/dist/presentation/flow.js +58 -38
  227. package/dist/presentation/flow.js.map +1 -1
  228. package/dist/types.d.ts +16 -2
  229. package/dist/types.d.ts.map +1 -1
  230. package/grammars/tree-sitter-erlang.wasm +0 -0
  231. package/grammars/tree-sitter-fsharp.wasm +0 -0
  232. package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
  233. package/grammars/tree-sitter-gleam.wasm +0 -0
  234. package/package.json +10 -10
  235. package/src/ast-analysis/engine.ts +145 -61
  236. package/src/ast-analysis/rules/index.ts +87 -0
  237. package/src/ast-analysis/visitor-utils.ts +86 -46
  238. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  239. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  240. package/src/cli/commands/audit.ts +1 -1
  241. package/src/cli/commands/build.ts +2 -0
  242. package/src/cli/commands/check.ts +1 -1
  243. package/src/cli/commands/children.ts +1 -1
  244. package/src/cli/commands/diff-impact.ts +1 -1
  245. package/src/cli/commands/embed.ts +54 -4
  246. package/src/cli/commands/roles.ts +1 -1
  247. package/src/cli/commands/structure.ts +1 -1
  248. package/src/cli/shared/options.ts +1 -1
  249. package/src/db/connection.ts +8 -0
  250. package/src/domain/analysis/dependencies.ts +166 -85
  251. package/src/domain/analysis/fn-impact.ts +120 -50
  252. package/src/domain/analysis/module-map.ts +175 -140
  253. package/src/domain/graph/builder/helpers.ts +85 -76
  254. package/src/domain/graph/builder/incremental.ts +223 -131
  255. package/src/domain/graph/builder/pipeline.ts +32 -785
  256. package/src/domain/graph/builder/stages/build-edges.ts +207 -142
  257. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  258. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  259. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  260. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  261. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  262. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  263. package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
  264. package/src/domain/graph/cycles.ts +51 -49
  265. package/src/domain/graph/journal.ts +84 -69
  266. package/src/domain/graph/watcher.ts +29 -25
  267. package/src/domain/parser.ts +170 -67
  268. package/src/domain/search/generator.ts +132 -74
  269. package/src/domain/search/models.ts +75 -4
  270. package/src/domain/search/search/hybrid.ts +53 -42
  271. package/src/domain/search/search/semantic.ts +105 -65
  272. package/src/domain/wasm-worker-entry.ts +243 -153
  273. package/src/extractors/c.ts +27 -8
  274. package/src/extractors/cpp.ts +50 -8
  275. package/src/extractors/cuda.ts +90 -16
  276. package/src/extractors/elixir.ts +103 -4
  277. package/src/extractors/erlang.ts +63 -20
  278. package/src/extractors/fsharp.ts +104 -0
  279. package/src/extractors/gleam.ts +40 -39
  280. package/src/extractors/groovy.ts +45 -1
  281. package/src/extractors/haskell.ts +45 -4
  282. package/src/extractors/helpers.ts +205 -1
  283. package/src/extractors/java.ts +42 -45
  284. package/src/extractors/javascript.ts +44 -43
  285. package/src/extractors/julia.ts +191 -77
  286. package/src/extractors/kotlin.ts +4 -0
  287. package/src/extractors/objc.ts +171 -47
  288. package/src/extractors/python.ts +5 -3
  289. package/src/extractors/r.ts +104 -82
  290. package/src/extractors/scala.ts +24 -36
  291. package/src/extractors/solidity.ts +59 -78
  292. package/src/extractors/verilog.ts +83 -15
  293. package/src/features/boundaries.ts +64 -46
  294. package/src/features/cfg.ts +145 -74
  295. package/src/features/check.ts +60 -43
  296. package/src/features/cochange.ts +95 -72
  297. package/src/features/complexity.ts +134 -79
  298. package/src/features/dataflow.ts +57 -34
  299. package/src/features/flow.ts +48 -24
  300. package/src/features/graph-enrichment.ts +105 -70
  301. package/src/features/owners.ts +186 -146
  302. package/src/features/sequence.ts +99 -69
  303. package/src/features/structure-query.ts +94 -79
  304. package/src/features/structure.ts +56 -56
  305. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  306. package/src/graph/classifiers/roles.ts +64 -54
  307. package/src/infrastructure/config.ts +1 -1
  308. package/src/mcp/tool-registry.ts +5 -0
  309. package/src/mcp/tools/semantic-search.ts +2 -0
  310. package/src/presentation/cfg.ts +48 -32
  311. package/src/presentation/flow.ts +100 -52
  312. package/src/types.ts +16 -1
@@ -8,50 +8,24 @@ import fs from 'node:fs';
8
8
  import path from 'node:path';
9
9
  import { performance } from 'node:perf_hooks';
10
10
  import {
11
- acquireAdvisoryLock,
12
11
  closeDb,
13
12
  closeDbPair,
14
13
  getBuildMeta,
15
14
  initSchema,
16
15
  MIGRATIONS,
17
16
  openDb,
18
- releaseAdvisoryLock,
19
- setBuildMeta,
20
17
  } from '../../../db/index.js';
21
18
  import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js';
22
19
  import { debug, info, warn } from '../../../infrastructure/logger.js';
23
20
  import { loadNative } from '../../../infrastructure/native.js';
24
- import { semverCompare } from '../../../infrastructure/update-check.js';
25
- import { normalizePath } from '../../../shared/constants.js';
26
21
  import { toErrorMessage } from '../../../shared/errors.js';
27
22
  import { CODEGRAPH_VERSION } from '../../../shared/version.js';
28
- import type {
29
- BetterSqlite3Database,
30
- BuildGraphOpts,
31
- BuildResult,
32
- Definition,
33
- ExtractorOutput,
34
- SqliteStatement,
35
- } from '../../../types.js';
36
- import {
37
- classifyNativeDrops,
38
- formatDropExtensionSummary,
39
- getActiveEngine,
40
- getInstalledWasmExtensions,
41
- parseFilesWasmForBackfill,
42
- } from '../../parser.js';
23
+ import type { BuildGraphOpts, BuildResult } from '../../../types.js';
24
+ import { getActiveEngine } from '../../parser.js';
43
25
  import { writeJournalHeader } from '../journal.js';
44
26
  import { setWorkspaces } from '../resolve.js';
45
27
  import { PipelineContext } from './context.js';
46
- import {
47
- batchInsertNodes,
48
- collectFiles as collectFilesUtil,
49
- fileHash,
50
- fileStat,
51
- loadPathAliases,
52
- readFileSafe,
53
- } from './helpers.js';
54
- import { NativeDbProxy } from './native-db-proxy.js';
28
+ import { loadPathAliases } from './helpers.js';
55
29
  import { buildEdges } from './stages/build-edges.js';
56
30
  import { buildStructure } from './stages/build-structure.js';
57
31
  // Pipeline stages
@@ -59,10 +33,24 @@ import { collectFiles } from './stages/collect-files.js';
59
33
  import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
60
34
  import { finalize } from './stages/finalize.js';
61
35
  import { insertNodes } from './stages/insert-nodes.js';
36
+ import {
37
+ closeNativeDb,
38
+ refreshJsDb,
39
+ reopenNativeDb,
40
+ suspendNativeDb,
41
+ } from './stages/native-db-lifecycle.js';
42
+ import { tryNativeOrchestrator } from './stages/native-orchestrator.js';
62
43
  import { parseFiles } from './stages/parse-files.js';
63
44
  import { resolveImports } from './stages/resolve-imports.js';
64
45
  import { runAnalyses } from './stages/run-analyses.js';
65
46
 
47
+ // Re-export computeWasmOnlyStaleFiles for backward compatibility with tests
48
+ // that import from this module path (#1073 unit tests).
49
+ export {
50
+ computeWasmOnlyStaleFiles,
51
+ type WasmOnlyStaleFilesInput,
52
+ } from './stages/native-orchestrator.js';
53
+
66
54
  // ── Setup helpers ───────────────────────────────────────────────────────
67
55
 
68
56
  function initializeEngine(ctx: PipelineContext): void {
@@ -165,7 +153,9 @@ function loadAliases(ctx: PipelineContext): void {
165
153
 
166
154
  function setupPipeline(ctx: PipelineContext): void {
167
155
  ctx.rootDir = path.resolve(ctx.rootDir);
168
- ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db');
156
+ ctx.dbPath = ctx.opts.dbPath
157
+ ? path.resolve(ctx.opts.dbPath)
158
+ : path.join(ctx.rootDir, '.codegraph', 'graph.db');
169
159
 
170
160
  // Detect whether native engine is available.
171
161
  const enginePref = ctx.opts.engine || 'auto';
@@ -183,6 +173,16 @@ function setupPipeline(ctx: PipelineContext): void {
183
173
  initSchema(ctx.db);
184
174
 
185
175
  ctx.config = loadConfig(ctx.rootDir);
176
+ // Merge caller-supplied excludes on top of the file-config excludes so
177
+ // programmatic callers (e.g. benchmark scripts) can extend exclusion
178
+ // without mutating .codegraphrc.json. Native orchestrator picks this up
179
+ // automatically — it reads exclude off the serialized ctx.config below.
180
+ if (ctx.opts.exclude?.length) {
181
+ ctx.config = {
182
+ ...ctx.config,
183
+ exclude: [...(ctx.config.exclude ?? []), ...ctx.opts.exclude],
184
+ };
185
+ }
186
186
  ctx.incremental =
187
187
  ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false;
188
188
 
@@ -223,761 +223,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
223
223
  };
224
224
  }
225
225
 
226
- // ── NativeDb lifecycle helpers ──────────────────────────────────────────
227
-
228
- /** Checkpoint WAL through rusqlite and close the native connection. */
229
- function closeNativeDb(ctx: PipelineContext, label: string): void {
230
- if (!ctx.nativeDb) return;
231
- try {
232
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
233
- } catch (e) {
234
- debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
235
- }
236
- try {
237
- ctx.nativeDb.close();
238
- } catch (e) {
239
- debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
240
- }
241
- ctx.nativeDb = undefined;
242
- }
243
-
244
- /** Try to reopen the native connection for a given pipeline phase. */
245
- function reopenNativeDb(ctx: PipelineContext, label: string): void {
246
- if ((ctx.opts.engine ?? 'auto') === 'wasm') return;
247
- const native = loadNative();
248
- if (!native?.NativeDatabase) return;
249
- try {
250
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
251
- } catch (e) {
252
- debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
253
- ctx.nativeDb = undefined;
254
- }
255
- }
256
-
257
- /** Close nativeDb and clear stale references in engineOpts. */
258
- function suspendNativeDb(ctx: PipelineContext, label: string): void {
259
- closeNativeDb(ctx, label);
260
- if (ctx.engineOpts?.nativeDb) {
261
- ctx.engineOpts.nativeDb = undefined;
262
- }
263
- }
264
-
265
- /**
266
- * After native writes, reopen the JS db connection to get a fresh page cache.
267
- * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
268
- * causing SQLITE_CORRUPT on the next read (#715, #736).
269
- */
270
- function refreshJsDb(ctx: PipelineContext): void {
271
- try {
272
- ctx.db.close();
273
- } catch (e) {
274
- debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
275
- }
276
- ctx.db = openDb(ctx.dbPath);
277
- }
278
-
279
- // ── Native orchestrator types ──────────────────────────────────────────
280
-
281
- interface NativeOrchestratorResult {
282
- phases: Record<string, number>;
283
- earlyExit?: boolean;
284
- nodeCount?: number;
285
- edgeCount?: number;
286
- fileCount?: number;
287
- changedFiles?: string[];
288
- changedCount?: number;
289
- removedCount?: number;
290
- isFullBuild?: boolean;
291
- /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
292
- structureHandled?: boolean;
293
- /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
294
- analysisComplete?: boolean;
295
- }
296
-
297
- // ── Native orchestrator helpers ───────────────────────────────────────
298
-
299
- /** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
300
- function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null {
301
- if (ctx.forceFullRebuild) return 'forceFullRebuild';
302
- // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
303
- // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
304
- // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
305
- const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
306
- if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`;
307
- if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`;
308
- return null;
309
- }
310
-
311
- /** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
312
- * Returns false if the DB reopen fails (caller should return partial result). */
313
- function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean {
314
- closeNativeDb(ctx, 'post-native-build');
315
- try {
316
- ctx.db.close();
317
- } catch (e) {
318
- debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
319
- }
320
- try {
321
- ctx.db = openDb(ctx.dbPath);
322
- return true;
323
- } catch (reopenErr) {
324
- warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`);
325
- return false;
326
- }
327
- }
328
-
329
- /**
330
- * Reconstruct fileSymbols from the DB after a native orchestrator build.
331
- * When `scopeFiles` is provided, only loads those files (for analysis-only).
332
- * When omitted, loads all files (needed for structure rebuilds).
333
- */
334
- function reconstructFileSymbolsFromDb(
335
- ctx: PipelineContext,
336
- scopeFiles?: string[],
337
- ): Map<string, ExtractorOutput> {
338
- let query =
339
- 'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
340
- const params: string[] = [];
341
- if (scopeFiles && scopeFiles.length > 0) {
342
- const placeholders = scopeFiles.map(() => '?').join(',');
343
- query += ` AND file IN (${placeholders})`;
344
- params.push(...scopeFiles);
345
- }
346
- query += ' ORDER BY file, line';
347
-
348
- const rows = ctx.db.prepare(query).all(...params) as {
349
- file: string;
350
- name: string;
351
- kind: string;
352
- line: number;
353
- endLine: number | null;
354
- }[];
355
-
356
- const fileSymbols = new Map<string, ExtractorOutput>();
357
- for (const row of rows) {
358
- let entry = fileSymbols.get(row.file);
359
- if (!entry) {
360
- entry = {
361
- definitions: [],
362
- calls: [],
363
- imports: [],
364
- classes: [],
365
- exports: [],
366
- typeMap: new Map(),
367
- };
368
- fileSymbols.set(row.file, entry);
369
- }
370
- entry.definitions.push({
371
- name: row.name,
372
- kind: row.kind as Definition['kind'],
373
- line: row.line,
374
- endLine: row.endLine ?? undefined,
375
- });
376
- }
377
-
378
- // Populate import/export counts from DB edges so buildStructure
379
- // computes correct import_count/export_count in node_metrics.
380
- // The extractor arrays aren't persisted to the DB, so we derive
381
- // counts from edge data instead (#804).
382
- const importCountRows = ctx.db
383
- .prepare(
384
- `SELECT n.file, COUNT(*) AS cnt
385
- FROM edges e JOIN nodes n ON e.source_id = n.id
386
- WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
387
- AND n.file IS NOT NULL
388
- GROUP BY n.file`,
389
- )
390
- .all() as { file: string; cnt: number }[];
391
- for (const row of importCountRows) {
392
- const entry = fileSymbols.get(row.file);
393
- if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports'];
394
- }
395
-
396
- const exportCountRows = ctx.db
397
- .prepare(
398
- `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
399
- FROM edges e
400
- JOIN nodes n_tgt ON e.target_id = n_tgt.id
401
- JOIN nodes n_src ON e.source_id = n_src.id
402
- WHERE e.kind IN ('imports', 'imports-type', 'reexports')
403
- AND n_tgt.file IS NOT NULL
404
- AND n_src.file != n_tgt.file
405
- GROUP BY n_tgt.file`,
406
- )
407
- .all() as { file: string; cnt: number }[];
408
- for (const row of exportCountRows) {
409
- const entry = fileSymbols.get(row.file);
410
- if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports'];
411
- }
412
-
413
- return fileSymbols;
414
- }
415
-
416
- /**
417
- * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
418
- * For full builds, passes changedFiles=null (full rebuild).
419
- * For incremental builds, passes the changed file list to scope the update.
420
- */
421
- async function runPostNativeStructure(
422
- ctx: PipelineContext,
423
- allFileSymbols: Map<string, ExtractorOutput>,
424
- isFullBuild: boolean,
425
- changedFiles: string[] | undefined,
426
- ): Promise<number> {
427
- const structureStart = performance.now();
428
- try {
429
- const directories = new Set<string>();
430
- for (const relPath of allFileSymbols.keys()) {
431
- const parts = relPath.split('/');
432
- for (let i = 1; i < parts.length; i++) {
433
- directories.add(parts.slice(0, i).join('/'));
434
- }
435
- }
436
-
437
- const lineCountMap = new Map<string, number>();
438
- const cachedLineCounts = ctx.db
439
- .prepare(
440
- `SELECT n.name AS file, m.line_count
441
- FROM node_metrics m JOIN nodes n ON m.node_id = n.id
442
- WHERE n.kind = 'file'`,
443
- )
444
- .all() as Array<{ file: string; line_count: number }>;
445
- for (const row of cachedLineCounts) {
446
- lineCountMap.set(row.file, row.line_count);
447
- }
448
-
449
- // Full builds need null (rebuild everything). Incremental builds pass the
450
- // changed file list so buildStructure only updates those files' metrics
451
- // and contains edges — matching the JS pipeline's medium-incremental path.
452
- const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
453
- const { buildStructure: buildStructureFn } = (await import(
454
- '../../../features/structure.js'
455
- )) as {
456
- buildStructure: (
457
- db: typeof ctx.db,
458
- fileSymbols: Map<string, ExtractorOutput>,
459
- rootDir: string,
460
- lineCountMap: Map<string, number>,
461
- directories: Set<string>,
462
- changedFiles: string[] | null,
463
- ) => void;
464
- };
465
- buildStructureFn(
466
- ctx.db,
467
- allFileSymbols,
468
- ctx.rootDir,
469
- lineCountMap,
470
- directories,
471
- changedFilePaths,
472
- );
473
- debug(
474
- `Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`,
475
- );
476
- } catch (err) {
477
- warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
478
- }
479
- return performance.now() - structureStart;
480
- }
481
-
482
- /**
483
- * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
484
- * Used when the Rust addon doesn't include analysis persistence (older addon
485
- * version) or when analysis failed on the Rust side.
486
- */
487
- async function runPostNativeAnalysis(
488
- ctx: PipelineContext,
489
- allFileSymbols: Map<string, ExtractorOutput>,
490
- changedFiles: string[] | undefined,
491
- ): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> {
492
- const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
493
-
494
- // Scope analysis fileSymbols to changed files only
495
- let analysisFileSymbols: Map<string, ExtractorOutput>;
496
- if (changedFiles && changedFiles.length > 0) {
497
- analysisFileSymbols = new Map();
498
- for (const f of changedFiles) {
499
- const entry = allFileSymbols.get(f);
500
- if (entry) analysisFileSymbols.set(f, entry);
501
- }
502
- } else {
503
- analysisFileSymbols = allFileSymbols;
504
- }
505
-
506
- // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
507
- const native = loadNative();
508
- if (native?.NativeDatabase) {
509
- try {
510
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
511
- if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb;
512
- } catch {
513
- ctx.nativeDb = undefined;
514
- if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined;
515
- }
516
- }
517
-
518
- // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
519
- // Previously each feature called wal_checkpoint(TRUNCATE) individually
520
- // (~68ms each × 3-4 features). One FULL checkpoint suffices.
521
- if (ctx.nativeDb && ctx.engineOpts) {
522
- ctx.db.pragma('wal_checkpoint(FULL)');
523
- ctx.engineOpts.suspendJsDb = () => {};
524
- ctx.engineOpts.resumeJsDb = () => {};
525
- }
526
-
527
- try {
528
- const { runAnalyses: runAnalysesFn } = (await import('../../../ast-analysis/engine.js')) as {
529
- runAnalyses: (
530
- db: BetterSqlite3Database,
531
- fileSymbols: Map<string, ExtractorOutput>,
532
- rootDir: string,
533
- opts: Record<string, unknown>,
534
- engineOpts?: Record<string, unknown>,
535
- ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>;
536
- };
537
- const result = await runAnalysesFn(
538
- ctx.db,
539
- analysisFileSymbols,
540
- ctx.rootDir,
541
- ctx.opts as Record<string, unknown>,
542
- ctx.engineOpts as unknown as Record<string, unknown> | undefined,
543
- );
544
- timing.astMs = result.astMs ?? 0;
545
- timing.complexityMs = result.complexityMs ?? 0;
546
- timing.cfgMs = result.cfgMs ?? 0;
547
- timing.dataflowMs = result.dataflowMs ?? 0;
548
- } catch (err) {
549
- warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
550
- }
551
-
552
- // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
553
- // WAL writes so JS and external readers can see them. Runs once after
554
- // all analysis features complete (not per-feature).
555
- if (ctx.nativeDb) {
556
- try {
557
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
558
- } catch {
559
- /* ignore checkpoint errors */
560
- }
561
- try {
562
- ctx.nativeDb.close();
563
- } catch {
564
- /* ignore close errors */
565
- }
566
- ctx.nativeDb = undefined;
567
- if (ctx.engineOpts) {
568
- ctx.engineOpts.nativeDb = undefined;
569
- ctx.engineOpts.suspendJsDb = undefined;
570
- ctx.engineOpts.resumeJsDb = undefined;
571
- }
572
- }
573
-
574
- return timing;
575
- }
576
-
577
- /** Format timing result from native orchestrator phases + JS post-processing. */
578
- function formatNativeTimingResult(
579
- p: Record<string, number>,
580
- structurePatchMs: number,
581
- analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
582
- ): BuildResult {
583
- return {
584
- phases: {
585
- setupMs: +(p.setupMs ?? 0).toFixed(1),
586
- collectMs: +(p.collectMs ?? 0).toFixed(1),
587
- detectMs: +(p.detectMs ?? 0).toFixed(1),
588
- parseMs: +(p.parseMs ?? 0).toFixed(1),
589
- insertMs: +(p.insertMs ?? 0).toFixed(1),
590
- resolveMs: +(p.resolveMs ?? 0).toFixed(1),
591
- edgesMs: +(p.edgesMs ?? 0).toFixed(1),
592
- structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
593
- rolesMs: +(p.rolesMs ?? 0).toFixed(1),
594
- astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
595
- complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
596
- cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
597
- dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
598
- finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
599
- },
600
- };
601
- }
602
-
603
- /** Try the native build orchestrator. Returns a BuildResult on success, undefined to fall through to JS pipeline. */
604
- async function tryNativeOrchestrator(
605
- ctx: PipelineContext,
606
- ): Promise<BuildResult | undefined | 'early-exit'> {
607
- const skipReason = shouldSkipNativeOrchestrator(ctx);
608
- if (skipReason) {
609
- debug(`Skipping native orchestrator: ${skipReason}`);
610
- return undefined;
611
- }
612
-
613
- // Open NativeDatabase on demand — deferred from setupPipeline to skip the
614
- // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
615
- // first to avoid dual-connection WAL corruption.
616
- if (!ctx.nativeDb && ctx.nativeAvailable) {
617
- const native = loadNative();
618
- if (native?.NativeDatabase) {
619
- try {
620
- // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
621
- // Uses raw close() instead of closeDb() intentionally — the advisory lock
622
- // is kept and transferred to the NativeDbProxy below, not released here.
623
- ctx.db.close();
624
- acquireAdvisoryLock(ctx.dbPath);
625
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
626
- ctx.nativeDb.initSchema();
627
- // Replace ctx.db with a NativeDbProxy so post-native JS fallback
628
- // (structure, analysis) can use it without reopening better-sqlite3.
629
- const proxy = new NativeDbProxy(ctx.nativeDb);
630
- proxy.__lockPath = `${ctx.dbPath}.lock`;
631
- ctx.db = proxy as unknown as typeof ctx.db;
632
- ctx.nativeFirstProxy = true;
633
- } catch (err) {
634
- warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
635
- try {
636
- ctx.nativeDb?.close();
637
- } catch (e) {
638
- debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
639
- }
640
- ctx.nativeDb = undefined;
641
- ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
642
- releaseAdvisoryLock(`${ctx.dbPath}.lock`);
643
- // Reopen better-sqlite3 for JS pipeline fallback
644
- ctx.db = openDb(ctx.dbPath);
645
- }
646
- }
647
- }
648
-
649
- if (!ctx.nativeDb?.buildGraph) return undefined;
650
-
651
- const resultJson = ctx.nativeDb.buildGraph(
652
- ctx.rootDir,
653
- JSON.stringify(ctx.config),
654
- JSON.stringify(ctx.aliases),
655
- JSON.stringify(ctx.opts),
656
- );
657
- const result = JSON.parse(resultJson) as NativeOrchestratorResult;
658
-
659
- if (result.earlyExit) {
660
- info('No changes detected');
661
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
662
- return 'early-exit';
663
- }
664
-
665
- // Log incremental status to match JS pipeline output
666
- const changed = result.changedCount ?? 0;
667
- const removed = result.removedCount ?? 0;
668
- if (!result.isFullBuild && (changed > 0 || removed > 0)) {
669
- info(`Incremental: ${changed} changed, ${removed} removed`);
670
- }
671
-
672
- const p = result.phases;
673
-
674
- // Sync build_meta so JS-side version/engine checks work on next build.
675
- // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
676
- // platform package.json version (ctx.engineVersion). The Rust side's
677
- // check_version_mismatch compares against CARGO_PKG_VERSION; writing
678
- // the package.json value would create a permanent mismatch whenever
679
- // the binary and platform package.json diverge — e.g., CI hot-swap
680
- // via ci-install-native.mjs (#1066) — forcing every subsequent build
681
- // to be a full rebuild.
682
- //
683
- // When the native addon doesn't expose engineVersion() (older addon),
684
- // fall back to CODEGRAPH_VERSION — same fallback used by both
685
- // checkEngineSchemaMismatch (read path) and persistBuildMetadata
686
- // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
687
- // here would re-introduce the asymmetry this PR fixes for that case.
688
- const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
689
- setBuildMeta(ctx.db, {
690
- engine: ctx.engineName,
691
- engine_version: nativeVersionForMeta,
692
- codegraph_version: nativeVersionForMeta,
693
- schema_version: String(ctx.schemaVersion),
694
- built_at: new Date().toISOString(),
695
- });
696
-
697
- info(
698
- `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`,
699
- );
700
-
701
- // ── Post-native structure + analysis ──────────────────────────────
702
- let analysisTiming = {
703
- astMs: +(p.astMs ?? 0),
704
- complexityMs: +(p.complexityMs ?? 0),
705
- cfgMs: +(p.cfgMs ?? 0),
706
- dataflowMs: +(p.dataflowMs ?? 0),
707
- };
708
- let structurePatchMs = 0;
709
- // Skip JS structure when the Rust pipeline's small-incremental fast path
710
- // already handled it. For full builds and large incrementals where Rust
711
- // skipped structure, we must run the JS fallback.
712
- const needsStructure = !result.structureHandled;
713
- // When the Rust addon doesn't include analysis persistence (older addon
714
- // version or analysis failed), fall back to JS-side analysis.
715
- const needsAnalysisFallback =
716
- !result.analysisComplete &&
717
- (ctx.opts.ast !== false ||
718
- ctx.opts.complexity !== false ||
719
- ctx.opts.cfg !== false ||
720
- ctx.opts.dataflow !== false);
721
-
722
- if (needsStructure || needsAnalysisFallback) {
723
- // When analysis fallback is needed, handoff to better-sqlite3 — the
724
- // analysis engine uses the suspend/resume WAL pattern that requires a
725
- // real better-sqlite3 connection, not the NativeDbProxy.
726
- if (needsAnalysisFallback && ctx.nativeFirstProxy) {
727
- closeNativeDb(ctx, 'pre-analysis-fallback');
728
- ctx.db = openDb(ctx.dbPath);
729
- ctx.nativeFirstProxy = false;
730
- } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
731
- // DB reopen failed — return partial result
732
- return formatNativeTimingResult(p, 0, analysisTiming);
733
- }
734
-
735
- const fileSymbols = reconstructFileSymbolsFromDb(ctx);
736
-
737
- if (needsStructure) {
738
- structurePatchMs = await runPostNativeStructure(
739
- ctx,
740
- fileSymbols,
741
- !!result.isFullBuild,
742
- result.changedFiles,
743
- );
744
- }
745
-
746
- if (needsAnalysisFallback) {
747
- analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
748
- }
749
- }
750
-
751
- // Engine parity: the native orchestrator silently drops files whose
752
- // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
753
- // stale native binaries). WASM handles those — backfill via WASM so both
754
- // engines process the same file set (#967).
755
- //
756
- // Runs on full builds and on incrementals when the orchestrator reports
757
- // any file activity (removals or changes). The orchestrator's
758
- // `detect_removed_files` filter (#1070) skips files outside its narrower
759
- // file_collector, so on a current binary a no-op rebuild reports
760
- // `removedCount=0` and `changedCount=0`, making the backfill call pure
761
- // overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
762
- // binaries lacking the filter still report `removedCount>0` and get the
763
- // gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
764
- // narrows (but does not fully close) the gap where a brand-new
765
- // unsupported-extension file is added on an otherwise-quiet incremental
766
- // — see #1091 for the residual gap.
767
- const removedCount = result.removedCount ?? 0;
768
- const changedCount = result.changedCount ?? 0;
769
- if (result.isFullBuild || removedCount > 0 || changedCount > 0) {
770
- await backfillNativeDroppedFiles(ctx);
771
- }
772
-
773
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
774
- return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
775
- }
776
-
777
- /**
778
- * Backfill files that the native orchestrator silently dropped during parse.
779
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
780
- */
781
- async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
782
- // Compute the missing-file set FIRST, before any expensive DB handoff.
783
- // NativeDbProxy supports .prepare().all(), so the upfront query works
784
- // whether ctx.db is a proxy or a real better-sqlite3 connection. On
785
- // incremental no-op rebuilds nothing is missing, so we want to early-return
786
- // without paying the close-native / reopen-better-sqlite3 cost.
787
- const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
788
- const expected = new Set(
789
- collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
790
- );
791
-
792
- const existingNodeRows = ctx.db
793
- .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
794
- .all() as Array<{ file: string }>;
795
- const existingNodes = new Set(existingNodeRows.map((r) => r.file));
796
-
797
- // Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
798
- // (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
799
- // (e.g. a DB written by old code where `nodes` was populated but
800
- // `file_hashes` was not). Treating "in nodes but not in file_hashes" as
801
- // missing closes the gap so the backfill repairs the file_hashes row even
802
- // when the node row already exists.
803
- let existingHashes = new Set<string>();
804
- try {
805
- const existingHashRows = ctx.db
806
- .prepare('SELECT DISTINCT file FROM file_hashes')
807
- .all() as Array<{ file: string }>;
808
- existingHashes = new Set(existingHashRows.map((r) => r.file));
809
- } catch (e) {
810
- // file_hashes table may not exist on legacy DBs; treat as fully missing
811
- // so the backfill writes rows on the upsert path below.
812
- debug(
813
- `backfillNativeDroppedFiles: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
814
- );
815
- }
816
-
817
- // Restrict backfill to files with an installed WASM grammar. Extensions in
818
- // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
819
- // minimal installs) can't be parsed by either engine, so they're not a
820
- // native regression — excluding them keeps the warn count meaningful.
821
- const installedExts = getInstalledWasmExtensions();
822
- const missingRel: string[] = [];
823
- const missingAbs: string[] = [];
824
- for (const rel of expected) {
825
- // A file is "missing" if it's absent from EITHER nodes OR file_hashes.
826
- // Both must be present for fast-skip to work correctly.
827
- if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
828
- const ext = path.extname(rel).toLowerCase();
829
- if (!installedExts.has(ext)) continue;
830
- missingRel.push(rel);
831
- missingAbs.push(path.join(ctx.rootDir, rel));
832
- }
833
- if (missingAbs.length === 0) return;
834
-
835
- // Now that we know there's work to do, hand off to better-sqlite3 (needed
836
- // for the INSERT path below).
837
- if (ctx.nativeFirstProxy) {
838
- closeNativeDb(ctx, 'pre-parity-backfill');
839
- ctx.db = openDb(ctx.dbPath);
840
- ctx.nativeFirstProxy = false;
841
- }
842
-
843
- // Classify drops so users see per-extension reasons instead of just a count
844
- // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
845
- // extractor); `native-extractor-failure` indicates a real native bug since
846
- // the language IS supported by the addon yet the file was dropped anyway.
847
- const { byReason, totals } = classifyNativeDrops(missingRel);
848
- if (totals['unsupported-by-native'] > 0) {
849
- info(
850
- `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`,
851
- );
852
- }
853
- if (totals['native-extractor-failure'] > 0) {
854
- warn(
855
- `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
856
- );
857
- }
858
- const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
859
-
860
- const rows: unknown[][] = [];
861
- const exportKeys: unknown[][] = [];
862
- for (const [relPath, symbols] of wasmResults) {
863
- // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
864
- rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
865
- for (const def of symbols.definitions ?? []) {
866
- // Populate qualified_name/scope the same way the JS fallback does so
867
- // downstream queries (cross-file references, "go to definition") find
868
- // these symbols.
869
- const dotIdx = def.name.lastIndexOf('.');
870
- const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
871
- rows.push([
872
- def.name,
873
- def.kind,
874
- relPath,
875
- def.line,
876
- def.endLine ?? null,
877
- null,
878
- def.name,
879
- scope,
880
- def.visibility ?? null,
881
- ]);
882
- }
883
- // Exports: insert the row (INSERT OR IGNORE — a matching definition row
884
- // is a no-op) and queue a key for the second-pass exported=1 update, so
885
- // queries filtering on exported=1 find backfilled symbols (#970).
886
- for (const exp of symbols.exports ?? []) {
887
- rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
888
- exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
889
- }
890
- }
891
- const db = ctx.db as unknown as BetterSqlite3Database;
892
- batchInsertNodes(db, rows);
893
-
894
- // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
895
- if (exportKeys.length > 0) {
896
- const EXPORT_CHUNK = 500;
897
- const exportStmtCache = new Map<number, SqliteStatement>();
898
- for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
899
- const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
900
- const chunkSize = end - i;
901
- let updateStmt = exportStmtCache.get(chunkSize);
902
- if (!updateStmt) {
903
- const conditions = Array.from(
904
- { length: chunkSize },
905
- () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
906
- ).join(' OR ');
907
- updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
908
- exportStmtCache.set(chunkSize, updateStmt);
909
- }
910
- const vals: unknown[] = [];
911
- for (let j = i; j < end; j++) {
912
- const k = exportKeys[j] as unknown[];
913
- vals.push(k[0], k[1], k[2], k[3]);
914
- }
915
- updateStmt.run(...vals);
916
- }
917
- }
918
-
919
- // Persist file_hashes rows for every backfilled file. The Rust orchestrator
920
- // only hashes files it parsed itself, so without this step files in
921
- // optional-language extensions (e.g. .clj when no Rust extractor exists)
922
- // would be missing from `file_hashes` — permanently breaking the JS-side
923
- // fast-skip pre-flight (#1054), which rejects on `collected file missing
924
- // from file_hashes` and forces every no-op rebuild back through the full
925
- // ~2s native pipeline (#1068).
926
- //
927
- // Iterates `missingRel` (every collected file the Rust orchestrator
928
- // dropped), not `wasmResults`, so files that produced zero symbols still
929
- // get a row.
930
- try {
931
- const upsertHash = db.prepare(
932
- 'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
933
- );
934
- const writeHashes = db.transaction(() => {
935
- for (let i = 0; i < missingRel.length; i++) {
936
- const relPath = missingRel[i];
937
- const absPath = missingAbs[i];
938
- if (!relPath || !absPath) continue;
939
- let code: string | null;
940
- try {
941
- code = readFileSafe(absPath);
942
- } catch (e) {
943
- debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
944
- continue;
945
- }
946
- if (code === null) continue;
947
- const stat = fileStat(absPath);
948
- const mtime = stat ? stat.mtime : 0;
949
- const size = stat ? stat.size : 0;
950
- upsertHash.run(relPath, fileHash(code), mtime, size);
951
- }
952
- });
953
- writeHashes();
954
- } catch (e) {
955
- debug(
956
- `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
957
- );
958
- }
959
-
960
- // Free WASM parse trees from the inline backfill path (#1058).
961
- // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
962
- // backed by WASM linear memory) on every result, but these symbols are
963
- // consumed locally for DB row construction and never added to
964
- // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
965
- // sees them. Without this, trees leak WASM memory until process exit —
966
- // bounded per run but cumulative across in-process integration tests.
967
- // Mirrors the cleanup discipline established for #931.
968
- for (const [, symbols] of wasmResults) {
969
- const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
970
- if (tree && typeof tree.delete === 'function') {
971
- try {
972
- tree.delete();
973
- } catch {
974
- /* ignore cleanup errors */
975
- }
976
- }
977
- (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
978
- (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
979
- }
980
- }
226
+ // Native db lifecycle and orchestrator helpers live in dedicated stage
227
+ // modules — see `./stages/native-db-lifecycle.ts` and `./stages/native-orchestrator.ts`.
981
228
 
982
229
  // ── Pipeline stages execution ───────────────────────────────────────────
983
230