@optave/codegraph 3.10.0 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/README.md +40 -33
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/index.js +77 -0
  7. package/dist/ast-analysis/rules/index.js.map +1 -1
  8. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  9. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitor-utils.js +83 -49
  11. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  12. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  14. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  15. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  16. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  17. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  18. package/dist/cli/commands/audit.js +1 -1
  19. package/dist/cli/commands/audit.js.map +1 -1
  20. package/dist/cli/commands/build.d.ts.map +1 -1
  21. package/dist/cli/commands/build.js +2 -0
  22. package/dist/cli/commands/build.js.map +1 -1
  23. package/dist/cli/commands/check.js +1 -1
  24. package/dist/cli/commands/check.js.map +1 -1
  25. package/dist/cli/commands/children.js +1 -1
  26. package/dist/cli/commands/children.js.map +1 -1
  27. package/dist/cli/commands/diff-impact.js +1 -1
  28. package/dist/cli/commands/diff-impact.js.map +1 -1
  29. package/dist/cli/commands/embed.d.ts.map +1 -1
  30. package/dist/cli/commands/embed.js +49 -4
  31. package/dist/cli/commands/embed.js.map +1 -1
  32. package/dist/cli/commands/roles.js +1 -1
  33. package/dist/cli/commands/roles.js.map +1 -1
  34. package/dist/cli/commands/structure.js +1 -1
  35. package/dist/cli/commands/structure.js.map +1 -1
  36. package/dist/cli/shared/options.js +1 -1
  37. package/dist/cli/shared/options.js.map +1 -1
  38. package/dist/db/connection.d.ts.map +1 -1
  39. package/dist/db/connection.js +8 -0
  40. package/dist/db/connection.js.map +1 -1
  41. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  42. package/dist/domain/analysis/dependencies.js +106 -80
  43. package/dist/domain/analysis/dependencies.js.map +1 -1
  44. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  45. package/dist/domain/analysis/fn-impact.js +77 -52
  46. package/dist/domain/analysis/fn-impact.js.map +1 -1
  47. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  48. package/dist/domain/analysis/module-map.js +132 -121
  49. package/dist/domain/analysis/module-map.js.map +1 -1
  50. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  51. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  52. package/dist/domain/graph/builder/helpers.js +47 -33
  53. package/dist/domain/graph/builder/helpers.js.map +1 -1
  54. package/dist/domain/graph/builder/incremental.d.ts +6 -6
  55. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  56. package/dist/domain/graph/builder/incremental.js +148 -99
  57. package/dist/domain/graph/builder/incremental.js.map +1 -1
  58. package/dist/domain/graph/builder/pipeline.d.ts +1 -0
  59. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  60. package/dist/domain/graph/builder/pipeline.js +23 -637
  61. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  62. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  63. package/dist/domain/graph/builder/stages/build-edges.js +141 -98
  64. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  65. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  66. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  67. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  68. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  69. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  70. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  71. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  72. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  73. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  74. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  75. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  76. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  77. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  78. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  79. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  80. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  81. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  82. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  83. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  84. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  85. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  86. package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
  87. package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
  88. package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
  89. package/dist/domain/graph/cycles.d.ts +6 -4
  90. package/dist/domain/graph/cycles.d.ts.map +1 -1
  91. package/dist/domain/graph/cycles.js +50 -55
  92. package/dist/domain/graph/cycles.js.map +1 -1
  93. package/dist/domain/graph/journal.d.ts.map +1 -1
  94. package/dist/domain/graph/journal.js +89 -70
  95. package/dist/domain/graph/journal.js.map +1 -1
  96. package/dist/domain/graph/watcher.d.ts.map +1 -1
  97. package/dist/domain/graph/watcher.js +28 -20
  98. package/dist/domain/graph/watcher.js.map +1 -1
  99. package/dist/domain/parser.d.ts +12 -23
  100. package/dist/domain/parser.d.ts.map +1 -1
  101. package/dist/domain/parser.js +153 -80
  102. package/dist/domain/parser.js.map +1 -1
  103. package/dist/domain/search/generator.d.ts +3 -1
  104. package/dist/domain/search/generator.d.ts.map +1 -1
  105. package/dist/domain/search/generator.js +68 -45
  106. package/dist/domain/search/generator.js.map +1 -1
  107. package/dist/domain/search/models.d.ts +18 -0
  108. package/dist/domain/search/models.d.ts.map +1 -1
  109. package/dist/domain/search/models.js +72 -4
  110. package/dist/domain/search/models.js.map +1 -1
  111. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  112. package/dist/domain/search/search/hybrid.js +49 -40
  113. package/dist/domain/search/search/hybrid.js.map +1 -1
  114. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  115. package/dist/domain/search/search/semantic.js +69 -49
  116. package/dist/domain/search/search/semantic.js.map +1 -1
  117. package/dist/domain/wasm-worker-entry.js +209 -137
  118. package/dist/domain/wasm-worker-entry.js.map +1 -1
  119. package/dist/extractors/c.js +25 -6
  120. package/dist/extractors/c.js.map +1 -1
  121. package/dist/extractors/cpp.js +47 -6
  122. package/dist/extractors/cpp.js.map +1 -1
  123. package/dist/extractors/cuda.js +90 -14
  124. package/dist/extractors/cuda.js.map +1 -1
  125. package/dist/extractors/elixir.js +108 -4
  126. package/dist/extractors/elixir.js.map +1 -1
  127. package/dist/extractors/erlang.js +56 -20
  128. package/dist/extractors/erlang.js.map +1 -1
  129. package/dist/extractors/fsharp.d.ts +7 -0
  130. package/dist/extractors/fsharp.d.ts.map +1 -1
  131. package/dist/extractors/fsharp.js +94 -0
  132. package/dist/extractors/fsharp.js.map +1 -1
  133. package/dist/extractors/gleam.d.ts.map +1 -1
  134. package/dist/extractors/gleam.js +29 -33
  135. package/dist/extractors/gleam.js.map +1 -1
  136. package/dist/extractors/groovy.js +41 -1
  137. package/dist/extractors/groovy.js.map +1 -1
  138. package/dist/extractors/haskell.js +48 -4
  139. package/dist/extractors/haskell.js.map +1 -1
  140. package/dist/extractors/helpers.d.ts +79 -1
  141. package/dist/extractors/helpers.d.ts.map +1 -1
  142. package/dist/extractors/helpers.js +137 -0
  143. package/dist/extractors/helpers.js.map +1 -1
  144. package/dist/extractors/java.d.ts.map +1 -1
  145. package/dist/extractors/java.js +37 -49
  146. package/dist/extractors/java.js.map +1 -1
  147. package/dist/extractors/javascript.d.ts.map +1 -1
  148. package/dist/extractors/javascript.js +44 -44
  149. package/dist/extractors/javascript.js.map +1 -1
  150. package/dist/extractors/julia.js +198 -74
  151. package/dist/extractors/julia.js.map +1 -1
  152. package/dist/extractors/kotlin.js +4 -0
  153. package/dist/extractors/kotlin.js.map +1 -1
  154. package/dist/extractors/objc.js +184 -47
  155. package/dist/extractors/objc.js.map +1 -1
  156. package/dist/extractors/python.js +7 -4
  157. package/dist/extractors/python.js.map +1 -1
  158. package/dist/extractors/r.d.ts.map +1 -1
  159. package/dist/extractors/r.js +103 -87
  160. package/dist/extractors/r.js.map +1 -1
  161. package/dist/extractors/scala.d.ts.map +1 -1
  162. package/dist/extractors/scala.js +18 -32
  163. package/dist/extractors/scala.js.map +1 -1
  164. package/dist/extractors/solidity.d.ts.map +1 -1
  165. package/dist/extractors/solidity.js +55 -69
  166. package/dist/extractors/solidity.js.map +1 -1
  167. package/dist/extractors/verilog.js +80 -15
  168. package/dist/extractors/verilog.js.map +1 -1
  169. package/dist/features/boundaries.d.ts.map +1 -1
  170. package/dist/features/boundaries.js +49 -39
  171. package/dist/features/boundaries.js.map +1 -1
  172. package/dist/features/cfg.d.ts.map +1 -1
  173. package/dist/features/cfg.js +90 -63
  174. package/dist/features/cfg.js.map +1 -1
  175. package/dist/features/check.d.ts.map +1 -1
  176. package/dist/features/check.js +43 -34
  177. package/dist/features/check.js.map +1 -1
  178. package/dist/features/cochange.d.ts.map +1 -1
  179. package/dist/features/cochange.js +68 -56
  180. package/dist/features/cochange.js.map +1 -1
  181. package/dist/features/complexity.d.ts.map +1 -1
  182. package/dist/features/complexity.js +105 -75
  183. package/dist/features/complexity.js.map +1 -1
  184. package/dist/features/dataflow.d.ts.map +1 -1
  185. package/dist/features/dataflow.js +37 -29
  186. package/dist/features/dataflow.js.map +1 -1
  187. package/dist/features/flow.d.ts.map +1 -1
  188. package/dist/features/flow.js +31 -22
  189. package/dist/features/flow.js.map +1 -1
  190. package/dist/features/graph-enrichment.d.ts.map +1 -1
  191. package/dist/features/graph-enrichment.js +77 -70
  192. package/dist/features/graph-enrichment.js.map +1 -1
  193. package/dist/features/owners.d.ts +17 -26
  194. package/dist/features/owners.d.ts.map +1 -1
  195. package/dist/features/owners.js +120 -109
  196. package/dist/features/owners.js.map +1 -1
  197. package/dist/features/sequence.d.ts.map +1 -1
  198. package/dist/features/sequence.js +59 -54
  199. package/dist/features/sequence.js.map +1 -1
  200. package/dist/features/structure-query.d.ts.map +1 -1
  201. package/dist/features/structure-query.js +60 -60
  202. package/dist/features/structure-query.js.map +1 -1
  203. package/dist/features/structure.js +28 -36
  204. package/dist/features/structure.js.map +1 -1
  205. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  206. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  207. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  208. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  209. package/dist/graph/classifiers/roles.js +63 -59
  210. package/dist/graph/classifiers/roles.js.map +1 -1
  211. package/dist/infrastructure/config.d.ts +1 -1
  212. package/dist/infrastructure/config.d.ts.map +1 -1
  213. package/dist/infrastructure/config.js +1 -1
  214. package/dist/infrastructure/config.js.map +1 -1
  215. package/dist/mcp/tool-registry.d.ts.map +1 -1
  216. package/dist/mcp/tool-registry.js +4 -0
  217. package/dist/mcp/tool-registry.js.map +1 -1
  218. package/dist/mcp/tools/semantic-search.d.ts +1 -0
  219. package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
  220. package/dist/mcp/tools/semantic-search.js +1 -0
  221. package/dist/mcp/tools/semantic-search.js.map +1 -1
  222. package/dist/presentation/cfg.d.ts.map +1 -1
  223. package/dist/presentation/cfg.js +44 -29
  224. package/dist/presentation/cfg.js.map +1 -1
  225. package/dist/presentation/flow.d.ts.map +1 -1
  226. package/dist/presentation/flow.js +58 -38
  227. package/dist/presentation/flow.js.map +1 -1
  228. package/dist/types.d.ts +16 -2
  229. package/dist/types.d.ts.map +1 -1
  230. package/grammars/tree-sitter-erlang.wasm +0 -0
  231. package/grammars/tree-sitter-fsharp.wasm +0 -0
  232. package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
  233. package/grammars/tree-sitter-gleam.wasm +0 -0
  234. package/package.json +10 -10
  235. package/src/ast-analysis/engine.ts +145 -61
  236. package/src/ast-analysis/rules/index.ts +87 -0
  237. package/src/ast-analysis/visitor-utils.ts +86 -46
  238. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  239. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  240. package/src/cli/commands/audit.ts +1 -1
  241. package/src/cli/commands/build.ts +2 -0
  242. package/src/cli/commands/check.ts +1 -1
  243. package/src/cli/commands/children.ts +1 -1
  244. package/src/cli/commands/diff-impact.ts +1 -1
  245. package/src/cli/commands/embed.ts +54 -4
  246. package/src/cli/commands/roles.ts +1 -1
  247. package/src/cli/commands/structure.ts +1 -1
  248. package/src/cli/shared/options.ts +1 -1
  249. package/src/db/connection.ts +8 -0
  250. package/src/domain/analysis/dependencies.ts +166 -85
  251. package/src/domain/analysis/fn-impact.ts +120 -50
  252. package/src/domain/analysis/module-map.ts +175 -140
  253. package/src/domain/graph/builder/helpers.ts +85 -76
  254. package/src/domain/graph/builder/incremental.ts +223 -131
  255. package/src/domain/graph/builder/pipeline.ts +32 -785
  256. package/src/domain/graph/builder/stages/build-edges.ts +207 -142
  257. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  258. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  259. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  260. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  261. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  262. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  263. package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
  264. package/src/domain/graph/cycles.ts +51 -49
  265. package/src/domain/graph/journal.ts +84 -69
  266. package/src/domain/graph/watcher.ts +29 -25
  267. package/src/domain/parser.ts +170 -67
  268. package/src/domain/search/generator.ts +132 -74
  269. package/src/domain/search/models.ts +75 -4
  270. package/src/domain/search/search/hybrid.ts +53 -42
  271. package/src/domain/search/search/semantic.ts +105 -65
  272. package/src/domain/wasm-worker-entry.ts +243 -153
  273. package/src/extractors/c.ts +27 -8
  274. package/src/extractors/cpp.ts +50 -8
  275. package/src/extractors/cuda.ts +90 -16
  276. package/src/extractors/elixir.ts +103 -4
  277. package/src/extractors/erlang.ts +63 -20
  278. package/src/extractors/fsharp.ts +104 -0
  279. package/src/extractors/gleam.ts +40 -39
  280. package/src/extractors/groovy.ts +45 -1
  281. package/src/extractors/haskell.ts +45 -4
  282. package/src/extractors/helpers.ts +205 -1
  283. package/src/extractors/java.ts +42 -45
  284. package/src/extractors/javascript.ts +44 -43
  285. package/src/extractors/julia.ts +191 -77
  286. package/src/extractors/kotlin.ts +4 -0
  287. package/src/extractors/objc.ts +171 -47
  288. package/src/extractors/python.ts +5 -3
  289. package/src/extractors/r.ts +104 -82
  290. package/src/extractors/scala.ts +24 -36
  291. package/src/extractors/solidity.ts +59 -78
  292. package/src/extractors/verilog.ts +83 -15
  293. package/src/features/boundaries.ts +64 -46
  294. package/src/features/cfg.ts +145 -74
  295. package/src/features/check.ts +60 -43
  296. package/src/features/cochange.ts +95 -72
  297. package/src/features/complexity.ts +134 -79
  298. package/src/features/dataflow.ts +57 -34
  299. package/src/features/flow.ts +48 -24
  300. package/src/features/graph-enrichment.ts +105 -70
  301. package/src/features/owners.ts +186 -146
  302. package/src/features/sequence.ts +99 -69
  303. package/src/features/structure-query.ts +94 -79
  304. package/src/features/structure.ts +56 -56
  305. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  306. package/src/graph/classifiers/roles.ts +64 -54
  307. package/src/infrastructure/config.ts +1 -1
  308. package/src/mcp/tool-registry.ts +5 -0
  309. package/src/mcp/tools/semantic-search.ts +2 -0
  310. package/src/presentation/cfg.ts +48 -32
  311. package/src/presentation/flow.ts +100 -52
  312. package/src/types.ts +16 -1
@@ -7,20 +7,17 @@
7
7
  import fs from 'node:fs';
8
8
  import path from 'node:path';
9
9
  import { performance } from 'node:perf_hooks';
10
- import { acquireAdvisoryLock, closeDb, closeDbPair, getBuildMeta, initSchema, MIGRATIONS, openDb, releaseAdvisoryLock, setBuildMeta, } from '../../../db/index.js';
10
+ import { closeDb, closeDbPair, getBuildMeta, initSchema, MIGRATIONS, openDb, } from '../../../db/index.js';
11
11
  import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js';
12
12
  import { debug, info, warn } from '../../../infrastructure/logger.js';
13
13
  import { loadNative } from '../../../infrastructure/native.js';
14
- import { semverCompare } from '../../../infrastructure/update-check.js';
15
- import { normalizePath } from '../../../shared/constants.js';
16
14
  import { toErrorMessage } from '../../../shared/errors.js';
17
15
  import { CODEGRAPH_VERSION } from '../../../shared/version.js';
18
- import { classifyNativeDrops, formatDropExtensionSummary, getActiveEngine, getInstalledWasmExtensions, parseFilesWasmForBackfill, } from '../../parser.js';
16
+ import { getActiveEngine } from '../../parser.js';
19
17
  import { writeJournalHeader } from '../journal.js';
20
18
  import { setWorkspaces } from '../resolve.js';
21
19
  import { PipelineContext } from './context.js';
22
- import { batchInsertNodes, collectFiles as collectFilesUtil, fileHash, fileStat, loadPathAliases, readFileSafe, } from './helpers.js';
23
- import { NativeDbProxy } from './native-db-proxy.js';
20
+ import { loadPathAliases } from './helpers.js';
24
21
  import { buildEdges } from './stages/build-edges.js';
25
22
  import { buildStructure } from './stages/build-structure.js';
26
23
  // Pipeline stages
@@ -28,9 +25,14 @@ import { collectFiles } from './stages/collect-files.js';
28
25
  import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
29
26
  import { finalize } from './stages/finalize.js';
30
27
  import { insertNodes } from './stages/insert-nodes.js';
28
+ import { closeNativeDb, refreshJsDb, reopenNativeDb, suspendNativeDb, } from './stages/native-db-lifecycle.js';
29
+ import { tryNativeOrchestrator } from './stages/native-orchestrator.js';
31
30
  import { parseFiles } from './stages/parse-files.js';
32
31
  import { resolveImports } from './stages/resolve-imports.js';
33
32
  import { runAnalyses } from './stages/run-analyses.js';
33
+ // Re-export computeWasmOnlyStaleFiles for backward compatibility with tests
34
+ // that import from this module path (#1073 unit tests).
35
+ export { computeWasmOnlyStaleFiles, } from './stages/native-orchestrator.js';
34
36
  // ── Setup helpers ───────────────────────────────────────────────────────
35
37
  function initializeEngine(ctx) {
36
38
  ctx.engineOpts = {
@@ -117,7 +119,9 @@ function loadAliases(ctx) {
117
119
  }
118
120
  function setupPipeline(ctx) {
119
121
  ctx.rootDir = path.resolve(ctx.rootDir);
120
- ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db');
122
+ ctx.dbPath = ctx.opts.dbPath
123
+ ? path.resolve(ctx.opts.dbPath)
124
+ : path.join(ctx.rootDir, '.codegraph', 'graph.db');
121
125
  // Detect whether native engine is available.
122
126
  const enginePref = ctx.opts.engine || 'auto';
123
127
  const native = enginePref !== 'wasm' ? loadNative() : null;
@@ -133,6 +137,16 @@ function setupPipeline(ctx) {
133
137
  ctx.db = openDb(ctx.dbPath);
134
138
  initSchema(ctx.db);
135
139
  ctx.config = loadConfig(ctx.rootDir);
140
+ // Merge caller-supplied excludes on top of the file-config excludes so
141
+ // programmatic callers (e.g. benchmark scripts) can extend exclusion
142
+ // without mutating .codegraphrc.json. Native orchestrator picks this up
143
+ // automatically — it reads exclude off the serialized ctx.config below.
144
+ if (ctx.opts.exclude?.length) {
145
+ ctx.config = {
146
+ ...ctx.config,
147
+ exclude: [...(ctx.config.exclude ?? []), ...ctx.opts.exclude],
148
+ };
149
+ }
136
150
  ctx.incremental =
137
151
  ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false;
138
152
  initializeEngine(ctx);
@@ -168,636 +182,8 @@ function formatTimingResult(ctx) {
168
182
  },
169
183
  };
170
184
  }
171
- // ── NativeDb lifecycle helpers ──────────────────────────────────────────
172
- /** Checkpoint WAL through rusqlite and close the native connection. */
173
- function closeNativeDb(ctx, label) {
174
- if (!ctx.nativeDb)
175
- return;
176
- try {
177
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
178
- }
179
- catch (e) {
180
- debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
181
- }
182
- try {
183
- ctx.nativeDb.close();
184
- }
185
- catch (e) {
186
- debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
187
- }
188
- ctx.nativeDb = undefined;
189
- }
190
- /** Try to reopen the native connection for a given pipeline phase. */
191
- function reopenNativeDb(ctx, label) {
192
- if ((ctx.opts.engine ?? 'auto') === 'wasm')
193
- return;
194
- const native = loadNative();
195
- if (!native?.NativeDatabase)
196
- return;
197
- try {
198
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
199
- }
200
- catch (e) {
201
- debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
202
- ctx.nativeDb = undefined;
203
- }
204
- }
205
- /** Close nativeDb and clear stale references in engineOpts. */
206
- function suspendNativeDb(ctx, label) {
207
- closeNativeDb(ctx, label);
208
- if (ctx.engineOpts?.nativeDb) {
209
- ctx.engineOpts.nativeDb = undefined;
210
- }
211
- }
212
- /**
213
- * After native writes, reopen the JS db connection to get a fresh page cache.
214
- * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
215
- * causing SQLITE_CORRUPT on the next read (#715, #736).
216
- */
217
- function refreshJsDb(ctx) {
218
- try {
219
- ctx.db.close();
220
- }
221
- catch (e) {
222
- debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
223
- }
224
- ctx.db = openDb(ctx.dbPath);
225
- }
226
- // ── Native orchestrator helpers ───────────────────────────────────────
227
- /** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
228
- function shouldSkipNativeOrchestrator(ctx) {
229
- if (ctx.forceFullRebuild)
230
- return 'forceFullRebuild';
231
- // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
232
- // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
233
- // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
234
- const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
235
- if (orchestratorBuggy)
236
- return `buggy addon ${ctx.engineVersion}`;
237
- if (ctx.engineName !== 'native')
238
- return `engine=${ctx.engineName}`;
239
- return null;
240
- }
241
- /** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
242
- * Returns false if the DB reopen fails (caller should return partial result). */
243
- function handoffWalAfterNativeBuild(ctx) {
244
- closeNativeDb(ctx, 'post-native-build');
245
- try {
246
- ctx.db.close();
247
- }
248
- catch (e) {
249
- debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
250
- }
251
- try {
252
- ctx.db = openDb(ctx.dbPath);
253
- return true;
254
- }
255
- catch (reopenErr) {
256
- warn(`Failed to reopen DB after native build: ${reopenErr.message}`);
257
- return false;
258
- }
259
- }
260
- /**
261
- * Reconstruct fileSymbols from the DB after a native orchestrator build.
262
- * When `scopeFiles` is provided, only loads those files (for analysis-only).
263
- * When omitted, loads all files (needed for structure rebuilds).
264
- */
265
- function reconstructFileSymbolsFromDb(ctx, scopeFiles) {
266
- let query = 'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
267
- const params = [];
268
- if (scopeFiles && scopeFiles.length > 0) {
269
- const placeholders = scopeFiles.map(() => '?').join(',');
270
- query += ` AND file IN (${placeholders})`;
271
- params.push(...scopeFiles);
272
- }
273
- query += ' ORDER BY file, line';
274
- const rows = ctx.db.prepare(query).all(...params);
275
- const fileSymbols = new Map();
276
- for (const row of rows) {
277
- let entry = fileSymbols.get(row.file);
278
- if (!entry) {
279
- entry = {
280
- definitions: [],
281
- calls: [],
282
- imports: [],
283
- classes: [],
284
- exports: [],
285
- typeMap: new Map(),
286
- };
287
- fileSymbols.set(row.file, entry);
288
- }
289
- entry.definitions.push({
290
- name: row.name,
291
- kind: row.kind,
292
- line: row.line,
293
- endLine: row.endLine ?? undefined,
294
- });
295
- }
296
- // Populate import/export counts from DB edges so buildStructure
297
- // computes correct import_count/export_count in node_metrics.
298
- // The extractor arrays aren't persisted to the DB, so we derive
299
- // counts from edge data instead (#804).
300
- const importCountRows = ctx.db
301
- .prepare(`SELECT n.file, COUNT(*) AS cnt
302
- FROM edges e JOIN nodes n ON e.source_id = n.id
303
- WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
304
- AND n.file IS NOT NULL
305
- GROUP BY n.file`)
306
- .all();
307
- for (const row of importCountRows) {
308
- const entry = fileSymbols.get(row.file);
309
- if (entry)
310
- entry.imports = new Array(row.cnt);
311
- }
312
- const exportCountRows = ctx.db
313
- .prepare(`SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
314
- FROM edges e
315
- JOIN nodes n_tgt ON e.target_id = n_tgt.id
316
- JOIN nodes n_src ON e.source_id = n_src.id
317
- WHERE e.kind IN ('imports', 'imports-type', 'reexports')
318
- AND n_tgt.file IS NOT NULL
319
- AND n_src.file != n_tgt.file
320
- GROUP BY n_tgt.file`)
321
- .all();
322
- for (const row of exportCountRows) {
323
- const entry = fileSymbols.get(row.file);
324
- if (entry)
325
- entry.exports = new Array(row.cnt);
326
- }
327
- return fileSymbols;
328
- }
329
- /**
330
- * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
331
- * For full builds, passes changedFiles=null (full rebuild).
332
- * For incremental builds, passes the changed file list to scope the update.
333
- */
334
- async function runPostNativeStructure(ctx, allFileSymbols, isFullBuild, changedFiles) {
335
- const structureStart = performance.now();
336
- try {
337
- const directories = new Set();
338
- for (const relPath of allFileSymbols.keys()) {
339
- const parts = relPath.split('/');
340
- for (let i = 1; i < parts.length; i++) {
341
- directories.add(parts.slice(0, i).join('/'));
342
- }
343
- }
344
- const lineCountMap = new Map();
345
- const cachedLineCounts = ctx.db
346
- .prepare(`SELECT n.name AS file, m.line_count
347
- FROM node_metrics m JOIN nodes n ON m.node_id = n.id
348
- WHERE n.kind = 'file'`)
349
- .all();
350
- for (const row of cachedLineCounts) {
351
- lineCountMap.set(row.file, row.line_count);
352
- }
353
- // Full builds need null (rebuild everything). Incremental builds pass the
354
- // changed file list so buildStructure only updates those files' metrics
355
- // and contains edges — matching the JS pipeline's medium-incremental path.
356
- const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
357
- const { buildStructure: buildStructureFn } = (await import('../../../features/structure.js'));
358
- buildStructureFn(ctx.db, allFileSymbols, ctx.rootDir, lineCountMap, directories, changedFilePaths);
359
- debug(`Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`);
360
- }
361
- catch (err) {
362
- warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
363
- }
364
- return performance.now() - structureStart;
365
- }
366
- /**
367
- * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
368
- * Used when the Rust addon doesn't include analysis persistence (older addon
369
- * version) or when analysis failed on the Rust side.
370
- */
371
- async function runPostNativeAnalysis(ctx, allFileSymbols, changedFiles) {
372
- const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
373
- // Scope analysis fileSymbols to changed files only
374
- let analysisFileSymbols;
375
- if (changedFiles && changedFiles.length > 0) {
376
- analysisFileSymbols = new Map();
377
- for (const f of changedFiles) {
378
- const entry = allFileSymbols.get(f);
379
- if (entry)
380
- analysisFileSymbols.set(f, entry);
381
- }
382
- }
383
- else {
384
- analysisFileSymbols = allFileSymbols;
385
- }
386
- // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
387
- const native = loadNative();
388
- if (native?.NativeDatabase) {
389
- try {
390
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
391
- if (ctx.engineOpts)
392
- ctx.engineOpts.nativeDb = ctx.nativeDb;
393
- }
394
- catch {
395
- ctx.nativeDb = undefined;
396
- if (ctx.engineOpts)
397
- ctx.engineOpts.nativeDb = undefined;
398
- }
399
- }
400
- // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
401
- // Previously each feature called wal_checkpoint(TRUNCATE) individually
402
- // (~68ms each × 3-4 features). One FULL checkpoint suffices.
403
- if (ctx.nativeDb && ctx.engineOpts) {
404
- ctx.db.pragma('wal_checkpoint(FULL)');
405
- ctx.engineOpts.suspendJsDb = () => { };
406
- ctx.engineOpts.resumeJsDb = () => { };
407
- }
408
- try {
409
- const { runAnalyses: runAnalysesFn } = (await import('../../../ast-analysis/engine.js'));
410
- const result = await runAnalysesFn(ctx.db, analysisFileSymbols, ctx.rootDir, ctx.opts, ctx.engineOpts);
411
- timing.astMs = result.astMs ?? 0;
412
- timing.complexityMs = result.complexityMs ?? 0;
413
- timing.cfgMs = result.cfgMs ?? 0;
414
- timing.dataflowMs = result.dataflowMs ?? 0;
415
- }
416
- catch (err) {
417
- warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
418
- }
419
- // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
420
- // WAL writes so JS and external readers can see them. Runs once after
421
- // all analysis features complete (not per-feature).
422
- if (ctx.nativeDb) {
423
- try {
424
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
425
- }
426
- catch {
427
- /* ignore checkpoint errors */
428
- }
429
- try {
430
- ctx.nativeDb.close();
431
- }
432
- catch {
433
- /* ignore close errors */
434
- }
435
- ctx.nativeDb = undefined;
436
- if (ctx.engineOpts) {
437
- ctx.engineOpts.nativeDb = undefined;
438
- ctx.engineOpts.suspendJsDb = undefined;
439
- ctx.engineOpts.resumeJsDb = undefined;
440
- }
441
- }
442
- return timing;
443
- }
444
- /** Format timing result from native orchestrator phases + JS post-processing. */
445
- function formatNativeTimingResult(p, structurePatchMs, analysisTiming) {
446
- return {
447
- phases: {
448
- setupMs: +(p.setupMs ?? 0).toFixed(1),
449
- collectMs: +(p.collectMs ?? 0).toFixed(1),
450
- detectMs: +(p.detectMs ?? 0).toFixed(1),
451
- parseMs: +(p.parseMs ?? 0).toFixed(1),
452
- insertMs: +(p.insertMs ?? 0).toFixed(1),
453
- resolveMs: +(p.resolveMs ?? 0).toFixed(1),
454
- edgesMs: +(p.edgesMs ?? 0).toFixed(1),
455
- structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
456
- rolesMs: +(p.rolesMs ?? 0).toFixed(1),
457
- astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
458
- complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
459
- cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
460
- dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
461
- finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
462
- },
463
- };
464
- }
465
- /** Try the native build orchestrator. Returns a BuildResult on success, undefined to fall through to JS pipeline. */
466
- async function tryNativeOrchestrator(ctx) {
467
- const skipReason = shouldSkipNativeOrchestrator(ctx);
468
- if (skipReason) {
469
- debug(`Skipping native orchestrator: ${skipReason}`);
470
- return undefined;
471
- }
472
- // Open NativeDatabase on demand — deferred from setupPipeline to skip the
473
- // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
474
- // first to avoid dual-connection WAL corruption.
475
- if (!ctx.nativeDb && ctx.nativeAvailable) {
476
- const native = loadNative();
477
- if (native?.NativeDatabase) {
478
- try {
479
- // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
480
- // Uses raw close() instead of closeDb() intentionally — the advisory lock
481
- // is kept and transferred to the NativeDbProxy below, not released here.
482
- ctx.db.close();
483
- acquireAdvisoryLock(ctx.dbPath);
484
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
485
- ctx.nativeDb.initSchema();
486
- // Replace ctx.db with a NativeDbProxy so post-native JS fallback
487
- // (structure, analysis) can use it without reopening better-sqlite3.
488
- const proxy = new NativeDbProxy(ctx.nativeDb);
489
- proxy.__lockPath = `${ctx.dbPath}.lock`;
490
- ctx.db = proxy;
491
- ctx.nativeFirstProxy = true;
492
- }
493
- catch (err) {
494
- warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
495
- try {
496
- ctx.nativeDb?.close();
497
- }
498
- catch (e) {
499
- debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
500
- }
501
- ctx.nativeDb = undefined;
502
- ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
503
- releaseAdvisoryLock(`${ctx.dbPath}.lock`);
504
- // Reopen better-sqlite3 for JS pipeline fallback
505
- ctx.db = openDb(ctx.dbPath);
506
- }
507
- }
508
- }
509
- if (!ctx.nativeDb?.buildGraph)
510
- return undefined;
511
- const resultJson = ctx.nativeDb.buildGraph(ctx.rootDir, JSON.stringify(ctx.config), JSON.stringify(ctx.aliases), JSON.stringify(ctx.opts));
512
- const result = JSON.parse(resultJson);
513
- if (result.earlyExit) {
514
- info('No changes detected');
515
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
516
- return 'early-exit';
517
- }
518
- // Log incremental status to match JS pipeline output
519
- const changed = result.changedCount ?? 0;
520
- const removed = result.removedCount ?? 0;
521
- if (!result.isFullBuild && (changed > 0 || removed > 0)) {
522
- info(`Incremental: ${changed} changed, ${removed} removed`);
523
- }
524
- const p = result.phases;
525
- // Sync build_meta so JS-side version/engine checks work on next build.
526
- // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
527
- // platform package.json version (ctx.engineVersion). The Rust side's
528
- // check_version_mismatch compares against CARGO_PKG_VERSION; writing
529
- // the package.json value would create a permanent mismatch whenever
530
- // the binary and platform package.json diverge — e.g., CI hot-swap
531
- // via ci-install-native.mjs (#1066) — forcing every subsequent build
532
- // to be a full rebuild.
533
- //
534
- // When the native addon doesn't expose engineVersion() (older addon),
535
- // fall back to CODEGRAPH_VERSION — same fallback used by both
536
- // checkEngineSchemaMismatch (read path) and persistBuildMetadata
537
- // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
538
- // here would re-introduce the asymmetry this PR fixes for that case.
539
- const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
540
- setBuildMeta(ctx.db, {
541
- engine: ctx.engineName,
542
- engine_version: nativeVersionForMeta,
543
- codegraph_version: nativeVersionForMeta,
544
- schema_version: String(ctx.schemaVersion),
545
- built_at: new Date().toISOString(),
546
- });
547
- info(`Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`);
548
- // ── Post-native structure + analysis ──────────────────────────────
549
- let analysisTiming = {
550
- astMs: +(p.astMs ?? 0),
551
- complexityMs: +(p.complexityMs ?? 0),
552
- cfgMs: +(p.cfgMs ?? 0),
553
- dataflowMs: +(p.dataflowMs ?? 0),
554
- };
555
- let structurePatchMs = 0;
556
- // Skip JS structure when the Rust pipeline's small-incremental fast path
557
- // already handled it. For full builds and large incrementals where Rust
558
- // skipped structure, we must run the JS fallback.
559
- const needsStructure = !result.structureHandled;
560
- // When the Rust addon doesn't include analysis persistence (older addon
561
- // version or analysis failed), fall back to JS-side analysis.
562
- const needsAnalysisFallback = !result.analysisComplete &&
563
- (ctx.opts.ast !== false ||
564
- ctx.opts.complexity !== false ||
565
- ctx.opts.cfg !== false ||
566
- ctx.opts.dataflow !== false);
567
- if (needsStructure || needsAnalysisFallback) {
568
- // When analysis fallback is needed, handoff to better-sqlite3 — the
569
- // analysis engine uses the suspend/resume WAL pattern that requires a
570
- // real better-sqlite3 connection, not the NativeDbProxy.
571
- if (needsAnalysisFallback && ctx.nativeFirstProxy) {
572
- closeNativeDb(ctx, 'pre-analysis-fallback');
573
- ctx.db = openDb(ctx.dbPath);
574
- ctx.nativeFirstProxy = false;
575
- }
576
- else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
577
- // DB reopen failed — return partial result
578
- return formatNativeTimingResult(p, 0, analysisTiming);
579
- }
580
- const fileSymbols = reconstructFileSymbolsFromDb(ctx);
581
- if (needsStructure) {
582
- structurePatchMs = await runPostNativeStructure(ctx, fileSymbols, !!result.isFullBuild, result.changedFiles);
583
- }
584
- if (needsAnalysisFallback) {
585
- analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
586
- }
587
- }
588
- // Engine parity: the native orchestrator silently drops files whose
589
- // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
590
- // stale native binaries). WASM handles those — backfill via WASM so both
591
- // engines process the same file set (#967).
592
- //
593
- // Runs on full builds and on incrementals when the orchestrator reports
594
- // any file activity (removals or changes). The orchestrator's
595
- // `detect_removed_files` filter (#1070) skips files outside its narrower
596
- // file_collector, so on a current binary a no-op rebuild reports
597
- // `removedCount=0` and `changedCount=0`, making the backfill call pure
598
- // overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
599
- // binaries lacking the filter still report `removedCount>0` and get the
600
- // gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
601
- // narrows (but does not fully close) the gap where a brand-new
602
- // unsupported-extension file is added on an otherwise-quiet incremental
603
- // — see #1091 for the residual gap.
604
- const removedCount = result.removedCount ?? 0;
605
- const changedCount = result.changedCount ?? 0;
606
- if (result.isFullBuild || removedCount > 0 || changedCount > 0) {
607
- await backfillNativeDroppedFiles(ctx);
608
- }
609
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
610
- return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
611
- }
612
- /**
613
- * Backfill files that the native orchestrator silently dropped during parse.
614
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
615
- */
616
- async function backfillNativeDroppedFiles(ctx) {
617
- // Compute the missing-file set FIRST, before any expensive DB handoff.
618
- // NativeDbProxy supports .prepare().all(), so the upfront query works
619
- // whether ctx.db is a proxy or a real better-sqlite3 connection. On
620
- // incremental no-op rebuilds nothing is missing, so we want to early-return
621
- // without paying the close-native / reopen-better-sqlite3 cost.
622
- const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set());
623
- const expected = new Set(collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))));
624
- const existingNodeRows = ctx.db
625
- .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
626
- .all();
627
- const existingNodes = new Set(existingNodeRows.map((r) => r.file));
628
- // Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
629
- // (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
630
- // (e.g. a DB written by old code where `nodes` was populated but
631
- // `file_hashes` was not). Treating "in nodes but not in file_hashes" as
632
- // missing closes the gap so the backfill repairs the file_hashes row even
633
- // when the node row already exists.
634
- let existingHashes = new Set();
635
- try {
636
- const existingHashRows = ctx.db
637
- .prepare('SELECT DISTINCT file FROM file_hashes')
638
- .all();
639
- existingHashes = new Set(existingHashRows.map((r) => r.file));
640
- }
641
- catch (e) {
642
- // file_hashes table may not exist on legacy DBs; treat as fully missing
643
- // so the backfill writes rows on the upsert path below.
644
- debug(`backfillNativeDroppedFiles: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`);
645
- }
646
- // Restrict backfill to files with an installed WASM grammar. Extensions in
647
- // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
648
- // minimal installs) can't be parsed by either engine, so they're not a
649
- // native regression — excluding them keeps the warn count meaningful.
650
- const installedExts = getInstalledWasmExtensions();
651
- const missingRel = [];
652
- const missingAbs = [];
653
- for (const rel of expected) {
654
- // A file is "missing" if it's absent from EITHER nodes OR file_hashes.
655
- // Both must be present for fast-skip to work correctly.
656
- if (existingNodes.has(rel) && existingHashes.has(rel))
657
- continue;
658
- const ext = path.extname(rel).toLowerCase();
659
- if (!installedExts.has(ext))
660
- continue;
661
- missingRel.push(rel);
662
- missingAbs.push(path.join(ctx.rootDir, rel));
663
- }
664
- if (missingAbs.length === 0)
665
- return;
666
- // Now that we know there's work to do, hand off to better-sqlite3 (needed
667
- // for the INSERT path below).
668
- if (ctx.nativeFirstProxy) {
669
- closeNativeDb(ctx, 'pre-parity-backfill');
670
- ctx.db = openDb(ctx.dbPath);
671
- ctx.nativeFirstProxy = false;
672
- }
673
- // Classify drops so users see per-extension reasons instead of just a count
674
- // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
675
- // extractor); `native-extractor-failure` indicates a real native bug since
676
- // the language IS supported by the addon yet the file was dropped anyway.
677
- const { byReason, totals } = classifyNativeDrops(missingRel);
678
- if (totals['unsupported-by-native'] > 0) {
679
- info(`Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`);
680
- }
681
- if (totals['native-extractor-failure'] > 0) {
682
- warn(`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`);
683
- }
684
- const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
685
- const rows = [];
686
- const exportKeys = [];
687
- for (const [relPath, symbols] of wasmResults) {
688
- // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
689
- rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
690
- for (const def of symbols.definitions ?? []) {
691
- // Populate qualified_name/scope the same way the JS fallback does so
692
- // downstream queries (cross-file references, "go to definition") find
693
- // these symbols.
694
- const dotIdx = def.name.lastIndexOf('.');
695
- const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
696
- rows.push([
697
- def.name,
698
- def.kind,
699
- relPath,
700
- def.line,
701
- def.endLine ?? null,
702
- null,
703
- def.name,
704
- scope,
705
- def.visibility ?? null,
706
- ]);
707
- }
708
- // Exports: insert the row (INSERT OR IGNORE — a matching definition row
709
- // is a no-op) and queue a key for the second-pass exported=1 update, so
710
- // queries filtering on exported=1 find backfilled symbols (#970).
711
- for (const exp of symbols.exports ?? []) {
712
- rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
713
- exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
714
- }
715
- }
716
- const db = ctx.db;
717
- batchInsertNodes(db, rows);
718
- // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
719
- if (exportKeys.length > 0) {
720
- const EXPORT_CHUNK = 500;
721
- const exportStmtCache = new Map();
722
- for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
723
- const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
724
- const chunkSize = end - i;
725
- let updateStmt = exportStmtCache.get(chunkSize);
726
- if (!updateStmt) {
727
- const conditions = Array.from({ length: chunkSize }, () => '(name = ? AND kind = ? AND file = ? AND line = ?)').join(' OR ');
728
- updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
729
- exportStmtCache.set(chunkSize, updateStmt);
730
- }
731
- const vals = [];
732
- for (let j = i; j < end; j++) {
733
- const k = exportKeys[j];
734
- vals.push(k[0], k[1], k[2], k[3]);
735
- }
736
- updateStmt.run(...vals);
737
- }
738
- }
739
- // Persist file_hashes rows for every backfilled file. The Rust orchestrator
740
- // only hashes files it parsed itself, so without this step files in
741
- // optional-language extensions (e.g. .clj when no Rust extractor exists)
742
- // would be missing from `file_hashes` — permanently breaking the JS-side
743
- // fast-skip pre-flight (#1054), which rejects on `collected file missing
744
- // from file_hashes` and forces every no-op rebuild back through the full
745
- // ~2s native pipeline (#1068).
746
- //
747
- // Iterates `missingRel` (every collected file the Rust orchestrator
748
- // dropped), not `wasmResults`, so files that produced zero symbols still
749
- // get a row.
750
- try {
751
- const upsertHash = db.prepare('INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)');
752
- const writeHashes = db.transaction(() => {
753
- for (let i = 0; i < missingRel.length; i++) {
754
- const relPath = missingRel[i];
755
- const absPath = missingAbs[i];
756
- if (!relPath || !absPath)
757
- continue;
758
- let code;
759
- try {
760
- code = readFileSafe(absPath);
761
- }
762
- catch (e) {
763
- debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
764
- continue;
765
- }
766
- if (code === null)
767
- continue;
768
- const stat = fileStat(absPath);
769
- const mtime = stat ? stat.mtime : 0;
770
- const size = stat ? stat.size : 0;
771
- upsertHash.run(relPath, fileHash(code), mtime, size);
772
- }
773
- });
774
- writeHashes();
775
- }
776
- catch (e) {
777
- debug(`backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`);
778
- }
779
- // Free WASM parse trees from the inline backfill path (#1058).
780
- // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
781
- // backed by WASM linear memory) on every result, but these symbols are
782
- // consumed locally for DB row construction and never added to
783
- // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
784
- // sees them. Without this, trees leak WASM memory until process exit —
785
- // bounded per run but cumulative across in-process integration tests.
786
- // Mirrors the cleanup discipline established for #931.
787
- for (const [, symbols] of wasmResults) {
788
- const tree = symbols._tree;
789
- if (tree && typeof tree.delete === 'function') {
790
- try {
791
- tree.delete();
792
- }
793
- catch {
794
- /* ignore cleanup errors */
795
- }
796
- }
797
- symbols._tree = undefined;
798
- symbols._langId = undefined;
799
- }
800
- }
185
+ // Native db lifecycle and orchestrator helpers live in dedicated stage
186
+ // modules see `./stages/native-db-lifecycle.ts` and `./stages/native-orchestrator.ts`.
801
187
  // ── Pipeline stages execution ───────────────────────────────────────────
802
188
  async function runPipelineStages(ctx) {
803
189
  // ── WASM / fallback dual-connection mode ─────────────────────────────