@optave/codegraph 3.11.0 → 3.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. package/README.md +38 -31
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  6. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  7. package/dist/ast-analysis/visitor-utils.js +83 -49
  8. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  9. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  11. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  12. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  14. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  15. package/dist/cli/commands/embed.d.ts.map +1 -1
  16. package/dist/cli/commands/embed.js +49 -4
  17. package/dist/cli/commands/embed.js.map +1 -1
  18. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  19. package/dist/domain/analysis/dependencies.js +106 -80
  20. package/dist/domain/analysis/dependencies.js.map +1 -1
  21. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  22. package/dist/domain/analysis/fn-impact.js +77 -52
  23. package/dist/domain/analysis/fn-impact.js.map +1 -1
  24. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  25. package/dist/domain/analysis/module-map.js +132 -121
  26. package/dist/domain/analysis/module-map.js.map +1 -1
  27. package/dist/domain/graph/builder/call-resolver.d.ts +71 -0
  28. package/dist/domain/graph/builder/call-resolver.d.ts.map +1 -0
  29. package/dist/domain/graph/builder/call-resolver.js +130 -0
  30. package/dist/domain/graph/builder/call-resolver.js.map +1 -0
  31. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  32. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  33. package/dist/domain/graph/builder/helpers.js +47 -33
  34. package/dist/domain/graph/builder/helpers.js.map +1 -1
  35. package/dist/domain/graph/builder/incremental.d.ts +6 -0
  36. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  37. package/dist/domain/graph/builder/incremental.js +214 -127
  38. package/dist/domain/graph/builder/incremental.js.map +1 -1
  39. package/dist/domain/graph/builder/pipeline.d.ts +1 -44
  40. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  41. package/dist/domain/graph/builder/pipeline.js +10 -766
  42. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  43. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  44. package/dist/domain/graph/builder/stages/build-edges.js +151 -192
  45. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  46. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  47. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  48. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  49. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  50. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  51. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  52. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  54. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  56. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  57. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  58. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  59. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  60. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  61. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  62. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  63. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  64. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  65. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  66. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  67. package/dist/domain/graph/cycles.d.ts +6 -4
  68. package/dist/domain/graph/cycles.d.ts.map +1 -1
  69. package/dist/domain/graph/cycles.js +50 -55
  70. package/dist/domain/graph/cycles.js.map +1 -1
  71. package/dist/domain/graph/journal.d.ts.map +1 -1
  72. package/dist/domain/graph/journal.js +89 -70
  73. package/dist/domain/graph/journal.js.map +1 -1
  74. package/dist/domain/graph/watcher.d.ts.map +1 -1
  75. package/dist/domain/graph/watcher.js +10 -4
  76. package/dist/domain/graph/watcher.js.map +1 -1
  77. package/dist/domain/parser.d.ts +12 -23
  78. package/dist/domain/parser.d.ts.map +1 -1
  79. package/dist/domain/parser.js +126 -79
  80. package/dist/domain/parser.js.map +1 -1
  81. package/dist/domain/search/generator.d.ts +3 -1
  82. package/dist/domain/search/generator.d.ts.map +1 -1
  83. package/dist/domain/search/generator.js +68 -45
  84. package/dist/domain/search/generator.js.map +1 -1
  85. package/dist/domain/search/models.d.ts +2 -0
  86. package/dist/domain/search/models.d.ts.map +1 -1
  87. package/dist/domain/search/models.js +37 -3
  88. package/dist/domain/search/models.js.map +1 -1
  89. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  90. package/dist/domain/search/search/hybrid.js +49 -40
  91. package/dist/domain/search/search/hybrid.js.map +1 -1
  92. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  93. package/dist/domain/search/search/semantic.js +69 -49
  94. package/dist/domain/search/search/semantic.js.map +1 -1
  95. package/dist/domain/wasm-worker-entry.js +201 -136
  96. package/dist/domain/wasm-worker-entry.js.map +1 -1
  97. package/dist/extractors/elixir.js +95 -71
  98. package/dist/extractors/elixir.js.map +1 -1
  99. package/dist/extractors/gleam.d.ts.map +1 -1
  100. package/dist/extractors/gleam.js +23 -31
  101. package/dist/extractors/gleam.js.map +1 -1
  102. package/dist/extractors/helpers.d.ts +79 -1
  103. package/dist/extractors/helpers.d.ts.map +1 -1
  104. package/dist/extractors/helpers.js +137 -0
  105. package/dist/extractors/helpers.js.map +1 -1
  106. package/dist/extractors/java.d.ts.map +1 -1
  107. package/dist/extractors/java.js +37 -49
  108. package/dist/extractors/java.js.map +1 -1
  109. package/dist/extractors/javascript.d.ts.map +1 -1
  110. package/dist/extractors/javascript.js +44 -44
  111. package/dist/extractors/javascript.js.map +1 -1
  112. package/dist/extractors/julia.js +27 -34
  113. package/dist/extractors/julia.js.map +1 -1
  114. package/dist/extractors/r.d.ts.map +1 -1
  115. package/dist/extractors/r.js +33 -58
  116. package/dist/extractors/r.js.map +1 -1
  117. package/dist/extractors/solidity.d.ts.map +1 -1
  118. package/dist/extractors/solidity.js +38 -61
  119. package/dist/extractors/solidity.js.map +1 -1
  120. package/dist/features/boundaries.d.ts.map +1 -1
  121. package/dist/features/boundaries.js +49 -39
  122. package/dist/features/boundaries.js.map +1 -1
  123. package/dist/features/cfg.d.ts.map +1 -1
  124. package/dist/features/cfg.js +90 -63
  125. package/dist/features/cfg.js.map +1 -1
  126. package/dist/features/check.d.ts.map +1 -1
  127. package/dist/features/check.js +43 -34
  128. package/dist/features/check.js.map +1 -1
  129. package/dist/features/cochange.d.ts.map +1 -1
  130. package/dist/features/cochange.js +68 -56
  131. package/dist/features/cochange.js.map +1 -1
  132. package/dist/features/complexity.d.ts.map +1 -1
  133. package/dist/features/complexity.js +105 -75
  134. package/dist/features/complexity.js.map +1 -1
  135. package/dist/features/dataflow.d.ts.map +1 -1
  136. package/dist/features/dataflow.js +37 -29
  137. package/dist/features/dataflow.js.map +1 -1
  138. package/dist/features/flow.d.ts.map +1 -1
  139. package/dist/features/flow.js +31 -22
  140. package/dist/features/flow.js.map +1 -1
  141. package/dist/features/graph-enrichment.d.ts.map +1 -1
  142. package/dist/features/graph-enrichment.js +77 -70
  143. package/dist/features/graph-enrichment.js.map +1 -1
  144. package/dist/features/owners.d.ts +17 -26
  145. package/dist/features/owners.d.ts.map +1 -1
  146. package/dist/features/owners.js +120 -109
  147. package/dist/features/owners.js.map +1 -1
  148. package/dist/features/sequence.d.ts.map +1 -1
  149. package/dist/features/sequence.js +59 -54
  150. package/dist/features/sequence.js.map +1 -1
  151. package/dist/features/structure-query.d.ts.map +1 -1
  152. package/dist/features/structure-query.js +60 -60
  153. package/dist/features/structure-query.js.map +1 -1
  154. package/dist/features/structure.d.ts.map +1 -1
  155. package/dist/features/structure.js +149 -52
  156. package/dist/features/structure.js.map +1 -1
  157. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  158. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  159. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  160. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  161. package/dist/graph/classifiers/roles.js +63 -59
  162. package/dist/graph/classifiers/roles.js.map +1 -1
  163. package/dist/infrastructure/config.d.ts +1 -1
  164. package/dist/infrastructure/config.d.ts.map +1 -1
  165. package/dist/infrastructure/config.js +1 -1
  166. package/dist/infrastructure/config.js.map +1 -1
  167. package/dist/presentation/cfg.d.ts.map +1 -1
  168. package/dist/presentation/cfg.js +44 -29
  169. package/dist/presentation/cfg.js.map +1 -1
  170. package/dist/presentation/flow.d.ts.map +1 -1
  171. package/dist/presentation/flow.js +58 -38
  172. package/dist/presentation/flow.js.map +1 -1
  173. package/dist/types.d.ts +1 -1
  174. package/dist/types.d.ts.map +1 -1
  175. package/grammars/tree-sitter-erlang.wasm +0 -0
  176. package/package.json +9 -9
  177. package/src/ast-analysis/engine.ts +145 -61
  178. package/src/ast-analysis/visitor-utils.ts +86 -46
  179. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  180. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  181. package/src/cli/commands/embed.ts +54 -4
  182. package/src/domain/analysis/dependencies.ts +166 -85
  183. package/src/domain/analysis/fn-impact.ts +120 -50
  184. package/src/domain/analysis/module-map.ts +175 -140
  185. package/src/domain/graph/builder/call-resolver.ts +181 -0
  186. package/src/domain/graph/builder/helpers.ts +85 -76
  187. package/src/domain/graph/builder/incremental.ts +321 -152
  188. package/src/domain/graph/builder/pipeline.ts +19 -957
  189. package/src/domain/graph/builder/stages/build-edges.ts +229 -275
  190. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  191. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  192. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  193. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  194. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  195. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  196. package/src/domain/graph/cycles.ts +51 -49
  197. package/src/domain/graph/journal.ts +84 -69
  198. package/src/domain/graph/watcher.ts +12 -4
  199. package/src/domain/parser.ts +143 -66
  200. package/src/domain/search/generator.ts +132 -74
  201. package/src/domain/search/models.ts +39 -3
  202. package/src/domain/search/search/hybrid.ts +53 -42
  203. package/src/domain/search/search/semantic.ts +105 -65
  204. package/src/domain/wasm-worker-entry.ts +235 -152
  205. package/src/extractors/elixir.ts +91 -64
  206. package/src/extractors/gleam.ts +33 -37
  207. package/src/extractors/helpers.ts +205 -1
  208. package/src/extractors/java.ts +42 -45
  209. package/src/extractors/javascript.ts +44 -43
  210. package/src/extractors/julia.ts +28 -35
  211. package/src/extractors/r.ts +38 -56
  212. package/src/extractors/solidity.ts +43 -71
  213. package/src/features/boundaries.ts +64 -46
  214. package/src/features/cfg.ts +145 -74
  215. package/src/features/check.ts +60 -43
  216. package/src/features/cochange.ts +95 -72
  217. package/src/features/complexity.ts +134 -79
  218. package/src/features/dataflow.ts +57 -34
  219. package/src/features/flow.ts +48 -24
  220. package/src/features/graph-enrichment.ts +105 -70
  221. package/src/features/owners.ts +186 -146
  222. package/src/features/sequence.ts +99 -69
  223. package/src/features/structure-query.ts +94 -79
  224. package/src/features/structure.ts +199 -79
  225. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  226. package/src/graph/classifiers/roles.ts +64 -54
  227. package/src/infrastructure/config.ts +1 -1
  228. package/src/presentation/cfg.ts +48 -32
  229. package/src/presentation/flow.ts +100 -52
  230. package/src/types.ts +1 -1
@@ -7,20 +7,17 @@
7
7
  import fs from 'node:fs';
8
8
  import path from 'node:path';
9
9
  import { performance } from 'node:perf_hooks';
10
- import { acquireAdvisoryLock, closeDb, closeDbPair, getBuildMeta, initSchema, MIGRATIONS, openDb, purgeFilesData, releaseAdvisoryLock, setBuildMeta, } from '../../../db/index.js';
10
+ import { closeDb, closeDbPair, getBuildMeta, initSchema, MIGRATIONS, openDb, } from '../../../db/index.js';
11
11
  import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js';
12
12
  import { debug, info, warn } from '../../../infrastructure/logger.js';
13
13
  import { loadNative } from '../../../infrastructure/native.js';
14
- import { semverCompare } from '../../../infrastructure/update-check.js';
15
- import { normalizePath } from '../../../shared/constants.js';
16
14
  import { toErrorMessage } from '../../../shared/errors.js';
17
15
  import { CODEGRAPH_VERSION } from '../../../shared/version.js';
18
- import { classifyNativeDrops, formatDropExtensionSummary, getActiveEngine, getInstalledWasmExtensions, NATIVE_SUPPORTED_EXTENSIONS, parseFilesWasmForBackfill, } from '../../parser.js';
16
+ import { getActiveEngine } from '../../parser.js';
19
17
  import { writeJournalHeader } from '../journal.js';
20
18
  import { setWorkspaces } from '../resolve.js';
21
19
  import { PipelineContext } from './context.js';
22
- import { batchInsertNodes, collectFiles as collectFilesUtil, fileHash, fileStat, loadPathAliases, readFileSafe, } from './helpers.js';
23
- import { NativeDbProxy } from './native-db-proxy.js';
20
+ import { loadPathAliases } from './helpers.js';
24
21
  import { buildEdges } from './stages/build-edges.js';
25
22
  import { buildStructure } from './stages/build-structure.js';
26
23
  // Pipeline stages
@@ -28,9 +25,14 @@ import { collectFiles } from './stages/collect-files.js';
28
25
  import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
29
26
  import { finalize } from './stages/finalize.js';
30
27
  import { insertNodes } from './stages/insert-nodes.js';
28
+ import { closeNativeDb, refreshJsDb, reopenNativeDb, suspendNativeDb, } from './stages/native-db-lifecycle.js';
29
+ import { tryNativeOrchestrator } from './stages/native-orchestrator.js';
31
30
  import { parseFiles } from './stages/parse-files.js';
32
31
  import { resolveImports } from './stages/resolve-imports.js';
33
32
  import { runAnalyses } from './stages/run-analyses.js';
33
+ // Re-export computeWasmOnlyStaleFiles for backward compatibility with tests
34
+ // that import from this module path (#1073 unit tests).
35
+ export { computeWasmOnlyStaleFiles, } from './stages/native-orchestrator.js';
34
36
  // ── Setup helpers ───────────────────────────────────────────────────────
35
37
  function initializeEngine(ctx) {
36
38
  ctx.engineOpts = {
@@ -180,766 +182,8 @@ function formatTimingResult(ctx) {
180
182
  },
181
183
  };
182
184
  }
183
- // ── NativeDb lifecycle helpers ──────────────────────────────────────────
184
- /** Checkpoint WAL through rusqlite and close the native connection. */
185
- function closeNativeDb(ctx, label) {
186
- if (!ctx.nativeDb)
187
- return;
188
- try {
189
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
190
- }
191
- catch (e) {
192
- debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
193
- }
194
- try {
195
- ctx.nativeDb.close();
196
- }
197
- catch (e) {
198
- debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
199
- }
200
- ctx.nativeDb = undefined;
201
- }
202
- /** Try to reopen the native connection for a given pipeline phase. */
203
- function reopenNativeDb(ctx, label) {
204
- if ((ctx.opts.engine ?? 'auto') === 'wasm')
205
- return;
206
- const native = loadNative();
207
- if (!native?.NativeDatabase)
208
- return;
209
- try {
210
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
211
- }
212
- catch (e) {
213
- debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
214
- ctx.nativeDb = undefined;
215
- }
216
- }
217
- /** Close nativeDb and clear stale references in engineOpts. */
218
- function suspendNativeDb(ctx, label) {
219
- closeNativeDb(ctx, label);
220
- if (ctx.engineOpts?.nativeDb) {
221
- ctx.engineOpts.nativeDb = undefined;
222
- }
223
- }
224
- /**
225
- * After native writes, reopen the JS db connection to get a fresh page cache.
226
- * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
227
- * causing SQLITE_CORRUPT on the next read (#715, #736).
228
- */
229
- function refreshJsDb(ctx) {
230
- try {
231
- ctx.db.close();
232
- }
233
- catch (e) {
234
- debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
235
- }
236
- ctx.db = openDb(ctx.dbPath);
237
- }
238
- // ── Native orchestrator helpers ───────────────────────────────────────
239
- /** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
240
- function shouldSkipNativeOrchestrator(ctx) {
241
- if (ctx.forceFullRebuild)
242
- return 'forceFullRebuild';
243
- // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
244
- // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
245
- // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
246
- const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
247
- if (orchestratorBuggy)
248
- return `buggy addon ${ctx.engineVersion}`;
249
- if (ctx.engineName !== 'native')
250
- return `engine=${ctx.engineName}`;
251
- return null;
252
- }
253
- /** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
254
- * Returns false if the DB reopen fails (caller should return partial result). */
255
- function handoffWalAfterNativeBuild(ctx) {
256
- closeNativeDb(ctx, 'post-native-build');
257
- try {
258
- ctx.db.close();
259
- }
260
- catch (e) {
261
- debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
262
- }
263
- try {
264
- ctx.db = openDb(ctx.dbPath);
265
- return true;
266
- }
267
- catch (reopenErr) {
268
- warn(`Failed to reopen DB after native build: ${reopenErr.message}`);
269
- return false;
270
- }
271
- }
272
- /**
273
- * Reconstruct fileSymbols from the DB after a native orchestrator build.
274
- * When `scopeFiles` is provided, only loads those files (for analysis-only).
275
- * When omitted, loads all files (needed for structure rebuilds).
276
- */
277
- function reconstructFileSymbolsFromDb(ctx, scopeFiles) {
278
- let query = 'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
279
- const params = [];
280
- if (scopeFiles && scopeFiles.length > 0) {
281
- const placeholders = scopeFiles.map(() => '?').join(',');
282
- query += ` AND file IN (${placeholders})`;
283
- params.push(...scopeFiles);
284
- }
285
- query += ' ORDER BY file, line';
286
- const rows = ctx.db.prepare(query).all(...params);
287
- const fileSymbols = new Map();
288
- for (const row of rows) {
289
- let entry = fileSymbols.get(row.file);
290
- if (!entry) {
291
- entry = {
292
- definitions: [],
293
- calls: [],
294
- imports: [],
295
- classes: [],
296
- exports: [],
297
- typeMap: new Map(),
298
- };
299
- fileSymbols.set(row.file, entry);
300
- }
301
- entry.definitions.push({
302
- name: row.name,
303
- kind: row.kind,
304
- line: row.line,
305
- endLine: row.endLine ?? undefined,
306
- });
307
- }
308
- // Populate import/export counts from DB edges so buildStructure
309
- // computes correct import_count/export_count in node_metrics.
310
- // The extractor arrays aren't persisted to the DB, so we derive
311
- // counts from edge data instead (#804).
312
- const importCountRows = ctx.db
313
- .prepare(`SELECT n.file, COUNT(*) AS cnt
314
- FROM edges e JOIN nodes n ON e.source_id = n.id
315
- WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
316
- AND n.file IS NOT NULL
317
- GROUP BY n.file`)
318
- .all();
319
- for (const row of importCountRows) {
320
- const entry = fileSymbols.get(row.file);
321
- if (entry)
322
- entry.imports = new Array(row.cnt);
323
- }
324
- const exportCountRows = ctx.db
325
- .prepare(`SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
326
- FROM edges e
327
- JOIN nodes n_tgt ON e.target_id = n_tgt.id
328
- JOIN nodes n_src ON e.source_id = n_src.id
329
- WHERE e.kind IN ('imports', 'imports-type', 'reexports')
330
- AND n_tgt.file IS NOT NULL
331
- AND n_src.file != n_tgt.file
332
- GROUP BY n_tgt.file`)
333
- .all();
334
- for (const row of exportCountRows) {
335
- const entry = fileSymbols.get(row.file);
336
- if (entry)
337
- entry.exports = new Array(row.cnt);
338
- }
339
- return fileSymbols;
340
- }
341
- /**
342
- * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
343
- * For full builds, passes changedFiles=null (full rebuild).
344
- * For incremental builds, passes the changed file list to scope the update.
345
- */
346
- async function runPostNativeStructure(ctx, allFileSymbols, isFullBuild, changedFiles) {
347
- const structureStart = performance.now();
348
- try {
349
- const directories = new Set();
350
- for (const relPath of allFileSymbols.keys()) {
351
- const parts = relPath.split('/');
352
- for (let i = 1; i < parts.length; i++) {
353
- directories.add(parts.slice(0, i).join('/'));
354
- }
355
- }
356
- const lineCountMap = new Map();
357
- const cachedLineCounts = ctx.db
358
- .prepare(`SELECT n.name AS file, m.line_count
359
- FROM node_metrics m JOIN nodes n ON m.node_id = n.id
360
- WHERE n.kind = 'file'`)
361
- .all();
362
- for (const row of cachedLineCounts) {
363
- lineCountMap.set(row.file, row.line_count);
364
- }
365
- // Full builds need null (rebuild everything). Incremental builds pass the
366
- // changed file list so buildStructure only updates those files' metrics
367
- // and contains edges — matching the JS pipeline's medium-incremental path.
368
- const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
369
- const { buildStructure: buildStructureFn } = (await import('../../../features/structure.js'));
370
- buildStructureFn(ctx.db, allFileSymbols, ctx.rootDir, lineCountMap, directories, changedFilePaths);
371
- debug(`Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`);
372
- }
373
- catch (err) {
374
- warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
375
- }
376
- return performance.now() - structureStart;
377
- }
378
- /**
379
- * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
380
- * Used when the Rust addon doesn't include analysis persistence (older addon
381
- * version) or when analysis failed on the Rust side.
382
- */
383
- async function runPostNativeAnalysis(ctx, allFileSymbols, changedFiles) {
384
- const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
385
- // Scope analysis fileSymbols to changed files only
386
- let analysisFileSymbols;
387
- if (changedFiles && changedFiles.length > 0) {
388
- analysisFileSymbols = new Map();
389
- for (const f of changedFiles) {
390
- const entry = allFileSymbols.get(f);
391
- if (entry)
392
- analysisFileSymbols.set(f, entry);
393
- }
394
- }
395
- else {
396
- analysisFileSymbols = allFileSymbols;
397
- }
398
- // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
399
- const native = loadNative();
400
- if (native?.NativeDatabase) {
401
- try {
402
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
403
- if (ctx.engineOpts)
404
- ctx.engineOpts.nativeDb = ctx.nativeDb;
405
- }
406
- catch {
407
- ctx.nativeDb = undefined;
408
- if (ctx.engineOpts)
409
- ctx.engineOpts.nativeDb = undefined;
410
- }
411
- }
412
- // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
413
- // Previously each feature called wal_checkpoint(TRUNCATE) individually
414
- // (~68ms each × 3-4 features). One FULL checkpoint suffices.
415
- if (ctx.nativeDb && ctx.engineOpts) {
416
- ctx.db.pragma('wal_checkpoint(FULL)');
417
- ctx.engineOpts.suspendJsDb = () => { };
418
- ctx.engineOpts.resumeJsDb = () => { };
419
- }
420
- try {
421
- const { runAnalyses: runAnalysesFn } = (await import('../../../ast-analysis/engine.js'));
422
- const result = await runAnalysesFn(ctx.db, analysisFileSymbols, ctx.rootDir, ctx.opts, ctx.engineOpts);
423
- timing.astMs = result.astMs ?? 0;
424
- timing.complexityMs = result.complexityMs ?? 0;
425
- timing.cfgMs = result.cfgMs ?? 0;
426
- timing.dataflowMs = result.dataflowMs ?? 0;
427
- }
428
- catch (err) {
429
- warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
430
- }
431
- // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
432
- // WAL writes so JS and external readers can see them. Runs once after
433
- // all analysis features complete (not per-feature).
434
- if (ctx.nativeDb) {
435
- try {
436
- ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
437
- }
438
- catch {
439
- /* ignore checkpoint errors */
440
- }
441
- try {
442
- ctx.nativeDb.close();
443
- }
444
- catch {
445
- /* ignore close errors */
446
- }
447
- ctx.nativeDb = undefined;
448
- if (ctx.engineOpts) {
449
- ctx.engineOpts.nativeDb = undefined;
450
- ctx.engineOpts.suspendJsDb = undefined;
451
- ctx.engineOpts.resumeJsDb = undefined;
452
- }
453
- }
454
- return timing;
455
- }
456
- /** Format timing result from native orchestrator phases + JS post-processing. */
457
- function formatNativeTimingResult(p, structurePatchMs, analysisTiming) {
458
- return {
459
- phases: {
460
- setupMs: +(p.setupMs ?? 0).toFixed(1),
461
- collectMs: +(p.collectMs ?? 0).toFixed(1),
462
- detectMs: +(p.detectMs ?? 0).toFixed(1),
463
- parseMs: +(p.parseMs ?? 0).toFixed(1),
464
- insertMs: +(p.insertMs ?? 0).toFixed(1),
465
- resolveMs: +(p.resolveMs ?? 0).toFixed(1),
466
- edgesMs: +(p.edgesMs ?? 0).toFixed(1),
467
- structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
468
- rolesMs: +(p.rolesMs ?? 0).toFixed(1),
469
- astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
470
- complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
471
- cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
472
- dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
473
- finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
474
- },
475
- };
476
- }
477
- /** Try the native build orchestrator. Returns a BuildResult on success, undefined to fall through to JS pipeline. */
478
- async function tryNativeOrchestrator(ctx) {
479
- const skipReason = shouldSkipNativeOrchestrator(ctx);
480
- if (skipReason) {
481
- debug(`Skipping native orchestrator: ${skipReason}`);
482
- return undefined;
483
- }
484
- // Open NativeDatabase on demand — deferred from setupPipeline to skip the
485
- // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
486
- // first to avoid dual-connection WAL corruption.
487
- if (!ctx.nativeDb && ctx.nativeAvailable) {
488
- const native = loadNative();
489
- if (native?.NativeDatabase) {
490
- try {
491
- // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
492
- // Uses raw close() instead of closeDb() intentionally — the advisory lock
493
- // is kept and transferred to the NativeDbProxy below, not released here.
494
- ctx.db.close();
495
- acquireAdvisoryLock(ctx.dbPath);
496
- ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
497
- ctx.nativeDb.initSchema();
498
- // Replace ctx.db with a NativeDbProxy so post-native JS fallback
499
- // (structure, analysis) can use it without reopening better-sqlite3.
500
- const proxy = new NativeDbProxy(ctx.nativeDb);
501
- proxy.__lockPath = `${ctx.dbPath}.lock`;
502
- ctx.db = proxy;
503
- ctx.nativeFirstProxy = true;
504
- }
505
- catch (err) {
506
- warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
507
- try {
508
- ctx.nativeDb?.close();
509
- }
510
- catch (e) {
511
- debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
512
- }
513
- ctx.nativeDb = undefined;
514
- ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
515
- releaseAdvisoryLock(`${ctx.dbPath}.lock`);
516
- // Reopen better-sqlite3 for JS pipeline fallback
517
- ctx.db = openDb(ctx.dbPath);
518
- }
519
- }
520
- }
521
- if (!ctx.nativeDb?.buildGraph)
522
- return undefined;
523
- const resultJson = ctx.nativeDb.buildGraph(ctx.rootDir, JSON.stringify(ctx.config), JSON.stringify(ctx.aliases), JSON.stringify(ctx.opts));
524
- const result = JSON.parse(resultJson);
525
- if (result.earlyExit) {
526
- info('No changes detected');
527
- // Even on no-op rebuilds, dropped-language files added since the last
528
- // full build are still missing from `nodes`/`file_hashes` (#1083), and
529
- // WASM-only files deleted from disk leave stale rows behind (#1073).
530
- // The orchestrator's file_collector skipped them, so its earlyExit
531
- // doesn't imply DB consistency. Run the gap repair before returning.
532
- const gap = detectDroppedLanguageGap(ctx);
533
- if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
534
- await backfillNativeDroppedFiles(ctx, gap);
535
- }
536
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
537
- return 'early-exit';
538
- }
539
- // Log incremental status to match JS pipeline output
540
- const changed = result.changedCount ?? 0;
541
- const removed = result.removedCount ?? 0;
542
- if (!result.isFullBuild && (changed > 0 || removed > 0)) {
543
- info(`Incremental: ${changed} changed, ${removed} removed`);
544
- }
545
- const p = result.phases;
546
- // Sync build_meta so JS-side version/engine checks work on next build.
547
- // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
548
- // platform package.json version (ctx.engineVersion). The Rust side's
549
- // check_version_mismatch compares against CARGO_PKG_VERSION; writing
550
- // the package.json value would create a permanent mismatch whenever
551
- // the binary and platform package.json diverge — e.g., CI hot-swap
552
- // via ci-install-native.mjs (#1066) — forcing every subsequent build
553
- // to be a full rebuild.
554
- //
555
- // When the native addon doesn't expose engineVersion() (older addon),
556
- // fall back to CODEGRAPH_VERSION — same fallback used by both
557
- // checkEngineSchemaMismatch (read path) and persistBuildMetadata
558
- // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
559
- // here would re-introduce the asymmetry this PR fixes for that case.
560
- const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
561
- setBuildMeta(ctx.db, {
562
- engine: ctx.engineName,
563
- engine_version: nativeVersionForMeta,
564
- codegraph_version: nativeVersionForMeta,
565
- schema_version: String(ctx.schemaVersion),
566
- built_at: new Date().toISOString(),
567
- });
568
- info(`Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`);
569
- // ── Post-native structure + analysis ──────────────────────────────
570
- let analysisTiming = {
571
- astMs: +(p.astMs ?? 0),
572
- complexityMs: +(p.complexityMs ?? 0),
573
- cfgMs: +(p.cfgMs ?? 0),
574
- dataflowMs: +(p.dataflowMs ?? 0),
575
- };
576
- let structurePatchMs = 0;
577
- // Skip JS structure when the Rust pipeline's small-incremental fast path
578
- // already handled it. For full builds and large incrementals where Rust
579
- // skipped structure, we must run the JS fallback.
580
- const needsStructure = !result.structureHandled;
581
- // When the Rust addon doesn't include analysis persistence (older addon
582
- // version or analysis failed), fall back to JS-side analysis.
583
- const needsAnalysisFallback = !result.analysisComplete &&
584
- (ctx.opts.ast !== false ||
585
- ctx.opts.complexity !== false ||
586
- ctx.opts.cfg !== false ||
587
- ctx.opts.dataflow !== false);
588
- if (needsStructure || needsAnalysisFallback) {
589
- // When analysis fallback is needed, handoff to better-sqlite3 — the
590
- // analysis engine uses the suspend/resume WAL pattern that requires a
591
- // real better-sqlite3 connection, not the NativeDbProxy.
592
- if (needsAnalysisFallback && ctx.nativeFirstProxy) {
593
- closeNativeDb(ctx, 'pre-analysis-fallback');
594
- ctx.db = openDb(ctx.dbPath);
595
- ctx.nativeFirstProxy = false;
596
- }
597
- else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
598
- // DB reopen failed — return partial result
599
- return formatNativeTimingResult(p, 0, analysisTiming);
600
- }
601
- const fileSymbols = reconstructFileSymbolsFromDb(ctx);
602
- if (needsStructure) {
603
- structurePatchMs = await runPostNativeStructure(ctx, fileSymbols, !!result.isFullBuild, result.changedFiles);
604
- }
605
- if (needsAnalysisFallback) {
606
- analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
607
- }
608
- }
609
- // Engine parity: the native orchestrator silently drops files whose
610
- // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
611
- // stale native binaries). WASM handles those — backfill via WASM so both
612
- // engines process the same file set (#967).
613
- //
614
- // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
615
- // both gating and the backfill itself. On dirty incrementals/full builds
616
- // the orchestrator signals trigger backfill, so the walk happens once
617
- // (instead of redundantly inside backfill). On quiet incrementals we
618
- // still pay the walk so we can detect brand-new files in dropped-language
619
- // extensions — a gap that the orchestrator's `detect_removed_files`
620
- // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
621
- // because the expensive part (WASM re-parse of the missing set) is
622
- // gated below.
623
- const removedCount = result.removedCount ?? 0;
624
- const changedCount = result.changedCount ?? 0;
625
- const gap = detectDroppedLanguageGap(ctx);
626
- if (result.isFullBuild ||
627
- removedCount > 0 ||
628
- changedCount > 0 ||
629
- gap.missingAbs.length > 0 ||
630
- gap.staleRel.length > 0) {
631
- await backfillNativeDroppedFiles(ctx, gap);
632
- }
633
- closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
634
- return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
635
- }
636
- /**
637
- * Compute the WASM-only files present in the DB but missing from disk (#1073).
638
- *
639
- * Returns relative paths that:
640
- * - appear in `existingNodes` or `existingHashes` (in DB),
641
- * - are absent from `expected` (not on disk),
642
- * - have an extension installed for WASM, AND
643
- * - have an extension NOT covered by `nativeSupported` — Rust's
644
- * `purge_changed_files` handles deletion for natively-supported extensions
645
- * via its own `detect_removed_files`, so the caller must not double-purge.
646
- *
647
- * Extensions are lowercased before lookup to match the registry and Rust's
648
- * `LanguageKind::from_extension` (which normalises case for the languages
649
- * where both cases are conventional, e.g. R's `.r` / `.R`).
650
- *
651
- * DB paths are forced to forward slashes before comparison with `expected`
652
- * (which is always normalised). The on-disk invariant is that DB rows are
653
- * written with forward slashes, but a stale row written by older code on
654
- * Windows could carry back-slashes — normalising here makes the comparison
655
- * platform-safe and prevents false-positive purges of live rows. We replace
656
- * `\\` explicitly (rather than calling `normalizePath`, which only touches
657
- * `path.sep`) so the defence works when running on POSIX against a DB that
658
- * was migrated from Windows.
659
- *
660
- * Exported for unit testing.
661
- */
662
- export function computeWasmOnlyStaleFiles(input) {
663
- const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
664
- const stale = [];
665
- const seen = new Set();
666
- const consider = (rawRel) => {
667
- const rel = rawRel.replace(/\\/g, '/');
668
- if (expected.has(rel) || seen.has(rel))
669
- return;
670
- const ext = path.extname(rel).toLowerCase();
671
- if (nativeSupported.has(ext))
672
- return;
673
- if (!installedExts.has(ext))
674
- return;
675
- seen.add(rel);
676
- // Push the ORIGINAL raw path (not the normalised form) so the eventual
677
- // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
678
- // matches the actual stored row. The dedup `seen` set keeps the
679
- // normalised form so a file written once with `\` and once with `/`
680
- // is still treated as one entry — but the value the SQL sees has to
681
- // be byte-identical to what's on disk in the DB.
682
- stale.push(rawRel);
683
- };
684
- for (const rel of existingNodes)
685
- consider(rel);
686
- for (const rel of existingHashes)
687
- consider(rel);
688
- return stale;
689
- }
690
- /**
691
- * Group relative paths by their lowercased extension. Shape matches the bucket
692
- * type that `formatDropExtensionSummary` consumes, so callers can render a
693
- * log-friendly per-extension summary without going through `classifyNativeDrops`
694
- * when the reason is already known (e.g. the stale-purge path where every path
695
- * is guaranteed `unsupported-by-native`).
696
- */
697
- function groupByExtension(relPaths) {
698
- const buckets = new Map();
699
- for (const rel of relPaths) {
700
- const ext = path.extname(rel).toLowerCase();
701
- let list = buckets.get(ext);
702
- if (!list) {
703
- list = [];
704
- buckets.set(ext, list);
705
- }
706
- list.push(rel);
707
- }
708
- return buckets;
709
- }
710
- /**
711
- * Detect files the native orchestrator silently dropped.
712
- *
713
- * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
714
- * is "missing" if it's absent from EITHER table — both must be present for
715
- * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
716
- * legacy DBs where `nodes` was populated but `file_hashes` was not).
717
- *
718
- * Restricted to files with an installed WASM grammar; extensions in
719
- * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
720
- * installs) can't be parsed by either engine, so they're not a native
721
- * regression — excluding them keeps the warn count in
722
- * `backfillNativeDroppedFiles` meaningful.
723
- *
724
- * Also detects WASM-only files deleted from disk (#1073). Rust's
725
- * `detect_removed_files` filter (#1070) skips files outside its supported
726
- * extensions, so deletions of WASM-only languages don't reach the native
727
- * purge path; the rest of the backfill only inserts rows, so without this
728
- * step stale `nodes`/`file_hashes` rows would linger across incremental
729
- * rebuilds until the next full rebuild.
730
- *
731
- * Cheap (no DB handoff, no parsing): used both to gate the backfill call
732
- * and as its working set. NativeDbProxy supports `.prepare().all()`, so
733
- * this works whether `ctx.db` is a proxy or a real better-sqlite3
734
- * connection — letting us skip the close-native / reopen-better-sqlite3
735
- * cost when there's nothing to backfill.
736
- */
737
- function detectDroppedLanguageGap(ctx) {
738
- const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set());
739
- const expected = new Set(collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))));
740
- const existingNodeRows = ctx.db
741
- .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
742
- .all();
743
- const existingNodes = new Set(existingNodeRows.map((r) => r.file));
744
- let existingHashes = new Set();
745
- try {
746
- const existingHashRows = ctx.db
747
- .prepare('SELECT DISTINCT file FROM file_hashes')
748
- .all();
749
- existingHashes = new Set(existingHashRows.map((r) => r.file));
750
- }
751
- catch (e) {
752
- // file_hashes table may not exist on legacy DBs; treat as fully missing
753
- // so the backfill writes rows on the upsert path below.
754
- debug(`detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`);
755
- }
756
- const installedExts = getInstalledWasmExtensions();
757
- const missingRel = [];
758
- const missingAbs = [];
759
- for (const rel of expected) {
760
- if (existingNodes.has(rel) && existingHashes.has(rel))
761
- continue;
762
- const ext = path.extname(rel).toLowerCase();
763
- if (!installedExts.has(ext))
764
- continue;
765
- missingRel.push(rel);
766
- missingAbs.push(path.join(ctx.rootDir, rel));
767
- }
768
- const staleRel = computeWasmOnlyStaleFiles({
769
- existingNodes,
770
- existingHashes,
771
- expected,
772
- installedExts,
773
- nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
774
- });
775
- return { missingRel, missingAbs, staleRel };
776
- }
777
- /**
778
- * Backfill files that the native orchestrator silently dropped during parse.
779
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
780
- *
781
- * Also purges stale rows for WASM-only files deleted from disk (#1073), which
782
- * Rust's `detect_removed_files` filter (#1070) skips.
783
- *
784
- * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
785
- * can use the same scan for both gating and the actual backfill — avoiding
786
- * a redundant fs walk when the orchestrator's signals already triggered.
787
- */
788
- async function backfillNativeDroppedFiles(ctx, gap) {
789
- const { missingRel, missingAbs, staleRel } = gap;
790
- if (missingAbs.length === 0 && staleRel.length === 0)
791
- return;
792
- // Now that we know there's work to do, hand off to better-sqlite3 (needed
793
- // for the INSERT path below).
794
- if (ctx.nativeFirstProxy) {
795
- closeNativeDb(ctx, 'pre-parity-backfill');
796
- ctx.db = openDb(ctx.dbPath);
797
- ctx.nativeFirstProxy = false;
798
- }
799
- const dbConn = ctx.db;
800
- // Purge WASM-only files that were deleted from disk (#1073). Rust's
801
- // detect_removed_files skips them and the insert path below never visits
802
- // them, so without this their rows would persist across rebuilds until the
803
- // next full rebuild reset the DB.
804
- if (staleRel.length > 0) {
805
- // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
806
- // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
807
- // always bucket 100% into `unsupported-by-native`. Build the extension
808
- // summary directly to avoid a redundant classification pass.
809
- const staleByExt = groupByExtension(staleRel);
810
- info(`Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`);
811
- purgeFilesData(dbConn, staleRel);
812
- }
813
- if (missingAbs.length === 0)
814
- return;
815
- // Classify drops so users see per-extension reasons instead of just a count
816
- // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
817
- // extractor); `native-extractor-failure` indicates a real native bug since
818
- // the language IS supported by the addon yet the file was dropped anyway.
819
- const { byReason, totals } = classifyNativeDrops(missingRel);
820
- if (totals['unsupported-by-native'] > 0) {
821
- info(`Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`);
822
- }
823
- if (totals['native-extractor-failure'] > 0) {
824
- warn(`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`);
825
- }
826
- const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
827
- const rows = [];
828
- const exportKeys = [];
829
- for (const [relPath, symbols] of wasmResults) {
830
- // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
831
- rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
832
- for (const def of symbols.definitions ?? []) {
833
- // Populate qualified_name/scope the same way the JS fallback does so
834
- // downstream queries (cross-file references, "go to definition") find
835
- // these symbols.
836
- const dotIdx = def.name.lastIndexOf('.');
837
- const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
838
- rows.push([
839
- def.name,
840
- def.kind,
841
- relPath,
842
- def.line,
843
- def.endLine ?? null,
844
- null,
845
- def.name,
846
- scope,
847
- def.visibility ?? null,
848
- ]);
849
- }
850
- // Exports: insert the row (INSERT OR IGNORE — a matching definition row
851
- // is a no-op) and queue a key for the second-pass exported=1 update, so
852
- // queries filtering on exported=1 find backfilled symbols (#970).
853
- for (const exp of symbols.exports ?? []) {
854
- rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
855
- exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
856
- }
857
- }
858
- const db = dbConn;
859
- batchInsertNodes(db, rows);
860
- // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
861
- if (exportKeys.length > 0) {
862
- const EXPORT_CHUNK = 500;
863
- const exportStmtCache = new Map();
864
- for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
865
- const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
866
- const chunkSize = end - i;
867
- let updateStmt = exportStmtCache.get(chunkSize);
868
- if (!updateStmt) {
869
- const conditions = Array.from({ length: chunkSize }, () => '(name = ? AND kind = ? AND file = ? AND line = ?)').join(' OR ');
870
- updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
871
- exportStmtCache.set(chunkSize, updateStmt);
872
- }
873
- const vals = [];
874
- for (let j = i; j < end; j++) {
875
- const k = exportKeys[j];
876
- vals.push(k[0], k[1], k[2], k[3]);
877
- }
878
- updateStmt.run(...vals);
879
- }
880
- }
881
- // Persist file_hashes rows for every backfilled file. The Rust orchestrator
882
- // only hashes files it parsed itself, so without this step files in
883
- // optional-language extensions (e.g. .clj when no Rust extractor exists)
884
- // would be missing from `file_hashes` — permanently breaking the JS-side
885
- // fast-skip pre-flight (#1054), which rejects on `collected file missing
886
- // from file_hashes` and forces every no-op rebuild back through the full
887
- // ~2s native pipeline (#1068).
888
- //
889
- // Iterates `missingRel` (every collected file the Rust orchestrator
890
- // dropped), not `wasmResults`, so files that produced zero symbols still
891
- // get a row.
892
- try {
893
- const upsertHash = db.prepare('INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)');
894
- const writeHashes = db.transaction(() => {
895
- for (let i = 0; i < missingRel.length; i++) {
896
- const relPath = missingRel[i];
897
- const absPath = missingAbs[i];
898
- if (!relPath || !absPath)
899
- continue;
900
- let code;
901
- try {
902
- code = readFileSafe(absPath);
903
- }
904
- catch (e) {
905
- debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
906
- continue;
907
- }
908
- if (code === null)
909
- continue;
910
- const stat = fileStat(absPath);
911
- const mtime = stat ? stat.mtime : 0;
912
- const size = stat ? stat.size : 0;
913
- upsertHash.run(relPath, fileHash(code), mtime, size);
914
- }
915
- });
916
- writeHashes();
917
- }
918
- catch (e) {
919
- debug(`backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`);
920
- }
921
- // Free WASM parse trees from the inline backfill path (#1058).
922
- // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
923
- // backed by WASM linear memory) on every result, but these symbols are
924
- // consumed locally for DB row construction and never added to
925
- // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
926
- // sees them. Without this, trees leak WASM memory until process exit —
927
- // bounded per run but cumulative across in-process integration tests.
928
- // Mirrors the cleanup discipline established for #931.
929
- for (const [, symbols] of wasmResults) {
930
- const tree = symbols._tree;
931
- if (tree && typeof tree.delete === 'function') {
932
- try {
933
- tree.delete();
934
- }
935
- catch {
936
- /* ignore cleanup errors */
937
- }
938
- }
939
- symbols._tree = undefined;
940
- symbols._langId = undefined;
941
- }
942
- }
185
+ // Native db lifecycle and orchestrator helpers live in dedicated stage
186
+ // modules see `./stages/native-db-lifecycle.ts` and `./stages/native-orchestrator.ts`.
943
187
  // ── Pipeline stages execution ───────────────────────────────────────────
944
188
  async function runPipelineStages(ctx) {
945
189
  // ── WASM / fallback dual-connection mode ─────────────────────────────