@optave/codegraph 3.10.0 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/README.md +40 -33
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/index.js +77 -0
  7. package/dist/ast-analysis/rules/index.js.map +1 -1
  8. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  9. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitor-utils.js +83 -49
  11. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  12. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  14. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  15. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  16. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  17. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  18. package/dist/cli/commands/audit.js +1 -1
  19. package/dist/cli/commands/audit.js.map +1 -1
  20. package/dist/cli/commands/build.d.ts.map +1 -1
  21. package/dist/cli/commands/build.js +2 -0
  22. package/dist/cli/commands/build.js.map +1 -1
  23. package/dist/cli/commands/check.js +1 -1
  24. package/dist/cli/commands/check.js.map +1 -1
  25. package/dist/cli/commands/children.js +1 -1
  26. package/dist/cli/commands/children.js.map +1 -1
  27. package/dist/cli/commands/diff-impact.js +1 -1
  28. package/dist/cli/commands/diff-impact.js.map +1 -1
  29. package/dist/cli/commands/embed.d.ts.map +1 -1
  30. package/dist/cli/commands/embed.js +49 -4
  31. package/dist/cli/commands/embed.js.map +1 -1
  32. package/dist/cli/commands/roles.js +1 -1
  33. package/dist/cli/commands/roles.js.map +1 -1
  34. package/dist/cli/commands/structure.js +1 -1
  35. package/dist/cli/commands/structure.js.map +1 -1
  36. package/dist/cli/shared/options.js +1 -1
  37. package/dist/cli/shared/options.js.map +1 -1
  38. package/dist/db/connection.d.ts.map +1 -1
  39. package/dist/db/connection.js +8 -0
  40. package/dist/db/connection.js.map +1 -1
  41. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  42. package/dist/domain/analysis/dependencies.js +106 -80
  43. package/dist/domain/analysis/dependencies.js.map +1 -1
  44. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  45. package/dist/domain/analysis/fn-impact.js +77 -52
  46. package/dist/domain/analysis/fn-impact.js.map +1 -1
  47. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  48. package/dist/domain/analysis/module-map.js +132 -121
  49. package/dist/domain/analysis/module-map.js.map +1 -1
  50. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  51. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  52. package/dist/domain/graph/builder/helpers.js +47 -33
  53. package/dist/domain/graph/builder/helpers.js.map +1 -1
  54. package/dist/domain/graph/builder/incremental.d.ts +6 -6
  55. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  56. package/dist/domain/graph/builder/incremental.js +148 -99
  57. package/dist/domain/graph/builder/incremental.js.map +1 -1
  58. package/dist/domain/graph/builder/pipeline.d.ts +1 -0
  59. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  60. package/dist/domain/graph/builder/pipeline.js +23 -637
  61. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  62. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  63. package/dist/domain/graph/builder/stages/build-edges.js +141 -98
  64. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  65. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  66. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  67. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  68. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  69. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  70. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  71. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  72. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  73. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  74. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  75. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  76. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  77. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  78. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  79. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  80. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  81. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  82. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  83. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  84. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  85. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  86. package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
  87. package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
  88. package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
  89. package/dist/domain/graph/cycles.d.ts +6 -4
  90. package/dist/domain/graph/cycles.d.ts.map +1 -1
  91. package/dist/domain/graph/cycles.js +50 -55
  92. package/dist/domain/graph/cycles.js.map +1 -1
  93. package/dist/domain/graph/journal.d.ts.map +1 -1
  94. package/dist/domain/graph/journal.js +89 -70
  95. package/dist/domain/graph/journal.js.map +1 -1
  96. package/dist/domain/graph/watcher.d.ts.map +1 -1
  97. package/dist/domain/graph/watcher.js +28 -20
  98. package/dist/domain/graph/watcher.js.map +1 -1
  99. package/dist/domain/parser.d.ts +12 -23
  100. package/dist/domain/parser.d.ts.map +1 -1
  101. package/dist/domain/parser.js +153 -80
  102. package/dist/domain/parser.js.map +1 -1
  103. package/dist/domain/search/generator.d.ts +3 -1
  104. package/dist/domain/search/generator.d.ts.map +1 -1
  105. package/dist/domain/search/generator.js +68 -45
  106. package/dist/domain/search/generator.js.map +1 -1
  107. package/dist/domain/search/models.d.ts +18 -0
  108. package/dist/domain/search/models.d.ts.map +1 -1
  109. package/dist/domain/search/models.js +72 -4
  110. package/dist/domain/search/models.js.map +1 -1
  111. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  112. package/dist/domain/search/search/hybrid.js +49 -40
  113. package/dist/domain/search/search/hybrid.js.map +1 -1
  114. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  115. package/dist/domain/search/search/semantic.js +69 -49
  116. package/dist/domain/search/search/semantic.js.map +1 -1
  117. package/dist/domain/wasm-worker-entry.js +209 -137
  118. package/dist/domain/wasm-worker-entry.js.map +1 -1
  119. package/dist/extractors/c.js +25 -6
  120. package/dist/extractors/c.js.map +1 -1
  121. package/dist/extractors/cpp.js +47 -6
  122. package/dist/extractors/cpp.js.map +1 -1
  123. package/dist/extractors/cuda.js +90 -14
  124. package/dist/extractors/cuda.js.map +1 -1
  125. package/dist/extractors/elixir.js +108 -4
  126. package/dist/extractors/elixir.js.map +1 -1
  127. package/dist/extractors/erlang.js +56 -20
  128. package/dist/extractors/erlang.js.map +1 -1
  129. package/dist/extractors/fsharp.d.ts +7 -0
  130. package/dist/extractors/fsharp.d.ts.map +1 -1
  131. package/dist/extractors/fsharp.js +94 -0
  132. package/dist/extractors/fsharp.js.map +1 -1
  133. package/dist/extractors/gleam.d.ts.map +1 -1
  134. package/dist/extractors/gleam.js +29 -33
  135. package/dist/extractors/gleam.js.map +1 -1
  136. package/dist/extractors/groovy.js +41 -1
  137. package/dist/extractors/groovy.js.map +1 -1
  138. package/dist/extractors/haskell.js +48 -4
  139. package/dist/extractors/haskell.js.map +1 -1
  140. package/dist/extractors/helpers.d.ts +79 -1
  141. package/dist/extractors/helpers.d.ts.map +1 -1
  142. package/dist/extractors/helpers.js +137 -0
  143. package/dist/extractors/helpers.js.map +1 -1
  144. package/dist/extractors/java.d.ts.map +1 -1
  145. package/dist/extractors/java.js +37 -49
  146. package/dist/extractors/java.js.map +1 -1
  147. package/dist/extractors/javascript.d.ts.map +1 -1
  148. package/dist/extractors/javascript.js +44 -44
  149. package/dist/extractors/javascript.js.map +1 -1
  150. package/dist/extractors/julia.js +198 -74
  151. package/dist/extractors/julia.js.map +1 -1
  152. package/dist/extractors/kotlin.js +4 -0
  153. package/dist/extractors/kotlin.js.map +1 -1
  154. package/dist/extractors/objc.js +184 -47
  155. package/dist/extractors/objc.js.map +1 -1
  156. package/dist/extractors/python.js +7 -4
  157. package/dist/extractors/python.js.map +1 -1
  158. package/dist/extractors/r.d.ts.map +1 -1
  159. package/dist/extractors/r.js +103 -87
  160. package/dist/extractors/r.js.map +1 -1
  161. package/dist/extractors/scala.d.ts.map +1 -1
  162. package/dist/extractors/scala.js +18 -32
  163. package/dist/extractors/scala.js.map +1 -1
  164. package/dist/extractors/solidity.d.ts.map +1 -1
  165. package/dist/extractors/solidity.js +55 -69
  166. package/dist/extractors/solidity.js.map +1 -1
  167. package/dist/extractors/verilog.js +80 -15
  168. package/dist/extractors/verilog.js.map +1 -1
  169. package/dist/features/boundaries.d.ts.map +1 -1
  170. package/dist/features/boundaries.js +49 -39
  171. package/dist/features/boundaries.js.map +1 -1
  172. package/dist/features/cfg.d.ts.map +1 -1
  173. package/dist/features/cfg.js +90 -63
  174. package/dist/features/cfg.js.map +1 -1
  175. package/dist/features/check.d.ts.map +1 -1
  176. package/dist/features/check.js +43 -34
  177. package/dist/features/check.js.map +1 -1
  178. package/dist/features/cochange.d.ts.map +1 -1
  179. package/dist/features/cochange.js +68 -56
  180. package/dist/features/cochange.js.map +1 -1
  181. package/dist/features/complexity.d.ts.map +1 -1
  182. package/dist/features/complexity.js +105 -75
  183. package/dist/features/complexity.js.map +1 -1
  184. package/dist/features/dataflow.d.ts.map +1 -1
  185. package/dist/features/dataflow.js +37 -29
  186. package/dist/features/dataflow.js.map +1 -1
  187. package/dist/features/flow.d.ts.map +1 -1
  188. package/dist/features/flow.js +31 -22
  189. package/dist/features/flow.js.map +1 -1
  190. package/dist/features/graph-enrichment.d.ts.map +1 -1
  191. package/dist/features/graph-enrichment.js +77 -70
  192. package/dist/features/graph-enrichment.js.map +1 -1
  193. package/dist/features/owners.d.ts +17 -26
  194. package/dist/features/owners.d.ts.map +1 -1
  195. package/dist/features/owners.js +120 -109
  196. package/dist/features/owners.js.map +1 -1
  197. package/dist/features/sequence.d.ts.map +1 -1
  198. package/dist/features/sequence.js +59 -54
  199. package/dist/features/sequence.js.map +1 -1
  200. package/dist/features/structure-query.d.ts.map +1 -1
  201. package/dist/features/structure-query.js +60 -60
  202. package/dist/features/structure-query.js.map +1 -1
  203. package/dist/features/structure.js +28 -36
  204. package/dist/features/structure.js.map +1 -1
  205. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  206. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  207. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  208. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  209. package/dist/graph/classifiers/roles.js +63 -59
  210. package/dist/graph/classifiers/roles.js.map +1 -1
  211. package/dist/infrastructure/config.d.ts +1 -1
  212. package/dist/infrastructure/config.d.ts.map +1 -1
  213. package/dist/infrastructure/config.js +1 -1
  214. package/dist/infrastructure/config.js.map +1 -1
  215. package/dist/mcp/tool-registry.d.ts.map +1 -1
  216. package/dist/mcp/tool-registry.js +4 -0
  217. package/dist/mcp/tool-registry.js.map +1 -1
  218. package/dist/mcp/tools/semantic-search.d.ts +1 -0
  219. package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
  220. package/dist/mcp/tools/semantic-search.js +1 -0
  221. package/dist/mcp/tools/semantic-search.js.map +1 -1
  222. package/dist/presentation/cfg.d.ts.map +1 -1
  223. package/dist/presentation/cfg.js +44 -29
  224. package/dist/presentation/cfg.js.map +1 -1
  225. package/dist/presentation/flow.d.ts.map +1 -1
  226. package/dist/presentation/flow.js +58 -38
  227. package/dist/presentation/flow.js.map +1 -1
  228. package/dist/types.d.ts +16 -2
  229. package/dist/types.d.ts.map +1 -1
  230. package/grammars/tree-sitter-erlang.wasm +0 -0
  231. package/grammars/tree-sitter-fsharp.wasm +0 -0
  232. package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
  233. package/grammars/tree-sitter-gleam.wasm +0 -0
  234. package/package.json +10 -10
  235. package/src/ast-analysis/engine.ts +145 -61
  236. package/src/ast-analysis/rules/index.ts +87 -0
  237. package/src/ast-analysis/visitor-utils.ts +86 -46
  238. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  239. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  240. package/src/cli/commands/audit.ts +1 -1
  241. package/src/cli/commands/build.ts +2 -0
  242. package/src/cli/commands/check.ts +1 -1
  243. package/src/cli/commands/children.ts +1 -1
  244. package/src/cli/commands/diff-impact.ts +1 -1
  245. package/src/cli/commands/embed.ts +54 -4
  246. package/src/cli/commands/roles.ts +1 -1
  247. package/src/cli/commands/structure.ts +1 -1
  248. package/src/cli/shared/options.ts +1 -1
  249. package/src/db/connection.ts +8 -0
  250. package/src/domain/analysis/dependencies.ts +166 -85
  251. package/src/domain/analysis/fn-impact.ts +120 -50
  252. package/src/domain/analysis/module-map.ts +175 -140
  253. package/src/domain/graph/builder/helpers.ts +85 -76
  254. package/src/domain/graph/builder/incremental.ts +223 -131
  255. package/src/domain/graph/builder/pipeline.ts +32 -785
  256. package/src/domain/graph/builder/stages/build-edges.ts +207 -142
  257. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  258. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  259. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  260. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  261. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  262. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  263. package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
  264. package/src/domain/graph/cycles.ts +51 -49
  265. package/src/domain/graph/journal.ts +84 -69
  266. package/src/domain/graph/watcher.ts +29 -25
  267. package/src/domain/parser.ts +170 -67
  268. package/src/domain/search/generator.ts +132 -74
  269. package/src/domain/search/models.ts +75 -4
  270. package/src/domain/search/search/hybrid.ts +53 -42
  271. package/src/domain/search/search/semantic.ts +105 -65
  272. package/src/domain/wasm-worker-entry.ts +243 -153
  273. package/src/extractors/c.ts +27 -8
  274. package/src/extractors/cpp.ts +50 -8
  275. package/src/extractors/cuda.ts +90 -16
  276. package/src/extractors/elixir.ts +103 -4
  277. package/src/extractors/erlang.ts +63 -20
  278. package/src/extractors/fsharp.ts +104 -0
  279. package/src/extractors/gleam.ts +40 -39
  280. package/src/extractors/groovy.ts +45 -1
  281. package/src/extractors/haskell.ts +45 -4
  282. package/src/extractors/helpers.ts +205 -1
  283. package/src/extractors/java.ts +42 -45
  284. package/src/extractors/javascript.ts +44 -43
  285. package/src/extractors/julia.ts +191 -77
  286. package/src/extractors/kotlin.ts +4 -0
  287. package/src/extractors/objc.ts +171 -47
  288. package/src/extractors/python.ts +5 -3
  289. package/src/extractors/r.ts +104 -82
  290. package/src/extractors/scala.ts +24 -36
  291. package/src/extractors/solidity.ts +59 -78
  292. package/src/extractors/verilog.ts +83 -15
  293. package/src/features/boundaries.ts +64 -46
  294. package/src/features/cfg.ts +145 -74
  295. package/src/features/check.ts +60 -43
  296. package/src/features/cochange.ts +95 -72
  297. package/src/features/complexity.ts +134 -79
  298. package/src/features/dataflow.ts +57 -34
  299. package/src/features/flow.ts +48 -24
  300. package/src/features/graph-enrichment.ts +105 -70
  301. package/src/features/owners.ts +186 -146
  302. package/src/features/sequence.ts +99 -69
  303. package/src/features/structure-query.ts +94 -79
  304. package/src/features/structure.ts +56 -56
  305. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  306. package/src/graph/classifiers/roles.ts +64 -54
  307. package/src/infrastructure/config.ts +1 -1
  308. package/src/mcp/tool-registry.ts +5 -0
  309. package/src/mcp/tools/semantic-search.ts +2 -0
  310. package/src/presentation/cfg.ts +48 -32
  311. package/src/presentation/flow.ts +100 -52
  312. package/src/types.ts +16 -1
@@ -322,12 +322,15 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
322
322
  * without _tree", which was the source of #1036 — a single file missing one
323
323
  * analysis triggered a full-build re-parse of every WASM-parseable file.
324
324
  */
325
- export async function ensureWasmTrees(
325
+ /**
326
+ * Select files from `fileSymbols` that still need analysis data and are
327
+ * parseable by an installed WASM grammar. Pure (no I/O) — safe to unit-test.
328
+ */
329
+ function collectBackfillPending(
326
330
  fileSymbols: Map<string, any>,
327
331
  rootDir: string,
328
332
  needsFn?: (relPath: string, symbols: any) => boolean,
329
- ): Promise<void> {
330
- // Collect files that still need analysis data and are parseable by WASM.
333
+ ): Array<{ relPath: string; absPath: string; symbols: any }> {
331
334
  const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
332
335
  for (const [relPath, symbols] of fileSymbols) {
333
336
  if (symbols._tree) continue; // legacy path — leave existing trees alone
@@ -335,6 +338,15 @@ export async function ensureWasmTrees(
335
338
  if (needsFn && !needsFn(relPath, symbols)) continue;
336
339
  pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
337
340
  }
341
+ return pending;
342
+ }
343
+
344
+ export async function ensureWasmTrees(
345
+ fileSymbols: Map<string, any>,
346
+ rootDir: string,
347
+ needsFn?: (relPath: string, symbols: any) => boolean,
348
+ ): Promise<void> {
349
+ const pending = collectBackfillPending(fileSymbols, rootDir, needsFn);
338
350
  if (pending.length === 0) return;
339
351
 
340
352
  const pool = getWasmWorkerPool();
@@ -352,30 +364,37 @@ export async function ensureWasmTrees(
352
364
  }
353
365
  }
354
366
 
355
- /**
356
- * Merge pre-computed analysis data from a worker result onto existing symbols.
357
- * Only fills gaps — never overwrites fields the caller already populated.
358
- * Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
359
- * per-definition complexity and cfg.
360
- */
361
- function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
367
+ /** Fill gap-only scalar metadata (`_langId`, `_lineCount`) from the worker output. */
368
+ function mergeScalarMetadata(symbols: any, worker: ExtractorOutput): void {
362
369
  if (!symbols._langId && worker._langId) symbols._langId = worker._langId;
363
370
  if (!symbols._lineCount && worker._lineCount) symbols._lineCount = worker._lineCount;
371
+ }
372
+
373
+ /** Fill gap-only analysis arrays (`astNodes`, `dataflow`) from the worker output. */
374
+ function mergeAnalysisArrays(symbols: any, worker: ExtractorOutput): void {
364
375
  if (!Array.isArray(symbols.astNodes) && Array.isArray(worker.astNodes)) {
365
376
  symbols.astNodes = worker.astNodes;
366
377
  }
367
378
  if (!symbols.dataflow && worker.dataflow) symbols.dataflow = worker.dataflow;
368
- if (worker.typeMap && worker.typeMap.size > 0) {
369
- if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
370
- symbols.typeMap = new Map(worker.typeMap);
371
- } else {
372
- for (const [k, v] of worker.typeMap) {
373
- if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
374
- }
375
- }
379
+ }
380
+
381
+ /** Merge worker typeMap into existing symbols.typeMap with first-wins semantics. */
382
+ function mergeTypeMap(symbols: any, worker: ExtractorOutput): void {
383
+ if (!worker.typeMap || worker.typeMap.size === 0) return;
384
+ if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
385
+ symbols.typeMap = new Map(worker.typeMap);
386
+ return;
387
+ }
388
+ for (const [k, v] of worker.typeMap) {
389
+ if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
376
390
  }
391
+ }
392
+
393
+ /** Patch existing definitions with worker complexity/cfg when absent. */
394
+ function mergeDefinitionAnalysis(symbols: any, worker: ExtractorOutput): void {
377
395
  const existingDefs: any[] = Array.isArray(symbols.definitions) ? symbols.definitions : [];
378
396
  const workerDefs: any[] = Array.isArray(worker.definitions) ? worker.definitions : [];
397
+ if (existingDefs.length === 0 || workerDefs.length === 0) return;
379
398
  // Index existing defs by (kind, name, line) — mirrors engine.ts matching key.
380
399
  const byKey = new Map<string, any>();
381
400
  for (const d of existingDefs) byKey.set(`${d.kind}|${d.name}|${d.line}`, d);
@@ -389,6 +408,19 @@ function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
389
408
  }
390
409
  }
391
410
 
411
+ /**
412
+ * Merge pre-computed analysis data from a worker result onto existing symbols.
413
+ * Only fills gaps — never overwrites fields the caller already populated.
414
+ * Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
415
+ * per-definition complexity and cfg.
416
+ */
417
+ function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
418
+ mergeScalarMetadata(symbols, worker);
419
+ mergeAnalysisArrays(symbols, worker);
420
+ mergeTypeMap(symbols, worker);
421
+ mergeDefinitionAnalysis(symbols, worker);
422
+ }
423
+
392
424
  /**
393
425
  * Check whether the required WASM grammar files exist on disk.
394
426
  */
@@ -457,6 +489,8 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
457
489
  '.cc',
458
490
  '.cxx',
459
491
  '.hpp',
492
+ '.cu',
493
+ '.cuh',
460
494
  '.kt',
461
495
  '.kts',
462
496
  '.swift',
@@ -471,6 +505,23 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
471
505
  '.hs',
472
506
  '.ml',
473
507
  '.mli',
508
+ '.fs',
509
+ '.fsx',
510
+ '.fsi',
511
+ '.m',
512
+ '.gleam',
513
+ '.jl',
514
+ '.clj',
515
+ '.cljs',
516
+ '.cljc',
517
+ '.erl',
518
+ '.hrl',
519
+ '.groovy',
520
+ '.gvy',
521
+ '.r',
522
+ '.sol',
523
+ '.v',
524
+ '.sv',
474
525
  ]);
475
526
 
476
527
  /**
@@ -520,25 +571,36 @@ export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClass
520
571
  }
521
572
 
522
573
  /**
523
- * Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
524
- * Caps at 3 sample paths per extension and 6 extensions total to keep warnings
525
- * readable when many languages are dropped at once. Extensions are sorted by
526
- * descending file count so the loudest offender shows up first; ties keep
527
- * insertion order. Pure function safe to unit-test independently.
574
+ * Render `{ ext → paths[] }` as a multi-line tabular breakdown for log lines.
575
+ * Each extension occupies its own line so a long warning scans like a table
576
+ * instead of a wall of semicolon-separated slices. Caps at 3 sample paths per
577
+ * extension and 6 extensions total to keep output bounded when many languages
578
+ * are dropped at once. Extensions are sorted by descending file count so the
579
+ * loudest offender shows up first; ties keep insertion order.
580
+ *
581
+ * Returns the empty string for empty input, and otherwise a string that
582
+ * begins with `\n` so callers can append it directly after the header line
583
+ * (`"Backfilling via WASM:" + formatDropExtensionSummary(...)`).
584
+ *
585
+ * Pure function — safe to unit-test independently.
528
586
  */
529
587
  export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
530
588
  const MAX_EXTS = 6;
531
589
  const MAX_SAMPLES = 3;
532
590
  const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
533
- const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
591
+ if (entries.length === 0) return '';
592
+ const shown = entries.slice(0, MAX_EXTS);
593
+ const extWidth = Math.max(...shown.map(([ext]) => ext.length));
594
+ const countWidth = Math.max(...shown.map(([, paths]) => String(paths.length).length));
595
+ const lines = shown.map(([ext, paths]) => {
534
596
  const sample = paths.slice(0, MAX_SAMPLES).join(', ');
535
- const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
536
- return `${ext} (${paths.length}: ${sample}${more})`;
597
+ const more = paths.length > MAX_SAMPLES ? ` (+${paths.length - MAX_SAMPLES} more)` : '';
598
+ return ` ${ext.padEnd(extWidth)} ${String(paths.length).padStart(countWidth)} ${sample}${more}`;
537
599
  });
538
600
  if (entries.length > MAX_EXTS) {
539
- shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
601
+ lines.push(` (+${entries.length - MAX_EXTS} more extension(s))`);
540
602
  }
541
- return shown.join('; ');
603
+ return `\n${lines.join('\n')}`;
542
604
  }
543
605
 
544
606
  // ── Unified API ──────────────────────────────────────────────────────────────
@@ -573,24 +635,36 @@ function patchDefinitions(definitions: any[]): void {
573
635
  }
574
636
  }
575
637
 
638
+ /**
639
+ * Field renames applied to each import record to bridge older native binaries
640
+ * that emit snake_case names. Each `[camel, snake]` pair becomes:
641
+ * `if (imp[camel] === undefined) imp[camel] = imp[snake];`
642
+ * Defined as data so the loop body stays trivially linear in cognitive complexity.
643
+ */
644
+ const IMPORT_FIELD_RENAMES: ReadonlyArray<readonly [string, string]> = [
645
+ ['typeOnly', 'type_only'],
646
+ ['wildcardReexport', 'wildcard_reexport'],
647
+ ['pythonImport', 'python_import'],
648
+ ['goImport', 'go_import'],
649
+ ['rustUse', 'rust_use'],
650
+ ['javaImport', 'java_import'],
651
+ ['csharpUsing', 'csharp_using'],
652
+ ['rubyRequire', 'ruby_require'],
653
+ ['phpUse', 'php_use'],
654
+ ['cInclude', 'c_include'],
655
+ ['kotlinImport', 'kotlin_import'],
656
+ ['swiftImport', 'swift_import'],
657
+ ['scalaImport', 'scala_import'],
658
+ ['bashSource', 'bash_source'],
659
+ ['dynamicImport', 'dynamic_import'],
660
+ ];
661
+
576
662
  /** Patch import fields for backward compat with older native binaries. */
577
663
  function patchImports(imports: any[]): void {
578
664
  for (const i of imports) {
579
- if (i.typeOnly === undefined) i.typeOnly = i.type_only;
580
- if (i.wildcardReexport === undefined) i.wildcardReexport = i.wildcard_reexport;
581
- if (i.pythonImport === undefined) i.pythonImport = i.python_import;
582
- if (i.goImport === undefined) i.goImport = i.go_import;
583
- if (i.rustUse === undefined) i.rustUse = i.rust_use;
584
- if (i.javaImport === undefined) i.javaImport = i.java_import;
585
- if (i.csharpUsing === undefined) i.csharpUsing = i.csharp_using;
586
- if (i.rubyRequire === undefined) i.rubyRequire = i.ruby_require;
587
- if (i.phpUse === undefined) i.phpUse = i.php_use;
588
- if (i.cInclude === undefined) i.cInclude = i.c_include;
589
- if (i.kotlinImport === undefined) i.kotlinImport = i.kotlin_import;
590
- if (i.swiftImport === undefined) i.swiftImport = i.swift_import;
591
- if (i.scalaImport === undefined) i.scalaImport = i.scala_import;
592
- if (i.bashSource === undefined) i.bashSource = i.bash_source;
593
- if (i.dynamicImport === undefined) i.dynamicImport = i.dynamic_import;
665
+ for (const [camel, snake] of IMPORT_FIELD_RENAMES) {
666
+ if (i[camel] === undefined) i[camel] = i[snake];
667
+ }
594
668
  }
595
669
  }
596
670
 
@@ -812,11 +886,18 @@ export const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [
812
886
  },
813
887
  {
814
888
  id: 'fsharp',
815
- extensions: ['.fs', '.fsx', '.fsi'],
889
+ extensions: ['.fs', '.fsx'],
816
890
  grammarFile: 'tree-sitter-fsharp.wasm',
817
891
  extractor: extractFSharpSymbols,
818
892
  required: false,
819
893
  },
894
+ {
895
+ id: 'fsharp-signature',
896
+ extensions: ['.fsi'],
897
+ grammarFile: 'tree-sitter-fsharp_signature.wasm',
898
+ extractor: extractFSharpSymbols,
899
+ required: false,
900
+ },
820
901
  {
821
902
  id: 'gleam',
822
903
  extensions: ['.gleam'],
@@ -1133,18 +1214,16 @@ export async function parseFilesWasmForBackfill(
1133
1214
  }
1134
1215
 
1135
1216
  /**
1136
- * Parse multiple files in bulk and return a Map<relPath, symbols>.
1217
+ * Run the native engine over `filePaths` and ingest the results into `result`.
1218
+ * Returns the set of file paths the native engine successfully parsed and the
1219
+ * TS/TSX files that need a typeMap backfill pass.
1137
1220
  */
1138
- export async function parseFilesAuto(
1221
+ function ingestNativeResults(
1222
+ native: any,
1139
1223
  filePaths: string[],
1140
1224
  rootDir: string,
1141
- opts: ParseEngineOpts = {},
1142
- ): Promise<Map<string, ExtractorOutput>> {
1143
- const { native } = resolveEngine(opts);
1144
-
1145
- if (!native) return parseFilesWasm(filePaths, rootDir);
1146
-
1147
- const result = new Map<string, ExtractorOutput>();
1225
+ result: Map<string, ExtractorOutput>,
1226
+ ): { nativeParsed: Set<string>; needsTypeMap: { filePath: string; relPath: string }[] } {
1148
1227
  // Always extract all analysis data (dataflow + AST nodes) during native parse.
1149
1228
  // This eliminates the need for any downstream WASM re-parse or native standalone calls.
1150
1229
  const nativeResults = native.parseFilesFull
@@ -1167,27 +1246,51 @@ export async function parseFilesAuto(
1167
1246
  needsTypeMap.push({ filePath: r.file, relPath });
1168
1247
  }
1169
1248
  }
1170
- if (needsTypeMap.length > 0) {
1171
- await backfillTypeMapBatch(needsTypeMap, result);
1172
- }
1249
+ return { nativeParsed, needsTypeMap };
1250
+ }
1173
1251
 
1174
- // Engine parity: native may silently drop files whose extensions are in
1175
- // SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
1176
- // extractor/grammar is missing or fails. WASM handles these fall back so
1177
- // both engines process the same file set (#967). Restrict to installed WASM
1178
- // grammars so we don't warn about files that neither engine can parse.
1252
+ /**
1253
+ * Engine parity: native may silently drop files whose extensions are in
1254
+ * SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
1255
+ * extractor/grammar is missing or fails. WASM handles these fall back so
1256
+ * both engines process the same file set (#967). Restrict to installed WASM
1257
+ * grammars so we don't warn about files that neither engine can parse.
1258
+ */
1259
+ async function backfillNativeDrops(
1260
+ filePaths: string[],
1261
+ nativeParsed: Set<string>,
1262
+ rootDir: string,
1263
+ result: Map<string, ExtractorOutput>,
1264
+ ): Promise<void> {
1179
1265
  const installedExts = getInstalledWasmExtensions();
1180
1266
  const dropped = filePaths.filter(
1181
1267
  (f) => !nativeParsed.has(f) && installedExts.has(path.extname(f).toLowerCase()),
1182
1268
  );
1183
- if (dropped.length > 0) {
1184
- warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
1185
- const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
1186
- for (const [relPath, symbols] of wasmResults) {
1187
- result.set(relPath, symbols);
1188
- }
1269
+ if (dropped.length === 0) return;
1270
+ warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
1271
+ const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
1272
+ for (const [relPath, symbols] of wasmResults) {
1273
+ result.set(relPath, symbols);
1189
1274
  }
1275
+ }
1190
1276
 
1277
+ /**
1278
+ * Parse multiple files in bulk and return a Map<relPath, symbols>.
1279
+ */
1280
+ export async function parseFilesAuto(
1281
+ filePaths: string[],
1282
+ rootDir: string,
1283
+ opts: ParseEngineOpts = {},
1284
+ ): Promise<Map<string, ExtractorOutput>> {
1285
+ const { native } = resolveEngine(opts);
1286
+ if (!native) return parseFilesWasm(filePaths, rootDir);
1287
+
1288
+ const result = new Map<string, ExtractorOutput>();
1289
+ const { nativeParsed, needsTypeMap } = ingestNativeResults(native, filePaths, rootDir, result);
1290
+ if (needsTypeMap.length > 0) {
1291
+ await backfillTypeMapBatch(needsTypeMap, result);
1292
+ }
1293
+ await backfillNativeDrops(filePaths, nativeParsed, rootDir, result);
1191
1294
  return result;
1192
1295
  }
1193
1296
 
@@ -8,6 +8,19 @@ import { embed, getModelConfig } from './models.js';
8
8
  import { buildSourceText } from './strategies/source.js';
9
9
  import { buildStructuredText } from './strategies/structured.js';
10
10
 
11
+ type EmbeddingNode = NodeRow & { id: number };
12
+ type EmbeddingStrategy = 'structured' | 'source';
13
+
14
+ interface PreparedEmbeddings {
15
+ texts: string[];
16
+ nodeIds: number[];
17
+ nodeNames: string[];
18
+ previews: string[];
19
+ overflowCount: number;
20
+ filesRead: number;
21
+ filesSkipped: number;
22
+ }
23
+
11
24
  /**
12
25
  * Rough token estimate (~4 chars per token for code/English).
13
26
  * Conservative — avoids adding a tokenizer dependency.
@@ -47,47 +60,22 @@ function initEmbeddingsSchema(db: BetterSqlite3Database): void {
47
60
  `);
48
61
  }
49
62
 
50
- export interface BuildEmbeddingsOptions {
51
- strategy?: 'structured' | 'source';
52
- }
53
-
54
63
  /**
55
- * Build embeddings for all functions/methods/classes in the graph.
64
+ * Resolve the repo root for embedding. Prefer the root recorded at build time;
65
+ * fall back to `<dbParent>` only when the DB lives at the conventional
66
+ * `<root>/.codegraph/graph.db` layout — otherwise trust the caller's rootDir.
56
67
  */
57
- export async function buildEmbeddings(
58
- rootDir: string,
59
- modelKey: string,
60
- customDbPath?: string,
61
- options: BuildEmbeddingsOptions = {},
62
- ): Promise<void> {
63
- const strategy = options.strategy || 'structured';
64
- const dbPath = customDbPath || findDbPath(undefined);
65
-
66
- if (!fs.existsSync(dbPath)) {
67
- throw new DbError(
68
- `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
69
- { file: dbPath },
70
- );
71
- }
72
-
73
- const db = openDb(dbPath) as BetterSqlite3Database;
74
- initEmbeddingsSchema(db);
75
-
76
- // Prefer the repo root recorded at build time — embed may be invoked from a
77
- // different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
78
- // positional rootDir will be wrong in that case. For legacy DBs without
79
- // root_dir metadata, fall back to `<dbParent>` only when the DB lives at
80
- // the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
81
- // the caller-provided rootDir (which may be an explicit positional arg).
82
- // `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
83
- // conventional-layout check is required to keep the rootDir path reachable.
68
+ function resolveRoot(db: BetterSqlite3Database, dbPath: string, rootDir: string): string {
84
69
  const metaRoot = getBuildMeta(db, 'root_dir');
85
70
  const resolvedDbPath = path.resolve(dbPath);
86
71
  const dbDirName = path.basename(path.dirname(resolvedDbPath));
87
72
  const dbParent =
88
73
  dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
89
- const resolvedRoot = metaRoot || dbParent || rootDir;
74
+ return metaRoot || dbParent || rootDir;
75
+ }
90
76
 
77
+ /** Reset embedding tables and load eligible symbols grouped by file. */
78
+ function loadNodesByFile(db: BetterSqlite3Database): Map<string, EmbeddingNode[]> {
91
79
  db.exec('DELETE FROM embeddings');
92
80
  db.exec('DELETE FROM embedding_meta');
93
81
  db.exec('DELETE FROM fts_index');
@@ -96,22 +84,52 @@ export async function buildEmbeddings(
96
84
  .prepare(
97
85
  `SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
98
86
  )
99
- .all() as Array<NodeRow & { id: number }>;
87
+ .all() as EmbeddingNode[];
100
88
 
101
- console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
102
-
103
- const byFile = new Map<string, typeof nodes>();
89
+ const byFile = new Map<string, EmbeddingNode[]>();
104
90
  for (const node of nodes) {
105
91
  if (!byFile.has(node.file)) byFile.set(node.file, []);
106
92
  byFile.get(node.file)?.push(node);
107
93
  }
94
+ return byFile;
95
+ }
96
+
97
+ /** Build embedding text for a single node, truncating if it would overflow. */
98
+ function buildNodeText(
99
+ node: EmbeddingNode,
100
+ file: string,
101
+ lines: string[],
102
+ db: BetterSqlite3Database,
103
+ strategy: EmbeddingStrategy,
104
+ contextWindow: number,
105
+ ): { text: string; overflowed: boolean } {
106
+ let text =
107
+ strategy === 'structured'
108
+ ? buildStructuredText(node, file, lines, db)
109
+ : buildSourceText(node, file, lines);
110
+ const tokens = estimateTokens(text);
111
+ if (tokens > contextWindow) {
112
+ text = text.slice(0, contextWindow * 4);
113
+ return { text, overflowed: true };
114
+ }
115
+ return { text, overflowed: false };
116
+ }
108
117
 
118
+ /**
119
+ * Walk files in the graph, read source, and produce parallel arrays of
120
+ * texts / nodeIds / nodeNames / previews ready for embedding.
121
+ */
122
+ function prepareEmbeddingTexts(
123
+ byFile: Map<string, EmbeddingNode[]>,
124
+ db: BetterSqlite3Database,
125
+ resolvedRoot: string,
126
+ strategy: EmbeddingStrategy,
127
+ contextWindow: number,
128
+ ): PreparedEmbeddings {
109
129
  const texts: string[] = [];
110
130
  const nodeIds: number[] = [];
111
131
  const nodeNames: string[] = [];
112
132
  const previews: string[] = [];
113
- const config = getModelConfig(modelKey);
114
- const contextWindow = config.contextWindow;
115
133
  let overflowCount = 0;
116
134
  let filesRead = 0;
117
135
  let filesSkipped = 0;
@@ -129,19 +147,8 @@ export async function buildEmbeddings(
129
147
  }
130
148
 
131
149
  for (const node of fileNodes) {
132
- let text =
133
- strategy === 'structured'
134
- ? buildStructuredText(node, file, lines, db)
135
- : buildSourceText(node, file, lines);
136
-
137
- // Detect and handle context window overflow
138
- const tokens = estimateTokens(text);
139
- if (tokens > contextWindow) {
140
- overflowCount++;
141
- const maxChars = contextWindow * 4;
142
- text = text.slice(0, maxChars);
143
- }
144
-
150
+ const { text, overflowed } = buildNodeText(node, file, lines, db, strategy, contextWindow);
151
+ if (overflowed) overflowCount++;
145
152
  texts.push(text);
146
153
  nodeIds.push(node.id);
147
154
  nodeNames.push(node.name);
@@ -149,28 +156,19 @@ export async function buildEmbeddings(
149
156
  }
150
157
  }
151
158
 
152
- if (overflowCount > 0) {
153
- warn(
154
- `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
155
- );
156
- }
157
-
158
- // If there were symbols to embed but every file failed to read, the DB was
159
- // almost certainly built from a different location than the current cwd.
160
- // Surface this clearly instead of emitting a silent "Stored 0 embeddings".
161
- if (byFile.size > 0 && filesRead === 0) {
162
- closeDb(db);
163
- throw new DbError(
164
- `embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
165
- `Tried resolving against: ${resolvedRoot}\n` +
166
- 'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
167
- { file: dbPath },
168
- );
169
- }
170
-
171
- console.log(`Embedding ${texts.length} symbols...`);
172
- const { vectors, dim } = await embed(texts, modelKey);
159
+ return { texts, nodeIds, nodeNames, previews, overflowCount, filesRead, filesSkipped };
160
+ }
173
161
 
162
+ /** Persist vectors, FTS rows, and embedding metadata in a single transaction. */
163
+ function persistEmbeddings(
164
+ db: BetterSqlite3Database,
165
+ prepared: PreparedEmbeddings,
166
+ vectors: Float32Array[],
167
+ dim: number,
168
+ modelName: string,
169
+ strategy: EmbeddingStrategy,
170
+ ): void {
171
+ const { nodeIds, nodeNames, previews, texts, overflowCount } = prepared;
174
172
  const insert = db.prepare(
175
173
  'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
176
174
  );
@@ -182,7 +180,7 @@ export async function buildEmbeddings(
182
180
  insert.run(nodeIds[i], Buffer.from(vec.buffer), previews[i], texts[i]);
183
181
  insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
184
182
  }
185
- insertMeta.run('model', config.name);
183
+ insertMeta.run('model', modelName);
186
184
  insertMeta.run('dim', String(dim));
187
185
  insertMeta.run('count', String(vectors.length));
188
186
  insertMeta.run('fts_count', String(vectors.length));
@@ -193,6 +191,66 @@ export async function buildEmbeddings(
193
191
  }
194
192
  });
195
193
  insertAll();
194
+ }
195
+
196
+ export interface BuildEmbeddingsOptions {
197
+ strategy?: EmbeddingStrategy;
198
+ }
199
+
200
+ /**
201
+ * Build embeddings for all functions/methods/classes in the graph.
202
+ */
203
+ export async function buildEmbeddings(
204
+ rootDir: string,
205
+ modelKey: string,
206
+ customDbPath?: string,
207
+ options: BuildEmbeddingsOptions = {},
208
+ ): Promise<void> {
209
+ const strategy = options.strategy || 'structured';
210
+ const dbPath = customDbPath || findDbPath(undefined);
211
+
212
+ if (!fs.existsSync(dbPath)) {
213
+ throw new DbError(
214
+ `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
215
+ { file: dbPath },
216
+ );
217
+ }
218
+
219
+ const db = openDb(dbPath) as BetterSqlite3Database;
220
+ initEmbeddingsSchema(db);
221
+
222
+ const resolvedRoot = resolveRoot(db, dbPath, rootDir);
223
+ const byFile = loadNodesByFile(db);
224
+
225
+ const nodeCount = [...byFile.values()].reduce((acc, list) => acc + list.length, 0);
226
+ console.log(`Building embeddings for ${nodeCount} symbols (strategy: ${strategy})...`);
227
+
228
+ const config = getModelConfig(modelKey);
229
+ const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, config.contextWindow);
230
+
231
+ if (prepared.overflowCount > 0) {
232
+ warn(
233
+ `${prepared.overflowCount} symbol(s) exceeded model context window (${config.contextWindow} tokens) and were truncated`,
234
+ );
235
+ }
236
+
237
+ // If there were symbols to embed but every file failed to read, the DB was
238
+ // almost certainly built from a different location than the current cwd.
239
+ // Surface this clearly instead of emitting a silent "Stored 0 embeddings".
240
+ if (byFile.size > 0 && prepared.filesRead === 0) {
241
+ closeDb(db);
242
+ throw new DbError(
243
+ `embed: could not read any of the ${prepared.filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
244
+ `Tried resolving against: ${resolvedRoot}\n` +
245
+ 'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
246
+ { file: dbPath },
247
+ );
248
+ }
249
+
250
+ console.log(`Embedding ${prepared.texts.length} symbols...`);
251
+ const { vectors, dim } = await embed(prepared.texts, modelKey);
252
+
253
+ persistEmbeddings(db, prepared, vectors as Float32Array[], dim, config.name, strategy);
196
254
 
197
255
  console.log(
198
256
  `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,