@optave/codegraph 3.10.0 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/README.md +40 -33
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +91 -60
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/index.js +77 -0
  7. package/dist/ast-analysis/rules/index.js.map +1 -1
  8. package/dist/ast-analysis/visitor-utils.d.ts +3 -0
  9. package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
  10. package/dist/ast-analysis/visitor-utils.js +83 -49
  11. package/dist/ast-analysis/visitor-utils.js.map +1 -1
  12. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  13. package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
  14. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  15. package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
  16. package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
  17. package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
  18. package/dist/cli/commands/audit.js +1 -1
  19. package/dist/cli/commands/audit.js.map +1 -1
  20. package/dist/cli/commands/build.d.ts.map +1 -1
  21. package/dist/cli/commands/build.js +2 -0
  22. package/dist/cli/commands/build.js.map +1 -1
  23. package/dist/cli/commands/check.js +1 -1
  24. package/dist/cli/commands/check.js.map +1 -1
  25. package/dist/cli/commands/children.js +1 -1
  26. package/dist/cli/commands/children.js.map +1 -1
  27. package/dist/cli/commands/diff-impact.js +1 -1
  28. package/dist/cli/commands/diff-impact.js.map +1 -1
  29. package/dist/cli/commands/embed.d.ts.map +1 -1
  30. package/dist/cli/commands/embed.js +49 -4
  31. package/dist/cli/commands/embed.js.map +1 -1
  32. package/dist/cli/commands/roles.js +1 -1
  33. package/dist/cli/commands/roles.js.map +1 -1
  34. package/dist/cli/commands/structure.js +1 -1
  35. package/dist/cli/commands/structure.js.map +1 -1
  36. package/dist/cli/shared/options.js +1 -1
  37. package/dist/cli/shared/options.js.map +1 -1
  38. package/dist/db/connection.d.ts.map +1 -1
  39. package/dist/db/connection.js +8 -0
  40. package/dist/db/connection.js.map +1 -1
  41. package/dist/domain/analysis/dependencies.d.ts.map +1 -1
  42. package/dist/domain/analysis/dependencies.js +106 -80
  43. package/dist/domain/analysis/dependencies.js.map +1 -1
  44. package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
  45. package/dist/domain/analysis/fn-impact.js +77 -52
  46. package/dist/domain/analysis/fn-impact.js.map +1 -1
  47. package/dist/domain/analysis/module-map.d.ts.map +1 -1
  48. package/dist/domain/analysis/module-map.js +132 -121
  49. package/dist/domain/analysis/module-map.js.map +1 -1
  50. package/dist/domain/graph/builder/helpers.d.ts +4 -4
  51. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  52. package/dist/domain/graph/builder/helpers.js +47 -33
  53. package/dist/domain/graph/builder/helpers.js.map +1 -1
  54. package/dist/domain/graph/builder/incremental.d.ts +6 -6
  55. package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
  56. package/dist/domain/graph/builder/incremental.js +148 -99
  57. package/dist/domain/graph/builder/incremental.js.map +1 -1
  58. package/dist/domain/graph/builder/pipeline.d.ts +1 -0
  59. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  60. package/dist/domain/graph/builder/pipeline.js +23 -637
  61. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  62. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  63. package/dist/domain/graph/builder/stages/build-edges.js +141 -98
  64. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  65. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  66. package/dist/domain/graph/builder/stages/build-structure.js +82 -65
  67. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  68. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  69. package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
  70. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  71. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  72. package/dist/domain/graph/builder/stages/finalize.js +60 -51
  73. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  74. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
  75. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  76. package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
  77. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  78. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
  79. package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
  80. package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
  81. package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
  82. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
  83. package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
  84. package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
  85. package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
  86. package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
  87. package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
  88. package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
  89. package/dist/domain/graph/cycles.d.ts +6 -4
  90. package/dist/domain/graph/cycles.d.ts.map +1 -1
  91. package/dist/domain/graph/cycles.js +50 -55
  92. package/dist/domain/graph/cycles.js.map +1 -1
  93. package/dist/domain/graph/journal.d.ts.map +1 -1
  94. package/dist/domain/graph/journal.js +89 -70
  95. package/dist/domain/graph/journal.js.map +1 -1
  96. package/dist/domain/graph/watcher.d.ts.map +1 -1
  97. package/dist/domain/graph/watcher.js +28 -20
  98. package/dist/domain/graph/watcher.js.map +1 -1
  99. package/dist/domain/parser.d.ts +12 -23
  100. package/dist/domain/parser.d.ts.map +1 -1
  101. package/dist/domain/parser.js +153 -80
  102. package/dist/domain/parser.js.map +1 -1
  103. package/dist/domain/search/generator.d.ts +3 -1
  104. package/dist/domain/search/generator.d.ts.map +1 -1
  105. package/dist/domain/search/generator.js +68 -45
  106. package/dist/domain/search/generator.js.map +1 -1
  107. package/dist/domain/search/models.d.ts +18 -0
  108. package/dist/domain/search/models.d.ts.map +1 -1
  109. package/dist/domain/search/models.js +72 -4
  110. package/dist/domain/search/models.js.map +1 -1
  111. package/dist/domain/search/search/hybrid.d.ts.map +1 -1
  112. package/dist/domain/search/search/hybrid.js +49 -40
  113. package/dist/domain/search/search/hybrid.js.map +1 -1
  114. package/dist/domain/search/search/semantic.d.ts.map +1 -1
  115. package/dist/domain/search/search/semantic.js +69 -49
  116. package/dist/domain/search/search/semantic.js.map +1 -1
  117. package/dist/domain/wasm-worker-entry.js +209 -137
  118. package/dist/domain/wasm-worker-entry.js.map +1 -1
  119. package/dist/extractors/c.js +25 -6
  120. package/dist/extractors/c.js.map +1 -1
  121. package/dist/extractors/cpp.js +47 -6
  122. package/dist/extractors/cpp.js.map +1 -1
  123. package/dist/extractors/cuda.js +90 -14
  124. package/dist/extractors/cuda.js.map +1 -1
  125. package/dist/extractors/elixir.js +108 -4
  126. package/dist/extractors/elixir.js.map +1 -1
  127. package/dist/extractors/erlang.js +56 -20
  128. package/dist/extractors/erlang.js.map +1 -1
  129. package/dist/extractors/fsharp.d.ts +7 -0
  130. package/dist/extractors/fsharp.d.ts.map +1 -1
  131. package/dist/extractors/fsharp.js +94 -0
  132. package/dist/extractors/fsharp.js.map +1 -1
  133. package/dist/extractors/gleam.d.ts.map +1 -1
  134. package/dist/extractors/gleam.js +29 -33
  135. package/dist/extractors/gleam.js.map +1 -1
  136. package/dist/extractors/groovy.js +41 -1
  137. package/dist/extractors/groovy.js.map +1 -1
  138. package/dist/extractors/haskell.js +48 -4
  139. package/dist/extractors/haskell.js.map +1 -1
  140. package/dist/extractors/helpers.d.ts +79 -1
  141. package/dist/extractors/helpers.d.ts.map +1 -1
  142. package/dist/extractors/helpers.js +137 -0
  143. package/dist/extractors/helpers.js.map +1 -1
  144. package/dist/extractors/java.d.ts.map +1 -1
  145. package/dist/extractors/java.js +37 -49
  146. package/dist/extractors/java.js.map +1 -1
  147. package/dist/extractors/javascript.d.ts.map +1 -1
  148. package/dist/extractors/javascript.js +44 -44
  149. package/dist/extractors/javascript.js.map +1 -1
  150. package/dist/extractors/julia.js +198 -74
  151. package/dist/extractors/julia.js.map +1 -1
  152. package/dist/extractors/kotlin.js +4 -0
  153. package/dist/extractors/kotlin.js.map +1 -1
  154. package/dist/extractors/objc.js +184 -47
  155. package/dist/extractors/objc.js.map +1 -1
  156. package/dist/extractors/python.js +7 -4
  157. package/dist/extractors/python.js.map +1 -1
  158. package/dist/extractors/r.d.ts.map +1 -1
  159. package/dist/extractors/r.js +103 -87
  160. package/dist/extractors/r.js.map +1 -1
  161. package/dist/extractors/scala.d.ts.map +1 -1
  162. package/dist/extractors/scala.js +18 -32
  163. package/dist/extractors/scala.js.map +1 -1
  164. package/dist/extractors/solidity.d.ts.map +1 -1
  165. package/dist/extractors/solidity.js +55 -69
  166. package/dist/extractors/solidity.js.map +1 -1
  167. package/dist/extractors/verilog.js +80 -15
  168. package/dist/extractors/verilog.js.map +1 -1
  169. package/dist/features/boundaries.d.ts.map +1 -1
  170. package/dist/features/boundaries.js +49 -39
  171. package/dist/features/boundaries.js.map +1 -1
  172. package/dist/features/cfg.d.ts.map +1 -1
  173. package/dist/features/cfg.js +90 -63
  174. package/dist/features/cfg.js.map +1 -1
  175. package/dist/features/check.d.ts.map +1 -1
  176. package/dist/features/check.js +43 -34
  177. package/dist/features/check.js.map +1 -1
  178. package/dist/features/cochange.d.ts.map +1 -1
  179. package/dist/features/cochange.js +68 -56
  180. package/dist/features/cochange.js.map +1 -1
  181. package/dist/features/complexity.d.ts.map +1 -1
  182. package/dist/features/complexity.js +105 -75
  183. package/dist/features/complexity.js.map +1 -1
  184. package/dist/features/dataflow.d.ts.map +1 -1
  185. package/dist/features/dataflow.js +37 -29
  186. package/dist/features/dataflow.js.map +1 -1
  187. package/dist/features/flow.d.ts.map +1 -1
  188. package/dist/features/flow.js +31 -22
  189. package/dist/features/flow.js.map +1 -1
  190. package/dist/features/graph-enrichment.d.ts.map +1 -1
  191. package/dist/features/graph-enrichment.js +77 -70
  192. package/dist/features/graph-enrichment.js.map +1 -1
  193. package/dist/features/owners.d.ts +17 -26
  194. package/dist/features/owners.d.ts.map +1 -1
  195. package/dist/features/owners.js +120 -109
  196. package/dist/features/owners.js.map +1 -1
  197. package/dist/features/sequence.d.ts.map +1 -1
  198. package/dist/features/sequence.js +59 -54
  199. package/dist/features/sequence.js.map +1 -1
  200. package/dist/features/structure-query.d.ts.map +1 -1
  201. package/dist/features/structure-query.js +60 -60
  202. package/dist/features/structure-query.js.map +1 -1
  203. package/dist/features/structure.js +28 -36
  204. package/dist/features/structure.js.map +1 -1
  205. package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
  206. package/dist/graph/algorithms/leiden/optimiser.js +100 -69
  207. package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
  208. package/dist/graph/classifiers/roles.d.ts.map +1 -1
  209. package/dist/graph/classifiers/roles.js +63 -59
  210. package/dist/graph/classifiers/roles.js.map +1 -1
  211. package/dist/infrastructure/config.d.ts +1 -1
  212. package/dist/infrastructure/config.d.ts.map +1 -1
  213. package/dist/infrastructure/config.js +1 -1
  214. package/dist/infrastructure/config.js.map +1 -1
  215. package/dist/mcp/tool-registry.d.ts.map +1 -1
  216. package/dist/mcp/tool-registry.js +4 -0
  217. package/dist/mcp/tool-registry.js.map +1 -1
  218. package/dist/mcp/tools/semantic-search.d.ts +1 -0
  219. package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
  220. package/dist/mcp/tools/semantic-search.js +1 -0
  221. package/dist/mcp/tools/semantic-search.js.map +1 -1
  222. package/dist/presentation/cfg.d.ts.map +1 -1
  223. package/dist/presentation/cfg.js +44 -29
  224. package/dist/presentation/cfg.js.map +1 -1
  225. package/dist/presentation/flow.d.ts.map +1 -1
  226. package/dist/presentation/flow.js +58 -38
  227. package/dist/presentation/flow.js.map +1 -1
  228. package/dist/types.d.ts +16 -2
  229. package/dist/types.d.ts.map +1 -1
  230. package/grammars/tree-sitter-erlang.wasm +0 -0
  231. package/grammars/tree-sitter-fsharp.wasm +0 -0
  232. package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
  233. package/grammars/tree-sitter-gleam.wasm +0 -0
  234. package/package.json +10 -10
  235. package/src/ast-analysis/engine.ts +145 -61
  236. package/src/ast-analysis/rules/index.ts +87 -0
  237. package/src/ast-analysis/visitor-utils.ts +86 -46
  238. package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
  239. package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
  240. package/src/cli/commands/audit.ts +1 -1
  241. package/src/cli/commands/build.ts +2 -0
  242. package/src/cli/commands/check.ts +1 -1
  243. package/src/cli/commands/children.ts +1 -1
  244. package/src/cli/commands/diff-impact.ts +1 -1
  245. package/src/cli/commands/embed.ts +54 -4
  246. package/src/cli/commands/roles.ts +1 -1
  247. package/src/cli/commands/structure.ts +1 -1
  248. package/src/cli/shared/options.ts +1 -1
  249. package/src/db/connection.ts +8 -0
  250. package/src/domain/analysis/dependencies.ts +166 -85
  251. package/src/domain/analysis/fn-impact.ts +120 -50
  252. package/src/domain/analysis/module-map.ts +175 -140
  253. package/src/domain/graph/builder/helpers.ts +85 -76
  254. package/src/domain/graph/builder/incremental.ts +223 -131
  255. package/src/domain/graph/builder/pipeline.ts +32 -785
  256. package/src/domain/graph/builder/stages/build-edges.ts +207 -142
  257. package/src/domain/graph/builder/stages/build-structure.ts +115 -82
  258. package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
  259. package/src/domain/graph/builder/stages/finalize.ts +72 -70
  260. package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
  261. package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
  262. package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
  263. package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
  264. package/src/domain/graph/cycles.ts +51 -49
  265. package/src/domain/graph/journal.ts +84 -69
  266. package/src/domain/graph/watcher.ts +29 -25
  267. package/src/domain/parser.ts +170 -67
  268. package/src/domain/search/generator.ts +132 -74
  269. package/src/domain/search/models.ts +75 -4
  270. package/src/domain/search/search/hybrid.ts +53 -42
  271. package/src/domain/search/search/semantic.ts +105 -65
  272. package/src/domain/wasm-worker-entry.ts +243 -153
  273. package/src/extractors/c.ts +27 -8
  274. package/src/extractors/cpp.ts +50 -8
  275. package/src/extractors/cuda.ts +90 -16
  276. package/src/extractors/elixir.ts +103 -4
  277. package/src/extractors/erlang.ts +63 -20
  278. package/src/extractors/fsharp.ts +104 -0
  279. package/src/extractors/gleam.ts +40 -39
  280. package/src/extractors/groovy.ts +45 -1
  281. package/src/extractors/haskell.ts +45 -4
  282. package/src/extractors/helpers.ts +205 -1
  283. package/src/extractors/java.ts +42 -45
  284. package/src/extractors/javascript.ts +44 -43
  285. package/src/extractors/julia.ts +191 -77
  286. package/src/extractors/kotlin.ts +4 -0
  287. package/src/extractors/objc.ts +171 -47
  288. package/src/extractors/python.ts +5 -3
  289. package/src/extractors/r.ts +104 -82
  290. package/src/extractors/scala.ts +24 -36
  291. package/src/extractors/solidity.ts +59 -78
  292. package/src/extractors/verilog.ts +83 -15
  293. package/src/features/boundaries.ts +64 -46
  294. package/src/features/cfg.ts +145 -74
  295. package/src/features/check.ts +60 -43
  296. package/src/features/cochange.ts +95 -72
  297. package/src/features/complexity.ts +134 -79
  298. package/src/features/dataflow.ts +57 -34
  299. package/src/features/flow.ts +48 -24
  300. package/src/features/graph-enrichment.ts +105 -70
  301. package/src/features/owners.ts +186 -146
  302. package/src/features/sequence.ts +99 -69
  303. package/src/features/structure-query.ts +94 -79
  304. package/src/features/structure.ts +56 -56
  305. package/src/graph/algorithms/leiden/optimiser.ts +142 -87
  306. package/src/graph/classifiers/roles.ts +64 -54
  307. package/src/infrastructure/config.ts +1 -1
  308. package/src/mcp/tool-registry.ts +5 -0
  309. package/src/mcp/tools/semantic-search.ts +2 -0
  310. package/src/presentation/cfg.ts +48 -32
  311. package/src/presentation/flow.ts +100 -52
  312. package/src/types.ts +16 -1
@@ -1,14 +1,48 @@
1
1
  import { execFileSync } from 'node:child_process';
2
+ import { createRequire } from 'node:module';
3
+ import path from 'node:path';
2
4
  import { createInterface } from 'node:readline';
3
5
  import { info } from '../../infrastructure/logger.js';
4
6
  import { ConfigError, EngineError } from '../../shared/errors.js';
5
7
 
8
+ const _require = createRequire(import.meta.url);
9
+
10
+ /**
11
+ * Resolve the directory where `npm install` should run so the installed
12
+ * package ends up reachable by `await import(pkg)` from inside this module.
13
+ *
14
+ * Without a `cwd`, `execFileSync('npm', ['install', ...])` operates on
15
+ * `process.cwd()` — when the user runs codegraph against a repo that is *not*
16
+ * the directory where codegraph itself is installed, npm installs into the
17
+ * wrong `node_modules`, the dynamic import still fails, and the user gets
18
+ * `ENGINE_UNAVAILABLE: ... installed but failed to load`.
19
+ *
20
+ * Pin cwd to the directory that contains @optave/codegraph's `node_modules`
21
+ * so the install lands where Node's resolution algorithm will find it.
22
+ *
23
+ * @internal Exported for unit tests; not part of the public barrel.
24
+ */
25
+ export function resolveNpmInstallCwd(): string | undefined {
26
+ try {
27
+ const pkgJsonPath = _require.resolve('@optave/codegraph/package.json');
28
+ // pkgJsonPath = <host>/node_modules/@optave/codegraph/package.json
29
+ // dirname x4: package.json → codegraph → @optave → node_modules → <host>
30
+ return path.dirname(path.dirname(path.dirname(path.dirname(pkgJsonPath))));
31
+ } catch {
32
+ // Source-of-truth checkout (no @optave/codegraph in node_modules) — fall back
33
+ // to process.cwd() so legacy behavior survives in tests.
34
+ return undefined;
35
+ }
36
+ }
37
+
6
38
  export interface ModelConfig {
7
39
  name: string;
8
40
  dim: number;
9
41
  contextWindow: number;
10
42
  desc: string;
11
43
  quantized: boolean;
44
+ /** Pooling strategy passed to the transformers pipeline. Defaults to 'mean'. */
45
+ pooling?: 'mean' | 'cls';
12
46
  }
13
47
 
14
48
  // Lazy-load transformers (heavy, optional module)
@@ -59,7 +93,7 @@ export const MODELS: Record<string, ModelConfig> = {
59
93
  name: 'nomic-ai/nomic-embed-text-v1.5',
60
94
  dim: 768,
61
95
  contextWindow: 8192,
62
- desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
96
+ desc: 'Matryoshka MRL trained (~137MB). 8192 context. Codegraph stores full 768d (no truncation); v1 scores higher on our benchmark.',
63
97
  quantized: false,
64
98
  },
65
99
  'bge-large': {
@@ -69,11 +103,41 @@ export const MODELS: Record<string, ModelConfig> = {
69
103
  desc: 'Best general retrieval (~335MB). Top MTEB scores.',
70
104
  quantized: false,
71
105
  },
106
+ 'mxbai-xsmall': {
107
+ name: 'mixedbread-ai/mxbai-embed-xsmall-v1',
108
+ dim: 384,
109
+ contextWindow: 4096,
110
+ desc: 'Tiny model with long context (~50MB). 4096 ctx.',
111
+ quantized: false,
112
+ pooling: 'cls',
113
+ },
114
+ 'mxbai-large': {
115
+ name: 'mixedbread-ai/mxbai-embed-large-v1',
116
+ dim: 1024,
117
+ contextWindow: 512,
118
+ desc: 'Top MTEB BERT-large, Matryoshka dimensions (~400MB). 512 ctx.',
119
+ quantized: false,
120
+ pooling: 'cls',
121
+ },
122
+ 'bge-m3': {
123
+ name: 'Xenova/bge-m3',
124
+ dim: 1024,
125
+ contextWindow: 8192,
126
+ desc: 'Multilingual, multi-task (~600MB). 100+ languages, 8192 context.',
127
+ quantized: false,
128
+ },
129
+ modernbert: {
130
+ name: 'nomic-ai/modernbert-embed-base',
131
+ dim: 768,
132
+ contextWindow: 8192,
133
+ desc: 'ModernBERT base (~150MB). Newer architecture, 8192 ctx, English.',
134
+ quantized: false,
135
+ },
72
136
  };
73
137
 
74
138
  export const EMBEDDING_STRATEGIES: readonly string[] = ['structured', 'source'];
75
139
 
76
- export const DEFAULT_MODEL: string = 'nomic-v1.5';
140
+ export const DEFAULT_MODEL: string = 'nomic';
77
141
  const NPM_BIN = process.platform === 'win32' ? 'npm.cmd' : 'npm';
78
142
  const BATCH_SIZE_MAP: Record<string, number> = {
79
143
  minilm: 32,
@@ -83,6 +147,10 @@ const BATCH_SIZE_MAP: Record<string, number> = {
83
147
  nomic: 8,
84
148
  'nomic-v1.5': 8,
85
149
  'bge-large': 4,
150
+ 'mxbai-xsmall': 32,
151
+ 'mxbai-large': 4,
152
+ 'bge-m3': 4,
153
+ modernbert: 8,
86
154
  };
87
155
  const DEFAULT_BATCH_SIZE = 32;
88
156
 
@@ -104,12 +172,14 @@ export function getModelConfig(modelKey?: string): ModelConfig {
104
172
  * @internal Not part of the public barrel.
105
173
  */
106
174
  export function promptInstall(packageName: string): Promise<boolean> {
175
+ const installCwd = resolveNpmInstallCwd();
107
176
  if (!process.stdin.isTTY) {
108
177
  info(`Installing ${packageName} (optional dependency for semantic search)…`);
109
178
  try {
110
179
  execFileSync(NPM_BIN, ['install', '--no-save', packageName], {
111
180
  stdio: 'inherit',
112
181
  timeout: 300_000,
182
+ cwd: installCwd,
113
183
  });
114
184
  return Promise.resolve(true);
115
185
  } catch (err) {
@@ -128,9 +198,10 @@ export function promptInstall(packageName: string): Promise<boolean> {
128
198
  rl.close();
129
199
  if (answer.trim().toLowerCase() !== 'y') return resolve(false);
130
200
  try {
131
- execFileSync(NPM_BIN, ['install', packageName], {
201
+ execFileSync(NPM_BIN, ['install', '--no-save', packageName], {
132
202
  stdio: 'inherit',
133
203
  timeout: 300_000,
204
+ cwd: installCwd,
134
205
  });
135
206
  resolve(true);
136
207
  } catch (err) {
@@ -239,7 +310,7 @@ export async function embed(
239
310
  const batch = texts.slice(i, i + batchSize);
240
311
  const output =
241
312
  (await // biome-ignore lint/complexity/noBannedTypes: dynamically loaded extractor is untyped
242
- (ext as Function)(batch, { pooling: 'mean', normalize: true })) as {
313
+ (ext as Function)(batch, { pooling: config.pooling ?? 'mean', normalize: true })) as {
243
314
  data: number[];
244
315
  };
245
316
 
@@ -105,61 +105,72 @@ async function collectRankedLists(
105
105
  return rankedLists;
106
106
  }
107
107
 
108
+ /** Initialise a fusion entry seeded from the first ranked item we see for a key. */
109
+ function createFusionEntry(item: RankedItem): FusionEntry {
110
+ return {
111
+ name: item.name,
112
+ kind: item.kind,
113
+ file: item.file,
114
+ line: item.line,
115
+ endLine: (item.endLine as number | null) ?? null,
116
+ role: (item.role as string | null) ?? null,
117
+ fileHash: (item.fileHash as string | null) ?? null,
118
+ rrfScore: 0,
119
+ bm25Score: null,
120
+ bm25Rank: null,
121
+ similarity: null,
122
+ semanticRank: null,
123
+ };
124
+ }
125
+
126
+ /** Merge a single ranked item into its fusion entry: update RRF and best per-source rank. */
127
+ function mergeRankedItem(entry: FusionEntry, item: RankedItem, k: number): void {
128
+ entry.rrfScore += 1 / (k + item.rank);
129
+ if (item.source === 'bm25') {
130
+ if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
131
+ entry.bm25Score = item.bm25Score ?? null;
132
+ entry.bm25Rank = item.rank;
133
+ }
134
+ } else if (entry.semanticRank === null || item.rank < entry.semanticRank) {
135
+ entry.similarity = item.similarity ?? null;
136
+ entry.semanticRank = item.rank;
137
+ }
138
+ }
139
+
140
+ /** Flatten a fusion entry into the public-facing hybrid result shape. */
141
+ function toHybridResult(e: FusionEntry): HybridResult {
142
+ return {
143
+ name: e.name,
144
+ kind: e.kind,
145
+ file: e.file,
146
+ line: e.line,
147
+ endLine: e.endLine,
148
+ role: e.role,
149
+ fileHash: e.fileHash,
150
+ rrf: e.rrfScore,
151
+ bm25Score: e.bm25Score,
152
+ bm25Rank: e.bm25Rank,
153
+ similarity: e.similarity,
154
+ semanticRank: e.semanticRank,
155
+ };
156
+ }
157
+
108
158
  /** Reciprocal Rank Fusion: merge ranked lists into a single scored result set. */
109
159
  function fuseResults(rankedLists: RankedItem[][], k: number, limit: number): HybridResult[] {
110
160
  const fusionMap = new Map<string, FusionEntry>();
111
-
112
161
  for (const list of rankedLists) {
113
162
  for (const item of list) {
114
163
  if (!fusionMap.has(item.key)) {
115
- fusionMap.set(item.key, {
116
- name: item.name,
117
- kind: item.kind,
118
- file: item.file,
119
- line: item.line,
120
- endLine: (item.endLine as number | null) ?? null,
121
- role: (item.role as string | null) ?? null,
122
- fileHash: (item.fileHash as string | null) ?? null,
123
- rrfScore: 0,
124
- bm25Score: null,
125
- bm25Rank: null,
126
- similarity: null,
127
- semanticRank: null,
128
- });
129
- }
130
- const entry = fusionMap.get(item.key)!;
131
- entry.rrfScore += 1 / (k + item.rank);
132
- if (item.source === 'bm25') {
133
- if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
134
- entry.bm25Score = (item as RankedItem & { bm25Score?: number }).bm25Score ?? null;
135
- entry.bm25Rank = item.rank;
136
- }
137
- } else {
138
- if (entry.semanticRank === null || item.rank < entry.semanticRank) {
139
- entry.similarity = (item as RankedItem & { similarity?: number }).similarity ?? null;
140
- entry.semanticRank = item.rank;
141
- }
164
+ fusionMap.set(item.key, createFusionEntry(item));
142
165
  }
166
+ mergeRankedItem(fusionMap.get(item.key)!, item, k);
143
167
  }
144
168
  }
145
169
 
146
170
  return [...fusionMap.values()]
147
171
  .sort((a, b) => b.rrfScore - a.rrfScore)
148
172
  .slice(0, limit)
149
- .map((e) => ({
150
- name: e.name,
151
- kind: e.kind,
152
- file: e.file,
153
- line: e.line,
154
- endLine: e.endLine,
155
- role: e.role,
156
- fileHash: e.fileHash,
157
- rrf: e.rrfScore,
158
- bm25Score: e.bm25Score,
159
- bm25Rank: e.bm25Rank,
160
- similarity: e.similarity,
161
- semanticRank: e.semanticRank,
162
- }));
173
+ .map(toHybridResult);
163
174
  }
164
175
 
165
176
  export async function hybridSearchData(
@@ -4,7 +4,7 @@ import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js';
4
4
  import { normalizeSymbol } from '../../queries.js';
5
5
  import { embed } from '../models.js';
6
6
  import { cosineSim } from '../stores/sqlite-blob.js';
7
- import { prepareSearch } from './prepare.js';
7
+ import { type PreparedSearch, prepareSearch } from './prepare.js';
8
8
 
9
9
  export interface SemanticSearchOpts {
10
10
  config?: CodegraphConfig;
@@ -30,6 +30,25 @@ export interface SearchDataResult {
30
30
  results: SemanticResult[];
31
31
  }
32
32
 
33
+ type StoredRow = PreparedSearch['rows'][number];
34
+
35
+ /** Reconstitute a stored embedding row's vector blob into a Float32Array. */
36
+ function rowVector(row: StoredRow): Float32Array {
37
+ return new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer);
38
+ }
39
+
40
+ /** Warn when stored embeddings and the query model use different dimensions. */
41
+ function checkDimensionMismatch(storedDim: number | null, dim: number): boolean {
42
+ if (storedDim && dim !== storedDim) {
43
+ console.log(
44
+ `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
45
+ );
46
+ console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
47
+ return true;
48
+ }
49
+ return false;
50
+ }
51
+
33
52
  export async function searchData(
34
53
  query: string,
35
54
  customDbPath: string | undefined,
@@ -50,20 +69,12 @@ export async function searchData(
50
69
  dim,
51
70
  } = await embed([query], modelKey ?? undefined);
52
71
 
53
- if (storedDim && dim !== storedDim) {
54
- console.log(
55
- `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
56
- );
57
- console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
58
- return null;
59
- }
72
+ if (checkDimensionMismatch(storedDim, dim)) return null;
60
73
 
61
74
  const hc = new Map<string, string>();
62
75
  const results: SemanticResult[] = [];
63
76
  for (const row of rows) {
64
- const vec = new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer);
65
- const sim = cosineSim(queryVec!, vec);
66
-
77
+ const sim = cosineSim(queryVec!, rowVector(row));
67
78
  if (sim >= minScore) {
68
79
  results.push({
69
80
  ...normalizeSymbol(row, db as BetterSqlite3Database, hc),
@@ -91,6 +102,82 @@ export interface MultiSearchResult {
91
102
  }>;
92
103
  }
93
104
 
105
+ interface RankedHit {
106
+ rowIndex: number;
107
+ similarity: number;
108
+ rank: number;
109
+ }
110
+
111
+ interface FusionEntry {
112
+ rrfScore: number;
113
+ queryScores: Array<{ query: string; similarity: number; rank: number }>;
114
+ }
115
+
116
+ /**
117
+ * Emit a warning for any query pair whose embeddings are nearly identical,
118
+ * since RRF would over-weight matches shared between them.
119
+ */
120
+ function warnOnSimilarQueries(
121
+ queries: string[],
122
+ queryVecs: Float32Array[],
123
+ threshold: number,
124
+ ): void {
125
+ for (let i = 0; i < queryVecs.length; i++) {
126
+ for (let j = i + 1; j < queryVecs.length; j++) {
127
+ const sim = cosineSim(queryVecs[i]!, queryVecs[j]!);
128
+ if (sim >= threshold) {
129
+ warn(
130
+ `Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
131
+ `(${(sim * 100).toFixed(0)}% cosine similarity). ` +
132
+ `This may bias RRF results toward their shared matches. ` +
133
+ `Consider using more distinct queries.`,
134
+ );
135
+ }
136
+ }
137
+ }
138
+ }
139
+
140
+ /** Rank stored rows for a single query, keeping only those above minScore. */
141
+ function rankRowsForQuery(
142
+ queryVec: Float32Array,
143
+ rowVecs: Float32Array[],
144
+ minScore: number,
145
+ ): RankedHit[] {
146
+ const scored: Array<{ rowIndex: number; similarity: number }> = [];
147
+ for (let ri = 0; ri < rowVecs.length; ri++) {
148
+ const sim = cosineSim(queryVec, rowVecs[ri]!);
149
+ if (sim >= minScore) {
150
+ scored.push({ rowIndex: ri, similarity: sim });
151
+ }
152
+ }
153
+ scored.sort((a, b) => b.similarity - a.similarity);
154
+ return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
155
+ }
156
+
157
+ /** Reciprocal Rank Fusion across each query's ranked hits. */
158
+ function fuseRankedHits(
159
+ queries: string[],
160
+ perQueryRanked: RankedHit[][],
161
+ k: number,
162
+ ): Map<number, FusionEntry> {
163
+ const fusionMap = new Map<number, FusionEntry>();
164
+ for (let qi = 0; qi < queries.length; qi++) {
165
+ for (const item of perQueryRanked[qi]!) {
166
+ if (!fusionMap.has(item.rowIndex)) {
167
+ fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
168
+ }
169
+ const entry = fusionMap.get(item.rowIndex)!;
170
+ entry.rrfScore += 1 / (k + item.rank);
171
+ entry.queryScores.push({
172
+ query: queries[qi]!,
173
+ similarity: item.similarity,
174
+ rank: item.rank,
175
+ });
176
+ }
177
+ }
178
+ return fusionMap;
179
+ }
180
+
94
181
  export async function multiSearchData(
95
182
  queries: string[],
96
183
  customDbPath: string | undefined,
@@ -101,6 +188,7 @@ export async function multiSearchData(
101
188
  const limit = opts.limit ?? searchCfg.topK ?? 15;
102
189
  const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2;
103
190
  const k = opts.rrfK ?? searchCfg.rrfK ?? 60;
191
+ const similarityWarnThreshold = searchCfg.similarityWarnThreshold ?? 0.85;
104
192
 
105
193
  const prepared = prepareSearch(customDbPath, opts);
106
194
  if (!prepared) return null;
@@ -109,63 +197,15 @@ export async function multiSearchData(
109
197
  try {
110
198
  const { vectors: queryVecs, dim } = await embed(queries, modelKey ?? undefined);
111
199
 
112
- const SIMILARITY_WARN_THRESHOLD = searchCfg.similarityWarnThreshold ?? 0.85;
113
- for (let i = 0; i < queryVecs.length; i++) {
114
- for (let j = i + 1; j < queryVecs.length; j++) {
115
- const sim = cosineSim(queryVecs[i]!, queryVecs[j]!);
116
- if (sim >= SIMILARITY_WARN_THRESHOLD) {
117
- warn(
118
- `Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
119
- `(${(sim * 100).toFixed(0)}% cosine similarity). ` +
120
- `This may bias RRF results toward their shared matches. ` +
121
- `Consider using more distinct queries.`,
122
- );
123
- }
124
- }
125
- }
200
+ warnOnSimilarQueries(queries, queryVecs as Float32Array[], similarityWarnThreshold);
126
201
 
127
- if (storedDim && dim !== storedDim) {
128
- console.log(
129
- `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
130
- );
131
- console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
132
- return null;
133
- }
202
+ if (checkDimensionMismatch(storedDim, dim)) return null;
134
203
 
135
- const rowVecs = rows.map(
136
- (row) => new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer),
204
+ const rowVecs = rows.map(rowVector);
205
+ const perQueryRanked = queries.map((_q, qi) =>
206
+ rankRowsForQuery(queryVecs[qi]!, rowVecs, minScore),
137
207
  );
138
-
139
- const perQueryRanked = queries.map((_query, qi) => {
140
- const scored: Array<{ rowIndex: number; similarity: number }> = [];
141
- for (let ri = 0; ri < rows.length; ri++) {
142
- const sim = cosineSim(queryVecs[qi]!, rowVecs[ri]!);
143
- if (sim >= minScore) {
144
- scored.push({ rowIndex: ri, similarity: sim });
145
- }
146
- }
147
- scored.sort((a, b) => b.similarity - a.similarity);
148
- return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
149
- });
150
-
151
- const fusionMap = new Map<
152
- number,
153
- { rrfScore: number; queryScores: Array<{ query: string; similarity: number; rank: number }> }
154
- >();
155
- for (let qi = 0; qi < queries.length; qi++) {
156
- for (const item of perQueryRanked[qi]!) {
157
- if (!fusionMap.has(item.rowIndex)) {
158
- fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
159
- }
160
- const entry = fusionMap.get(item.rowIndex)!;
161
- entry.rrfScore += 1 / (k + item.rank);
162
- entry.queryScores.push({
163
- query: queries[qi]!,
164
- similarity: item.similarity,
165
- rank: item.rank,
166
- });
167
- }
168
- }
208
+ const fusionMap = fuseRankedHits(queries, perQueryRanked, k);
169
209
 
170
210
  const hc = new Map<string, string>();
171
211
  const results: MultiSearchResult['results'] = [];