@opencodehub/ingestion 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (563) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +69 -0
  3. package/dist/extract/index.d.ts +8 -0
  4. package/dist/extract/index.d.ts.map +1 -0
  5. package/dist/extract/index.js +6 -0
  6. package/dist/extract/index.js.map +1 -0
  7. package/dist/extract/orm-detector.d.ts +19 -0
  8. package/dist/extract/orm-detector.d.ts.map +1 -0
  9. package/dist/extract/orm-detector.js +209 -0
  10. package/dist/extract/orm-detector.js.map +1 -0
  11. package/dist/extract/property-access.d.ts +76 -0
  12. package/dist/extract/property-access.d.ts.map +1 -0
  13. package/dist/extract/property-access.js +260 -0
  14. package/dist/extract/property-access.js.map +1 -0
  15. package/dist/extract/receiver-resolver.d.ts +86 -0
  16. package/dist/extract/receiver-resolver.d.ts.map +1 -0
  17. package/dist/extract/receiver-resolver.js +77 -0
  18. package/dist/extract/receiver-resolver.js.map +1 -0
  19. package/dist/extract/route-detector-java.d.ts +29 -0
  20. package/dist/extract/route-detector-java.d.ts.map +1 -0
  21. package/dist/extract/route-detector-java.js +190 -0
  22. package/dist/extract/route-detector-java.js.map +1 -0
  23. package/dist/extract/route-detector-nestjs.d.ts +30 -0
  24. package/dist/extract/route-detector-nestjs.d.ts.map +1 -0
  25. package/dist/extract/route-detector-nestjs.js +134 -0
  26. package/dist/extract/route-detector-nestjs.js.map +1 -0
  27. package/dist/extract/route-detector-python.d.ts +28 -0
  28. package/dist/extract/route-detector-python.d.ts.map +1 -0
  29. package/dist/extract/route-detector-python.js +100 -0
  30. package/dist/extract/route-detector-python.js.map +1 -0
  31. package/dist/extract/route-detector-rails.d.ts +28 -0
  32. package/dist/extract/route-detector-rails.d.ts.map +1 -0
  33. package/dist/extract/route-detector-rails.js +162 -0
  34. package/dist/extract/route-detector-rails.js.map +1 -0
  35. package/dist/extract/route-detector.d.ts +45 -0
  36. package/dist/extract/route-detector.d.ts.map +1 -0
  37. package/dist/extract/route-detector.js +467 -0
  38. package/dist/extract/route-detector.js.map +1 -0
  39. package/dist/extract/tool-detector.d.ts +26 -0
  40. package/dist/extract/tool-detector.d.ts.map +1 -0
  41. package/dist/extract/tool-detector.js +364 -0
  42. package/dist/extract/tool-detector.js.map +1 -0
  43. package/dist/extract/types.d.ts +89 -0
  44. package/dist/extract/types.d.ts.map +1 -0
  45. package/dist/extract/types.js +11 -0
  46. package/dist/extract/types.js.map +1 -0
  47. package/dist/index.d.ts +10 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +10 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/parse/cobol-regex.d.ts +85 -0
  52. package/dist/parse/cobol-regex.d.ts.map +1 -0
  53. package/dist/parse/cobol-regex.js +355 -0
  54. package/dist/parse/cobol-regex.js.map +1 -0
  55. package/dist/parse/grammar-registry.d.ts +115 -0
  56. package/dist/parse/grammar-registry.d.ts.map +1 -0
  57. package/dist/parse/grammar-registry.js +278 -0
  58. package/dist/parse/grammar-registry.js.map +1 -0
  59. package/dist/parse/index.d.ts +14 -0
  60. package/dist/parse/index.d.ts.map +1 -0
  61. package/dist/parse/index.js +10 -0
  62. package/dist/parse/index.js.map +1 -0
  63. package/dist/parse/language-detector.d.ts +17 -0
  64. package/dist/parse/language-detector.d.ts.map +1 -0
  65. package/dist/parse/language-detector.js +104 -0
  66. package/dist/parse/language-detector.js.map +1 -0
  67. package/dist/parse/parse-worker.d.ts +24 -0
  68. package/dist/parse/parse-worker.d.ts.map +1 -0
  69. package/dist/parse/parse-worker.js +230 -0
  70. package/dist/parse/parse-worker.js.map +1 -0
  71. package/dist/parse/types.d.ts +49 -0
  72. package/dist/parse/types.d.ts.map +1 -0
  73. package/dist/parse/types.js +11 -0
  74. package/dist/parse/types.js.map +1 -0
  75. package/dist/parse/unified-queries.d.ts +37 -0
  76. package/dist/parse/unified-queries.d.ts.map +1 -0
  77. package/dist/parse/unified-queries.js +623 -0
  78. package/dist/parse/unified-queries.js.map +1 -0
  79. package/dist/parse/wasm-fallback.d.ts +88 -0
  80. package/dist/parse/wasm-fallback.d.ts.map +1 -0
  81. package/dist/parse/wasm-fallback.js +258 -0
  82. package/dist/parse/wasm-fallback.js.map +1 -0
  83. package/dist/parse/worker-pool.d.ts +48 -0
  84. package/dist/parse/worker-pool.d.ts.map +1 -0
  85. package/dist/parse/worker-pool.js +97 -0
  86. package/dist/parse/worker-pool.js.map +1 -0
  87. package/dist/pipeline/dep-parsers/go.d.ts +25 -0
  88. package/dist/pipeline/dep-parsers/go.d.ts.map +1 -0
  89. package/dist/pipeline/dep-parsers/go.js +146 -0
  90. package/dist/pipeline/dep-parsers/go.js.map +1 -0
  91. package/dist/pipeline/dep-parsers/index.d.ts +17 -0
  92. package/dist/pipeline/dep-parsers/index.d.ts.map +1 -0
  93. package/dist/pipeline/dep-parsers/index.js +16 -0
  94. package/dist/pipeline/dep-parsers/index.js.map +1 -0
  95. package/dist/pipeline/dep-parsers/maven.d.ts +24 -0
  96. package/dist/pipeline/dep-parsers/maven.d.ts.map +1 -0
  97. package/dist/pipeline/dep-parsers/maven.js +131 -0
  98. package/dist/pipeline/dep-parsers/maven.js.map +1 -0
  99. package/dist/pipeline/dep-parsers/npm.d.ts +30 -0
  100. package/dist/pipeline/dep-parsers/npm.d.ts.map +1 -0
  101. package/dist/pipeline/dep-parsers/npm.js +309 -0
  102. package/dist/pipeline/dep-parsers/npm.js.map +1 -0
  103. package/dist/pipeline/dep-parsers/nuget.d.ts +24 -0
  104. package/dist/pipeline/dep-parsers/nuget.d.ts.map +1 -0
  105. package/dist/pipeline/dep-parsers/nuget.js +178 -0
  106. package/dist/pipeline/dep-parsers/nuget.js.map +1 -0
  107. package/dist/pipeline/dep-parsers/python.d.ts +21 -0
  108. package/dist/pipeline/dep-parsers/python.d.ts.map +1 -0
  109. package/dist/pipeline/dep-parsers/python.js +369 -0
  110. package/dist/pipeline/dep-parsers/python.js.map +1 -0
  111. package/dist/pipeline/dep-parsers/rust.d.ts +18 -0
  112. package/dist/pipeline/dep-parsers/rust.d.ts.map +1 -0
  113. package/dist/pipeline/dep-parsers/rust.js +134 -0
  114. package/dist/pipeline/dep-parsers/rust.js.map +1 -0
  115. package/dist/pipeline/dep-parsers/spdx-normalize.d.ts +15 -0
  116. package/dist/pipeline/dep-parsers/spdx-normalize.d.ts.map +1 -0
  117. package/dist/pipeline/dep-parsers/spdx-normalize.js +31 -0
  118. package/dist/pipeline/dep-parsers/spdx-normalize.js.map +1 -0
  119. package/dist/pipeline/dep-parsers/types.d.ts +63 -0
  120. package/dist/pipeline/dep-parsers/types.d.ts.map +1 -0
  121. package/dist/pipeline/dep-parsers/types.js +56 -0
  122. package/dist/pipeline/dep-parsers/types.js.map +1 -0
  123. package/dist/pipeline/gitignore-stack.d.ts +44 -0
  124. package/dist/pipeline/gitignore-stack.d.ts.map +1 -0
  125. package/dist/pipeline/gitignore-stack.js +69 -0
  126. package/dist/pipeline/gitignore-stack.js.map +1 -0
  127. package/dist/pipeline/gitignore.d.ts +67 -0
  128. package/dist/pipeline/gitignore.d.ts.map +1 -0
  129. package/dist/pipeline/gitignore.js +210 -0
  130. package/dist/pipeline/gitignore.js.map +1 -0
  131. package/dist/pipeline/index.d.ts +53 -0
  132. package/dist/pipeline/index.d.ts.map +1 -0
  133. package/dist/pipeline/index.js +29 -0
  134. package/dist/pipeline/index.js.map +1 -0
  135. package/dist/pipeline/orchestrator.d.ts +105 -0
  136. package/dist/pipeline/orchestrator.d.ts.map +1 -0
  137. package/dist/pipeline/orchestrator.js +175 -0
  138. package/dist/pipeline/orchestrator.js.map +1 -0
  139. package/dist/pipeline/ownership-helpers/drift.d.ts +41 -0
  140. package/dist/pipeline/ownership-helpers/drift.d.ts.map +1 -0
  141. package/dist/pipeline/ownership-helpers/drift.js +122 -0
  142. package/dist/pipeline/ownership-helpers/drift.js.map +1 -0
  143. package/dist/pipeline/ownership-helpers/gini-community.d.ts +24 -0
  144. package/dist/pipeline/ownership-helpers/gini-community.d.ts.map +1 -0
  145. package/dist/pipeline/ownership-helpers/gini-community.js +32 -0
  146. package/dist/pipeline/ownership-helpers/gini-community.js.map +1 -0
  147. package/dist/pipeline/ownership-helpers/git-blame-batcher.d.ts +71 -0
  148. package/dist/pipeline/ownership-helpers/git-blame-batcher.d.ts.map +1 -0
  149. package/dist/pipeline/ownership-helpers/git-blame-batcher.js +178 -0
  150. package/dist/pipeline/ownership-helpers/git-blame-batcher.js.map +1 -0
  151. package/dist/pipeline/ownership-helpers/line-overlap.d.ts +35 -0
  152. package/dist/pipeline/ownership-helpers/line-overlap.d.ts.map +1 -0
  153. package/dist/pipeline/ownership-helpers/line-overlap.js +62 -0
  154. package/dist/pipeline/ownership-helpers/line-overlap.js.map +1 -0
  155. package/dist/pipeline/ownership-helpers/orphan.d.ts +73 -0
  156. package/dist/pipeline/ownership-helpers/orphan.d.ts.map +1 -0
  157. package/dist/pipeline/ownership-helpers/orphan.js +117 -0
  158. package/dist/pipeline/ownership-helpers/orphan.js.map +1 -0
  159. package/dist/pipeline/phases/accesses.d.ts +44 -0
  160. package/dist/pipeline/phases/accesses.d.ts.map +1 -0
  161. package/dist/pipeline/phases/accesses.js +194 -0
  162. package/dist/pipeline/phases/accesses.js.map +1 -0
  163. package/dist/pipeline/phases/annotate.d.ts +28 -0
  164. package/dist/pipeline/phases/annotate.d.ts.map +1 -0
  165. package/dist/pipeline/phases/annotate.js +60 -0
  166. package/dist/pipeline/phases/annotate.js.map +1 -0
  167. package/dist/pipeline/phases/cochange.d.ts +42 -0
  168. package/dist/pipeline/phases/cochange.d.ts.map +1 -0
  169. package/dist/pipeline/phases/cochange.js +0 -0
  170. package/dist/pipeline/phases/cochange.js.map +1 -0
  171. package/dist/pipeline/phases/communities.d.ts +34 -0
  172. package/dist/pipeline/phases/communities.d.ts.map +1 -0
  173. package/dist/pipeline/phases/communities.js +412 -0
  174. package/dist/pipeline/phases/communities.js.map +1 -0
  175. package/dist/pipeline/phases/complexity.d.ts +50 -0
  176. package/dist/pipeline/phases/complexity.d.ts.map +1 -0
  177. package/dist/pipeline/phases/complexity.js +794 -0
  178. package/dist/pipeline/phases/complexity.js.map +1 -0
  179. package/dist/pipeline/phases/confidence-demote.d.ts +23 -0
  180. package/dist/pipeline/phases/confidence-demote.d.ts.map +1 -0
  181. package/dist/pipeline/phases/confidence-demote.js +113 -0
  182. package/dist/pipeline/phases/confidence-demote.js.map +1 -0
  183. package/dist/pipeline/phases/content-cache.d.ts +166 -0
  184. package/dist/pipeline/phases/content-cache.d.ts.map +1 -0
  185. package/dist/pipeline/phases/content-cache.js +323 -0
  186. package/dist/pipeline/phases/content-cache.js.map +1 -0
  187. package/dist/pipeline/phases/coverage-parsers/cobertura.d.ts +25 -0
  188. package/dist/pipeline/phases/coverage-parsers/cobertura.d.ts.map +1 -0
  189. package/dist/pipeline/phases/coverage-parsers/cobertura.js +139 -0
  190. package/dist/pipeline/phases/coverage-parsers/cobertura.js.map +1 -0
  191. package/dist/pipeline/phases/coverage-parsers/coverage-py.d.ts +25 -0
  192. package/dist/pipeline/phases/coverage-parsers/coverage-py.d.ts.map +1 -0
  193. package/dist/pipeline/phases/coverage-parsers/coverage-py.js +51 -0
  194. package/dist/pipeline/phases/coverage-parsers/coverage-py.js.map +1 -0
  195. package/dist/pipeline/phases/coverage-parsers/jacoco.d.ts +32 -0
  196. package/dist/pipeline/phases/coverage-parsers/jacoco.d.ts.map +1 -0
  197. package/dist/pipeline/phases/coverage-parsers/jacoco.js +98 -0
  198. package/dist/pipeline/phases/coverage-parsers/jacoco.js.map +1 -0
  199. package/dist/pipeline/phases/coverage-parsers/lcov.d.ts +21 -0
  200. package/dist/pipeline/phases/coverage-parsers/lcov.d.ts.map +1 -0
  201. package/dist/pipeline/phases/coverage-parsers/lcov.js +104 -0
  202. package/dist/pipeline/phases/coverage-parsers/lcov.js.map +1 -0
  203. package/dist/pipeline/phases/coverage-parsers/types.d.ts +27 -0
  204. package/dist/pipeline/phases/coverage-parsers/types.d.ts.map +1 -0
  205. package/dist/pipeline/phases/coverage-parsers/types.js +39 -0
  206. package/dist/pipeline/phases/coverage-parsers/types.js.map +1 -0
  207. package/dist/pipeline/phases/coverage.d.ts +39 -0
  208. package/dist/pipeline/phases/coverage.d.ts.map +1 -0
  209. package/dist/pipeline/phases/coverage.js +154 -0
  210. package/dist/pipeline/phases/coverage.js.map +1 -0
  211. package/dist/pipeline/phases/cross-file.d.ts +40 -0
  212. package/dist/pipeline/phases/cross-file.d.ts.map +1 -0
  213. package/dist/pipeline/phases/cross-file.js +411 -0
  214. package/dist/pipeline/phases/cross-file.js.map +1 -0
  215. package/dist/pipeline/phases/dead-code.d.ts +28 -0
  216. package/dist/pipeline/phases/dead-code.d.ts.map +1 -0
  217. package/dist/pipeline/phases/dead-code.js +157 -0
  218. package/dist/pipeline/phases/dead-code.js.map +1 -0
  219. package/dist/pipeline/phases/default-set.d.ts +24 -0
  220. package/dist/pipeline/phases/default-set.d.ts.map +1 -0
  221. package/dist/pipeline/phases/default-set.js +133 -0
  222. package/dist/pipeline/phases/default-set.js.map +1 -0
  223. package/dist/pipeline/phases/dependencies.d.ts +59 -0
  224. package/dist/pipeline/phases/dependencies.d.ts.map +1 -0
  225. package/dist/pipeline/phases/dependencies.js +281 -0
  226. package/dist/pipeline/phases/dependencies.js.map +1 -0
  227. package/dist/pipeline/phases/embedder-pool.d.ts +31 -0
  228. package/dist/pipeline/phases/embedder-pool.d.ts.map +1 -0
  229. package/dist/pipeline/phases/embedder-pool.js +79 -0
  230. package/dist/pipeline/phases/embedder-pool.js.map +1 -0
  231. package/dist/pipeline/phases/embedder-worker.d.ts +28 -0
  232. package/dist/pipeline/phases/embedder-worker.d.ts.map +1 -0
  233. package/dist/pipeline/phases/embedder-worker.js +43 -0
  234. package/dist/pipeline/phases/embedder-worker.js.map +1 -0
  235. package/dist/pipeline/phases/embeddings.d.ts +117 -0
  236. package/dist/pipeline/phases/embeddings.d.ts.map +1 -0
  237. package/dist/pipeline/phases/embeddings.js +697 -0
  238. package/dist/pipeline/phases/embeddings.js.map +1 -0
  239. package/dist/pipeline/phases/fetches.d.ts +47 -0
  240. package/dist/pipeline/phases/fetches.d.ts.map +1 -0
  241. package/dist/pipeline/phases/fetches.js +207 -0
  242. package/dist/pipeline/phases/fetches.js.map +1 -0
  243. package/dist/pipeline/phases/incremental-helper.d.ts +96 -0
  244. package/dist/pipeline/phases/incremental-helper.d.ts.map +1 -0
  245. package/dist/pipeline/phases/incremental-helper.js +125 -0
  246. package/dist/pipeline/phases/incremental-helper.js.map +1 -0
  247. package/dist/pipeline/phases/incremental-scope.d.ts +67 -0
  248. package/dist/pipeline/phases/incremental-scope.d.ts.map +1 -0
  249. package/dist/pipeline/phases/incremental-scope.js +225 -0
  250. package/dist/pipeline/phases/incremental-scope.js.map +1 -0
  251. package/dist/pipeline/phases/markdown.d.ts +29 -0
  252. package/dist/pipeline/phases/markdown.d.ts.map +1 -0
  253. package/dist/pipeline/phases/markdown.js +298 -0
  254. package/dist/pipeline/phases/markdown.js.map +1 -0
  255. package/dist/pipeline/phases/mro.d.ts +24 -0
  256. package/dist/pipeline/phases/mro.d.ts.map +1 -0
  257. package/dist/pipeline/phases/mro.js +303 -0
  258. package/dist/pipeline/phases/mro.js.map +1 -0
  259. package/dist/pipeline/phases/openapi.d.ts +52 -0
  260. package/dist/pipeline/phases/openapi.d.ts.map +1 -0
  261. package/dist/pipeline/phases/openapi.js +285 -0
  262. package/dist/pipeline/phases/openapi.js.map +1 -0
  263. package/dist/pipeline/phases/orm.d.ts +26 -0
  264. package/dist/pipeline/phases/orm.d.ts.map +1 -0
  265. package/dist/pipeline/phases/orm.js +183 -0
  266. package/dist/pipeline/phases/orm.js.map +1 -0
  267. package/dist/pipeline/phases/ownership.d.ts +88 -0
  268. package/dist/pipeline/phases/ownership.d.ts.map +1 -0
  269. package/dist/pipeline/phases/ownership.js +479 -0
  270. package/dist/pipeline/phases/ownership.js.map +1 -0
  271. package/dist/pipeline/phases/parse.d.ts +63 -0
  272. package/dist/pipeline/phases/parse.d.ts.map +1 -0
  273. package/dist/pipeline/phases/parse.js +994 -0
  274. package/dist/pipeline/phases/parse.js.map +1 -0
  275. package/dist/pipeline/phases/processes.d.ts +47 -0
  276. package/dist/pipeline/phases/processes.d.ts.map +1 -0
  277. package/dist/pipeline/phases/processes.js +620 -0
  278. package/dist/pipeline/phases/processes.js.map +1 -0
  279. package/dist/pipeline/phases/profile.d.ts +33 -0
  280. package/dist/pipeline/phases/profile.d.ts.map +1 -0
  281. package/dist/pipeline/phases/profile.js +91 -0
  282. package/dist/pipeline/phases/profile.js.map +1 -0
  283. package/dist/pipeline/phases/repo-node.d.ts +112 -0
  284. package/dist/pipeline/phases/repo-node.d.ts.map +1 -0
  285. package/dist/pipeline/phases/repo-node.js +272 -0
  286. package/dist/pipeline/phases/repo-node.js.map +1 -0
  287. package/dist/pipeline/phases/risk-snapshot.d.ts +34 -0
  288. package/dist/pipeline/phases/risk-snapshot.d.ts.map +1 -0
  289. package/dist/pipeline/phases/risk-snapshot.js +63 -0
  290. package/dist/pipeline/phases/risk-snapshot.js.map +1 -0
  291. package/dist/pipeline/phases/routes.d.ts +31 -0
  292. package/dist/pipeline/phases/routes.d.ts.map +1 -0
  293. package/dist/pipeline/phases/routes.js +262 -0
  294. package/dist/pipeline/phases/routes.js.map +1 -0
  295. package/dist/pipeline/phases/sbom.d.ts +45 -0
  296. package/dist/pipeline/phases/sbom.d.ts.map +1 -0
  297. package/dist/pipeline/phases/sbom.js +289 -0
  298. package/dist/pipeline/phases/sbom.js.map +1 -0
  299. package/dist/pipeline/phases/scan.d.ts +54 -0
  300. package/dist/pipeline/phases/scan.d.ts.map +1 -0
  301. package/dist/pipeline/phases/scan.js +340 -0
  302. package/dist/pipeline/phases/scan.js.map +1 -0
  303. package/dist/pipeline/phases/scip-index.d.ts +54 -0
  304. package/dist/pipeline/phases/scip-index.d.ts.map +1 -0
  305. package/dist/pipeline/phases/scip-index.js +469 -0
  306. package/dist/pipeline/phases/scip-index.js.map +1 -0
  307. package/dist/pipeline/phases/structure.d.ts +21 -0
  308. package/dist/pipeline/phases/structure.d.ts.map +1 -0
  309. package/dist/pipeline/phases/structure.js +115 -0
  310. package/dist/pipeline/phases/structure.js.map +1 -0
  311. package/dist/pipeline/phases/summarize.d.ts +126 -0
  312. package/dist/pipeline/phases/summarize.d.ts.map +1 -0
  313. package/dist/pipeline/phases/summarize.js +401 -0
  314. package/dist/pipeline/phases/summarize.js.map +1 -0
  315. package/dist/pipeline/phases/temporal-helpers/branch-divergence.d.ts +42 -0
  316. package/dist/pipeline/phases/temporal-helpers/branch-divergence.d.ts.map +1 -0
  317. package/dist/pipeline/phases/temporal-helpers/branch-divergence.js +96 -0
  318. package/dist/pipeline/phases/temporal-helpers/branch-divergence.js.map +1 -0
  319. package/dist/pipeline/phases/temporal-helpers/churn-decay.d.ts +22 -0
  320. package/dist/pipeline/phases/temporal-helpers/churn-decay.d.ts.map +1 -0
  321. package/dist/pipeline/phases/temporal-helpers/churn-decay.js +32 -0
  322. package/dist/pipeline/phases/temporal-helpers/churn-decay.js.map +1 -0
  323. package/dist/pipeline/phases/temporal-helpers/conventional-commits.d.ts +21 -0
  324. package/dist/pipeline/phases/temporal-helpers/conventional-commits.d.ts.map +1 -0
  325. package/dist/pipeline/phases/temporal-helpers/conventional-commits.js +37 -0
  326. package/dist/pipeline/phases/temporal-helpers/conventional-commits.js.map +1 -0
  327. package/dist/pipeline/phases/temporal-helpers/gini.d.ts +32 -0
  328. package/dist/pipeline/phases/temporal-helpers/gini.d.ts.map +1 -0
  329. package/dist/pipeline/phases/temporal-helpers/gini.js +78 -0
  330. package/dist/pipeline/phases/temporal-helpers/gini.js.map +1 -0
  331. package/dist/pipeline/phases/temporal-helpers/revert-detect.d.ts +14 -0
  332. package/dist/pipeline/phases/temporal-helpers/revert-detect.d.ts.map +1 -0
  333. package/dist/pipeline/phases/temporal-helpers/revert-detect.js +25 -0
  334. package/dist/pipeline/phases/temporal-helpers/revert-detect.js.map +1 -0
  335. package/dist/pipeline/phases/temporal-helpers/test-pair.d.ts +18 -0
  336. package/dist/pipeline/phases/temporal-helpers/test-pair.d.ts.map +1 -0
  337. package/dist/pipeline/phases/temporal-helpers/test-pair.js +119 -0
  338. package/dist/pipeline/phases/temporal-helpers/test-pair.js.map +1 -0
  339. package/dist/pipeline/phases/temporal.d.ts +65 -0
  340. package/dist/pipeline/phases/temporal.d.ts.map +1 -0
  341. package/dist/pipeline/phases/temporal.js +621 -0
  342. package/dist/pipeline/phases/temporal.js.map +1 -0
  343. package/dist/pipeline/phases/tools.d.ts +21 -0
  344. package/dist/pipeline/phases/tools.d.ts.map +1 -0
  345. package/dist/pipeline/phases/tools.js +118 -0
  346. package/dist/pipeline/phases/tools.js.map +1 -0
  347. package/dist/pipeline/profile-detectors/api-contracts.d.ts +18 -0
  348. package/dist/pipeline/profile-detectors/api-contracts.d.ts.map +1 -0
  349. package/dist/pipeline/profile-detectors/api-contracts.js +78 -0
  350. package/dist/pipeline/profile-detectors/api-contracts.js.map +1 -0
  351. package/dist/pipeline/profile-detectors/framework-detector.d.ts +11 -0
  352. package/dist/pipeline/profile-detectors/framework-detector.d.ts.map +1 -0
  353. package/dist/pipeline/profile-detectors/framework-detector.js +11 -0
  354. package/dist/pipeline/profile-detectors/framework-detector.js.map +1 -0
  355. package/dist/pipeline/profile-detectors/frameworks-catalog.d.ts +7 -0
  356. package/dist/pipeline/profile-detectors/frameworks-catalog.d.ts.map +1 -0
  357. package/dist/pipeline/profile-detectors/frameworks-catalog.js +7 -0
  358. package/dist/pipeline/profile-detectors/frameworks-catalog.js.map +1 -0
  359. package/dist/pipeline/profile-detectors/frameworks.d.ts +7 -0
  360. package/dist/pipeline/profile-detectors/frameworks.d.ts.map +1 -0
  361. package/dist/pipeline/profile-detectors/frameworks.js +7 -0
  362. package/dist/pipeline/profile-detectors/frameworks.js.map +1 -0
  363. package/dist/pipeline/profile-detectors/iac.d.ts +22 -0
  364. package/dist/pipeline/profile-detectors/iac.d.ts.map +1 -0
  365. package/dist/pipeline/profile-detectors/iac.js +97 -0
  366. package/dist/pipeline/profile-detectors/iac.js.map +1 -0
  367. package/dist/pipeline/profile-detectors/languages.d.ts +18 -0
  368. package/dist/pipeline/profile-detectors/languages.d.ts.map +1 -0
  369. package/dist/pipeline/profile-detectors/languages.js +60 -0
  370. package/dist/pipeline/profile-detectors/languages.js.map +1 -0
  371. package/dist/pipeline/profile-detectors/manifests.d.ts +7 -0
  372. package/dist/pipeline/profile-detectors/manifests.d.ts.map +1 -0
  373. package/dist/pipeline/profile-detectors/manifests.js +7 -0
  374. package/dist/pipeline/profile-detectors/manifests.js.map +1 -0
  375. package/dist/pipeline/profile-detectors/src-dirs.d.ts +17 -0
  376. package/dist/pipeline/profile-detectors/src-dirs.d.ts.map +1 -0
  377. package/dist/pipeline/profile-detectors/src-dirs.js +89 -0
  378. package/dist/pipeline/profile-detectors/src-dirs.js.map +1 -0
  379. package/dist/pipeline/profile-detectors/variant-detectors.d.ts +7 -0
  380. package/dist/pipeline/profile-detectors/variant-detectors.d.ts.map +1 -0
  381. package/dist/pipeline/profile-detectors/variant-detectors.js +7 -0
  382. package/dist/pipeline/profile-detectors/variant-detectors.js.map +1 -0
  383. package/dist/pipeline/runner.d.ts +54 -0
  384. package/dist/pipeline/runner.d.ts.map +1 -0
  385. package/dist/pipeline/runner.js +247 -0
  386. package/dist/pipeline/runner.js.map +1 -0
  387. package/dist/pipeline/types.d.ts +235 -0
  388. package/dist/pipeline/types.d.ts.map +1 -0
  389. package/dist/pipeline/types.js +15 -0
  390. package/dist/pipeline/types.js.map +1 -0
  391. package/dist/providers/c.d.ts +3 -0
  392. package/dist/providers/c.d.ts.map +1 -0
  393. package/dist/providers/c.js +162 -0
  394. package/dist/providers/c.js.map +1 -0
  395. package/dist/providers/cobol.d.ts +19 -0
  396. package/dist/providers/cobol.d.ts.map +1 -0
  397. package/dist/providers/cobol.js +44 -0
  398. package/dist/providers/cobol.js.map +1 -0
  399. package/dist/providers/cpp.d.ts +3 -0
  400. package/dist/providers/cpp.d.ts.map +1 -0
  401. package/dist/providers/cpp.js +200 -0
  402. package/dist/providers/cpp.js.map +1 -0
  403. package/dist/providers/csharp.d.ts +3 -0
  404. package/dist/providers/csharp.d.ts.map +1 -0
  405. package/dist/providers/csharp.js +292 -0
  406. package/dist/providers/csharp.js.map +1 -0
  407. package/dist/providers/dart.d.ts +3 -0
  408. package/dist/providers/dart.d.ts.map +1 -0
  409. package/dist/providers/dart.js +214 -0
  410. package/dist/providers/dart.js.map +1 -0
  411. package/dist/providers/definition-ids.d.ts +18 -0
  412. package/dist/providers/definition-ids.d.ts.map +1 -0
  413. package/dist/providers/definition-ids.js +23 -0
  414. package/dist/providers/definition-ids.js.map +1 -0
  415. package/dist/providers/extract-helpers.d.ts +60 -0
  416. package/dist/providers/extract-helpers.d.ts.map +1 -0
  417. package/dist/providers/extract-helpers.js +296 -0
  418. package/dist/providers/extract-helpers.js.map +1 -0
  419. package/dist/providers/extraction-types.d.ts +85 -0
  420. package/dist/providers/extraction-types.d.ts.map +1 -0
  421. package/dist/providers/extraction-types.js +13 -0
  422. package/dist/providers/extraction-types.js.map +1 -0
  423. package/dist/providers/go.d.ts +3 -0
  424. package/dist/providers/go.d.ts.map +1 -0
  425. package/dist/providers/go.js +359 -0
  426. package/dist/providers/go.js.map +1 -0
  427. package/dist/providers/http-detect.d.ts +44 -0
  428. package/dist/providers/http-detect.d.ts.map +1 -0
  429. package/dist/providers/http-detect.js +307 -0
  430. package/dist/providers/http-detect.js.map +1 -0
  431. package/dist/providers/index.d.ts +38 -0
  432. package/dist/providers/index.d.ts.map +1 -0
  433. package/dist/providers/index.js +33 -0
  434. package/dist/providers/index.js.map +1 -0
  435. package/dist/providers/java.d.ts +3 -0
  436. package/dist/providers/java.d.ts.map +1 -0
  437. package/dist/providers/java.js +259 -0
  438. package/dist/providers/java.js.map +1 -0
  439. package/dist/providers/javascript.d.ts +3 -0
  440. package/dist/providers/javascript.d.ts.map +1 -0
  441. package/dist/providers/javascript.js +139 -0
  442. package/dist/providers/javascript.js.map +1 -0
  443. package/dist/providers/kotlin.d.ts +3 -0
  444. package/dist/providers/kotlin.d.ts.map +1 -0
  445. package/dist/providers/kotlin.js +175 -0
  446. package/dist/providers/kotlin.js.map +1 -0
  447. package/dist/providers/php.d.ts +3 -0
  448. package/dist/providers/php.d.ts.map +1 -0
  449. package/dist/providers/php.js +218 -0
  450. package/dist/providers/php.js.map +1 -0
  451. package/dist/providers/python-accesses.d.ts +9 -0
  452. package/dist/providers/python-accesses.d.ts.map +1 -0
  453. package/dist/providers/python-accesses.js +22 -0
  454. package/dist/providers/python-accesses.js.map +1 -0
  455. package/dist/providers/python.d.ts +3 -0
  456. package/dist/providers/python.d.ts.map +1 -0
  457. package/dist/providers/python.js +323 -0
  458. package/dist/providers/python.js.map +1 -0
  459. package/dist/providers/registry.d.ts +4 -0
  460. package/dist/providers/registry.d.ts.map +1 -0
  461. package/dist/providers/registry.js +46 -0
  462. package/dist/providers/registry.js.map +1 -0
  463. package/dist/providers/resolution/c3.d.ts +6 -0
  464. package/dist/providers/resolution/c3.d.ts.map +1 -0
  465. package/dist/providers/resolution/c3.js +76 -0
  466. package/dist/providers/resolution/c3.js.map +1 -0
  467. package/dist/providers/resolution/context.d.ts +38 -0
  468. package/dist/providers/resolution/context.d.ts.map +1 -0
  469. package/dist/providers/resolution/context.js +45 -0
  470. package/dist/providers/resolution/context.js.map +1 -0
  471. package/dist/providers/resolution/first-wins.d.ts +3 -0
  472. package/dist/providers/resolution/first-wins.d.ts.map +1 -0
  473. package/dist/providers/resolution/first-wins.js +27 -0
  474. package/dist/providers/resolution/first-wins.js.map +1 -0
  475. package/dist/providers/resolution/mro.d.ts +16 -0
  476. package/dist/providers/resolution/mro.d.ts.map +1 -0
  477. package/dist/providers/resolution/mro.js +14 -0
  478. package/dist/providers/resolution/mro.js.map +1 -0
  479. package/dist/providers/resolution/none.d.ts +3 -0
  480. package/dist/providers/resolution/none.d.ts.map +1 -0
  481. package/dist/providers/resolution/none.js +11 -0
  482. package/dist/providers/resolution/none.js.map +1 -0
  483. package/dist/providers/resolution/python-all-filter.d.ts +25 -0
  484. package/dist/providers/resolution/python-all-filter.d.ts.map +1 -0
  485. package/dist/providers/resolution/python-all-filter.js +64 -0
  486. package/dist/providers/resolution/python-all-filter.js.map +1 -0
  487. package/dist/providers/resolution/resolver-strategy.d.ts +42 -0
  488. package/dist/providers/resolution/resolver-strategy.d.ts.map +1 -0
  489. package/dist/providers/resolution/resolver-strategy.js +50 -0
  490. package/dist/providers/resolution/resolver-strategy.js.map +1 -0
  491. package/dist/providers/resolution/single-inheritance.d.ts +3 -0
  492. package/dist/providers/resolution/single-inheritance.d.ts.map +1 -0
  493. package/dist/providers/resolution/single-inheritance.js +21 -0
  494. package/dist/providers/resolution/single-inheritance.js.map +1 -0
  495. package/dist/providers/resolution/stack-graphs/__fixtures__/mock-tree.d.ts +16 -0
  496. package/dist/providers/resolution/stack-graphs/__fixtures__/mock-tree.d.ts.map +1 -0
  497. package/dist/providers/resolution/stack-graphs/__fixtures__/mock-tree.js +50 -0
  498. package/dist/providers/resolution/stack-graphs/__fixtures__/mock-tree.js.map +1 -0
  499. package/dist/providers/resolution/stack-graphs/glue.d.ts +15 -0
  500. package/dist/providers/resolution/stack-graphs/glue.d.ts.map +1 -0
  501. package/dist/providers/resolution/stack-graphs/glue.js +44 -0
  502. package/dist/providers/resolution/stack-graphs/glue.js.map +1 -0
  503. package/dist/providers/resolution/stack-graphs/node-edge-builder.d.ts +30 -0
  504. package/dist/providers/resolution/stack-graphs/node-edge-builder.d.ts.map +1 -0
  505. package/dist/providers/resolution/stack-graphs/node-edge-builder.js +366 -0
  506. package/dist/providers/resolution/stack-graphs/node-edge-builder.js.map +1 -0
  507. package/dist/providers/resolution/stack-graphs/partial-path-engine.d.ts +9 -0
  508. package/dist/providers/resolution/stack-graphs/partial-path-engine.d.ts.map +1 -0
  509. package/dist/providers/resolution/stack-graphs/partial-path-engine.js +152 -0
  510. package/dist/providers/resolution/stack-graphs/partial-path-engine.js.map +1 -0
  511. package/dist/providers/resolution/stack-graphs/rule-parser.d.ts +11 -0
  512. package/dist/providers/resolution/stack-graphs/rule-parser.d.ts.map +1 -0
  513. package/dist/providers/resolution/stack-graphs/rule-parser.js +247 -0
  514. package/dist/providers/resolution/stack-graphs/rule-parser.js.map +1 -0
  515. package/dist/providers/resolution/stack-graphs/types.d.ts +93 -0
  516. package/dist/providers/resolution/stack-graphs/types.d.ts.map +1 -0
  517. package/dist/providers/resolution/stack-graphs/types.js +11 -0
  518. package/dist/providers/resolution/stack-graphs/types.js.map +1 -0
  519. package/dist/providers/resolution/stack-graphs-python.d.ts +27 -0
  520. package/dist/providers/resolution/stack-graphs-python.d.ts.map +1 -0
  521. package/dist/providers/resolution/stack-graphs-python.js +104 -0
  522. package/dist/providers/resolution/stack-graphs-python.js.map +1 -0
  523. package/dist/providers/resolution/stack-graphs-ts.d.ts +134 -0
  524. package/dist/providers/resolution/stack-graphs-ts.d.ts.map +1 -0
  525. package/dist/providers/resolution/stack-graphs-ts.js +372 -0
  526. package/dist/providers/resolution/stack-graphs-ts.js.map +1 -0
  527. package/dist/providers/ruby.d.ts +3 -0
  528. package/dist/providers/ruby.d.ts.map +1 -0
  529. package/dist/providers/ruby.js +259 -0
  530. package/dist/providers/ruby.js.map +1 -0
  531. package/dist/providers/rust.d.ts +3 -0
  532. package/dist/providers/rust.d.ts.map +1 -0
  533. package/dist/providers/rust.js +318 -0
  534. package/dist/providers/rust.js.map +1 -0
  535. package/dist/providers/swift.d.ts +3 -0
  536. package/dist/providers/swift.d.ts.map +1 -0
  537. package/dist/providers/swift.js +177 -0
  538. package/dist/providers/swift.js.map +1 -0
  539. package/dist/providers/test-helpers.d.ts +24 -0
  540. package/dist/providers/test-helpers.d.ts.map +1 -0
  541. package/dist/providers/test-helpers.js +33 -0
  542. package/dist/providers/test-helpers.js.map +1 -0
  543. package/dist/providers/ts-shared.d.ts +30 -0
  544. package/dist/providers/ts-shared.d.ts.map +1 -0
  545. package/dist/providers/ts-shared.js +328 -0
  546. package/dist/providers/ts-shared.js.map +1 -0
  547. package/dist/providers/tsx.d.ts +7 -0
  548. package/dist/providers/tsx.d.ts.map +1 -0
  549. package/dist/providers/tsx.js +79 -0
  550. package/dist/providers/tsx.js.map +1 -0
  551. package/dist/providers/types.d.ts +166 -0
  552. package/dist/providers/types.d.ts.map +1 -0
  553. package/dist/providers/types.js +7 -0
  554. package/dist/providers/types.js.map +1 -0
  555. package/dist/providers/typescript-family-accesses.d.ts +14 -0
  556. package/dist/providers/typescript-family-accesses.d.ts.map +1 -0
  557. package/dist/providers/typescript-family-accesses.js +27 -0
  558. package/dist/providers/typescript-family-accesses.js.map +1 -0
  559. package/dist/providers/typescript.d.ts +9 -0
  560. package/dist/providers/typescript.d.ts.map +1 -0
  561. package/dist/providers/typescript.js +84 -0
  562. package/dist/providers/typescript.js.map +1 -0
  563. package/package.json +108 -0
@@ -0,0 +1,697 @@
1
+ /**
2
+ * Embeddings phase — generates 768-dim vectors across one or more
3
+ * hierarchical tiers and materialises them into the phase output as an
4
+ * array of `EmbeddingRow`s the CLI upserts into DuckDB.
5
+ *
6
+ * Granularity tiers (P03):
7
+ * - `"symbol"` — one vector per callable/declaration symbol. When a
8
+ * `SymbolSummaryRow` exists for the node the text is fused
9
+ * `signature\nsummary\nbody`; otherwise we fall back to the raw
10
+ * signature/description pair.
11
+ * - `"file"` — one vector per scanned file. Coarse tier used by the
12
+ * `--zoom` retrieval path. Files larger than ~8192 tokens are
13
+ * truncated to the first `N` chars so a single outlier never blows
14
+ * up batch latency.
15
+ * - `"community"` — one vector per Community node. Architectural tier
16
+ * used to answer "which subsystem handles X?" queries. Text is
17
+ * `inferredLabel\nkeywords…\ntop_symbols…`.
18
+ *
19
+ * Contract:
20
+ * - `options.embeddings !== true` → phase is a silent no-op.
21
+ * - Weights missing (EMBEDDER_NOT_SETUP) → emit a warning via the
22
+ * progress callback and return zeroes. NEVER aborts the pipeline.
23
+ * - Default `granularity = ["symbol"]` preserves v1.0 behaviour; callers
24
+ * opt in to hierarchical tiers explicitly.
25
+ *
26
+ * Determinism:
27
+ * - Rows are sorted by (granularity, node_id, chunk_index).
28
+ * `embeddingsHash` hashes the canonical representation so downstream
29
+ * callers can assert byte-level stability across runs. The hash is
30
+ * returned in the phase output but is intentionally not folded into
31
+ * graphHash.
32
+ */
33
+ import { createHash } from "node:crypto";
34
+ import { readFileSync } from "node:fs";
35
+ import path from "node:path";
36
+ import { EmbedderNotSetupError, openOnnxEmbedder, tryOpenHttpEmbedder, } from "@opencodehub/embedder";
37
+ import { ANNOTATE_PHASE_NAME } from "./annotate.js";
38
+ import { COMMUNITIES_PHASE_NAME } from "./communities.js";
39
+ import { openOnnxEmbedderPool } from "./embedder-pool.js";
40
+ import { SCAN_PHASE_NAME } from "./scan.js";
41
+ import { SUMMARIZE_PHASE_NAME } from "./summarize.js";
42
+ /**
43
+ * Default batch size for cross-node inference. Picked so a single batch
44
+ * fully utilizes one ONNX session without blowing host memory on a typical
45
+ * M-series / Linux laptop: 32 symbols × ~500 tokens × 2 (int64 id+mask) is
46
+ * comfortably under 1 MB of tensor feed, and the quadratic attention cost
47
+ * is dominated by the per-chunk cost rather than the batch dimension.
48
+ * Callers can override via `options.embeddingsBatchSize`.
49
+ */
50
+ const DEFAULT_EMBEDDING_BATCH_SIZE = 32;
51
+ export const EMBEDDER_PHASE_NAME = "embeddings";
52
+ /**
53
+ * Well-known options key the orchestrator uses to attach an
54
+ * {@link EmbeddingHashCacheAdapter}. Kept as a `const` so callers can't
55
+ * typo the probe site. Matches the pattern used by `SUMMARY_CACHE_OPTIONS_KEY`
56
+ * in the summarize phase.
57
+ */
58
+ export const EMBEDDING_HASH_CACHE_OPTIONS_KEY = "__embeddingHashCache";
59
+ function resolveEmbeddingHashCacheAdapter(ctx) {
60
+ const opts = ctx.options;
61
+ const cache = opts[EMBEDDING_HASH_CACHE_OPTIONS_KEY];
62
+ if (cache === undefined || cache === null || typeof cache !== "object")
63
+ return undefined;
64
+ const adapter = cache;
65
+ if (typeof adapter.list !== "function")
66
+ return undefined;
67
+ return adapter;
68
+ }
69
+ /**
70
+ * Compose the composite key used to probe {@link EmbeddingHashCacheAdapter}.
71
+ * `\0` is binary-safe vs `:` which appears inside NodeIds; the same key
72
+ * encoding is used by the storage adapter's `listEmbeddingHashes`.
73
+ */
74
+ function priorHashKey(granularity, nodeId, chunkIndex) {
75
+ return `${granularity}\0${nodeId}\0${chunkIndex}`;
76
+ }
77
+ /** Node kinds we currently embed at the symbol tier. */
78
+ const EMBEDDABLE_KINDS = new Set([
79
+ "Function",
80
+ "Method",
81
+ "Constructor",
82
+ "Route",
83
+ "Tool",
84
+ "Class",
85
+ "Interface",
86
+ ]);
87
+ /**
88
+ * Max body chars to fuse into a summary-fused symbol embedding. Keeps the
89
+ * fused text well under the embedder's ~500-token window even after
90
+ * signature + summary join. The chunker downstream still wraps any
91
+ * overflow, so this cap is a belt-and-braces guard.
92
+ */
93
+ const SYMBOL_BODY_CHAR_CAP = 1200;
94
+ /**
95
+ * File-level truncation cap. 8192 tokens × ~4 chars/token on code
96
+ * (conservative WordPiece approximation) ≈ 32_768 chars. Rarely hit in
97
+ * practice because most source files are well under this size; outliers
98
+ * (generated code, lockfiles) are truncated to the first chunk so the
99
+ * phase stays responsive.
100
+ */
101
+ const FILE_CHAR_CAP = 8192 * 4;
102
+ /**
103
+ * File extensions that contribute to file-tier embeddings. Picked to
104
+ * mirror `scan.detectLanguage`'s reliably-parseable set so we don't try
105
+ * to embed binary assets or vendored artifacts. The gate is
106
+ * deliberately conservative — the file tier is a retrieval aid, not a
107
+ * completeness guarantee.
108
+ */
109
+ const EMBEDDABLE_FILE_EXTS = new Set([
110
+ ".ts",
111
+ ".tsx",
112
+ ".js",
113
+ ".jsx",
114
+ ".mjs",
115
+ ".cjs",
116
+ ".py",
117
+ ".go",
118
+ ".rs",
119
+ ".java",
120
+ ".kt",
121
+ ".rb",
122
+ ".php",
123
+ ".cs",
124
+ ".swift",
125
+ ".md",
126
+ ".mdx",
127
+ ]);
128
+ function emptyOutput() {
129
+ return {
130
+ embeddingsInserted: 0,
131
+ symbolsSkipped: 0,
132
+ chunksTotal: 0,
133
+ embeddingsModelId: "",
134
+ embeddingsHash: hashRows([]),
135
+ rows: [],
136
+ ranEmbedder: false,
137
+ byGranularity: { symbol: 0, file: 0, community: 0 },
138
+ summaryFused: false,
139
+ chunksSkipped: 0,
140
+ };
141
+ }
142
+ /**
143
+ * Fuse text for the symbol tier. When a summary is present the layout is
144
+ * `signature\nsummary\nbody`; otherwise we fall back to
145
+ * `signature\ndescription`. Body is length-capped so a long function's
146
+ * source never overwhelms the 500-token embedder window even before the
147
+ * chunker runs.
148
+ */
149
+ function symbolText(node, summary, body) {
150
+ const head = node.signature !== undefined && node.signature.length > 0 ? node.signature : node.name;
151
+ if (summary !== undefined) {
152
+ const sigLine = summary.signatureSummary !== undefined && summary.signatureSummary.length > 0
153
+ ? summary.signatureSummary
154
+ : head;
155
+ const bodyPiece = body !== undefined && body.length > 0
156
+ ? body.length > SYMBOL_BODY_CHAR_CAP
157
+ ? body.slice(0, SYMBOL_BODY_CHAR_CAP)
158
+ : body
159
+ : "";
160
+ const parts = [sigLine, summary.summaryText];
161
+ if (bodyPiece.length > 0)
162
+ parts.push(bodyPiece);
163
+ return parts.join("\n");
164
+ }
165
+ const tail = node.description ?? "";
166
+ return tail.length > 0 ? `${head}\n${tail}` : head;
167
+ }
168
+ /**
169
+ * Greedy text splitter used when a single input exceeds the embedder's
170
+ * maxTokens budget. We split on line boundaries first, and fall back to
171
+ * fixed-width character slices when a single line is too long.
172
+ *
173
+ * Token budget is approximated as `maxChars = tokens * 4` (conservative
174
+ * for WordPiece, which produces ~4 chars/token on English code).
175
+ */
176
+ function splitIntoChunks(text, tokens) {
177
+ const maxChars = Math.max(tokens * 4, 64);
178
+ if (text.length <= maxChars) {
179
+ return [text];
180
+ }
181
+ const lines = text.split("\n");
182
+ const chunks = [];
183
+ let buf = "";
184
+ for (const line of lines) {
185
+ if (line.length > maxChars) {
186
+ // Flush whatever we had.
187
+ if (buf.length > 0) {
188
+ chunks.push(buf);
189
+ buf = "";
190
+ }
191
+ // Fixed-width slice.
192
+ for (let i = 0; i < line.length; i += maxChars) {
193
+ chunks.push(line.slice(i, i + maxChars));
194
+ }
195
+ continue;
196
+ }
197
+ if (buf.length + line.length + 1 > maxChars) {
198
+ chunks.push(buf);
199
+ buf = line;
200
+ }
201
+ else {
202
+ buf = buf.length > 0 ? `${buf}\n${line}` : line;
203
+ }
204
+ }
205
+ if (buf.length > 0) {
206
+ chunks.push(buf);
207
+ }
208
+ return chunks;
209
+ }
210
+ /**
211
+ * Hash a canonical representation of the rows. Rows are sorted by
212
+ * (granularity, node_id, chunk_index); each row is serialised as
213
+ * `<granularity>\0<id>\0<chunk>\0<hex(vector bytes)>\0<content_hash>`.
214
+ * This representation is byte-stable across machines and TypeScript
215
+ * engines.
216
+ */
217
+ function hashRows(rows) {
218
+ const hasher = createHash("sha256");
219
+ const sorted = [...rows].sort((a, b) => {
220
+ const ga = a.granularity ?? "symbol";
221
+ const gb = b.granularity ?? "symbol";
222
+ if (ga !== gb)
223
+ return ga < gb ? -1 : 1;
224
+ if (a.nodeId === b.nodeId)
225
+ return a.chunkIndex - b.chunkIndex;
226
+ return a.nodeId < b.nodeId ? -1 : 1;
227
+ });
228
+ for (const r of sorted) {
229
+ hasher.update(r.granularity ?? "symbol", "utf8");
230
+ hasher.update("\0");
231
+ hasher.update(r.nodeId, "utf8");
232
+ hasher.update("\0");
233
+ hasher.update(String(r.chunkIndex));
234
+ hasher.update("\0");
235
+ // Vector bytes — endianness is stable across every platform we ship to
236
+ // (little-endian on x86_64 + aarch64). Copy into a fresh Uint8Array so
237
+ // we never leak Float32Array's ArrayBufferLike widening into crypto.
238
+ const vecBytes = new Uint8Array(r.vector.buffer.slice(r.vector.byteOffset, r.vector.byteOffset + r.vector.byteLength));
239
+ hasher.update(vecBytes);
240
+ hasher.update("\0");
241
+ hasher.update(r.contentHash, "utf8");
242
+ hasher.update("\0");
243
+ }
244
+ return hasher.digest("hex");
245
+ }
246
+ /**
247
+ * Content hash = sha256 of `<granularity>\0<sourceText>`. Threading the
248
+ * tier into the hash prevents collisions when the same node is embedded
249
+ * at multiple granularities (very unlikely in practice, but keeps the
250
+ * cache-key space clean when a future tier reuses the same underlying
251
+ * content).
252
+ */
253
+ function hashText(granularity, text) {
254
+ const hasher = createHash("sha256");
255
+ hasher.update(granularity, "utf8");
256
+ hasher.update("\0");
257
+ hasher.update(text, "utf8");
258
+ return hasher.digest("hex");
259
+ }
260
+ function isEmbeddableSymbol(node) {
261
+ if (typeof node !== "object" || node === null)
262
+ return false;
263
+ const n = node;
264
+ return (typeof n["id"] === "string" &&
265
+ typeof n["name"] === "string" &&
266
+ typeof n["kind"] === "string" &&
267
+ typeof n["filePath"] === "string" &&
268
+ EMBEDDABLE_KINDS.has(n["kind"]));
269
+ }
270
+ function isFileNode(node) {
271
+ if (typeof node !== "object" || node === null)
272
+ return false;
273
+ const n = node;
274
+ return (typeof n["id"] === "string" &&
275
+ n["kind"] === "File" &&
276
+ typeof n["filePath"] === "string" &&
277
+ typeof n["name"] === "string");
278
+ }
279
+ function isCommunityNode(node) {
280
+ if (typeof node !== "object" || node === null)
281
+ return false;
282
+ const n = node;
283
+ return typeof n["id"] === "string" && n["kind"] === "Community" && typeof n["name"] === "string";
284
+ }
285
+ /**
286
+ * Normalize the requested tier list. De-dupe while preserving first-seen
287
+ * order so the phase walks tiers in a predictable sequence
288
+ * (symbol → file → community) regardless of how the caller supplied them.
289
+ */
290
+ function normalizeGranularities(requested) {
291
+ if (requested === undefined || requested.length === 0)
292
+ return ["symbol"];
293
+ const seen = new Set();
294
+ const out = [];
295
+ for (const g of requested) {
296
+ if (seen.has(g))
297
+ continue;
298
+ seen.add(g);
299
+ out.push(g);
300
+ }
301
+ return out;
302
+ }
303
+ /**
304
+ * Read a line-bounded slice of a source file. Returns `undefined` on any
305
+ * error so the embedder never aborts because of a permission/missing
306
+ * file condition. Tests patch readFileSync via module state; the fallback
307
+ * is `fs.readFileSync`.
308
+ */
309
+ function readSourceSpan(repoPath, filePath, startLine, endLine) {
310
+ try {
311
+ const abs = path.isAbsolute(filePath) ? filePath : path.join(repoPath, filePath);
312
+ const all = readFileSync(abs, "utf-8");
313
+ const lines = all.split(/\r?\n/);
314
+ const from = Math.max(0, startLine - 1);
315
+ const to = Math.min(lines.length, endLine);
316
+ if (to <= from)
317
+ return undefined;
318
+ return lines.slice(from, to).join("\n");
319
+ }
320
+ catch {
321
+ return undefined;
322
+ }
323
+ }
324
+ function readFileWhole(repoPath, relPath) {
325
+ try {
326
+ const abs = path.isAbsolute(relPath) ? relPath : path.join(repoPath, relPath);
327
+ return readFileSync(abs, "utf-8");
328
+ }
329
+ catch {
330
+ return undefined;
331
+ }
332
+ }
333
+ async function runEmbeddings(ctx) {
334
+ // 1. Flag gate. Silent no-op when disabled.
335
+ if (ctx.options.embeddings !== true) {
336
+ return emptyOutput();
337
+ }
338
+ const tiers = normalizeGranularities(ctx.options
339
+ .embeddingsGranularity);
340
+ // 2. Open embedder. Priority:
341
+ // a. If CODEHUB_EMBEDDING_URL + CODEHUB_EMBEDDING_MODEL are set AND
342
+ // offline is not in effect, use the HTTP embedder — no ONNX weights
343
+ // needed, dimension is enforced against the remote response.
344
+ // b. Otherwise fall back to the local ONNX path. Missing weights is a
345
+ // graceful degradation (warn + empty output); any other ONNX open
346
+ // error is re-raised.
347
+ //
348
+ // The offline invariant is non-negotiable: when `offline === true`, the
349
+ // HTTP path is REFUSED even if the env vars are set — `tryOpenHttpEmbedder`
350
+ // throws, and we rethrow rather than silently continuing to ONNX.
351
+ // `embeddingsWorkers` controls the ONNX worker-pool size. `undefined` or
352
+ // `<= 1` preserves the legacy in-process embedder (no pool, no worker
353
+ // overhead). Values >= 2 spin up a Piscina pool whose workers each hold
354
+ // their own OnnxEmbedder. The HTTP backend ignores the flag — its
355
+ // parallelism is driven by the remote server's capacity.
356
+ const workers = Math.max(1, Math.floor(ctx.options.embeddingsWorkers ?? 1));
357
+ const batchSize = Math.max(1, Math.floor(ctx.options.embeddingsBatchSize ?? DEFAULT_EMBEDDING_BATCH_SIZE));
358
+ let embedder;
359
+ try {
360
+ // Intentionally NOT using `openDefaultEmbedder` from `@opencodehub/embedder`:
361
+ // ingestion needs the offline flag, an explicit ONNX variant + modelDir,
362
+ // a weight canary, and an OnnxEmbedderPool — none of which apply at query
363
+ // time. Keep the two paths separate.
364
+ const httpEmbedder = await tryOpenHttpEmbedder({ offline: ctx.options.offline === true });
365
+ if (httpEmbedder !== null) {
366
+ embedder = httpEmbedder;
367
+ }
368
+ else {
369
+ const variant = ctx.options.embeddingsVariant ?? "fp32";
370
+ const cfg = { variant };
371
+ if (ctx.options.embeddingsModelDir !== undefined) {
372
+ cfg.modelDir = ctx.options.embeddingsModelDir;
373
+ }
374
+ if (workers > 1) {
375
+ // Weight canary: open (and immediately close) a main-thread
376
+ // OnnxEmbedder so EmbedderNotSetupError surfaces with its class
377
+ // identity preserved. Piscina's structured-clone transport would
378
+ // strip the prototype chain from a worker-raised error, breaking
379
+ // the `instanceof EmbedderNotSetupError` catch below.
380
+ const canary = await openOnnxEmbedder(cfg);
381
+ await canary.close();
382
+ embedder = openOnnxEmbedderPool({ workers, ...cfg });
383
+ }
384
+ else {
385
+ embedder = await openOnnxEmbedder(cfg);
386
+ }
387
+ }
388
+ }
389
+ catch (err) {
390
+ if (err instanceof EmbedderNotSetupError) {
391
+ ctx.onProgress?.({
392
+ phase: EMBEDDER_PHASE_NAME,
393
+ kind: "warn",
394
+ message: "embeddings phase skipped: weights not installed. " +
395
+ "Run `codehub setup --embeddings` while online, or set " +
396
+ "CODEHUB_EMBEDDING_URL to use a remote OpenAI-compatible endpoint.",
397
+ });
398
+ return emptyOutput();
399
+ }
400
+ throw err;
401
+ }
402
+ try {
403
+ const rows = [];
404
+ let skipped = 0;
405
+ let chunksTotal = 0;
406
+ let chunksSkipped = 0;
407
+ let summaryFused = false;
408
+ const byGranularity = {
409
+ symbol: 0,
410
+ file: 0,
411
+ community: 0,
412
+ };
413
+ // Prior-hash cache. When the CLI plugs an adapter AND the caller
414
+ // did not pass `force: true`, we load every prior `content_hash` from the
415
+ // `embeddings` table in a single round-trip. Chunks whose
416
+ // `(granularity, nodeId, chunkIndex)` key maps to an identical freshly-
417
+ // computed hash skip both `embedder.embed()` and the upsert batch —
418
+ // unchanged source reduces a full re-analyze to a no-op for the
419
+ // embeddings phase. Under `force`, or with no adapter installed, the map
420
+ // is empty and the phase behaves exactly as it did before the
421
+ // content-hash skip landed.
422
+ const forceFlag = ctx.options.force === true;
423
+ const hashCache = resolveEmbeddingHashCacheAdapter(ctx);
424
+ const priorHashes = forceFlag || hashCache === undefined ? new Map() : await hashCache.list();
425
+ // Max tokens includes [CLS]/[SEP]; the embedder caps input at 510 user
426
+ // tokens by default. Keep the chunker slightly conservative.
427
+ const maxUserTokens = 500;
428
+ // Lookup summaries by nodeId (the newest `createdAt` wins when multiple
429
+ // prompt versions coexist). Summaries live in the `summarize` phase's
430
+ // output; absent phase / disabled flag → empty map, which simply means
431
+ // raw-body fallback.
432
+ const summarizeOut = ctx.phaseOutputs.get(SUMMARIZE_PHASE_NAME);
433
+ const summaryByNode = new Map();
434
+ if (summarizeOut !== undefined && summarizeOut.rows.length > 0) {
435
+ for (const s of summarizeOut.rows) {
436
+ const entry = {
437
+ summaryText: s.summaryText,
438
+ };
439
+ if (s.signatureSummary !== undefined)
440
+ entry.signatureSummary = s.signatureSummary;
441
+ summaryByNode.set(s.nodeId, entry);
442
+ }
443
+ }
444
+ const jobs = [];
445
+ // ---- Symbol tier ---------------------------------------------------
446
+ if (tiers.includes("symbol")) {
447
+ const eligible = [];
448
+ for (const n of ctx.graph.nodes()) {
449
+ if (isEmbeddableSymbol(n))
450
+ eligible.push(n);
451
+ }
452
+ eligible.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0));
453
+ for (const node of eligible) {
454
+ const summary = summaryByNode.get(node.id);
455
+ let body;
456
+ if (summary !== undefined &&
457
+ node.startLine !== undefined &&
458
+ node.endLine !== undefined &&
459
+ node.filePath.length > 0) {
460
+ body = readSourceSpan(ctx.repoPath, node.filePath, node.startLine, node.endLine);
461
+ }
462
+ const text = symbolText(node, summary, body);
463
+ if (text.length === 0) {
464
+ skipped += 1;
465
+ continue;
466
+ }
467
+ if (summary !== undefined)
468
+ summaryFused = true;
469
+ const chunks = splitIntoChunks(text, maxUserTokens);
470
+ if (chunks.length === 0) {
471
+ skipped += 1;
472
+ continue;
473
+ }
474
+ chunksTotal += chunks.length;
475
+ // Content-hash skip. A symbol can emit multiple chunks
476
+ // (long signature+summary+body). We only skip when *every* fresh
477
+ // chunk hash matches its prior row — otherwise one mismatched chunk
478
+ // would leave the tier partially updated with stale neighbours.
479
+ // The anti-goal is explicit: don't try to diff indices; re-embed
480
+ // the whole node at this granularity.
481
+ const freshHashes = chunks.map((ch) => hashText("symbol", ch));
482
+ const allMatch = priorHashes.size > 0 &&
483
+ chunks.every((_chunk, i) => {
484
+ const fresh = freshHashes[i];
485
+ if (fresh === undefined)
486
+ return false;
487
+ return priorHashes.get(priorHashKey("symbol", node.id, i)) === fresh;
488
+ });
489
+ if (allMatch) {
490
+ chunksSkipped += chunks.length;
491
+ continue;
492
+ }
493
+ for (let i = 0; i < chunks.length; i++) {
494
+ const chunkText = chunks[i] ?? "";
495
+ const contentHash = freshHashes[i] ?? hashText("symbol", chunkText);
496
+ const chunkIndex = i;
497
+ jobs.push({
498
+ granularity: "symbol",
499
+ text: chunkText,
500
+ emitRow: (vector) => ({
501
+ nodeId: node.id,
502
+ granularity: "symbol",
503
+ chunkIndex,
504
+ ...(node.startLine !== undefined ? { startLine: node.startLine } : {}),
505
+ ...(node.endLine !== undefined ? { endLine: node.endLine } : {}),
506
+ vector,
507
+ contentHash,
508
+ }),
509
+ });
510
+ }
511
+ }
512
+ }
513
+ // ---- File tier -----------------------------------------------------
514
+ if (tiers.includes("file")) {
515
+ const scan = ctx.phaseOutputs.get(SCAN_PHASE_NAME);
516
+ const fileNodeByPath = new Map();
517
+ for (const n of ctx.graph.nodes()) {
518
+ if (isFileNode(n))
519
+ fileNodeByPath.set(n.filePath, n);
520
+ }
521
+ const scanFiles = scan ? [...scan.files] : [];
522
+ scanFiles.sort((a, b) => (a.relPath < b.relPath ? -1 : a.relPath > b.relPath ? 1 : 0));
523
+ for (const f of scanFiles) {
524
+ const ext = path.extname(f.relPath).toLowerCase();
525
+ if (!EMBEDDABLE_FILE_EXTS.has(ext))
526
+ continue;
527
+ const fileNode = fileNodeByPath.get(f.relPath);
528
+ if (fileNode === undefined)
529
+ continue;
530
+ const raw = readFileWhole(ctx.repoPath, f.relPath);
531
+ if (raw === undefined || raw.length === 0) {
532
+ skipped += 1;
533
+ continue;
534
+ }
535
+ const truncated = raw.length > FILE_CHAR_CAP ? raw.slice(0, FILE_CHAR_CAP) : raw;
536
+ const chunks = splitIntoChunks(truncated, maxUserTokens);
537
+ const firstChunk = chunks[0];
538
+ if (firstChunk === undefined) {
539
+ skipped += 1;
540
+ continue;
541
+ }
542
+ chunksTotal += 1;
543
+ // Content-hash skip. Single-chunk tier — the compare is
544
+ // straightforward: if the prior row's hash equals the fresh hash,
545
+ // bail before queuing work.
546
+ const contentHash = hashText("file", firstChunk);
547
+ if (priorHashes.size > 0 &&
548
+ priorHashes.get(priorHashKey("file", fileNode.id, 0)) === contentHash) {
549
+ chunksSkipped += 1;
550
+ continue;
551
+ }
552
+ jobs.push({
553
+ granularity: "file",
554
+ text: firstChunk,
555
+ emitRow: (vector) => ({
556
+ nodeId: fileNode.id,
557
+ granularity: "file",
558
+ chunkIndex: 0,
559
+ vector,
560
+ contentHash,
561
+ }),
562
+ });
563
+ }
564
+ }
565
+ // ---- Community tier -----------------------------------------------
566
+ if (tiers.includes("community")) {
567
+ const membersByCommunity = new Map();
568
+ const nameById = new Map();
569
+ for (const n of ctx.graph.nodes()) {
570
+ const nn = n;
571
+ if (typeof nn.id === "string" && typeof nn.name === "string") {
572
+ nameById.set(nn.id, nn.name);
573
+ }
574
+ }
575
+ for (const e of ctx.graph.edges()) {
576
+ if (e.type !== "MEMBER_OF")
577
+ continue;
578
+ const to = e.to;
579
+ const arr = membersByCommunity.get(to);
580
+ if (arr !== undefined)
581
+ arr.push(e.from);
582
+ else
583
+ membersByCommunity.set(to, [e.from]);
584
+ }
585
+ const communities = [];
586
+ for (const n of ctx.graph.nodes()) {
587
+ if (isCommunityNode(n))
588
+ communities.push(n);
589
+ }
590
+ communities.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0));
591
+ for (const c of communities) {
592
+ const members = membersByCommunity.get(c.id) ?? [];
593
+ const memberNames = members
594
+ .map((m) => nameById.get(m))
595
+ .filter((x) => x !== undefined)
596
+ .sort();
597
+ const topNames = memberNames.slice(0, 10);
598
+ const label = c.inferredLabel ?? c.name;
599
+ const keywords = (c.keywords ?? []).slice(0, 5).join(" ");
600
+ const parts = [label];
601
+ if (keywords.length > 0)
602
+ parts.push(keywords);
603
+ if (topNames.length > 0)
604
+ parts.push(topNames.join(" "));
605
+ const text = parts.join("\n");
606
+ if (text.length === 0) {
607
+ skipped += 1;
608
+ continue;
609
+ }
610
+ const chunks = splitIntoChunks(text, maxUserTokens);
611
+ const firstChunk = chunks[0];
612
+ if (firstChunk === undefined) {
613
+ skipped += 1;
614
+ continue;
615
+ }
616
+ chunksTotal += 1;
617
+ // Content-hash skip. Community tier is also single-chunk.
618
+ const contentHash = hashText("community", firstChunk);
619
+ if (priorHashes.size > 0 &&
620
+ priorHashes.get(priorHashKey("community", c.id, 0)) === contentHash) {
621
+ chunksSkipped += 1;
622
+ continue;
623
+ }
624
+ jobs.push({
625
+ granularity: "community",
626
+ text: firstChunk,
627
+ emitRow: (vector) => ({
628
+ nodeId: c.id,
629
+ granularity: "community",
630
+ chunkIndex: 0,
631
+ vector,
632
+ contentHash,
633
+ }),
634
+ });
635
+ }
636
+ }
637
+ // ---- Dispatch ------------------------------------------------------
638
+ // Cross-node batching: group jobs into fixed-size batches and embed
639
+ // them as a single `embedBatch()` call. When the embedder is a worker
640
+ // pool, successive batches ride different workers in parallel; when
641
+ // it's an in-process embedder the batching still cuts per-call
642
+ // overhead (tokenizer + tensor feed building amortize across the
643
+ // batch). We fire `workers` batches concurrently so the pool stays
644
+ // saturated — the pool's Piscina queue handles backpressure.
645
+ for (let i = 0; i < jobs.length; i += batchSize * workers) {
646
+ const waveEnd = Math.min(jobs.length, i + batchSize * workers);
647
+ const waveBatches = [];
648
+ const waveJobSlices = [];
649
+ for (let b = i; b < waveEnd; b += batchSize) {
650
+ const batchEnd = Math.min(waveEnd, b + batchSize);
651
+ const slice = jobs.slice(b, batchEnd);
652
+ waveJobSlices.push(slice);
653
+ waveBatches.push(embedder.embedBatch(slice.map((j) => j.text)));
654
+ }
655
+ const waveResults = await Promise.all(waveBatches);
656
+ for (let w = 0; w < waveResults.length; w++) {
657
+ const vectors = waveResults[w] ?? [];
658
+ const slice = waveJobSlices[w] ?? [];
659
+ for (let k = 0; k < slice.length; k++) {
660
+ const job = slice[k];
661
+ const vec = vectors[k];
662
+ if (job === undefined || vec === undefined)
663
+ continue;
664
+ rows.push(job.emitRow(vec));
665
+ byGranularity[job.granularity] = (byGranularity[job.granularity] ?? 0) + 1;
666
+ }
667
+ }
668
+ }
669
+ return {
670
+ embeddingsInserted: rows.length,
671
+ symbolsSkipped: skipped,
672
+ chunksTotal,
673
+ embeddingsModelId: embedder.modelId,
674
+ embeddingsHash: hashRows(rows),
675
+ rows,
676
+ ranEmbedder: true,
677
+ byGranularity,
678
+ summaryFused,
679
+ chunksSkipped,
680
+ };
681
+ }
682
+ finally {
683
+ await embedder.close();
684
+ }
685
+ }
686
+ export const embeddingsPhase = {
687
+ name: EMBEDDER_PHASE_NAME,
688
+ // Depend on `summarize` so summary-fused text is available; depend on
689
+ // `communities` so the community tier sees the emitted Community nodes
690
+ // and MEMBER_OF edges; depend on `scan` transitively via `annotate`
691
+ // (annotate → structure → scan) for the file tier.
692
+ deps: [ANNOTATE_PHASE_NAME, SUMMARIZE_PHASE_NAME, COMMUNITIES_PHASE_NAME],
693
+ async run(ctx) {
694
+ return runEmbeddings(ctx);
695
+ },
696
+ };
697
+ //# sourceMappingURL=embeddings.js.map