@mirnoorata/codexa 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (364) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +634 -0
  3. package/dist/artifacts.d.ts +2 -0
  4. package/dist/artifacts.js +375 -0
  5. package/dist/artifacts.js.map +1 -0
  6. package/dist/autonomy.d.ts +17 -0
  7. package/dist/autonomy.js +124 -0
  8. package/dist/autonomy.js.map +1 -0
  9. package/dist/autoverify/policy.d.ts +5 -0
  10. package/dist/autoverify/policy.js +18 -0
  11. package/dist/autoverify/policy.js.map +1 -0
  12. package/dist/autoverify.d.ts +45 -0
  13. package/dist/autoverify.js +1041 -0
  14. package/dist/autoverify.js.map +1 -0
  15. package/dist/cache-lock.d.ts +16 -0
  16. package/dist/cache-lock.js +181 -0
  17. package/dist/cache-lock.js.map +1 -0
  18. package/dist/cli/hooks.d.ts +5 -0
  19. package/dist/cli/hooks.js +264 -0
  20. package/dist/cli/hooks.js.map +1 -0
  21. package/dist/cli.d.ts +2 -0
  22. package/dist/cli.js +1034 -0
  23. package/dist/cli.js.map +1 -0
  24. package/dist/codex-contract.d.ts +2 -0
  25. package/dist/codex-contract.js +78 -0
  26. package/dist/codex-contract.js.map +1 -0
  27. package/dist/command.d.ts +34 -0
  28. package/dist/command.js +162 -0
  29. package/dist/command.js.map +1 -0
  30. package/dist/doctor.d.ts +112 -0
  31. package/dist/doctor.js +518 -0
  32. package/dist/doctor.js.map +1 -0
  33. package/dist/eval/baseline.d.ts +7 -0
  34. package/dist/eval/baseline.js +146 -0
  35. package/dist/eval/baseline.js.map +1 -0
  36. package/dist/eval/historical.d.ts +4 -0
  37. package/dist/eval/historical.js +663 -0
  38. package/dist/eval/historical.js.map +1 -0
  39. package/dist/eval/render.d.ts +2 -0
  40. package/dist/eval/render.js +53 -0
  41. package/dist/eval/render.js.map +1 -0
  42. package/dist/eval/scoring.d.ts +21 -0
  43. package/dist/eval/scoring.js +618 -0
  44. package/dist/eval/scoring.js.map +1 -0
  45. package/dist/eval/synthetic.d.ts +36 -0
  46. package/dist/eval/synthetic.js +107 -0
  47. package/dist/eval/synthetic.js.map +1 -0
  48. package/dist/eval/types.d.ts +36 -0
  49. package/dist/eval/types.js +2 -0
  50. package/dist/eval/types.js.map +1 -0
  51. package/dist/eval.d.ts +140 -0
  52. package/dist/eval.js +551 -0
  53. package/dist/eval.js.map +1 -0
  54. package/dist/git.d.ts +17 -0
  55. package/dist/git.js +189 -0
  56. package/dist/git.js.map +1 -0
  57. package/dist/github-release.d.ts +47 -0
  58. package/dist/github-release.js +610 -0
  59. package/dist/github-release.js.map +1 -0
  60. package/dist/github-sync.d.ts +68 -0
  61. package/dist/github-sync.js +345 -0
  62. package/dist/github-sync.js.map +1 -0
  63. package/dist/graph.d.ts +10 -0
  64. package/dist/graph.js +665 -0
  65. package/dist/graph.js.map +1 -0
  66. package/dist/indexer/aliases.d.ts +2 -0
  67. package/dist/indexer/aliases.js +190 -0
  68. package/dist/indexer/aliases.js.map +1 -0
  69. package/dist/indexer/artifact-writing.d.ts +3 -0
  70. package/dist/indexer/artifact-writing.js +79 -0
  71. package/dist/indexer/artifact-writing.js.map +1 -0
  72. package/dist/indexer/discovery.d.ts +2 -0
  73. package/dist/indexer/discovery.js +5 -0
  74. package/dist/indexer/discovery.js.map +1 -0
  75. package/dist/indexer/external-facts.d.ts +6 -0
  76. package/dist/indexer/external-facts.js +45 -0
  77. package/dist/indexer/external-facts.js.map +1 -0
  78. package/dist/indexer/freshness.d.ts +8 -0
  79. package/dist/indexer/freshness.js +56 -0
  80. package/dist/indexer/freshness.js.map +1 -0
  81. package/dist/indexer/graph-stage.d.ts +2 -0
  82. package/dist/indexer/graph-stage.js +21 -0
  83. package/dist/indexer/graph-stage.js.map +1 -0
  84. package/dist/indexer/parsing.d.ts +30 -0
  85. package/dist/indexer/parsing.js +177 -0
  86. package/dist/indexer/parsing.js.map +1 -0
  87. package/dist/indexer/pipeline.d.ts +5 -0
  88. package/dist/indexer/pipeline.js +8 -0
  89. package/dist/indexer/pipeline.js.map +1 -0
  90. package/dist/indexer/ranking.d.ts +4 -0
  91. package/dist/indexer/ranking.js +134 -0
  92. package/dist/indexer/ranking.js.map +1 -0
  93. package/dist/indexer.d.ts +13 -0
  94. package/dist/indexer.js +395 -0
  95. package/dist/indexer.js.map +1 -0
  96. package/dist/init.d.ts +24 -0
  97. package/dist/init.js +566 -0
  98. package/dist/init.js.map +1 -0
  99. package/dist/language.d.ts +8 -0
  100. package/dist/language.js +123 -0
  101. package/dist/language.js.map +1 -0
  102. package/dist/live-index.d.ts +68 -0
  103. package/dist/live-index.js +215 -0
  104. package/dist/live-index.js.map +1 -0
  105. package/dist/lsp/assist.d.ts +44 -0
  106. package/dist/lsp/assist.js +331 -0
  107. package/dist/lsp/assist.js.map +1 -0
  108. package/dist/lsp/client.d.ts +59 -0
  109. package/dist/lsp/client.js +208 -0
  110. package/dist/lsp/client.js.map +1 -0
  111. package/dist/mcp/compaction.d.ts +15 -0
  112. package/dist/mcp/compaction.js +1249 -0
  113. package/dist/mcp/compaction.js.map +1 -0
  114. package/dist/mcp/envelope.d.ts +44 -0
  115. package/dist/mcp/envelope.js +425 -0
  116. package/dist/mcp/envelope.js.map +1 -0
  117. package/dist/mcp/prompts.d.ts +2 -0
  118. package/dist/mcp/prompts.js +109 -0
  119. package/dist/mcp/prompts.js.map +1 -0
  120. package/dist/mcp/resources.d.ts +2 -0
  121. package/dist/mcp/resources.js +132 -0
  122. package/dist/mcp/resources.js.map +1 -0
  123. package/dist/mcp/runtime.d.ts +15 -0
  124. package/dist/mcp/runtime.js +122 -0
  125. package/dist/mcp/runtime.js.map +1 -0
  126. package/dist/mcp/session-memory.d.ts +3 -0
  127. package/dist/mcp/session-memory.js +61 -0
  128. package/dist/mcp/session-memory.js.map +1 -0
  129. package/dist/mcp/tool-registry.d.ts +269 -0
  130. package/dist/mcp/tool-registry.js +284 -0
  131. package/dist/mcp/tool-registry.js.map +1 -0
  132. package/dist/mcp/tools.d.ts +53 -0
  133. package/dist/mcp/tools.js +372 -0
  134. package/dist/mcp/tools.js.map +1 -0
  135. package/dist/mcp-repo-root.d.ts +16 -0
  136. package/dist/mcp-repo-root.js +322 -0
  137. package/dist/mcp-repo-root.js.map +1 -0
  138. package/dist/mcp-tool-catalog.d.ts +2 -0
  139. package/dist/mcp-tool-catalog.js +2 -0
  140. package/dist/mcp-tool-catalog.js.map +1 -0
  141. package/dist/mcp.d.ts +11 -0
  142. package/dist/mcp.js +332 -0
  143. package/dist/mcp.js.map +1 -0
  144. package/dist/outcome-ranking.d.ts +5 -0
  145. package/dist/outcome-ranking.js +115 -0
  146. package/dist/outcome-ranking.js.map +1 -0
  147. package/dist/parser/context.d.ts +28 -0
  148. package/dist/parser/context.js +2 -0
  149. package/dist/parser/context.js.map +1 -0
  150. package/dist/parser/ecma.d.ts +5 -0
  151. package/dist/parser/ecma.js +388 -0
  152. package/dist/parser/ecma.js.map +1 -0
  153. package/dist/parser/facts.d.ts +12 -0
  154. package/dist/parser/facts.js +137 -0
  155. package/dist/parser/facts.js.map +1 -0
  156. package/dist/parser/json.d.ts +3 -0
  157. package/dist/parser/json.js +318 -0
  158. package/dist/parser/json.js.map +1 -0
  159. package/dist/parser/markdown.d.ts +3 -0
  160. package/dist/parser/markdown.js +180 -0
  161. package/dist/parser/markdown.js.map +1 -0
  162. package/dist/parser/nodes.d.ts +5 -0
  163. package/dist/parser/nodes.js +75 -0
  164. package/dist/parser/nodes.js.map +1 -0
  165. package/dist/parser/python.d.ts +2 -0
  166. package/dist/parser/python.js +307 -0
  167. package/dist/parser/python.js.map +1 -0
  168. package/dist/parser/references.d.ts +3 -0
  169. package/dist/parser/references.js +204 -0
  170. package/dist/parser/references.js.map +1 -0
  171. package/dist/parser/risks.d.ts +4 -0
  172. package/dist/parser/risks.js +62 -0
  173. package/dist/parser/risks.js.map +1 -0
  174. package/dist/parser/routes.d.ts +5 -0
  175. package/dist/parser/routes.js +97 -0
  176. package/dist/parser/routes.js.map +1 -0
  177. package/dist/parser/shallow.d.ts +3 -0
  178. package/dist/parser/shallow.js +545 -0
  179. package/dist/parser/shallow.js.map +1 -0
  180. package/dist/parser/source.d.ts +4 -0
  181. package/dist/parser/source.js +127 -0
  182. package/dist/parser/source.js.map +1 -0
  183. package/dist/parser.d.ts +2 -0
  184. package/dist/parser.js +2 -0
  185. package/dist/parser.js.map +1 -0
  186. package/dist/placeholder-signals.d.ts +15 -0
  187. package/dist/placeholder-signals.js +511 -0
  188. package/dist/placeholder-signals.js.map +1 -0
  189. package/dist/post-edit-outcomes.d.ts +167 -0
  190. package/dist/post-edit-outcomes.js +484 -0
  191. package/dist/post-edit-outcomes.js.map +1 -0
  192. package/dist/queries.d.ts +12 -0
  193. package/dist/queries.js +13 -0
  194. package/dist/queries.js.map +1 -0
  195. package/dist/query/change-plan.d.ts +48 -0
  196. package/dist/query/change-plan.js +858 -0
  197. package/dist/query/change-plan.js.map +1 -0
  198. package/dist/query/compact-data.d.ts +25 -0
  199. package/dist/query/compact-data.js +74 -0
  200. package/dist/query/compact-data.js.map +1 -0
  201. package/dist/query/context.d.ts +5 -0
  202. package/dist/query/context.js +1162 -0
  203. package/dist/query/context.js.map +1 -0
  204. package/dist/query/diff.d.ts +5 -0
  205. package/dist/query/diff.js +111 -0
  206. package/dist/query/diff.js.map +1 -0
  207. package/dist/query/edge-evidence.d.ts +3 -0
  208. package/dist/query/edge-evidence.js +36 -0
  209. package/dist/query/edge-evidence.js.map +1 -0
  210. package/dist/query/formatting.d.ts +14 -0
  211. package/dist/query/formatting.js +67 -0
  212. package/dist/query/formatting.js.map +1 -0
  213. package/dist/query/graph-traversal.d.ts +22 -0
  214. package/dist/query/graph-traversal.js +218 -0
  215. package/dist/query/graph-traversal.js.map +1 -0
  216. package/dist/query/graph.d.ts +14 -0
  217. package/dist/query/graph.js +102 -0
  218. package/dist/query/graph.js.map +1 -0
  219. package/dist/query/impact.d.ts +28 -0
  220. package/dist/query/impact.js +568 -0
  221. package/dist/query/impact.js.map +1 -0
  222. package/dist/query/inspection.d.ts +9 -0
  223. package/dist/query/inspection.js +290 -0
  224. package/dist/query/inspection.js.map +1 -0
  225. package/dist/query/next-tools.d.ts +3 -0
  226. package/dist/query/next-tools.js +25 -0
  227. package/dist/query/next-tools.js.map +1 -0
  228. package/dist/query/placeholders.d.ts +24 -0
  229. package/dist/query/placeholders.js +121 -0
  230. package/dist/query/placeholders.js.map +1 -0
  231. package/dist/query/post-edit/decision.d.ts +49 -0
  232. package/dist/query/post-edit/decision.js +130 -0
  233. package/dist/query/post-edit/decision.js.map +1 -0
  234. package/dist/query/post-edit/dirty-scope.d.ts +16 -0
  235. package/dist/query/post-edit/dirty-scope.js +21 -0
  236. package/dist/query/post-edit/dirty-scope.js.map +1 -0
  237. package/dist/query/post-edit/next-actions.d.ts +22 -0
  238. package/dist/query/post-edit/next-actions.js +44 -0
  239. package/dist/query/post-edit/next-actions.js.map +1 -0
  240. package/dist/query/post-edit/snapshot-contract.d.ts +8 -0
  241. package/dist/query/post-edit/snapshot-contract.js +111 -0
  242. package/dist/query/post-edit/snapshot-contract.js.map +1 -0
  243. package/dist/query/post-edit.d.ts +5 -0
  244. package/dist/query/post-edit.js +1108 -0
  245. package/dist/query/post-edit.js.map +1 -0
  246. package/dist/query/quality.d.ts +43 -0
  247. package/dist/query/quality.js +134 -0
  248. package/dist/query/quality.js.map +1 -0
  249. package/dist/query/raw-search.d.ts +23 -0
  250. package/dist/query/raw-search.js +147 -0
  251. package/dist/query/raw-search.js.map +1 -0
  252. package/dist/query/runtime.d.ts +11 -0
  253. package/dist/query/runtime.js +79 -0
  254. package/dist/query/runtime.js.map +1 -0
  255. package/dist/query/search.d.ts +25 -0
  256. package/dist/query/search.js +429 -0
  257. package/dist/query/search.js.map +1 -0
  258. package/dist/query/session-memory.d.ts +3 -0
  259. package/dist/query/session-memory.js +108 -0
  260. package/dist/query/session-memory.js.map +1 -0
  261. package/dist/query/session.d.ts +41 -0
  262. package/dist/query/session.js +90 -0
  263. package/dist/query/session.js.map +1 -0
  264. package/dist/query/targets.d.ts +25 -0
  265. package/dist/query/targets.js +97 -0
  266. package/dist/query/targets.js.map +1 -0
  267. package/dist/query/test-commands.d.ts +10 -0
  268. package/dist/query/test-commands.js +110 -0
  269. package/dist/query/test-commands.js.map +1 -0
  270. package/dist/query/test-plan.d.ts +6 -0
  271. package/dist/query/test-plan.js +104 -0
  272. package/dist/query/test-plan.js.map +1 -0
  273. package/dist/query/tests.d.ts +48 -0
  274. package/dist/query/tests.js +444 -0
  275. package/dist/query/tests.js.map +1 -0
  276. package/dist/query/verification/shell.d.ts +20 -0
  277. package/dist/query/verification/shell.js +164 -0
  278. package/dist/query/verification/shell.js.map +1 -0
  279. package/dist/query/verification.d.ts +47 -0
  280. package/dist/query/verification.js +1123 -0
  281. package/dist/query/verification.js.map +1 -0
  282. package/dist/query/workflow.d.ts +17 -0
  283. package/dist/query/workflow.js +252 -0
  284. package/dist/query/workflow.js.map +1 -0
  285. package/dist/query/workspace-guidance.d.ts +26 -0
  286. package/dist/query/workspace-guidance.js +214 -0
  287. package/dist/query/workspace-guidance.js.map +1 -0
  288. package/dist/query/worktree-state.d.ts +22 -0
  289. package/dist/query/worktree-state.js +32 -0
  290. package/dist/query/worktree-state.js.map +1 -0
  291. package/dist/query/worktree.d.ts +16 -0
  292. package/dist/query/worktree.js +194 -0
  293. package/dist/query/worktree.js.map +1 -0
  294. package/dist/query-data.d.ts +4 -0
  295. package/dist/query-data.js +112 -0
  296. package/dist/query-data.js.map +1 -0
  297. package/dist/repo-files.d.ts +24 -0
  298. package/dist/repo-files.js +105 -0
  299. package/dist/repo-files.js.map +1 -0
  300. package/dist/resolver.d.ts +9 -0
  301. package/dist/resolver.js +555 -0
  302. package/dist/resolver.js.map +1 -0
  303. package/dist/retrieval.d.ts +46 -0
  304. package/dist/retrieval.js +783 -0
  305. package/dist/retrieval.js.map +1 -0
  306. package/dist/risk-ingest.d.ts +16 -0
  307. package/dist/risk-ingest.js +458 -0
  308. package/dist/risk-ingest.js.map +1 -0
  309. package/dist/rules.d.ts +10 -0
  310. package/dist/rules.js +107 -0
  311. package/dist/rules.js.map +1 -0
  312. package/dist/semantic/python.d.ts +9 -0
  313. package/dist/semantic/python.js +817 -0
  314. package/dist/semantic/python.js.map +1 -0
  315. package/dist/semantic/typescript.d.ts +10 -0
  316. package/dist/semantic/typescript.js +714 -0
  317. package/dist/semantic/typescript.js.map +1 -0
  318. package/dist/semantic-retrieval.d.ts +53 -0
  319. package/dist/semantic-retrieval.js +673 -0
  320. package/dist/semantic-retrieval.js.map +1 -0
  321. package/dist/session-memory/derivation.d.ts +6 -0
  322. package/dist/session-memory/derivation.js +400 -0
  323. package/dist/session-memory/derivation.js.map +1 -0
  324. package/dist/session-memory/event-log.d.ts +23 -0
  325. package/dist/session-memory/event-log.js +126 -0
  326. package/dist/session-memory/event-log.js.map +1 -0
  327. package/dist/session-memory/formatting.d.ts +7 -0
  328. package/dist/session-memory/formatting.js +86 -0
  329. package/dist/session-memory/formatting.js.map +1 -0
  330. package/dist/session-memory/model.d.ts +94 -0
  331. package/dist/session-memory/model.js +17 -0
  332. package/dist/session-memory/model.js.map +1 -0
  333. package/dist/session-memory/runtime.d.ts +24 -0
  334. package/dist/session-memory/runtime.js +289 -0
  335. package/dist/session-memory/runtime.js.map +1 -0
  336. package/dist/session-memory/store.d.ts +27 -0
  337. package/dist/session-memory/store.js +447 -0
  338. package/dist/session-memory/store.js.map +1 -0
  339. package/dist/session-memory.d.ts +1 -0
  340. package/dist/session-memory.js +2 -0
  341. package/dist/session-memory.js.map +1 -0
  342. package/dist/static-analysis.d.ts +36 -0
  343. package/dist/static-analysis.js +505 -0
  344. package/dist/static-analysis.js.map +1 -0
  345. package/dist/symbol-report-ingest.d.ts +8 -0
  346. package/dist/symbol-report-ingest.js +504 -0
  347. package/dist/symbol-report-ingest.js.map +1 -0
  348. package/dist/task-snapshots.d.ts +41 -0
  349. package/dist/task-snapshots.js +430 -0
  350. package/dist/task-snapshots.js.map +1 -0
  351. package/dist/types.d.ts +848 -0
  352. package/dist/types.js +12 -0
  353. package/dist/types.js.map +1 -0
  354. package/dist/util.d.ts +11 -0
  355. package/dist/util.js +63 -0
  356. package/dist/util.js.map +1 -0
  357. package/dist/version.d.ts +1 -0
  358. package/dist/version.js +5 -0
  359. package/dist/version.js.map +1 -0
  360. package/package.json +81 -0
  361. package/plugins/codexa/.codex-plugin/plugin.json +38 -0
  362. package/plugins/codexa/.mcp.json +20 -0
  363. package/plugins/codexa/scripts/codexa-mcp.js +100 -0
  364. package/plugins/codexa/skills/codexa/SKILL.md +48 -0
@@ -0,0 +1,673 @@
1
+ import { createHash, randomUUID } from "node:crypto";
2
+ import { promises as fs } from "node:fs";
3
+ import fsSync from "node:fs";
4
+ import path from "node:path";
5
+ import { runCommand } from "./command.js";
6
+ import { stableId, uniqueSorted } from "./util.js";
7
+ const SEMANTIC_CACHE_VERSION = 1;
8
+ const SEMANTIC_CACHE_DIR = ".codex/cache/codexa-semantic-v1";
9
+ const MANIFEST_FILE = "manifest.json";
10
+ const VECTORS_FILE = "vectors.jsonl";
11
+ const DEFAULT_OPENAI_EMBEDDING_MODEL = "text-embedding-3-small";
12
+ const DEFAULT_TIMEOUT_MS = 60_000;
13
+ const DEFAULT_BATCH_SIZE = 64;
14
+ const DEFAULT_OPENAI_BATCH_CHAR_BUDGET = 120_000;
15
+ const DEFAULT_MAX_FILES = 750;
16
+ const MAX_SOURCE_CHARS_PER_FILE = 16_000;
17
+ const MAX_PREVIEW_CHARS = 280;
18
+ const MAX_SEMANTIC_MANIFEST_BYTES = 128 * 1024;
19
+ const MAX_SEMANTIC_VECTOR_BYTES = 64 * 1024 * 1024;
20
+ const MAX_SEMANTIC_VECTOR_RECORDS = 100_000;
21
+ const MAX_LOCAL_COMMAND_OUTPUT_BYTES = 16 * 1024 * 1024;
22
+ export function semanticOptionsFromQueryOptions(repoRoot, options = {}) {
23
+ const resolvedRepoRoot = path.resolve(repoRoot);
24
+ const semanticOverride = semanticEnabledOverride(options.semantic);
25
+ const manifest = semanticOverride === false ? undefined : readSemanticManifest(resolvedRepoRoot);
26
+ const provider = semanticProviderFromValue(options.semanticProvider ?? process.env.CODEXA_SEMANTIC_PROVIDER) ?? manifest?.provider ?? inferProviderFromEnvironment(semanticOverride === true);
27
+ const command = options.semanticCommand ?? process.env.CODEXA_SEMANTIC_COMMAND;
28
+ const queryOptions = {
29
+ provider,
30
+ model: options.semanticModel ?? process.env.CODEXA_SEMANTIC_MODEL ?? manifest?.model,
31
+ dimensions: positiveInt(options.semanticDimensions) ?? positiveIntFromEnv("CODEXA_SEMANTIC_DIMENSIONS") ?? manifest?.dimensions,
32
+ command,
33
+ args: options.semanticArgs ?? semanticArgsFromEnv(),
34
+ timeoutMs: positiveInt(options.semanticTimeoutMs) ?? positiveIntFromEnv("CODEXA_SEMANTIC_TIMEOUT_MS") ?? DEFAULT_TIMEOUT_MS,
35
+ batchSize: positiveInt(options.semanticBatchSize) ?? positiveIntFromEnv("CODEXA_SEMANTIC_BATCH_SIZE") ?? DEFAULT_BATCH_SIZE
36
+ };
37
+ const enabled = semanticOverride ?? Boolean(manifest && provider && semanticProviderRunnable(provider, queryOptions));
38
+ return {
39
+ enabled,
40
+ repoRoot: resolvedRepoRoot,
41
+ forced: semanticOverride === true,
42
+ ...queryOptions
43
+ };
44
+ }
45
+ export async function buildSemanticIndex(repoRootInput, index, options) {
46
+ const repoRoot = path.resolve(repoRootInput);
47
+ const providerOptions = semanticProviderOptionsWithEnvironment(options);
48
+ const provider = requiredProvider(providerOptions);
49
+ const model = providerModel(provider, providerOptions.model);
50
+ const chunks = await semanticChunksForIndex(repoRoot, index, options.maxFiles ?? DEFAULT_MAX_FILES);
51
+ if (chunks.length === 0) {
52
+ throw new Error("semantic index has no eligible chunks to embed");
53
+ }
54
+ const embeddings = await embedTexts(chunks.map((chunk) => ({ id: chunk.id, text: chunk.text })), { ...providerOptions, provider, model });
55
+ const vectorRecords = chunks.map((chunk) => {
56
+ const embedding = embeddings.get(chunk.id);
57
+ if (!embedding) {
58
+ throw new Error(`semantic provider did not return an embedding for ${chunk.id}`);
59
+ }
60
+ return { id: chunk.id, path: chunk.path, title: chunk.title, preview: chunk.preview, embedding };
61
+ });
62
+ const dimensions = vectorRecords[0]?.embedding.length ?? 0;
63
+ if (dimensions <= 0 || vectorRecords.some((record) => record.embedding.length !== dimensions)) {
64
+ throw new Error("semantic provider returned inconsistent embedding dimensions");
65
+ }
66
+ const cacheDir = path.join(repoRoot, SEMANTIC_CACHE_DIR);
67
+ const builtAt = new Date().toISOString();
68
+ const sourceFingerprint = semanticSourceFingerprint(index, chunks);
69
+ const manifest = {
70
+ schemaVersion: SEMANTIC_CACHE_VERSION,
71
+ snapshotId: index.snapshot.snapshotId,
72
+ indexedAt: index.freshness.indexedAt,
73
+ provider,
74
+ model,
75
+ dimensions,
76
+ chunkCount: vectorRecords.length,
77
+ builtAt,
78
+ vectorsFile: semanticVectorFileName({ sourceFingerprint, provider, model, dimensions }),
79
+ sourceFingerprint
80
+ };
81
+ await writeSemanticCache(cacheDir, manifest, vectorRecords);
82
+ return {
83
+ repoRoot,
84
+ cacheDir,
85
+ manifestPath: path.join(cacheDir, MANIFEST_FILE),
86
+ vectorPath: path.join(cacheDir, manifest.vectorsFile),
87
+ provider,
88
+ model,
89
+ dimensions,
90
+ chunkCount: vectorRecords.length,
91
+ sourceFingerprint: manifest.sourceFingerprint
92
+ };
93
+ }
94
+ export function semanticMayUseOpenWorldProvider(repoRoot, options = {}) {
95
+ const semanticOverride = semanticEnabledOverride(options.semantic);
96
+ if (semanticOverride === false) {
97
+ return false;
98
+ }
99
+ const manifest = readSemanticManifest(path.resolve(repoRoot));
100
+ const provider = semanticProviderFromValue(options.semanticProvider ?? process.env.CODEXA_SEMANTIC_PROVIDER) ?? manifest?.provider;
101
+ if (provider === "openai") {
102
+ return semanticOverride === true || Boolean(process.env.OPENAI_API_KEY && manifest);
103
+ }
104
+ return Boolean(semanticOverride === true && process.env.OPENAI_API_KEY && !provider);
105
+ }
106
+ export async function semanticLaneEntriesForQuery(index, query, fileByPath, options) {
107
+ if (!options.enabled) {
108
+ return {
109
+ entries: [],
110
+ summary: { enabled: false, status: "disabled", diagnostics: [] }
111
+ };
112
+ }
113
+ const loaded = loadSemanticCache(options.repoRoot);
114
+ if (!loaded.ok) {
115
+ return unavailable(options, [loaded.reason]);
116
+ }
117
+ const { manifest, vectors } = loaded;
118
+ const diagnostics = [];
119
+ if (manifest.snapshotId !== index.snapshot.snapshotId) {
120
+ diagnostics.push("semantic cache is stale for the current Codexa snapshot; run `codexa semantic-index <repo>`");
121
+ }
122
+ if (manifest.dimensions <= 0 || vectors.length === 0) {
123
+ diagnostics.push("semantic cache contains no vectors");
124
+ }
125
+ const provider = options.provider;
126
+ if (!provider) {
127
+ diagnostics.push("semantic query provider is not configured; set CODEXA_SEMANTIC_PROVIDER or pass query options");
128
+ }
129
+ else if (provider !== manifest.provider) {
130
+ diagnostics.push(`semantic query provider ${provider} does not match cached provider ${manifest.provider}`);
131
+ }
132
+ const model = provider ? providerModel(provider, options.model) : undefined;
133
+ if (provider && model !== manifest.model) {
134
+ diagnostics.push(`semantic query model ${model} does not match cached model ${manifest.model}`);
135
+ }
136
+ if (options.dimensions && options.dimensions !== manifest.dimensions) {
137
+ diagnostics.push(`semantic query dimensions ${options.dimensions} do not match cached dimensions ${manifest.dimensions}`);
138
+ }
139
+ if (diagnostics.length > 0) {
140
+ if (!options.forced) {
141
+ return {
142
+ entries: [],
143
+ summary: { enabled: false, status: "disabled", diagnostics: [] }
144
+ };
145
+ }
146
+ return {
147
+ entries: [],
148
+ summary: {
149
+ enabled: true,
150
+ status: "unavailable",
151
+ provider: manifest.provider,
152
+ model: manifest.model,
153
+ chunkCount: vectors.length,
154
+ diagnostics
155
+ }
156
+ };
157
+ }
158
+ let queryEmbedding;
159
+ try {
160
+ const embeddings = await embedTexts([{ id: "query", text: query }], { ...options, provider: manifest.provider, model: manifest.model, dimensions: manifest.dimensions });
161
+ queryEmbedding = embeddings.get("query") ?? [];
162
+ }
163
+ catch (error) {
164
+ return unavailable(options, [`semantic query embedding failed: ${errorMessage(error)}`], manifest);
165
+ }
166
+ if (queryEmbedding.length !== manifest.dimensions) {
167
+ return unavailable(options, [`semantic query embedding dimension ${queryEmbedding.length} does not match cache dimension ${manifest.dimensions}`], manifest);
168
+ }
169
+ const queryVector = normalizeVector(queryEmbedding);
170
+ const byPath = new Map();
171
+ for (const record of vectors) {
172
+ const file = fileByPath.get(record.path);
173
+ if (!file || record.embedding.length !== manifest.dimensions) {
174
+ continue;
175
+ }
176
+ const similarity = dot(queryVector, normalizeVector(record.embedding));
177
+ if (!Number.isFinite(similarity) || similarity <= 0.12) {
178
+ continue;
179
+ }
180
+ const existing = byPath.get(file.path) ?? { file, score: 0, reasons: [], matchedTerms: [] };
181
+ existing.score = Math.max(existing.score, similarity * 24);
182
+ existing.reasons.push(`semantic ${record.title} similarity ${similarity.toFixed(3)}: ${record.preview}`);
183
+ byPath.set(file.path, existing);
184
+ }
185
+ return {
186
+ entries: [...byPath.values()]
187
+ .map((entry) => ({ ...entry, reasons: uniqueSorted(entry.reasons).slice(0, 6) }))
188
+ .sort((a, b) => b.score - a.score || b.file.rank - a.file.rank || a.file.path.localeCompare(b.file.path))
189
+ .slice(0, 80),
190
+ summary: {
191
+ enabled: true,
192
+ status: "ok",
193
+ provider: manifest.provider,
194
+ model: manifest.model,
195
+ chunkCount: vectors.length,
196
+ diagnostics: manifest.snapshotId === index.snapshot.snapshotId ? [] : ["semantic cache snapshot differs from current index"]
197
+ }
198
+ };
199
+ }
200
+ async function semanticChunksForIndex(repoRoot, index, maxFiles) {
201
+ const symbolsByPath = groupByPath(index.symbols);
202
+ const usagesByPath = groupByPath(index.usageSites);
203
+ const importsByPath = groupByPath(index.imports);
204
+ const risksByPath = groupByPath(index.risks);
205
+ const workflowsByPath = new Map();
206
+ for (const workflow of index.workflows) {
207
+ for (const filePath of workflow.relatedFiles) {
208
+ const existing = workflowsByPath.get(filePath) ?? [];
209
+ existing.push(`${workflow.workflowKind} ${workflow.title}: ${workflow.summary}`);
210
+ workflowsByPath.set(filePath, existing);
211
+ }
212
+ }
213
+ const files = index.files
214
+ .filter((file) => !file.generated && !file.path.startsWith(".codex/") && file.sizeBytes <= 512 * 1024)
215
+ .sort((a, b) => b.rank - a.rank || a.path.localeCompare(b.path))
216
+ .slice(0, Math.max(1, maxFiles));
217
+ const chunks = [];
218
+ for (const file of files) {
219
+ const absolutePath = path.join(repoRoot, file.path);
220
+ let source = "";
221
+ try {
222
+ source = await fs.readFile(absolutePath, "utf8");
223
+ }
224
+ catch {
225
+ source = "";
226
+ }
227
+ const symbolText = (symbolsByPath.get(file.path) ?? [])
228
+ .slice(0, 80)
229
+ .map((symbol) => `${symbol.kind} ${symbol.qualifiedName} exported=${symbol.exported}`)
230
+ .join("\n");
231
+ const usageText = (usagesByPath.get(file.path) ?? [])
232
+ .slice(0, 80)
233
+ .map((usage) => `${usage.kind} ${usage.name} ${usage.text}`)
234
+ .join("\n");
235
+ const importText = (importsByPath.get(file.path) ?? [])
236
+ .slice(0, 60)
237
+ .map((imp) => `import ${imp.importedName ?? "*"} from ${imp.specifier} ${imp.resolvedPath ?? ""}`)
238
+ .join("\n");
239
+ const riskText = (risksByPath.get(file.path) ?? [])
240
+ .slice(0, 30)
241
+ .map((risk) => `${risk.signal}: ${risk.reason}`)
242
+ .join("\n");
243
+ const workflowText = (workflowsByPath.get(file.path) ?? []).slice(0, 20).join("\n");
244
+ const text = [
245
+ `file: ${file.path}`,
246
+ `language: ${file.language}`,
247
+ `module: ${moduleNameForPath(file.path)}`,
248
+ symbolText ? `symbols:\n${symbolText}` : "",
249
+ importText ? `imports:\n${importText}` : "",
250
+ usageText ? `usages:\n${usageText}` : "",
251
+ workflowText ? `workflows:\n${workflowText}` : "",
252
+ riskText ? `risks:\n${riskText}` : "",
253
+ source ? `source:\n${source.slice(0, MAX_SOURCE_CHARS_PER_FILE)}` : ""
254
+ ]
255
+ .filter(Boolean)
256
+ .join("\n\n");
257
+ chunks.push({
258
+ id: stableId("semantic-chunk", index.snapshot.snapshotId, file.path),
259
+ path: file.path,
260
+ title: file.path,
261
+ text,
262
+ preview: compactPreview(text)
263
+ });
264
+ }
265
+ return chunks;
266
+ }
267
+ async function embedTexts(items, options) {
268
+ const batchSize = Math.max(1, options.batchSize ?? DEFAULT_BATCH_SIZE);
269
+ const result = new Map();
270
+ for (const batch of embeddingBatches(items, batchSize, options.provider === "openai" ? DEFAULT_OPENAI_BATCH_CHAR_BUDGET : Number.POSITIVE_INFINITY)) {
271
+ const embeddings = options.provider === "openai"
272
+ ? await embedWithOpenAi(batch, { ...options, provider: "openai" })
273
+ : await embedWithLocalCommand(batch, { ...options, provider: "local-command" });
274
+ for (const [id, embedding] of embeddings) {
275
+ result.set(id, embedding);
276
+ }
277
+ }
278
+ return result;
279
+ }
280
+ function embeddingBatches(items, maxItems, maxChars) {
281
+ const batches = [];
282
+ let current = [];
283
+ let currentChars = 0;
284
+ for (const item of items) {
285
+ const itemChars = item.text.length;
286
+ if (current.length > 0 && (current.length >= maxItems || currentChars + itemChars > maxChars)) {
287
+ batches.push(current);
288
+ current = [];
289
+ currentChars = 0;
290
+ }
291
+ current.push(item);
292
+ currentChars += itemChars;
293
+ }
294
+ if (current.length > 0) {
295
+ batches.push(current);
296
+ }
297
+ return batches;
298
+ }
299
+ async function embedWithOpenAi(items, options) {
300
+ const apiKey = process.env.OPENAI_API_KEY;
301
+ if (!apiKey) {
302
+ throw new Error("OPENAI_API_KEY is required for OpenAI semantic embeddings");
303
+ }
304
+ const controller = new AbortController();
305
+ const timeout = setTimeout(() => controller.abort(), options.timeoutMs ?? DEFAULT_TIMEOUT_MS);
306
+ try {
307
+ const body = {
308
+ model: options.model,
309
+ input: items.map((item) => item.text),
310
+ encoding_format: "float"
311
+ };
312
+ if (options.dimensions) {
313
+ body.dimensions = options.dimensions;
314
+ }
315
+ const response = await fetch("https://api.openai.com/v1/embeddings", {
316
+ method: "POST",
317
+ headers: {
318
+ "Content-Type": "application/json",
319
+ Authorization: `Bearer ${apiKey}`
320
+ },
321
+ body: JSON.stringify(body),
322
+ signal: controller.signal
323
+ });
324
+ if (!response.ok) {
325
+ const text = await response.text().catch(() => "");
326
+ throw new Error(`OpenAI embeddings HTTP ${response.status}: ${text.slice(0, 300)}`);
327
+ }
328
+ const parsed = (await response.json());
329
+ const output = new Map();
330
+ for (const entry of parsed.data ?? []) {
331
+ const item = typeof entry.index === "number" ? items[entry.index] : undefined;
332
+ const embedding = numberArray(entry.embedding);
333
+ if (item && embedding) {
334
+ output.set(item.id, embedding);
335
+ }
336
+ }
337
+ return output;
338
+ }
339
+ finally {
340
+ clearTimeout(timeout);
341
+ }
342
+ }
343
+ async function embedWithLocalCommand(items, options) {
344
+ if (!options.command) {
345
+ throw new Error("semantic local-command provider requires a command");
346
+ }
347
+ const input = items.map((item) => `${JSON.stringify({ id: item.id, text: item.text, model: options.model, dimensions: options.dimensions })}\n`).join("");
348
+ const stdout = await runLocalEmbeddingCommand(options.command, options.args ?? [], input, options.timeoutMs ?? DEFAULT_TIMEOUT_MS);
349
+ return parseLocalEmbeddingOutput(stdout);
350
+ }
351
+ async function runLocalEmbeddingCommand(command, args, input, timeoutMs) {
352
+ const result = await runCommand(command, args, {
353
+ env: localEmbeddingCommandEnv(),
354
+ input,
355
+ killProcessGroup: true,
356
+ maxBufferBytes: MAX_LOCAL_COMMAND_OUTPUT_BYTES,
357
+ timeoutMs
358
+ });
359
+ if (!result.ok) {
360
+ if (result.timedOut) {
361
+ throw new Error(`semantic local-command timed out after ${timeoutMs}ms`);
362
+ }
363
+ if (result.truncated) {
364
+ throw new Error(`semantic local-command exceeded Codexa's ${MAX_LOCAL_COMMAND_OUTPUT_BYTES} byte output cap`);
365
+ }
366
+ const status = result.exitCode === null ? `signal ${result.signal ?? "unknown"}` : `exit ${result.exitCode}`;
367
+ throw new Error(`semantic local-command failed with ${status}: ${result.stderr.slice(0, 600)}`);
368
+ }
369
+ return result.stdout;
370
+ }
371
+ function localEmbeddingCommandEnv() {
372
+ const allowed = [
373
+ "PATH",
374
+ "TMPDIR",
375
+ "TMP",
376
+ "TEMP",
377
+ "LANG",
378
+ "LC_ALL",
379
+ "SystemRoot",
380
+ "WINDIR",
381
+ "ComSpec",
382
+ "PATHEXT"
383
+ ];
384
+ const env = {};
385
+ for (const key of allowed) {
386
+ if (process.env[key] !== undefined) {
387
+ env[key] = process.env[key];
388
+ }
389
+ }
390
+ return env;
391
+ }
392
+ function parseLocalEmbeddingOutput(output) {
393
+ const trimmed = output.trim();
394
+ if (!trimmed) {
395
+ throw new Error("semantic local-command produced no output");
396
+ }
397
+ let records;
398
+ try {
399
+ const parsed = JSON.parse(trimmed);
400
+ if (Array.isArray(parsed)) {
401
+ records = parsed;
402
+ }
403
+ else if (parsed && typeof parsed === "object" && Array.isArray(parsed.embeddings)) {
404
+ records = parsed.embeddings;
405
+ }
406
+ else {
407
+ records = [parsed];
408
+ }
409
+ }
410
+ catch {
411
+ records = trimmed.split(/\r?\n/u).map((line) => JSON.parse(line));
412
+ }
413
+ const embeddings = new Map();
414
+ for (const record of records) {
415
+ if (!record || typeof record !== "object") {
416
+ continue;
417
+ }
418
+ const id = record.id;
419
+ const embedding = numberArray(record.embedding);
420
+ if (typeof id === "string" && embedding) {
421
+ embeddings.set(id, embedding);
422
+ }
423
+ }
424
+ if (embeddings.size === 0) {
425
+ throw new Error("semantic local-command output did not contain {id, embedding} records");
426
+ }
427
+ return embeddings;
428
+ }
429
+ function loadSemanticCache(repoRoot) {
430
+ const cacheDir = path.join(repoRoot, SEMANTIC_CACHE_DIR);
431
+ const manifestPath = path.join(cacheDir, MANIFEST_FILE);
432
+ try {
433
+ const manifest = JSON.parse(readSizedTextSync(manifestPath, MAX_SEMANTIC_MANIFEST_BYTES));
434
+ if (!isManifest(manifest)) {
435
+ return { ok: false, reason: "semantic cache manifest is invalid" };
436
+ }
437
+ const vectorPath = path.join(cacheDir, manifest.vectorsFile);
438
+ const vectorLines = readSizedTextSync(vectorPath, MAX_SEMANTIC_VECTOR_BYTES)
439
+ .split(/\r?\n/u)
440
+ .filter((line) => line.trim().length > 0);
441
+ if (vectorLines.length > MAX_SEMANTIC_VECTOR_RECORDS) {
442
+ return { ok: false, reason: `semantic cache vector file has too many records: ${vectorLines.length}` };
443
+ }
444
+ const vectors = vectorLines.map((line) => JSON.parse(line)).filter((record) => isVectorRecordForManifest(record, manifest));
445
+ if (vectors.length !== manifest.chunkCount) {
446
+ return { ok: false, reason: `semantic cache vector count mismatch: manifest ${manifest.chunkCount}, vectors ${vectors.length}` };
447
+ }
448
+ return { ok: true, manifest, vectors };
449
+ }
450
+ catch (error) {
451
+ return { ok: false, reason: `semantic cache unavailable: ${errorMessage(error)}` };
452
+ }
453
+ }
454
+ function readSemanticManifest(repoRoot) {
455
+ try {
456
+ const manifest = JSON.parse(readSizedTextSync(path.join(repoRoot, SEMANTIC_CACHE_DIR, MANIFEST_FILE), MAX_SEMANTIC_MANIFEST_BYTES));
457
+ return isManifest(manifest) ? manifest : undefined;
458
+ }
459
+ catch {
460
+ return undefined;
461
+ }
462
+ }
463
+ function readSizedTextSync(filePath, maxBytes) {
464
+ const stat = fsSync.statSync(filePath);
465
+ if (stat.size > maxBytes) {
466
+ throw new Error(`${path.basename(filePath)} exceeds ${maxBytes} bytes`);
467
+ }
468
+ return fsSync.readFileSync(filePath, "utf8");
469
+ }
470
+ function isVectorRecordForManifest(record, manifest) {
471
+ return isVectorRecord(record) && record.embedding.length === manifest.dimensions;
472
+ }
473
+ async function writeSemanticCache(cacheDir, manifest, vectors) {
474
+ await fs.mkdir(cacheDir, { recursive: true });
475
+ const tempSuffix = `.tmp-${process.pid}-${Date.now()}-${randomUUID()}`;
476
+ const manifestTemp = path.join(cacheDir, `${MANIFEST_FILE}${tempSuffix}`);
477
+ const vectorsTemp = path.join(cacheDir, `${manifest.vectorsFile}${tempSuffix}`);
478
+ await fs.writeFile(vectorsTemp, vectors.map((record) => JSON.stringify(record)).join("\n") + "\n", "utf8");
479
+ await fs.writeFile(manifestTemp, JSON.stringify(manifest, null, 2) + "\n", "utf8");
480
+ await fs.rename(vectorsTemp, path.join(cacheDir, manifest.vectorsFile));
481
+ await fs.rename(manifestTemp, path.join(cacheDir, MANIFEST_FILE));
482
+ }
483
+ function unavailable(options, diagnostics, manifest) {
484
+ return {
485
+ entries: [],
486
+ summary: {
487
+ enabled: true,
488
+ status: "unavailable",
489
+ provider: manifest?.provider ?? options.provider,
490
+ model: manifest?.model ?? options.model,
491
+ chunkCount: manifest?.chunkCount,
492
+ diagnostics
493
+ }
494
+ };
495
+ }
496
+ function requiredProvider(options) {
497
+ const provider = semanticProviderFromValue(options.provider ?? process.env.CODEXA_SEMANTIC_PROVIDER) ?? inferProviderFromEnvironment(true);
498
+ if (!provider) {
499
+ throw new Error("semantic provider is required; use --provider openai or --provider local-command");
500
+ }
501
+ if (provider === "local-command" && !options.command && !process.env.CODEXA_SEMANTIC_COMMAND) {
502
+ throw new Error("local-command semantic provider requires --command or CODEXA_SEMANTIC_COMMAND");
503
+ }
504
+ return provider;
505
+ }
506
+ function semanticProviderOptionsWithEnvironment(options) {
507
+ return {
508
+ provider: semanticProviderFromValue(options.provider ?? process.env.CODEXA_SEMANTIC_PROVIDER),
509
+ model: options.model ?? process.env.CODEXA_SEMANTIC_MODEL,
510
+ dimensions: positiveInt(options.dimensions) ?? positiveIntFromEnv("CODEXA_SEMANTIC_DIMENSIONS"),
511
+ command: options.command ?? process.env.CODEXA_SEMANTIC_COMMAND,
512
+ args: options.args ?? semanticArgsFromEnv(),
513
+ timeoutMs: positiveInt(options.timeoutMs) ?? positiveIntFromEnv("CODEXA_SEMANTIC_TIMEOUT_MS") ?? DEFAULT_TIMEOUT_MS,
514
+ batchSize: positiveInt(options.batchSize) ?? positiveIntFromEnv("CODEXA_SEMANTIC_BATCH_SIZE") ?? DEFAULT_BATCH_SIZE
515
+ };
516
+ }
517
+ function semanticProviderRunnable(provider, options) {
518
+ if (provider === "openai") {
519
+ return Boolean(process.env.OPENAI_API_KEY);
520
+ }
521
+ return Boolean(options.command);
522
+ }
523
+ function providerModel(provider, model) {
524
+ return model ?? (provider === "openai" ? DEFAULT_OPENAI_EMBEDDING_MODEL : "local-command");
525
+ }
526
+ function inferProviderFromEnvironment(enabled) {
527
+ if (!enabled) {
528
+ return undefined;
529
+ }
530
+ if (process.env.CODEXA_SEMANTIC_COMMAND) {
531
+ return "local-command";
532
+ }
533
+ if (process.env.OPENAI_API_KEY) {
534
+ return "openai";
535
+ }
536
+ return undefined;
537
+ }
538
+ function semanticEnabledOverride(value) {
539
+ if (typeof value === "boolean") {
540
+ return value;
541
+ }
542
+ const envValue = process.env.CODEXA_SEMANTIC?.trim().toLowerCase();
543
+ if (!envValue || envValue === "auto") {
544
+ return undefined;
545
+ }
546
+ if (envValue === "1" || envValue === "true" || envValue === "yes" || envValue === "on") {
547
+ return true;
548
+ }
549
+ if (envValue === "0" || envValue === "false" || envValue === "no" || envValue === "off") {
550
+ return false;
551
+ }
552
+ return undefined;
553
+ }
554
+ export function semanticProviderFromValue(value) {
555
+ if (value === "openai" || value === "local-command") {
556
+ return value;
557
+ }
558
+ return undefined;
559
+ }
560
+ function semanticArgsFromEnv() {
561
+ const value = process.env.CODEXA_SEMANTIC_ARGS_JSON;
562
+ if (!value) {
563
+ return undefined;
564
+ }
565
+ try {
566
+ const parsed = JSON.parse(value);
567
+ return Array.isArray(parsed) && parsed.every((entry) => typeof entry === "string") ? parsed : undefined;
568
+ }
569
+ catch {
570
+ return undefined;
571
+ }
572
+ }
573
+ function semanticSourceFingerprint(index, chunks) {
574
+ return createHash("sha256")
575
+ .update(index.snapshot.snapshotId)
576
+ .update("\n")
577
+ .update(chunks.map((chunk) => `${chunk.path}:${hashText(chunk.text)}`).join("\n"))
578
+ .digest("hex");
579
+ }
580
+ function semanticVectorFileName(input) {
581
+ const fingerprint = createHash("sha256")
582
+ .update(input.sourceFingerprint)
583
+ .update("\n")
584
+ .update(input.provider)
585
+ .update("\n")
586
+ .update(input.model)
587
+ .update("\n")
588
+ .update(String(input.dimensions))
589
+ .digest("hex");
590
+ return `vectors-${fingerprint.slice(0, 24)}.jsonl`;
591
+ }
592
+ function hashText(value) {
593
+ return createHash("sha256").update(value).digest("hex");
594
+ }
595
+ function compactPreview(text) {
596
+ return text.replace(/\s+/gu, " ").trim().slice(0, MAX_PREVIEW_CHARS);
597
+ }
598
+ function groupByPath(items) {
599
+ const grouped = new Map();
600
+ for (const item of items) {
601
+ const list = grouped.get(item.path) ?? [];
602
+ list.push(item);
603
+ grouped.set(item.path, list);
604
+ }
605
+ return grouped;
606
+ }
607
+ function moduleNameForPath(filePath) {
608
+ const parts = filePath.split("/");
609
+ if (parts.length <= 1) {
610
+ return ".";
611
+ }
612
+ if (parts[0] === "src" && parts.length > 2) {
613
+ return `${parts[0]}/${parts[1]}`;
614
+ }
615
+ return parts[0] ?? ".";
616
+ }
617
+ function normalizeVector(vector) {
618
+ const norm = Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
619
+ return norm > 0 ? vector.map((value) => value / norm) : vector;
620
+ }
621
+ function dot(a, b) {
622
+ let score = 0;
623
+ for (let i = 0; i < Math.min(a.length, b.length); i += 1) {
624
+ score += a[i] * b[i];
625
+ }
626
+ return score;
627
+ }
628
+ function numberArray(value) {
629
+ if (!Array.isArray(value) || value.length === 0) {
630
+ return null;
631
+ }
632
+ const numbers = value.map((entry) => (typeof entry === "number" && Number.isFinite(entry) ? entry : Number.NaN));
633
+ return numbers.every(Number.isFinite) ? numbers : null;
634
+ }
635
+ function isManifest(value) {
636
+ if (!value || typeof value !== "object") {
637
+ return false;
638
+ }
639
+ const record = value;
640
+ return (record.schemaVersion === SEMANTIC_CACHE_VERSION &&
641
+ typeof record.snapshotId === "string" &&
642
+ typeof record.indexedAt === "string" &&
643
+ (record.provider === "openai" || record.provider === "local-command") &&
644
+ typeof record.model === "string" &&
645
+ typeof record.dimensions === "number" &&
646
+ typeof record.chunkCount === "number" &&
647
+ isSemanticVectorFileName(record.vectorsFile) &&
648
+ typeof record.sourceFingerprint === "string");
649
+ }
650
+ function isSemanticVectorFileName(value) {
651
+ return value === VECTORS_FILE || (typeof value === "string" && /^vectors-[a-f0-9]{24}\.jsonl$/u.test(value));
652
+ }
653
+ function isVectorRecord(value) {
654
+ if (!value || typeof value !== "object") {
655
+ return false;
656
+ }
657
+ const record = value;
658
+ return typeof record.id === "string" && typeof record.path === "string" && typeof record.title === "string" && typeof record.preview === "string" && Boolean(numberArray(record.embedding));
659
+ }
660
+ function positiveInt(value) {
661
+ return Number.isFinite(value) && value !== undefined && value > 0 ? Math.trunc(value) : undefined;
662
+ }
663
+ function positiveIntFromEnv(name) {
664
+ const value = process.env[name];
665
+ if (!value || !/^\d+$/u.test(value)) {
666
+ return undefined;
667
+ }
668
+ return positiveInt(Number.parseInt(value, 10));
669
+ }
670
+ function errorMessage(error) {
671
+ return error instanceof Error ? error.message : String(error);
672
+ }
673
+ //# sourceMappingURL=semantic-retrieval.js.map