@mirnoorata/codexa 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (364) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +634 -0
  3. package/dist/artifacts.d.ts +2 -0
  4. package/dist/artifacts.js +375 -0
  5. package/dist/artifacts.js.map +1 -0
  6. package/dist/autonomy.d.ts +17 -0
  7. package/dist/autonomy.js +124 -0
  8. package/dist/autonomy.js.map +1 -0
  9. package/dist/autoverify/policy.d.ts +5 -0
  10. package/dist/autoverify/policy.js +18 -0
  11. package/dist/autoverify/policy.js.map +1 -0
  12. package/dist/autoverify.d.ts +45 -0
  13. package/dist/autoverify.js +1041 -0
  14. package/dist/autoverify.js.map +1 -0
  15. package/dist/cache-lock.d.ts +16 -0
  16. package/dist/cache-lock.js +181 -0
  17. package/dist/cache-lock.js.map +1 -0
  18. package/dist/cli/hooks.d.ts +5 -0
  19. package/dist/cli/hooks.js +264 -0
  20. package/dist/cli/hooks.js.map +1 -0
  21. package/dist/cli.d.ts +2 -0
  22. package/dist/cli.js +1034 -0
  23. package/dist/cli.js.map +1 -0
  24. package/dist/codex-contract.d.ts +2 -0
  25. package/dist/codex-contract.js +78 -0
  26. package/dist/codex-contract.js.map +1 -0
  27. package/dist/command.d.ts +34 -0
  28. package/dist/command.js +162 -0
  29. package/dist/command.js.map +1 -0
  30. package/dist/doctor.d.ts +112 -0
  31. package/dist/doctor.js +518 -0
  32. package/dist/doctor.js.map +1 -0
  33. package/dist/eval/baseline.d.ts +7 -0
  34. package/dist/eval/baseline.js +146 -0
  35. package/dist/eval/baseline.js.map +1 -0
  36. package/dist/eval/historical.d.ts +4 -0
  37. package/dist/eval/historical.js +663 -0
  38. package/dist/eval/historical.js.map +1 -0
  39. package/dist/eval/render.d.ts +2 -0
  40. package/dist/eval/render.js +53 -0
  41. package/dist/eval/render.js.map +1 -0
  42. package/dist/eval/scoring.d.ts +21 -0
  43. package/dist/eval/scoring.js +618 -0
  44. package/dist/eval/scoring.js.map +1 -0
  45. package/dist/eval/synthetic.d.ts +36 -0
  46. package/dist/eval/synthetic.js +107 -0
  47. package/dist/eval/synthetic.js.map +1 -0
  48. package/dist/eval/types.d.ts +36 -0
  49. package/dist/eval/types.js +2 -0
  50. package/dist/eval/types.js.map +1 -0
  51. package/dist/eval.d.ts +140 -0
  52. package/dist/eval.js +551 -0
  53. package/dist/eval.js.map +1 -0
  54. package/dist/git.d.ts +17 -0
  55. package/dist/git.js +189 -0
  56. package/dist/git.js.map +1 -0
  57. package/dist/github-release.d.ts +47 -0
  58. package/dist/github-release.js +610 -0
  59. package/dist/github-release.js.map +1 -0
  60. package/dist/github-sync.d.ts +68 -0
  61. package/dist/github-sync.js +345 -0
  62. package/dist/github-sync.js.map +1 -0
  63. package/dist/graph.d.ts +10 -0
  64. package/dist/graph.js +665 -0
  65. package/dist/graph.js.map +1 -0
  66. package/dist/indexer/aliases.d.ts +2 -0
  67. package/dist/indexer/aliases.js +190 -0
  68. package/dist/indexer/aliases.js.map +1 -0
  69. package/dist/indexer/artifact-writing.d.ts +3 -0
  70. package/dist/indexer/artifact-writing.js +79 -0
  71. package/dist/indexer/artifact-writing.js.map +1 -0
  72. package/dist/indexer/discovery.d.ts +2 -0
  73. package/dist/indexer/discovery.js +5 -0
  74. package/dist/indexer/discovery.js.map +1 -0
  75. package/dist/indexer/external-facts.d.ts +6 -0
  76. package/dist/indexer/external-facts.js +45 -0
  77. package/dist/indexer/external-facts.js.map +1 -0
  78. package/dist/indexer/freshness.d.ts +8 -0
  79. package/dist/indexer/freshness.js +56 -0
  80. package/dist/indexer/freshness.js.map +1 -0
  81. package/dist/indexer/graph-stage.d.ts +2 -0
  82. package/dist/indexer/graph-stage.js +21 -0
  83. package/dist/indexer/graph-stage.js.map +1 -0
  84. package/dist/indexer/parsing.d.ts +30 -0
  85. package/dist/indexer/parsing.js +177 -0
  86. package/dist/indexer/parsing.js.map +1 -0
  87. package/dist/indexer/pipeline.d.ts +5 -0
  88. package/dist/indexer/pipeline.js +8 -0
  89. package/dist/indexer/pipeline.js.map +1 -0
  90. package/dist/indexer/ranking.d.ts +4 -0
  91. package/dist/indexer/ranking.js +134 -0
  92. package/dist/indexer/ranking.js.map +1 -0
  93. package/dist/indexer.d.ts +13 -0
  94. package/dist/indexer.js +395 -0
  95. package/dist/indexer.js.map +1 -0
  96. package/dist/init.d.ts +24 -0
  97. package/dist/init.js +566 -0
  98. package/dist/init.js.map +1 -0
  99. package/dist/language.d.ts +8 -0
  100. package/dist/language.js +123 -0
  101. package/dist/language.js.map +1 -0
  102. package/dist/live-index.d.ts +68 -0
  103. package/dist/live-index.js +215 -0
  104. package/dist/live-index.js.map +1 -0
  105. package/dist/lsp/assist.d.ts +44 -0
  106. package/dist/lsp/assist.js +331 -0
  107. package/dist/lsp/assist.js.map +1 -0
  108. package/dist/lsp/client.d.ts +59 -0
  109. package/dist/lsp/client.js +208 -0
  110. package/dist/lsp/client.js.map +1 -0
  111. package/dist/mcp/compaction.d.ts +15 -0
  112. package/dist/mcp/compaction.js +1249 -0
  113. package/dist/mcp/compaction.js.map +1 -0
  114. package/dist/mcp/envelope.d.ts +44 -0
  115. package/dist/mcp/envelope.js +425 -0
  116. package/dist/mcp/envelope.js.map +1 -0
  117. package/dist/mcp/prompts.d.ts +2 -0
  118. package/dist/mcp/prompts.js +109 -0
  119. package/dist/mcp/prompts.js.map +1 -0
  120. package/dist/mcp/resources.d.ts +2 -0
  121. package/dist/mcp/resources.js +132 -0
  122. package/dist/mcp/resources.js.map +1 -0
  123. package/dist/mcp/runtime.d.ts +15 -0
  124. package/dist/mcp/runtime.js +122 -0
  125. package/dist/mcp/runtime.js.map +1 -0
  126. package/dist/mcp/session-memory.d.ts +3 -0
  127. package/dist/mcp/session-memory.js +61 -0
  128. package/dist/mcp/session-memory.js.map +1 -0
  129. package/dist/mcp/tool-registry.d.ts +269 -0
  130. package/dist/mcp/tool-registry.js +284 -0
  131. package/dist/mcp/tool-registry.js.map +1 -0
  132. package/dist/mcp/tools.d.ts +53 -0
  133. package/dist/mcp/tools.js +372 -0
  134. package/dist/mcp/tools.js.map +1 -0
  135. package/dist/mcp-repo-root.d.ts +16 -0
  136. package/dist/mcp-repo-root.js +322 -0
  137. package/dist/mcp-repo-root.js.map +1 -0
  138. package/dist/mcp-tool-catalog.d.ts +2 -0
  139. package/dist/mcp-tool-catalog.js +2 -0
  140. package/dist/mcp-tool-catalog.js.map +1 -0
  141. package/dist/mcp.d.ts +11 -0
  142. package/dist/mcp.js +332 -0
  143. package/dist/mcp.js.map +1 -0
  144. package/dist/outcome-ranking.d.ts +5 -0
  145. package/dist/outcome-ranking.js +115 -0
  146. package/dist/outcome-ranking.js.map +1 -0
  147. package/dist/parser/context.d.ts +28 -0
  148. package/dist/parser/context.js +2 -0
  149. package/dist/parser/context.js.map +1 -0
  150. package/dist/parser/ecma.d.ts +5 -0
  151. package/dist/parser/ecma.js +388 -0
  152. package/dist/parser/ecma.js.map +1 -0
  153. package/dist/parser/facts.d.ts +12 -0
  154. package/dist/parser/facts.js +137 -0
  155. package/dist/parser/facts.js.map +1 -0
  156. package/dist/parser/json.d.ts +3 -0
  157. package/dist/parser/json.js +318 -0
  158. package/dist/parser/json.js.map +1 -0
  159. package/dist/parser/markdown.d.ts +3 -0
  160. package/dist/parser/markdown.js +180 -0
  161. package/dist/parser/markdown.js.map +1 -0
  162. package/dist/parser/nodes.d.ts +5 -0
  163. package/dist/parser/nodes.js +75 -0
  164. package/dist/parser/nodes.js.map +1 -0
  165. package/dist/parser/python.d.ts +2 -0
  166. package/dist/parser/python.js +307 -0
  167. package/dist/parser/python.js.map +1 -0
  168. package/dist/parser/references.d.ts +3 -0
  169. package/dist/parser/references.js +204 -0
  170. package/dist/parser/references.js.map +1 -0
  171. package/dist/parser/risks.d.ts +4 -0
  172. package/dist/parser/risks.js +62 -0
  173. package/dist/parser/risks.js.map +1 -0
  174. package/dist/parser/routes.d.ts +5 -0
  175. package/dist/parser/routes.js +97 -0
  176. package/dist/parser/routes.js.map +1 -0
  177. package/dist/parser/shallow.d.ts +3 -0
  178. package/dist/parser/shallow.js +545 -0
  179. package/dist/parser/shallow.js.map +1 -0
  180. package/dist/parser/source.d.ts +4 -0
  181. package/dist/parser/source.js +127 -0
  182. package/dist/parser/source.js.map +1 -0
  183. package/dist/parser.d.ts +2 -0
  184. package/dist/parser.js +2 -0
  185. package/dist/parser.js.map +1 -0
  186. package/dist/placeholder-signals.d.ts +15 -0
  187. package/dist/placeholder-signals.js +511 -0
  188. package/dist/placeholder-signals.js.map +1 -0
  189. package/dist/post-edit-outcomes.d.ts +167 -0
  190. package/dist/post-edit-outcomes.js +484 -0
  191. package/dist/post-edit-outcomes.js.map +1 -0
  192. package/dist/queries.d.ts +12 -0
  193. package/dist/queries.js +13 -0
  194. package/dist/queries.js.map +1 -0
  195. package/dist/query/change-plan.d.ts +48 -0
  196. package/dist/query/change-plan.js +858 -0
  197. package/dist/query/change-plan.js.map +1 -0
  198. package/dist/query/compact-data.d.ts +25 -0
  199. package/dist/query/compact-data.js +74 -0
  200. package/dist/query/compact-data.js.map +1 -0
  201. package/dist/query/context.d.ts +5 -0
  202. package/dist/query/context.js +1162 -0
  203. package/dist/query/context.js.map +1 -0
  204. package/dist/query/diff.d.ts +5 -0
  205. package/dist/query/diff.js +111 -0
  206. package/dist/query/diff.js.map +1 -0
  207. package/dist/query/edge-evidence.d.ts +3 -0
  208. package/dist/query/edge-evidence.js +36 -0
  209. package/dist/query/edge-evidence.js.map +1 -0
  210. package/dist/query/formatting.d.ts +14 -0
  211. package/dist/query/formatting.js +67 -0
  212. package/dist/query/formatting.js.map +1 -0
  213. package/dist/query/graph-traversal.d.ts +22 -0
  214. package/dist/query/graph-traversal.js +218 -0
  215. package/dist/query/graph-traversal.js.map +1 -0
  216. package/dist/query/graph.d.ts +14 -0
  217. package/dist/query/graph.js +102 -0
  218. package/dist/query/graph.js.map +1 -0
  219. package/dist/query/impact.d.ts +28 -0
  220. package/dist/query/impact.js +568 -0
  221. package/dist/query/impact.js.map +1 -0
  222. package/dist/query/inspection.d.ts +9 -0
  223. package/dist/query/inspection.js +290 -0
  224. package/dist/query/inspection.js.map +1 -0
  225. package/dist/query/next-tools.d.ts +3 -0
  226. package/dist/query/next-tools.js +25 -0
  227. package/dist/query/next-tools.js.map +1 -0
  228. package/dist/query/placeholders.d.ts +24 -0
  229. package/dist/query/placeholders.js +121 -0
  230. package/dist/query/placeholders.js.map +1 -0
  231. package/dist/query/post-edit/decision.d.ts +49 -0
  232. package/dist/query/post-edit/decision.js +130 -0
  233. package/dist/query/post-edit/decision.js.map +1 -0
  234. package/dist/query/post-edit/dirty-scope.d.ts +16 -0
  235. package/dist/query/post-edit/dirty-scope.js +21 -0
  236. package/dist/query/post-edit/dirty-scope.js.map +1 -0
  237. package/dist/query/post-edit/next-actions.d.ts +22 -0
  238. package/dist/query/post-edit/next-actions.js +44 -0
  239. package/dist/query/post-edit/next-actions.js.map +1 -0
  240. package/dist/query/post-edit/snapshot-contract.d.ts +8 -0
  241. package/dist/query/post-edit/snapshot-contract.js +111 -0
  242. package/dist/query/post-edit/snapshot-contract.js.map +1 -0
  243. package/dist/query/post-edit.d.ts +5 -0
  244. package/dist/query/post-edit.js +1108 -0
  245. package/dist/query/post-edit.js.map +1 -0
  246. package/dist/query/quality.d.ts +43 -0
  247. package/dist/query/quality.js +134 -0
  248. package/dist/query/quality.js.map +1 -0
  249. package/dist/query/raw-search.d.ts +23 -0
  250. package/dist/query/raw-search.js +147 -0
  251. package/dist/query/raw-search.js.map +1 -0
  252. package/dist/query/runtime.d.ts +11 -0
  253. package/dist/query/runtime.js +79 -0
  254. package/dist/query/runtime.js.map +1 -0
  255. package/dist/query/search.d.ts +25 -0
  256. package/dist/query/search.js +429 -0
  257. package/dist/query/search.js.map +1 -0
  258. package/dist/query/session-memory.d.ts +3 -0
  259. package/dist/query/session-memory.js +108 -0
  260. package/dist/query/session-memory.js.map +1 -0
  261. package/dist/query/session.d.ts +41 -0
  262. package/dist/query/session.js +90 -0
  263. package/dist/query/session.js.map +1 -0
  264. package/dist/query/targets.d.ts +25 -0
  265. package/dist/query/targets.js +97 -0
  266. package/dist/query/targets.js.map +1 -0
  267. package/dist/query/test-commands.d.ts +10 -0
  268. package/dist/query/test-commands.js +110 -0
  269. package/dist/query/test-commands.js.map +1 -0
  270. package/dist/query/test-plan.d.ts +6 -0
  271. package/dist/query/test-plan.js +104 -0
  272. package/dist/query/test-plan.js.map +1 -0
  273. package/dist/query/tests.d.ts +48 -0
  274. package/dist/query/tests.js +444 -0
  275. package/dist/query/tests.js.map +1 -0
  276. package/dist/query/verification/shell.d.ts +20 -0
  277. package/dist/query/verification/shell.js +164 -0
  278. package/dist/query/verification/shell.js.map +1 -0
  279. package/dist/query/verification.d.ts +47 -0
  280. package/dist/query/verification.js +1123 -0
  281. package/dist/query/verification.js.map +1 -0
  282. package/dist/query/workflow.d.ts +17 -0
  283. package/dist/query/workflow.js +252 -0
  284. package/dist/query/workflow.js.map +1 -0
  285. package/dist/query/workspace-guidance.d.ts +26 -0
  286. package/dist/query/workspace-guidance.js +214 -0
  287. package/dist/query/workspace-guidance.js.map +1 -0
  288. package/dist/query/worktree-state.d.ts +22 -0
  289. package/dist/query/worktree-state.js +32 -0
  290. package/dist/query/worktree-state.js.map +1 -0
  291. package/dist/query/worktree.d.ts +16 -0
  292. package/dist/query/worktree.js +194 -0
  293. package/dist/query/worktree.js.map +1 -0
  294. package/dist/query-data.d.ts +4 -0
  295. package/dist/query-data.js +112 -0
  296. package/dist/query-data.js.map +1 -0
  297. package/dist/repo-files.d.ts +24 -0
  298. package/dist/repo-files.js +105 -0
  299. package/dist/repo-files.js.map +1 -0
  300. package/dist/resolver.d.ts +9 -0
  301. package/dist/resolver.js +555 -0
  302. package/dist/resolver.js.map +1 -0
  303. package/dist/retrieval.d.ts +46 -0
  304. package/dist/retrieval.js +783 -0
  305. package/dist/retrieval.js.map +1 -0
  306. package/dist/risk-ingest.d.ts +16 -0
  307. package/dist/risk-ingest.js +458 -0
  308. package/dist/risk-ingest.js.map +1 -0
  309. package/dist/rules.d.ts +10 -0
  310. package/dist/rules.js +107 -0
  311. package/dist/rules.js.map +1 -0
  312. package/dist/semantic/python.d.ts +9 -0
  313. package/dist/semantic/python.js +817 -0
  314. package/dist/semantic/python.js.map +1 -0
  315. package/dist/semantic/typescript.d.ts +10 -0
  316. package/dist/semantic/typescript.js +714 -0
  317. package/dist/semantic/typescript.js.map +1 -0
  318. package/dist/semantic-retrieval.d.ts +53 -0
  319. package/dist/semantic-retrieval.js +673 -0
  320. package/dist/semantic-retrieval.js.map +1 -0
  321. package/dist/session-memory/derivation.d.ts +6 -0
  322. package/dist/session-memory/derivation.js +400 -0
  323. package/dist/session-memory/derivation.js.map +1 -0
  324. package/dist/session-memory/event-log.d.ts +23 -0
  325. package/dist/session-memory/event-log.js +126 -0
  326. package/dist/session-memory/event-log.js.map +1 -0
  327. package/dist/session-memory/formatting.d.ts +7 -0
  328. package/dist/session-memory/formatting.js +86 -0
  329. package/dist/session-memory/formatting.js.map +1 -0
  330. package/dist/session-memory/model.d.ts +94 -0
  331. package/dist/session-memory/model.js +17 -0
  332. package/dist/session-memory/model.js.map +1 -0
  333. package/dist/session-memory/runtime.d.ts +24 -0
  334. package/dist/session-memory/runtime.js +289 -0
  335. package/dist/session-memory/runtime.js.map +1 -0
  336. package/dist/session-memory/store.d.ts +27 -0
  337. package/dist/session-memory/store.js +447 -0
  338. package/dist/session-memory/store.js.map +1 -0
  339. package/dist/session-memory.d.ts +1 -0
  340. package/dist/session-memory.js +2 -0
  341. package/dist/session-memory.js.map +1 -0
  342. package/dist/static-analysis.d.ts +36 -0
  343. package/dist/static-analysis.js +505 -0
  344. package/dist/static-analysis.js.map +1 -0
  345. package/dist/symbol-report-ingest.d.ts +8 -0
  346. package/dist/symbol-report-ingest.js +504 -0
  347. package/dist/symbol-report-ingest.js.map +1 -0
  348. package/dist/task-snapshots.d.ts +41 -0
  349. package/dist/task-snapshots.js +430 -0
  350. package/dist/task-snapshots.js.map +1 -0
  351. package/dist/types.d.ts +848 -0
  352. package/dist/types.js +12 -0
  353. package/dist/types.js.map +1 -0
  354. package/dist/util.d.ts +11 -0
  355. package/dist/util.js +63 -0
  356. package/dist/util.js.map +1 -0
  357. package/dist/version.d.ts +1 -0
  358. package/dist/version.js +5 -0
  359. package/dist/version.js.map +1 -0
  360. package/package.json +81 -0
  361. package/plugins/codexa/.codex-plugin/plugin.json +38 -0
  362. package/plugins/codexa/.mcp.json +20 -0
  363. package/plugins/codexa/scripts/codexa-mcp.js +100 -0
  364. package/plugins/codexa/skills/codexa/SKILL.md +48 -0
@@ -0,0 +1,618 @@
1
+ import { isTestPath } from "../language.js";
2
+ import { formatBaselineCommands } from "./baseline.js";
3
+ export function scoreScenario(scenario, result, baseline, failOnRefresh, redactSample = false) {
4
+ const structuredDataBytes = serializedByteLength(result.data);
5
+ const files = filesFromData(result.data);
6
+ const plannedFiles = plannedFilesFromData(result.data);
7
+ const tests = testsFromData(result.data);
8
+ const baselineFiles = baseline === null ? [] : uniqueInOrder(baseline.flatMap((entry) => baselineFilesFromOutput(entry.command, entry.output)));
9
+ const baselineTests = baselineFiles.filter(isTestPath);
10
+ const expectedFiles = scenario.oracle.expectedFiles ?? [];
11
+ const expectedChangedFiles = scenario.oracle.expectedChangedFiles ?? [];
12
+ const expectedTests = scenario.oracle.expectedTests ?? [];
13
+ const forbiddenFiles = scenario.oracle.forbiddenFiles ?? [];
14
+ const expectedReadFirstFiles = uniqueInOrder([...expectedFiles, ...expectedTests]);
15
+ const failures = [];
16
+ const scored = scenario.scored ?? true;
17
+ const fileRecall = expectedFiles.length > 0 ? recall(files, expectedFiles) : null;
18
+ const changedFileRecall = expectedChangedFiles.length > 0 ? recall(plannedFiles, expectedChangedFiles) : null;
19
+ const testRecall = expectedTests.length > 0 ? recall(tests, expectedTests) : null;
20
+ const precisionK = Math.max(1, expectedReadFirstFiles.length || scenario.oracle.topFiles?.length || 5);
21
+ const precisionAtK = expectedReadFirstFiles.length > 0 ? precision(files.slice(0, precisionK), expectedReadFirstFiles) : null;
22
+ const baselineLines = baseline === null ? null : baseline.reduce((sum, entry) => sum + entry.output.split(/\r?\n/).filter(Boolean).length, 0);
23
+ const selectedToBaselineRatio = baselineLines && baselineLines > 0 ? files.length / baselineLines : null;
24
+ const baselineFileRecall = expectedFiles.length > 0 ? recall(baselineFiles, expectedFiles) : null;
25
+ const baselineTestRecall = expectedTests.length > 0 ? recall(baselineTests, expectedTests) : null;
26
+ const baselinePrecisionAtK = expectedReadFirstFiles.length > 0 ? precision(baselineFiles.slice(0, precisionK), expectedReadFirstFiles) : null;
27
+ const codexaToBaselineFileRatio = baselineFiles.length > 0 ? files.length / baselineFiles.length : null;
28
+ const minFileRecall = scenario.oracle.minFileRecall ?? 1;
29
+ const minChangedFileRecall = scenario.oracle.minChangedFileRecall ?? 1;
30
+ const minTestRecall = scenario.oracle.minTestRecall ?? 1;
31
+ const minPrecision = scenario.oracle.minFilePrecisionAtK ?? 0;
32
+ const refreshed = Boolean(result.refresh?.refreshed);
33
+ const quality = qualityFromData(result.data);
34
+ const falsePositiveFiles = expectedFiles.length > 0 ? files.filter((file) => !expectedFiles.includes(file) && !expectedTests.includes(file)) : [];
35
+ const missingExpectedFiles = expectedFiles.filter((file) => !files.includes(file));
36
+ const missingExpectedChangedFiles = expectedChangedFiles.filter((file) => !plannedFiles.includes(file));
37
+ const missingExpectedTests = expectedTests.filter((test) => !tests.includes(test));
38
+ const actualCallTrace = callTraceFromData(result.data);
39
+ const heuristicHeavy = Boolean(quality && quality.counts.heuristic > quality.counts.authoritative + quality.counts.derived && quality.counts.heuristic > 0);
40
+ const broadRetrievalFailure = Boolean(quality?.level === "low" && /broad|natural|session|workflow/i.test(scenario.description));
41
+ const rawRgBetter = Boolean((baselineFileRecall !== null && fileRecall !== null && baselineFileRecall > fileRecall) ||
42
+ (baselineTestRecall !== null && testRecall !== null && baselineTestRecall > testRecall) ||
43
+ (baselinePrecisionAtK !== null && precisionAtK !== null && baselinePrecisionAtK > precisionAtK && fileRecall !== null && baselineFileRecall !== null && baselineFileRecall >= fileRecall));
44
+ const rawRgBetterReason = rawRgBetter
45
+ ? [
46
+ baselineFileRecall !== null && fileRecall !== null && baselineFileRecall > fileRecall ? `file recall baseline ${baselineFileRecall.toFixed(2)} > Codexa ${fileRecall.toFixed(2)}` : undefined,
47
+ baselineTestRecall !== null && testRecall !== null && baselineTestRecall > testRecall ? `test recall baseline ${baselineTestRecall.toFixed(2)} > Codexa ${testRecall.toFixed(2)}` : undefined,
48
+ baselinePrecisionAtK !== null && precisionAtK !== null && baselinePrecisionAtK > precisionAtK ? `precision baseline ${baselinePrecisionAtK.toFixed(2)} > Codexa ${precisionAtK.toFixed(2)}` : undefined
49
+ ]
50
+ .filter((entry) => Boolean(entry))
51
+ .join("; ")
52
+ : undefined;
53
+ const overBudgetedOutput = Boolean(scenario.oracle.maxTextChars && result.text.length > scenario.oracle.maxTextChars);
54
+ const structuredDataBudget = scenario.oracle.maxDataBytes ?? (scenario.oracle.maxTextChars ? Math.max(128_000, scenario.oracle.maxTextChars * 8) : 128_000);
55
+ const overBudgetedStructuredData = structuredDataBytes > structuredDataBudget;
56
+ const postEditOutcome = postEditOutcomeFromData(result.data);
57
+ const toolHopsToEditReady = toolHopsToEditReadyFromData(result.data);
58
+ const verificationProvenancePresent = Boolean(postEditOutcome?.verificationProvenance || verificationProvenanceFromData(result.data));
59
+ if (fileRecall !== null && fileRecall < minFileRecall) {
60
+ failures.push(`file recall ${fileRecall.toFixed(2)} < ${minFileRecall.toFixed(2)}`);
61
+ }
62
+ if (changedFileRecall !== null && changedFileRecall < minChangedFileRecall) {
63
+ failures.push(`planned changed-file recall ${changedFileRecall.toFixed(2)} < ${minChangedFileRecall.toFixed(2)}`);
64
+ }
65
+ if (testRecall !== null && testRecall < minTestRecall) {
66
+ failures.push(`test recall ${testRecall.toFixed(2)} < ${minTestRecall.toFixed(2)}`);
67
+ }
68
+ if (precisionAtK !== null && precisionAtK < minPrecision) {
69
+ failures.push(`precision@${precisionK} ${precisionAtK.toFixed(2)} < ${minPrecision.toFixed(2)}`);
70
+ }
71
+ if (scenario.oracle.maxSelectedToBaselineRatio !== undefined &&
72
+ selectedToBaselineRatio !== null &&
73
+ selectedToBaselineRatio > scenario.oracle.maxSelectedToBaselineRatio) {
74
+ failures.push(`selected/baseline ratio ${selectedToBaselineRatio.toFixed(2)} > ${scenario.oracle.maxSelectedToBaselineRatio.toFixed(2)}`);
75
+ }
76
+ for (const file of forbiddenFiles) {
77
+ if (files.includes(file)) {
78
+ failures.push(`forbidden file returned: ${file}`);
79
+ }
80
+ }
81
+ if (scenario.oracle.maxFalsePositiveFiles !== undefined && falsePositiveFiles.length > scenario.oracle.maxFalsePositiveFiles) {
82
+ failures.push(`false-positive files ${falsePositiveFiles.length} > ${scenario.oracle.maxFalsePositiveFiles}`);
83
+ }
84
+ for (const expectedCall of scenario.oracle.expectedCodexaCalls ?? []) {
85
+ if (!actualCallTrace.includes(expectedCall)) {
86
+ failures.push(`expected Codexa call missing from trace: ${expectedCall}`);
87
+ }
88
+ }
89
+ for (const file of scenario.oracle.topFiles ?? []) {
90
+ if (!files.slice(0, precisionK).includes(file)) {
91
+ failures.push(`expected top-${precisionK} file missing: ${file}`);
92
+ }
93
+ }
94
+ if (overBudgetedOutput && scenario.oracle.maxTextChars) {
95
+ failures.push(`text length ${result.text.length} > ${scenario.oracle.maxTextChars}`);
96
+ }
97
+ if (overBudgetedStructuredData) {
98
+ failures.push(`structured data size ${structuredDataBytes} > ${structuredDataBudget}`);
99
+ }
100
+ if (failOnRefresh && refreshed) {
101
+ failures.push(`query auto-refreshed from ${result.refresh?.reason ?? "unknown"}`);
102
+ }
103
+ const measured = [fileRecall, changedFileRecall, testRecall, precisionAtK].filter((value) => value !== null);
104
+ const baseScore = measured.length > 0 ? measured.reduce((sum, value) => sum + value, 0) / measured.length : failures.length === 0 ? 1 : 0;
105
+ const score = scored ? Math.max(0, baseScore - Math.min(0.5, failures.length * 0.1)) : 0;
106
+ return {
107
+ id: scenario.id,
108
+ suite: scenario.suite,
109
+ description: redactSample ? `External historical task: ${scenario.id}` : scenario.description,
110
+ passed: failures.length === 0,
111
+ score,
112
+ scored,
113
+ baselineLines,
114
+ baselineFiles,
115
+ baselineTests,
116
+ files,
117
+ plannedFiles,
118
+ tests,
119
+ metrics: {
120
+ fileRecall,
121
+ changedFileRecall,
122
+ testRecall,
123
+ precisionAtK,
124
+ selectedToBaselineRatio,
125
+ textChars: result.text.length,
126
+ dataBytes: structuredDataBytes,
127
+ refreshed,
128
+ structuredBytes: structuredDataBytes,
129
+ toolHopsToEditReady,
130
+ verificationProvenancePresent
131
+ },
132
+ comparison: {
133
+ baselineFileRecall,
134
+ baselineTestRecall,
135
+ baselinePrecisionAtK,
136
+ fileRecallDelta: delta(fileRecall, baselineFileRecall),
137
+ testRecallDelta: delta(testRecall, baselineTestRecall),
138
+ precisionDelta: delta(precisionAtK, baselinePrecisionAtK),
139
+ codexaFileCount: files.length,
140
+ baselineFileCount: baselineFiles.length,
141
+ codexaTestCount: tests.length,
142
+ baselineTestCount: baselineTests.length,
143
+ codexaToBaselineFileRatio
144
+ },
145
+ calibration: {
146
+ falsePositiveFiles,
147
+ missingExpectedFiles,
148
+ missingExpectedChangedFiles,
149
+ missingExpectedTests,
150
+ heuristicHeavy,
151
+ broadRetrievalFailure,
152
+ rawRgBetter,
153
+ rawRgBetterReason,
154
+ overBudgetedOutput,
155
+ overBudgetedStructuredData,
156
+ postEditOutcome
157
+ },
158
+ failures,
159
+ sample: redactSample ? "[redacted for external historical task pack]" : result.text.split(/\r?\n/).slice(0, 14).join("\n")
160
+ };
161
+ }
162
+ export function calibrationSummary(scenarios) {
163
+ const postEditVerdicts = {};
164
+ const outcomeRecords = [];
165
+ const postEditRequiredChecksMissingScenarios = [];
166
+ for (const scenario of scenarios) {
167
+ const verdict = scenario.calibration.postEditOutcome?.verdict;
168
+ if (verdict) {
169
+ postEditVerdicts[verdict] = (postEditVerdicts[verdict] ?? 0) + 1;
170
+ }
171
+ const outcomePath = scenario.calibration.postEditOutcome?.path;
172
+ if (outcomePath) {
173
+ outcomeRecords.push(outcomePath);
174
+ }
175
+ if ((scenario.calibration.postEditOutcome?.requiredChecksMissing ?? 0) > 0) {
176
+ postEditRequiredChecksMissingScenarios.push(scenario.id);
177
+ }
178
+ }
179
+ return {
180
+ falsePositiveFiles: uniqueInOrder(scenarios.flatMap((scenario) => scenario.calibration.falsePositiveFiles)),
181
+ missingExpectedChangedFiles: uniqueInOrder(scenarios.flatMap((scenario) => scenario.calibration.missingExpectedChangedFiles)),
182
+ missingExpectedTests: uniqueInOrder(scenarios.flatMap((scenario) => scenario.calibration.missingExpectedTests)),
183
+ heuristicHeavyScenarios: scenarios.filter((scenario) => scenario.calibration.heuristicHeavy).map((scenario) => scenario.id),
184
+ broadRetrievalFailures: scenarios.filter((scenario) => scenario.calibration.broadRetrievalFailure).map((scenario) => scenario.id),
185
+ rawRgBetterScenarios: scenarios.filter((scenario) => scenario.calibration.rawRgBetter).map((scenario) => scenario.id),
186
+ overBudgetedOutputScenarios: scenarios.filter((scenario) => scenario.calibration.overBudgetedOutput).map((scenario) => scenario.id),
187
+ overBudgetedStructuredDataScenarios: scenarios.filter((scenario) => scenario.calibration.overBudgetedStructuredData).map((scenario) => scenario.id),
188
+ postEditMissedTests: uniqueInOrder(scenarios.flatMap((scenario) => scenario.calibration.postEditOutcome?.missedLikelyTests ?? [])),
189
+ postEditModifiedPublicSymbols: uniqueInOrder(scenarios.flatMap((scenario) => scenario.calibration.postEditOutcome?.modifiedPublicSymbols ?? [])),
190
+ postEditCalibrationLabels: uniqueInOrder(scenarios.flatMap((scenario) => scenario.calibration.postEditOutcome?.calibrationLabels ?? [])),
191
+ postEditRequiredChecksMissingScenarios: uniqueInOrder(postEditRequiredChecksMissingScenarios),
192
+ postEditAggregateCoverageScenarios: scenarios
193
+ .filter((scenario) => {
194
+ const outcome = scenario.calibration.postEditOutcome;
195
+ return Boolean(outcome?.calibrationLabels.includes("aggregate-command-coverage") && (outcome.ranCommands.length > 0 || outcome.commandEnvelopes.length > 0));
196
+ })
197
+ .map((scenario) => scenario.id),
198
+ postEditVerificationMissingScenarios: scenarios.filter((scenario) => (scenario.calibration.postEditOutcome?.verificationMissing ?? 0) > 0).map((scenario) => scenario.id),
199
+ postEditVerdicts,
200
+ outcomeRecords: uniqueInOrder(outcomeRecords)
201
+ };
202
+ }
203
+ export function scoreStructuredOutputForTest(result, oracle, baseline) {
204
+ return scoreScenario({
205
+ id: "test",
206
+ suite: "synthetic",
207
+ description: "test",
208
+ repoRoot: "",
209
+ codexa: async () => result,
210
+ baselineCommand: baseline?.command,
211
+ oracle
212
+ }, result, baseline ? [baseline] : null, true);
213
+ }
214
+ function evalScoringData(data) {
215
+ return data && typeof data === "object" && !Array.isArray(data) ? data : undefined;
216
+ }
217
+ export function filesFromData(data) {
218
+ const record = evalScoringData(data);
219
+ if (!record) {
220
+ return [];
221
+ }
222
+ if (Array.isArray(record.selectedFiles)) {
223
+ return uniqueInOrder(record.selectedFiles.flatMap(filePathFromUnknown));
224
+ }
225
+ if (Array.isArray(record.readFirstFiles)) {
226
+ return uniqueInOrder(record.readFirstFiles.flatMap(filePathFromUnknown));
227
+ }
228
+ if (Array.isArray(record.files)) {
229
+ return uniqueInOrder(record.files.flatMap(filePathFromUnknown));
230
+ }
231
+ if (record.fanout && typeof record.fanout === "object") {
232
+ const readFirst = record.fanout.readFirst;
233
+ if (Array.isArray(readFirst)) {
234
+ return uniqueInOrder(readFirst.flatMap((entry) => filePathFromUnknown(entry.file ?? entry)));
235
+ }
236
+ }
237
+ if (Array.isArray(record.affectedFiles)) {
238
+ return uniqueInOrder(record.affectedFiles.flatMap((entry) => filePathFromUnknown(entry.file ?? entry)));
239
+ }
240
+ if (Array.isArray(record.focusFiles)) {
241
+ return uniqueInOrder(record.focusFiles.flatMap((entry) => filePathFromUnknown(entry.file ?? entry)));
242
+ }
243
+ if (Array.isArray(record.nextReads)) {
244
+ return uniqueInOrder(record.nextReads.flatMap(filePathFromUnknown));
245
+ }
246
+ if (Array.isArray(record.changedFiles)) {
247
+ return uniqueInOrder(record.changedFiles.flatMap(filePathFromUnknown));
248
+ }
249
+ const nested = ["diff", "plan"].flatMap((key) => filesFromData(record[key]));
250
+ return uniqueInOrder(nested);
251
+ }
252
+ function plannedFilesFromData(data) {
253
+ const record = evalScoringData(data);
254
+ if (!record) {
255
+ return [];
256
+ }
257
+ if (Array.isArray(record.plannedEditTargets)) {
258
+ return uniqueInOrder(record.plannedEditTargets.flatMap(filePathFromUnknown));
259
+ }
260
+ if (record.snapshot && typeof record.snapshot === "object") {
261
+ if (Array.isArray(record.snapshot.plannedEditTargets)) {
262
+ return uniqueInOrder(record.snapshot.plannedEditTargets.flatMap(filePathFromUnknown));
263
+ }
264
+ }
265
+ if (Array.isArray(record.reviewTargets)) {
266
+ return uniqueInOrder(record.reviewTargets.flatMap(filePathFromUnknown));
267
+ }
268
+ const nested = ["focus", "context", "diff", "plan"].flatMap((key) => plannedFilesFromData(record[key]));
269
+ return uniqueInOrder(nested);
270
+ }
271
+ function callTraceFromData(data) {
272
+ const record = evalScoringData(data);
273
+ if (!record) {
274
+ return [];
275
+ }
276
+ if (Array.isArray(record.callTrace)) {
277
+ return record.callTrace.filter((entry) => typeof entry === "string");
278
+ }
279
+ return [];
280
+ }
281
+ export function testsFromData(data) {
282
+ const record = evalScoringData(data);
283
+ if (!record) {
284
+ return [];
285
+ }
286
+ const direct = Array.isArray(record.tests)
287
+ ? record.tests.flatMap((entry) => {
288
+ if (typeof entry === "string") {
289
+ return [entry];
290
+ }
291
+ if (entry && typeof entry === "object" && typeof entry.path === "string") {
292
+ return [entry.path];
293
+ }
294
+ return [];
295
+ })
296
+ : [];
297
+ const workflowTests = Array.isArray(record.workflows)
298
+ ? record.workflows.flatMap((workflow) => {
299
+ if (!workflow || typeof workflow !== "object") {
300
+ return [];
301
+ }
302
+ const tests = workflow.tests;
303
+ return Array.isArray(tests) ? tests.filter((entry) => typeof entry === "string") : [];
304
+ })
305
+ : [];
306
+ const nested = ["diff", "plan"].flatMap((key) => testsFromData(record[key]));
307
+ return uniqueInOrder([...direct, ...workflowTests, ...nested]);
308
+ }
309
+ function qualityFromData(data) {
310
+ const record = evalScoringData(data);
311
+ if (!record) {
312
+ return null;
313
+ }
314
+ const quality = record.quality;
315
+ if (quality) {
316
+ const counts = quality.counts ?? {};
317
+ return {
318
+ level: typeof quality.level === "string" ? quality.level : "unknown",
319
+ counts: {
320
+ authoritative: numericCount(counts.authoritative),
321
+ derived: numericCount(counts.derived),
322
+ heuristic: numericCount(counts.heuristic),
323
+ fallback: numericCount(counts.fallback)
324
+ }
325
+ };
326
+ }
327
+ for (const key of ["focus", "context", "diff", "plan"]) {
328
+ const nested = qualityFromData(record[key]);
329
+ if (nested) {
330
+ return nested;
331
+ }
332
+ }
333
+ return null;
334
+ }
335
+ function postEditOutcomeFromData(data) {
336
+ const record = evalScoringData(data);
337
+ if (!record) {
338
+ return undefined;
339
+ }
340
+ const candidate = evalScoringData(record.outcome) ?? record;
341
+ const testsNotRun = extractPaths(candidate.testsNotRun ?? record.testsNotRun);
342
+ const missedLikelyTests = extractPaths(candidate.missedLikelyTests ?? record.missedLikelyTests);
343
+ const modifiedPublicSymbols = extractStringArray(candidate.modifiedPublicSymbols ?? record.modifiedPublicSymbols);
344
+ const workflowChecks = extractArray(candidate.workflowChecks ?? record.workflowChecks);
345
+ const dependencyChecks = extractArray(candidate.dependencyChecks ?? record.dependencyChecks);
346
+ const requiredChecksMissing = [...workflowChecks, ...dependencyChecks].filter((entry) => entry && typeof entry === "object" && entry.status === "missing").length;
347
+ const ranCommands = extractStringArray(candidate.ranCommands ?? record.ranCommands);
348
+ const commandEnvelopes = extractCommandEnvelopes(candidate.commandEnvelopes ?? record.commandEnvelopes);
349
+ const verificationProvenance = extractVerificationProvenance(candidate.verificationProvenance ?? record.verificationProvenance);
350
+ const verificationLedger = extractArray(candidate.verificationLedger ?? record.verificationLedger);
351
+ const verificationStatusCount = (status) => verificationLedger.filter((entry) => entry && typeof entry === "object" && entry.status === status).length;
352
+ if (candidate !== record ||
353
+ testsNotRun.length > 0 ||
354
+ missedLikelyTests.length > 0 ||
355
+ modifiedPublicSymbols.length > 0 ||
356
+ workflowChecks.length > 0 ||
357
+ dependencyChecks.length > 0 ||
358
+ ranCommands.length > 0 ||
359
+ commandEnvelopes.length > 0 ||
360
+ verificationProvenance ||
361
+ verificationLedger.length > 0 ||
362
+ typeof candidate.verdict === "string" ||
363
+ typeof candidate.outcomeId === "string" ||
364
+ typeof candidate.path === "string" ||
365
+ Array.isArray(candidate.driftReasons) ||
366
+ Array.isArray(candidate.calibrationLabels)) {
367
+ return {
368
+ verdict: typeof candidate.verdict === "string" ? candidate.verdict : undefined,
369
+ outcomeId: typeof candidate.outcomeId === "string" ? candidate.outcomeId : undefined,
370
+ path: typeof candidate.path === "string" ? candidate.path : undefined,
371
+ driftReasons: extractStringArray(candidate.driftReasons),
372
+ calibrationLabels: extractStringArray(candidate.calibrationLabels),
373
+ testsNotRun,
374
+ missedLikelyTests,
375
+ modifiedPublicSymbols,
376
+ requiredChecksMissing,
377
+ ranCommands,
378
+ commandEnvelopes,
379
+ verificationProvenance,
380
+ verificationCovered: verificationStatusCount("covered"),
381
+ verificationMissing: verificationStatusCount("missing"),
382
+ verificationWaived: verificationStatusCount("waived"),
383
+ verificationNotApplicable: verificationStatusCount("not_applicable")
384
+ };
385
+ }
386
+ for (const key of ["review", "postEdit", "post_edit", "plan"]) {
387
+ const nested = postEditOutcomeFromData(record[key]);
388
+ if (nested) {
389
+ return nested;
390
+ }
391
+ }
392
+ return undefined;
393
+ }
394
+ function toolHopsToEditReadyFromData(data) {
395
+ const record = evalScoringData(data);
396
+ if (!record) {
397
+ return null;
398
+ }
399
+ const mode = typeof record.mode === "string" ? record.mode : undefined;
400
+ const actionability = typeof record.actionability === "string" ? record.actionability : undefined;
401
+ const editReady = actionability === "edit_ready" || record.packetVerdict === "edit-ready" || (record.editReadiness && typeof record.editReadiness === "object" && record.editReadiness.editable === true);
402
+ if (editReady) {
403
+ if (mode === "session_context" || mode === "focus_brief")
404
+ return 2;
405
+ if (mode === "task_brief" || mode === "context_pack")
406
+ return 1;
407
+ return 0;
408
+ }
409
+ for (const key of ["focus", "context", "diff", "plan", "data"]) {
410
+ const nested = toolHopsToEditReadyFromData(record[key]);
411
+ if (nested !== null) {
412
+ return nested;
413
+ }
414
+ }
415
+ return null;
416
+ }
417
+ function verificationProvenanceFromData(data) {
418
+ const record = evalScoringData(data);
419
+ if (!record) {
420
+ return undefined;
421
+ }
422
+ return extractVerificationProvenance(record.verificationProvenance) ?? verificationProvenanceFromData(record.data);
423
+ }
424
+ function extractVerificationProvenance(value) {
425
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
426
+ return undefined;
427
+ }
428
+ const record = value;
429
+ return {
430
+ schemaVersion: typeof record.schemaVersion === "number" ? record.schemaVersion : undefined,
431
+ commandCoverageClassifier: typeof record.commandCoverageClassifier === "string" ? record.commandCoverageClassifier : undefined,
432
+ commandCoverageClassifierVersion: typeof record.commandCoverageClassifierVersion === "string" ? record.commandCoverageClassifierVersion : undefined,
433
+ commandEnvelopeRulesetVersion: typeof record.commandEnvelopeRulesetVersion === "string" ? record.commandEnvelopeRulesetVersion : undefined,
434
+ verificationLedgerVersion: typeof record.verificationLedgerVersion === "string" ? record.verificationLedgerVersion : undefined
435
+ };
436
+ }
437
+ function numericCount(value) {
438
+ return typeof value === "number" && Number.isFinite(value) ? value : 0;
439
+ }
440
+ function filePathFromUnknown(value) {
441
+ if (typeof value === "string") {
442
+ return [value];
443
+ }
444
+ if (value && typeof value === "object" && typeof value.path === "string") {
445
+ return [value.path];
446
+ }
447
+ return [];
448
+ }
449
+ function extractStringArray(value) {
450
+ return Array.isArray(value) ? value.filter((entry) => typeof entry === "string") : [];
451
+ }
452
+ function extractPaths(value) {
453
+ if (!Array.isArray(value)) {
454
+ return [];
455
+ }
456
+ return value.flatMap((entry) => {
457
+ if (typeof entry === "string") {
458
+ return [entry];
459
+ }
460
+ if (entry && typeof entry === "object" && typeof entry.path === "string") {
461
+ return [entry.path];
462
+ }
463
+ return [];
464
+ });
465
+ }
466
+ function extractArray(value) {
467
+ return Array.isArray(value) ? value : [];
468
+ }
469
+ function extractCommandEnvelopes(value) {
470
+ if (!Array.isArray(value)) {
471
+ return [];
472
+ }
473
+ return value
474
+ .filter((entry) => Boolean(entry && typeof entry === "object" && !Array.isArray(entry)))
475
+ .map((entry) => ({
476
+ command: typeof entry.command === "string" ? entry.command : undefined,
477
+ cwd: typeof entry.cwd === "string" ? entry.cwd : undefined,
478
+ packageManager: typeof entry.packageManager === "string" ? entry.packageManager : undefined,
479
+ workspace: typeof entry.workspace === "string" ? entry.workspace : undefined,
480
+ packageRoot: typeof entry.packageRoot === "string" ? entry.packageRoot : undefined,
481
+ packageName: typeof entry.packageName === "string" ? entry.packageName : undefined,
482
+ scriptName: typeof entry.scriptName === "string" ? entry.scriptName : undefined,
483
+ args: extractStringArray(entry.args),
484
+ exitCode: typeof entry.exitCode === "number" ? entry.exitCode : undefined,
485
+ durationMs: typeof entry.durationMs === "number" ? entry.durationMs : undefined,
486
+ source: typeof entry.source === "string" ? entry.source : undefined,
487
+ scopeStatus: typeof entry.scopeStatus === "string" ? entry.scopeStatus : undefined,
488
+ classifierVersion: typeof entry.classifierVersion === "string" ? entry.classifierVersion : undefined
489
+ }));
490
+ }
491
+ function recall(actual, expected) {
492
+ if (expected.length === 0) {
493
+ return 1;
494
+ }
495
+ const actualSet = new Set(actual);
496
+ return expected.filter((item) => actualSet.has(item)).length / expected.length;
497
+ }
498
+ function precision(actual, expected) {
499
+ if (actual.length === 0) {
500
+ return expected.length === 0 ? 1 : 0;
501
+ }
502
+ const expectedSet = new Set(expected);
503
+ return actual.filter((item) => expectedSet.has(item)).length / actual.length;
504
+ }
505
+ function delta(current, baseline) {
506
+ if (current === null || baseline === null) {
507
+ return null;
508
+ }
509
+ return current - baseline;
510
+ }
511
+ export function uniqueInOrder(values) {
512
+ const seen = new Set();
513
+ const result = [];
514
+ for (const value of values) {
515
+ if (!seen.has(value)) {
516
+ seen.add(value);
517
+ result.push(value);
518
+ }
519
+ }
520
+ return result;
521
+ }
522
+ function serializedByteLength(value) {
523
+ try {
524
+ const serialized = JSON.stringify(value);
525
+ return serialized ? Buffer.byteLength(serialized, "utf8") : 0;
526
+ }
527
+ catch {
528
+ return Number.MAX_SAFE_INTEGER;
529
+ }
530
+ }
531
+ export function baselineFailureScenario(scenario, error) {
532
+ const message = error instanceof Error ? error.message : String(error);
533
+ const redactPrivate = scenario.privatePack ?? false;
534
+ return {
535
+ id: scenario.id,
536
+ suite: scenario.suite,
537
+ description: redactPrivate ? `External historical task: ${scenario.id}` : scenario.description,
538
+ passed: false,
539
+ score: 0,
540
+ scored: scenario.scored ?? true,
541
+ baselineLines: null,
542
+ baselineFiles: [],
543
+ baselineTests: [],
544
+ files: [],
545
+ plannedFiles: [],
546
+ tests: [],
547
+ metrics: {
548
+ fileRecall: null,
549
+ changedFileRecall: null,
550
+ testRecall: null,
551
+ precisionAtK: null,
552
+ selectedToBaselineRatio: null,
553
+ textChars: 0,
554
+ dataBytes: 0,
555
+ refreshed: false,
556
+ structuredBytes: 0,
557
+ toolHopsToEditReady: null,
558
+ verificationProvenancePresent: false
559
+ },
560
+ comparison: {
561
+ baselineFileRecall: null,
562
+ baselineTestRecall: null,
563
+ baselinePrecisionAtK: null,
564
+ fileRecallDelta: null,
565
+ testRecallDelta: null,
566
+ precisionDelta: null,
567
+ codexaFileCount: 0,
568
+ baselineFileCount: 0,
569
+ codexaTestCount: 0,
570
+ baselineTestCount: 0,
571
+ codexaToBaselineFileRatio: null
572
+ },
573
+ calibration: {
574
+ falsePositiveFiles: [],
575
+ missingExpectedFiles: scenario.oracle.expectedFiles ?? [],
576
+ missingExpectedChangedFiles: scenario.oracle.expectedChangedFiles ?? [],
577
+ missingExpectedTests: scenario.oracle.expectedTests ?? [],
578
+ heuristicHeavy: false,
579
+ broadRetrievalFailure: false,
580
+ rawRgBetter: false,
581
+ overBudgetedOutput: false,
582
+ overBudgetedStructuredData: false
583
+ },
584
+ failures: [
585
+ redactPrivate
586
+ ? "baseline command failed for external historical task pack: details redacted"
587
+ : `baseline command failed: ${formatBaselineCommands(scenario)}; ${message}`
588
+ ],
589
+ sample: redactPrivate ? "[redacted for external historical task pack]" : ""
590
+ };
591
+ }
592
+ function baselineFilesFromOutput(command, output) {
593
+ if (command[0] === "git" && command.includes("status")) {
594
+ return uniqueInOrder(output
595
+ .split(/\r?\n/)
596
+ .flatMap((line) => {
597
+ const trimmed = line.trim();
598
+ if (!trimmed) {
599
+ return [];
600
+ }
601
+ const rawPath = trimmed.slice(2).trim();
602
+ const renamed = rawPath.includes(" -> ") ? rawPath.split(" -> ").at(-1) ?? rawPath : rawPath;
603
+ return [normalizeBaselinePath(renamed.replace(/^"|"$/g, ""))];
604
+ })
605
+ .filter(Boolean));
606
+ }
607
+ return uniqueInOrder(output
608
+ .split(/\r?\n/)
609
+ .flatMap((line) => {
610
+ const match = /^(.+?):\d+(?::|$)/.exec(line);
611
+ return match?.[1] ? [normalizeBaselinePath(match[1])] : [];
612
+ })
613
+ .filter(Boolean));
614
+ }
615
+ function normalizeBaselinePath(filePath) {
616
+ return filePath.replace(/^\.\//, "");
617
+ }
618
+ //# sourceMappingURL=scoring.js.map