voratiq 0.1.0-beta.2 → 0.1.0-beta.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. package/README.md +41 -29
  2. package/dist/agents/launch/chat.d.ts +23 -0
  3. package/dist/agents/launch/chat.js +44 -0
  4. package/dist/agents/launch/environment.d.ts +8 -0
  5. package/dist/{commands/run/agents/workspace-prep.js → agents/launch/environment.js} +5 -27
  6. package/dist/agents/launch/prompt.d.ts +6 -0
  7. package/dist/agents/launch/prompt.js +12 -0
  8. package/dist/agents/launch/provider-state.d.ts +39 -0
  9. package/dist/agents/launch/provider-state.js +103 -0
  10. package/dist/agents/runtime/auth.d.ts +27 -0
  11. package/dist/agents/runtime/auth.js +72 -0
  12. package/dist/agents/runtime/chat.d.ts +5 -0
  13. package/dist/agents/runtime/chat.js +7 -0
  14. package/dist/agents/runtime/errors.d.ts +27 -0
  15. package/dist/agents/runtime/errors.js +51 -0
  16. package/dist/{commands/run/agents → agents/runtime}/failures.d.ts +0 -1
  17. package/dist/agents/runtime/failures.js +136 -0
  18. package/dist/agents/runtime/harness.d.ts +2 -0
  19. package/dist/agents/runtime/harness.js +119 -0
  20. package/dist/{commands/run/agents/sandbox-launcher.d.ts → agents/runtime/launcher.d.ts} +18 -6
  21. package/dist/{commands/run/agents/sandbox-launcher.js → agents/runtime/launcher.js} +17 -39
  22. package/dist/{commands/run/agents/workspace-prep.d.ts → agents/runtime/manifest.d.ts} +6 -6
  23. package/dist/agents/runtime/manifest.js +34 -0
  24. package/dist/agents/runtime/policy.d.ts +32 -0
  25. package/dist/agents/runtime/policy.js +240 -0
  26. package/dist/agents/runtime/registry.d.ts +4 -0
  27. package/dist/agents/runtime/registry.js +54 -0
  28. package/dist/{commands/run → agents/runtime}/sandbox.d.ts +8 -2
  29. package/dist/{commands/run → agents/runtime}/sandbox.js +28 -67
  30. package/dist/agents/runtime/shim/run-agent-shim.d.ts +1 -0
  31. package/dist/agents/runtime/shim/run-agent-shim.js +276 -0
  32. package/dist/agents/runtime/types.d.ts +91 -0
  33. package/dist/{commands/run/agents → agents/runtime}/watchdog.d.ts +4 -3
  34. package/dist/{commands/run/agents → agents/runtime}/watchdog.js +155 -26
  35. package/dist/auth/providers/codex.js +7 -2
  36. package/dist/auth/providers/gemini.js +14 -6
  37. package/dist/auth/providers/types.d.ts +1 -0
  38. package/dist/auth/providers/utils.d.ts +0 -1
  39. package/dist/auth/providers/utils.js +1 -49
  40. package/dist/bin.js +369 -71
  41. package/dist/cli/apply.d.ts +4 -0
  42. package/dist/cli/apply.js +28 -9
  43. package/dist/cli/auto.d.ts +32 -0
  44. package/dist/cli/auto.js +232 -0
  45. package/dist/cli/contract.d.ts +328 -0
  46. package/dist/cli/contract.js +480 -0
  47. package/dist/cli/errors.d.ts +3 -0
  48. package/dist/cli/errors.js +21 -3
  49. package/dist/cli/init.d.ts +5 -0
  50. package/dist/cli/init.js +34 -6
  51. package/dist/cli/list.d.ts +6 -4
  52. package/dist/cli/list.js +39 -16
  53. package/dist/cli/mcp.d.ts +2 -0
  54. package/dist/cli/mcp.js +16 -0
  55. package/dist/cli/message.d.ts +28 -0
  56. package/dist/cli/message.js +147 -0
  57. package/dist/cli/operator-envelope.d.ts +180 -0
  58. package/dist/cli/operator-envelope.js +425 -0
  59. package/dist/cli/output.d.ts +15 -1
  60. package/dist/cli/output.js +153 -5
  61. package/dist/cli/prune.d.ts +7 -3
  62. package/dist/cli/prune.js +57 -12
  63. package/dist/cli/reduce.d.ts +29 -0
  64. package/dist/cli/reduce.js +211 -0
  65. package/dist/cli/root-launcher.d.ts +4 -0
  66. package/dist/cli/root-launcher.js +15 -0
  67. package/dist/cli/run.d.ts +27 -1
  68. package/dist/cli/run.js +108 -16
  69. package/dist/cli/spec.d.ts +31 -0
  70. package/dist/cli/spec.js +180 -0
  71. package/dist/cli/verify.d.ts +35 -0
  72. package/dist/cli/verify.js +297 -0
  73. package/dist/commands/apply/command.d.ts +2 -0
  74. package/dist/commands/apply/command.js +145 -6
  75. package/dist/commands/apply/errors.d.ts +43 -4
  76. package/dist/commands/apply/errors.js +100 -22
  77. package/dist/commands/apply/types.d.ts +2 -1
  78. package/dist/commands/auto/command.d.ts +145 -0
  79. package/dist/commands/auto/command.js +433 -0
  80. package/dist/commands/auto/errors.d.ts +19 -0
  81. package/dist/commands/auto/errors.js +19 -0
  82. package/dist/commands/auto/validation.d.ts +14 -0
  83. package/dist/commands/auto/validation.js +90 -0
  84. package/dist/commands/fetch.d.ts +2 -2
  85. package/dist/commands/fetch.js +4 -4
  86. package/dist/commands/init/agents.d.ts +2 -1
  87. package/dist/commands/init/agents.js +66 -63
  88. package/dist/commands/init/command.js +300 -16
  89. package/dist/commands/init/types.d.ts +18 -7
  90. package/dist/commands/interactive/lifecycle.d.ts +15 -0
  91. package/dist/commands/interactive/lifecycle.js +141 -0
  92. package/dist/commands/list/command.d.ts +10 -3
  93. package/dist/commands/list/command.js +597 -40
  94. package/dist/commands/message/command.d.ts +23 -0
  95. package/dist/commands/message/command.js +215 -0
  96. package/dist/commands/message/errors.d.ts +9 -0
  97. package/dist/commands/message/errors.js +20 -0
  98. package/dist/commands/message/lifecycle.d.ts +14 -0
  99. package/dist/commands/message/lifecycle.js +128 -0
  100. package/dist/commands/prune/command.d.ts +2 -1
  101. package/dist/commands/prune/command.js +61 -10
  102. package/dist/commands/prune/errors.d.ts +1 -1
  103. package/dist/commands/prune/errors.js +5 -5
  104. package/dist/commands/prune/types.d.ts +21 -0
  105. package/dist/commands/reduce/command.d.ts +26 -0
  106. package/dist/commands/reduce/command.js +145 -0
  107. package/dist/commands/reduce/errors.d.ts +17 -0
  108. package/dist/commands/reduce/errors.js +32 -0
  109. package/dist/commands/reduce/targets.d.ts +11 -0
  110. package/dist/commands/reduce/targets.js +271 -0
  111. package/dist/commands/root-launcher/command.d.ts +31 -0
  112. package/dist/commands/root-launcher/command.js +233 -0
  113. package/dist/commands/run/command.d.ts +6 -1
  114. package/dist/commands/run/command.js +70 -40
  115. package/dist/commands/run/lifecycle.d.ts +7 -5
  116. package/dist/commands/run/lifecycle.js +45 -23
  117. package/dist/commands/run/record-init.d.ts +4 -1
  118. package/dist/commands/run/record-init.js +5 -2
  119. package/dist/commands/run/shim/run-agent-shim.d.ts +2 -1
  120. package/dist/commands/run/shim/run-agent-shim.js +4 -219
  121. package/dist/commands/run/validation.d.ts +2 -3
  122. package/dist/commands/run/validation.js +54 -25
  123. package/dist/commands/shared/max-parallel.d.ts +5 -0
  124. package/dist/commands/shared/max-parallel.js +15 -0
  125. package/dist/commands/shared/preview.d.ts +10 -0
  126. package/dist/commands/shared/preview.js +60 -0
  127. package/dist/commands/shared/resolve-reduction-competitors.d.ts +15 -0
  128. package/dist/commands/shared/resolve-reduction-competitors.js +13 -0
  129. package/dist/commands/shared/resolve-stage-competitors.d.ts +19 -0
  130. package/dist/commands/shared/resolve-stage-competitors.js +171 -0
  131. package/dist/commands/shared/session-id.d.ts +1 -0
  132. package/dist/commands/shared/session-id.js +1 -0
  133. package/dist/commands/spec/command.d.ts +22 -0
  134. package/dist/commands/spec/command.js +330 -0
  135. package/dist/commands/spec/errors.d.ts +11 -0
  136. package/dist/commands/spec/errors.js +23 -0
  137. package/dist/commands/verify/agents.d.ts +8 -0
  138. package/dist/commands/verify/agents.js +29 -0
  139. package/dist/commands/verify/command.d.ts +23 -0
  140. package/dist/commands/verify/command.js +168 -0
  141. package/dist/commands/verify/lifecycle.d.ts +14 -0
  142. package/dist/commands/verify/lifecycle.js +229 -0
  143. package/dist/commands/verify/max-parallel.d.ts +7 -0
  144. package/dist/commands/verify/max-parallel.js +15 -0
  145. package/dist/commands/verify/targets.d.ts +18 -0
  146. package/dist/commands/verify/targets.js +420 -0
  147. package/dist/competition/command-adapter.d.ts +35 -0
  148. package/dist/competition/command-adapter.js +20 -0
  149. package/dist/competition/core.d.ts +41 -0
  150. package/dist/competition/core.js +181 -0
  151. package/dist/competition/shared/extra-context.d.ts +14 -0
  152. package/dist/competition/shared/extra-context.js +100 -0
  153. package/dist/competition/shared/preflight.d.ts +11 -0
  154. package/dist/competition/shared/preflight.js +39 -0
  155. package/dist/competition/shared/prompt-helpers.d.ts +16 -0
  156. package/dist/competition/shared/prompt-helpers.js +27 -0
  157. package/dist/competition/shared/prune.d.ts +1 -0
  158. package/dist/competition/shared/prune.js +4 -0
  159. package/dist/competition/shared/sandbox-policy.d.ts +9 -0
  160. package/dist/competition/shared/sandbox-policy.js +7 -0
  161. package/dist/competition/shared/teardown.d.ts +36 -0
  162. package/dist/competition/shared/teardown.js +101 -0
  163. package/dist/configs/agents/defaults.d.ts +31 -2
  164. package/dist/configs/agents/defaults.js +346 -30
  165. package/dist/configs/agents/errors.js +14 -11
  166. package/dist/configs/agents/loader.d.ts +11 -1
  167. package/dist/configs/agents/loader.js +71 -4
  168. package/dist/configs/agents/types.js +2 -2
  169. package/dist/configs/environment/detect.js +9 -4
  170. package/dist/configs/environment/errors.js +4 -4
  171. package/dist/configs/environment/loader.d.ts +1 -1
  172. package/dist/configs/environment/loader.js +3 -3
  173. package/dist/configs/orchestration/bootstrap.d.ts +16 -0
  174. package/dist/configs/orchestration/bootstrap.js +122 -0
  175. package/dist/configs/orchestration/errors.d.ts +15 -0
  176. package/dist/configs/orchestration/errors.js +28 -0
  177. package/dist/configs/orchestration/loader.d.ts +9 -0
  178. package/dist/configs/orchestration/loader.js +148 -0
  179. package/dist/configs/orchestration/types.d.ts +102 -0
  180. package/dist/configs/orchestration/types.js +65 -0
  181. package/dist/configs/sandbox/defaults.js +14 -4
  182. package/dist/configs/sandbox/errors.d.ts +1 -1
  183. package/dist/configs/sandbox/errors.js +1 -1
  184. package/dist/configs/sandbox/loader.js +6 -4
  185. package/dist/configs/sandbox/schemas.js +4 -2
  186. package/dist/configs/settings/loader.d.ts +7 -0
  187. package/dist/configs/settings/loader.js +81 -0
  188. package/dist/configs/settings/types.d.ts +47 -0
  189. package/dist/configs/settings/types.js +23 -0
  190. package/dist/configs/verification/errors.d.ts +11 -0
  191. package/dist/configs/verification/errors.js +21 -0
  192. package/dist/configs/verification/loader.d.ts +8 -0
  193. package/dist/configs/verification/loader.js +43 -0
  194. package/dist/configs/verification/methods.d.ts +35 -0
  195. package/dist/configs/verification/methods.js +41 -0
  196. package/dist/configs/verification/programmatic-defaults.d.ts +10 -0
  197. package/dist/configs/verification/programmatic-defaults.js +42 -0
  198. package/dist/configs/verification/programmatic-detect.d.ts +10 -0
  199. package/dist/configs/{evals/detect.js → verification/programmatic-detect.js} +22 -33
  200. package/dist/configs/verification/types.d.ts +49 -0
  201. package/dist/configs/verification/types.js +45 -0
  202. package/dist/contracts/list.d.ts +207 -0
  203. package/dist/contracts/list.js +154 -0
  204. package/dist/domain/interactive/model/types.d.ts +104 -0
  205. package/dist/domain/interactive/model/types.js +83 -0
  206. package/dist/domain/interactive/persistence/adapter.d.ts +39 -0
  207. package/dist/domain/interactive/persistence/adapter.js +144 -0
  208. package/dist/domain/interactive/prompt.d.ts +3 -0
  209. package/dist/domain/interactive/prompt.js +7 -0
  210. package/dist/domain/message/competition/adapter.d.ts +36 -0
  211. package/dist/domain/message/competition/adapter.js +197 -0
  212. package/dist/domain/message/competition/prompt.d.ts +8 -0
  213. package/dist/domain/message/competition/prompt.js +29 -0
  214. package/dist/domain/message/model/mutators.d.ts +17 -0
  215. package/dist/domain/message/model/mutators.js +107 -0
  216. package/dist/domain/message/model/types.d.ts +100 -0
  217. package/dist/domain/message/model/types.js +87 -0
  218. package/dist/domain/message/persistence/adapter.d.ts +43 -0
  219. package/dist/domain/message/persistence/adapter.js +124 -0
  220. package/dist/domain/reduce/competition/adapter.d.ts +42 -0
  221. package/dist/domain/reduce/competition/adapter.js +826 -0
  222. package/dist/domain/reduce/competition/output-validation.d.ts +4 -0
  223. package/dist/domain/reduce/competition/output-validation.js +18 -0
  224. package/dist/domain/reduce/competition/prompt.d.ts +10 -0
  225. package/dist/domain/reduce/competition/prompt.js +96 -0
  226. package/dist/domain/reduce/competition/reduction.d.ts +9 -0
  227. package/dist/domain/reduce/competition/reduction.js +32 -0
  228. package/dist/domain/reduce/model/types.d.ts +122 -0
  229. package/dist/domain/reduce/model/types.js +84 -0
  230. package/dist/domain/reduce/persistence/adapter.d.ts +43 -0
  231. package/dist/domain/reduce/persistence/adapter.js +126 -0
  232. package/dist/domain/run/competition/adapter.d.ts +30 -0
  233. package/dist/domain/run/competition/adapter.js +39 -0
  234. package/dist/domain/run/competition/agent-execution.d.ts +20 -0
  235. package/dist/domain/run/competition/agent-execution.js +45 -0
  236. package/dist/domain/run/competition/agent-preparation.d.ts +12 -0
  237. package/dist/domain/run/competition/agent-preparation.js +24 -0
  238. package/dist/domain/run/competition/agents/artifacts.d.ts +17 -0
  239. package/dist/domain/run/competition/agents/artifacts.js +173 -0
  240. package/dist/{commands/run → domain/run/competition}/agents/lifecycle.d.ts +3 -3
  241. package/dist/{commands/run → domain/run/competition}/agents/lifecycle.js +84 -64
  242. package/dist/domain/run/competition/agents/post-processing.d.ts +12 -0
  243. package/dist/domain/run/competition/agents/post-processing.js +4 -0
  244. package/dist/domain/run/competition/agents/preparation.js +64 -0
  245. package/dist/{commands/run → domain/run/competition}/agents/run-context.d.ts +9 -16
  246. package/dist/{commands/run → domain/run/competition}/agents/run-context.js +22 -70
  247. package/dist/{commands/run → domain/run/competition}/agents/types.d.ts +10 -13
  248. package/dist/domain/run/competition/agents/workspace.d.ts +21 -0
  249. package/dist/domain/run/competition/agents/workspace.js +47 -0
  250. package/dist/{commands/run → domain/run/competition}/errors.d.ts +8 -1
  251. package/dist/{commands/run → domain/run/competition}/errors.js +39 -9
  252. package/dist/{commands/run → domain/run/competition}/phases.d.ts +1 -2
  253. package/dist/domain/run/competition/phases.js +1 -0
  254. package/dist/domain/run/competition/prompt.d.ts +7 -0
  255. package/dist/domain/run/competition/prompt.js +27 -0
  256. package/dist/{commands/run → domain/run/competition}/reports.d.ts +5 -3
  257. package/dist/{commands/run → domain/run/competition}/reports.js +7 -19
  258. package/dist/domain/run/competition/termination-state.d.ts +4 -0
  259. package/dist/domain/run/competition/termination-state.js +12 -0
  260. package/dist/{records → domain/run/model}/enhanced.d.ts +6 -7
  261. package/dist/{records → domain/run/model}/enhanced.js +11 -11
  262. package/dist/{records → domain/run/model}/errors.d.ts +1 -1
  263. package/dist/{records → domain/run/model}/errors.js +5 -5
  264. package/dist/{records → domain/run/model}/mutators.d.ts +4 -3
  265. package/dist/{records → domain/run/model}/mutators.js +58 -36
  266. package/dist/domain/run/model/types.d.ts +376 -0
  267. package/dist/domain/run/model/types.js +192 -0
  268. package/dist/{records/persistence.d.ts → domain/run/persistence/adapter.d.ts} +9 -3
  269. package/dist/domain/run/persistence/adapter.js +340 -0
  270. package/dist/domain/run/persistence/error-mapping.d.ts +2 -0
  271. package/dist/domain/run/persistence/error-mapping.js +17 -0
  272. package/dist/domain/shared/lifecycle.d.ts +54 -0
  273. package/dist/domain/shared/lifecycle.js +165 -0
  274. package/dist/domain/shared/token-usage.d.ts +21 -0
  275. package/dist/domain/shared/token-usage.js +38 -0
  276. package/dist/domain/spec/competition/adapter.d.ts +31 -0
  277. package/dist/domain/spec/competition/adapter.js +196 -0
  278. package/dist/domain/spec/competition/prompt.d.ts +11 -0
  279. package/dist/domain/spec/competition/prompt.js +44 -0
  280. package/dist/domain/spec/model/output.d.ts +13 -0
  281. package/dist/domain/spec/model/output.js +36 -0
  282. package/dist/domain/spec/model/types.d.ts +98 -0
  283. package/dist/domain/spec/model/types.js +84 -0
  284. package/dist/domain/spec/persistence/adapter.d.ts +51 -0
  285. package/dist/domain/spec/persistence/adapter.js +140 -0
  286. package/dist/domain/verify/blinding/aliases.d.ts +7 -0
  287. package/dist/domain/verify/blinding/aliases.js +23 -0
  288. package/dist/domain/verify/competition/adapter.d.ts +54 -0
  289. package/dist/domain/verify/competition/adapter.js +444 -0
  290. package/dist/domain/verify/competition/artifacts.d.ts +6 -0
  291. package/dist/domain/verify/competition/artifacts.js +7 -0
  292. package/dist/domain/verify/competition/blinding.d.ts +24 -0
  293. package/dist/domain/verify/competition/blinding.js +109 -0
  294. package/dist/domain/verify/competition/finalize.d.ts +11 -0
  295. package/dist/domain/verify/competition/finalize.js +65 -0
  296. package/dist/domain/verify/competition/programmatic.d.ts +15 -0
  297. package/dist/domain/verify/competition/programmatic.js +352 -0
  298. package/dist/domain/verify/competition/prompt.d.ts +19 -0
  299. package/dist/domain/verify/competition/prompt.js +63 -0
  300. package/dist/domain/verify/competition/rubric.d.ts +23 -0
  301. package/dist/domain/verify/competition/rubric.js +77 -0
  302. package/dist/domain/verify/competition/shared-layout.d.ts +121 -0
  303. package/dist/domain/verify/competition/shared-layout.js +365 -0
  304. package/dist/domain/verify/competition/target.d.ts +47 -0
  305. package/dist/domain/verify/competition/target.js +1 -0
  306. package/dist/domain/verify/model/mutators.d.ts +16 -0
  307. package/dist/domain/verify/model/mutators.js +126 -0
  308. package/dist/domain/verify/model/types.d.ts +408 -0
  309. package/dist/domain/verify/model/types.js +289 -0
  310. package/dist/domain/verify/persistence/adapter.d.ts +43 -0
  311. package/dist/domain/verify/persistence/adapter.js +126 -0
  312. package/dist/domain/verify/programmatic/runner.d.ts +22 -0
  313. package/dist/domain/verify/programmatic/runner.js +209 -0
  314. package/dist/domain/verify/rubric-result.d.ts +28 -0
  315. package/dist/domain/verify/rubric-result.js +121 -0
  316. package/dist/extra-context/contract.d.ts +17 -0
  317. package/dist/extra-context/contract.js +60 -0
  318. package/dist/interactive/index.d.ts +2 -0
  319. package/dist/interactive/index.js +1 -0
  320. package/dist/interactive/providers/launch.d.ts +23 -0
  321. package/dist/interactive/providers/launch.js +203 -0
  322. package/dist/interactive/providers/mcp.d.ts +13 -0
  323. package/dist/interactive/providers/mcp.js +547 -0
  324. package/dist/interactive/providers/shared.d.ts +2 -0
  325. package/dist/interactive/providers/shared.js +1 -0
  326. package/dist/interactive/providers.d.ts +3 -0
  327. package/dist/interactive/providers.js +3 -0
  328. package/dist/interactive/records.d.ts +2 -0
  329. package/dist/interactive/records.js +1 -0
  330. package/dist/interactive/substrate.d.ts +21 -0
  331. package/dist/interactive/substrate.js +522 -0
  332. package/dist/interactive/types.d.ts +101 -0
  333. package/dist/interactive/types.js +1 -0
  334. package/dist/mcp/server.d.ts +88 -0
  335. package/dist/mcp/server.js +790 -0
  336. package/dist/persistence/error-mapping.d.ts +19 -0
  337. package/dist/persistence/error-mapping.js +44 -0
  338. package/dist/persistence/errors.d.ts +26 -0
  339. package/dist/persistence/errors.js +49 -0
  340. package/dist/persistence/extra-context.d.ts +9 -0
  341. package/dist/persistence/extra-context.js +60 -0
  342. package/dist/{records → persistence}/history-lock.js +2 -2
  343. package/dist/persistence/record-path-schema.d.ts +3 -0
  344. package/dist/persistence/record-path-schema.js +16 -0
  345. package/dist/persistence/session-store.d.ts +92 -0
  346. package/dist/persistence/session-store.js +412 -0
  347. package/dist/policy/auto.d.ts +13 -0
  348. package/dist/policy/auto.js +22 -0
  349. package/dist/policy/index.d.ts +5 -0
  350. package/dist/policy/index.js +5 -0
  351. package/dist/policy/resolution.d.ts +6 -0
  352. package/dist/policy/resolution.js +23 -0
  353. package/dist/policy/result.d.ts +53 -0
  354. package/dist/policy/result.js +15 -0
  355. package/dist/policy/selector.d.ts +11 -0
  356. package/dist/policy/selector.js +57 -0
  357. package/dist/policy/verification.d.ts +77 -0
  358. package/dist/policy/verification.js +365 -0
  359. package/dist/policy/verifier-selection.d.ts +13 -0
  360. package/dist/policy/verifier-selection.js +78 -0
  361. package/dist/preflight/branch.d.ts +9 -0
  362. package/dist/preflight/branch.js +48 -0
  363. package/dist/preflight/errors.d.ts +3 -0
  364. package/dist/preflight/errors.js +10 -3
  365. package/dist/preflight/index.d.ts +13 -0
  366. package/dist/preflight/index.js +43 -8
  367. package/dist/render/interactions/confirmation.js +4 -2
  368. package/dist/render/transcripts/apply.js +9 -10
  369. package/dist/render/transcripts/auto.d.ts +27 -0
  370. package/dist/render/transcripts/auto.js +21 -0
  371. package/dist/render/transcripts/init.d.ts +4 -15
  372. package/dist/render/transcripts/init.js +71 -72
  373. package/dist/render/transcripts/list.d.ts +10 -1
  374. package/dist/render/transcripts/list.js +121 -15
  375. package/dist/render/transcripts/message.d.ts +72 -0
  376. package/dist/render/transcripts/message.js +362 -0
  377. package/dist/render/transcripts/prune.d.ts +7 -2
  378. package/dist/render/transcripts/prune.js +64 -17
  379. package/dist/render/transcripts/reduce.d.ts +74 -0
  380. package/dist/render/transcripts/reduce.js +395 -0
  381. package/dist/render/transcripts/root-launcher.d.ts +19 -0
  382. package/dist/render/transcripts/root-launcher.js +40 -0
  383. package/dist/render/transcripts/run.d.ts +35 -6
  384. package/dist/render/transcripts/run.js +241 -165
  385. package/dist/render/transcripts/shared.d.ts +2 -0
  386. package/dist/render/transcripts/shared.js +11 -4
  387. package/dist/render/transcripts/spec.d.ts +74 -0
  388. package/dist/render/transcripts/spec.js +394 -0
  389. package/dist/render/transcripts/stage-progress.d.ts +22 -0
  390. package/dist/render/transcripts/stage-progress.js +6 -0
  391. package/dist/render/transcripts/update-check.d.ts +2 -0
  392. package/dist/render/transcripts/update-check.js +22 -0
  393. package/dist/render/transcripts/verify.d.ts +74 -0
  394. package/dist/render/transcripts/verify.js +409 -0
  395. package/dist/render/utils/agents.d.ts +10 -9
  396. package/dist/render/utils/agents.js +30 -82
  397. package/dist/render/utils/badges.d.ts +3 -20
  398. package/dist/render/utils/badges.js +3 -36
  399. package/dist/render/utils/duration.d.ts +12 -0
  400. package/dist/render/utils/duration.js +37 -0
  401. package/dist/render/utils/interactive-frame.d.ts +6 -0
  402. package/dist/render/utils/interactive-frame.js +38 -0
  403. package/dist/render/utils/records.js +4 -4
  404. package/dist/render/utils/runs.d.ts +3 -9
  405. package/dist/render/utils/runs.js +16 -48
  406. package/dist/render/utils/stage-output.d.ts +20 -0
  407. package/dist/render/utils/stage-output.js +44 -0
  408. package/dist/render/utils/timezone.d.ts +2 -0
  409. package/dist/render/utils/timezone.js +42 -0
  410. package/dist/render/utils/transcript-shell.d.ts +66 -0
  411. package/dist/render/utils/transcript-shell.js +155 -0
  412. package/dist/render/utils/transcript.d.ts +7 -1
  413. package/dist/render/utils/transcript.js +12 -2
  414. package/dist/render/utils/wrap.d.ts +1 -0
  415. package/dist/render/utils/wrap.js +20 -0
  416. package/dist/status/colors.d.ts +2 -3
  417. package/dist/status/colors.js +3 -3
  418. package/dist/status/index.d.ts +108 -8
  419. package/dist/status/index.js +164 -5
  420. package/dist/update-check/checker.d.ts +24 -0
  421. package/dist/update-check/checker.js +130 -0
  422. package/dist/update-check/prompt.d.ts +25 -0
  423. package/dist/update-check/prompt.js +62 -0
  424. package/dist/update-check/semver.d.ts +17 -0
  425. package/dist/update-check/semver.js +36 -0
  426. package/dist/update-check/state-path.d.ts +8 -0
  427. package/dist/update-check/state-path.js +18 -0
  428. package/dist/utils/binaries.js +14 -8
  429. package/dist/utils/errors.d.ts +3 -1
  430. package/dist/utils/errors.js +3 -1
  431. package/dist/utils/git.d.ts +10 -0
  432. package/dist/utils/git.js +15 -3
  433. package/dist/utils/output.d.ts +5 -1
  434. package/dist/utils/output.js +4 -2
  435. package/dist/utils/process.d.ts +2 -1
  436. package/dist/utils/process.js +7 -3
  437. package/dist/utils/session-id.d.ts +1 -0
  438. package/dist/utils/session-id.js +22 -0
  439. package/dist/utils/slug.d.ts +2 -0
  440. package/dist/utils/slug.js +15 -0
  441. package/dist/utils/voratiq-cli-target.d.ts +9 -0
  442. package/dist/utils/voratiq-cli-target.js +58 -0
  443. package/dist/workspace/agents.d.ts +13 -16
  444. package/dist/workspace/agents.js +22 -147
  445. package/dist/workspace/chat/artifacts.d.ts +9 -0
  446. package/dist/workspace/chat/artifacts.js +82 -12
  447. package/dist/workspace/chat/native-usage.d.ts +13 -0
  448. package/dist/workspace/chat/native-usage.js +60 -0
  449. package/dist/workspace/chat/sources.d.ts +9 -5
  450. package/dist/workspace/chat/sources.js +89 -23
  451. package/dist/workspace/chat/token-usage-result.d.ts +23 -0
  452. package/dist/workspace/chat/token-usage-result.js +7 -0
  453. package/dist/workspace/chat/usage-extractor.d.ts +30 -0
  454. package/dist/workspace/chat/usage-extractor.js +461 -0
  455. package/dist/workspace/chat/usage-mappings.d.ts +20 -0
  456. package/dist/workspace/chat/usage-mappings.js +136 -0
  457. package/dist/workspace/credential-guard.js +1 -1
  458. package/dist/workspace/dependencies.js +4 -4
  459. package/dist/workspace/errors.d.ts +5 -0
  460. package/dist/workspace/errors.js +13 -3
  461. package/dist/workspace/layout.d.ts +17 -6
  462. package/dist/workspace/layout.js +51 -32
  463. package/dist/workspace/promotion.d.ts +32 -0
  464. package/dist/workspace/promotion.js +34 -0
  465. package/dist/workspace/prune.d.ts +1 -1
  466. package/dist/workspace/run.d.ts +1 -3
  467. package/dist/workspace/run.js +6 -15
  468. package/dist/workspace/setup.d.ts +8 -0
  469. package/dist/workspace/setup.js +359 -56
  470. package/dist/workspace/shim.js +1 -1
  471. package/dist/workspace/structure.d.ts +91 -26
  472. package/dist/workspace/structure.js +227 -43
  473. package/dist/workspace/templates.d.ts +9 -3
  474. package/dist/workspace/templates.js +26 -15
  475. package/dist/workspace/verification-defaults.d.ts +12 -0
  476. package/dist/workspace/verification-defaults.js +1017 -0
  477. package/package.json +30 -24
  478. package/dist/cli/review.d.ts +0 -12
  479. package/dist/cli/review.js +0 -33
  480. package/dist/commands/errors.d.ts +0 -4
  481. package/dist/commands/errors.js +0 -7
  482. package/dist/commands/init/evals.d.ts +0 -4
  483. package/dist/commands/init/evals.js +0 -219
  484. package/dist/commands/review/command.d.ts +0 -10
  485. package/dist/commands/review/command.js +0 -26
  486. package/dist/commands/run/agent-execution.d.ts +0 -19
  487. package/dist/commands/run/agent-execution.js +0 -63
  488. package/dist/commands/run/agents/auth-stage.d.ts +0 -23
  489. package/dist/commands/run/agents/auth-stage.js +0 -108
  490. package/dist/commands/run/agents/chat-preserver.d.ts +0 -9
  491. package/dist/commands/run/agents/chat-preserver.js +0 -35
  492. package/dist/commands/run/agents/eval-runner.d.ts +0 -19
  493. package/dist/commands/run/agents/eval-runner.js +0 -27
  494. package/dist/commands/run/agents/failures.js +0 -32
  495. package/dist/commands/run/agents/preparation.js +0 -123
  496. package/dist/commands/run/agents.d.ts +0 -14
  497. package/dist/commands/run/agents.js +0 -47
  498. package/dist/commands/run/prompts.d.ts +0 -4
  499. package/dist/commands/run/prompts.js +0 -16
  500. package/dist/commands/run/sandbox-registry.d.ts +0 -4
  501. package/dist/commands/run/sandbox-registry.js +0 -54
  502. package/dist/configs/evals/defaults.d.ts +0 -8
  503. package/dist/configs/evals/defaults.js +0 -28
  504. package/dist/configs/evals/detect.d.ts +0 -10
  505. package/dist/configs/evals/errors.d.ts +0 -16
  506. package/dist/configs/evals/errors.js +0 -29
  507. package/dist/configs/evals/loader.d.ts +0 -9
  508. package/dist/configs/evals/loader.js +0 -46
  509. package/dist/configs/evals/types.d.ts +0 -42
  510. package/dist/configs/evals/types.js +0 -74
  511. package/dist/evals/runner.d.ts +0 -16
  512. package/dist/evals/runner.js +0 -132
  513. package/dist/records/persistence.js +0 -469
  514. package/dist/records/types.d.ts +0 -255
  515. package/dist/records/types.js +0 -160
  516. package/dist/render/transcripts/review.d.ts +0 -2
  517. package/dist/render/transcripts/review.js +0 -36
  518. /package/dist/{commands/run → agents/runtime}/shim/agent-manifest.d.ts +0 -0
  519. /package/dist/{commands/run → agents/runtime}/shim/agent-manifest.js +0 -0
  520. /package/dist/{commands/run → agents/runtime/shim}/argv.d.ts +0 -0
  521. /package/dist/{commands/run → agents/runtime/shim}/argv.js +0 -0
  522. /package/dist/{commands/run/agents → agents/runtime}/types.js +0 -0
  523. /package/dist/{commands/run → domain/run/competition}/agents/preparation.d.ts +0 -0
  524. /package/dist/{commands/run/phases.js → domain/run/competition/agents/types.js} +0 -0
  525. /package/dist/{commands/run → domain/run/model}/id.d.ts +0 -0
  526. /package/dist/{commands/run → domain/run/model}/id.js +0 -0
  527. /package/dist/{records → persistence}/history-lock.d.ts +0 -0
@@ -0,0 +1,444 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { join, resolve } from "node:path";
3
+ import { detectAgentProcessFailureDetail } from "../../../agents/runtime/failures.js";
4
+ import { runSandboxedAgent } from "../../../agents/runtime/harness.js";
5
+ import { stageExtraContextFiles, } from "../../../competition/shared/extra-context.js";
6
+ import { composeStageSandboxPolicy } from "../../../competition/shared/sandbox-policy.js";
7
+ import { buildUnavailableTokenUsageResult, resolveTokenUsage, } from "../../../domain/shared/token-usage.js";
8
+ import { emitStageProgressEvent } from "../../../render/transcripts/stage-progress.js";
9
+ import { toErrorMessage } from "../../../utils/errors.js";
10
+ import { pathExists } from "../../../utils/fs.js";
11
+ import { prepareScratchAgentWorkspace } from "../../../workspace/agents.js";
12
+ import { extractProviderNativeTokenUsageForSession } from "../../../workspace/chat/native-usage.js";
13
+ import { ensureWorkspaceDependencies } from "../../../workspace/dependencies.js";
14
+ import { buildScopedAgentWorkspacePaths, } from "../../../workspace/layout.js";
15
+ import { getVerificationRubricExecutionDirectoryPath, getVerificationRubricResultPath, VORATIQ_VERIFICATION_DIR, } from "../../../workspace/structure.js";
16
+ import { verificationResultArtifactSchema, } from "../model/types.js";
17
+ import { writeVerificationArtifact } from "./artifacts.js";
18
+ import { assertNoVerificationIdentityLeak, assertRubricResultSelectorsMatchAliasMap, buildForbiddenVerificationIdentityTokens, parseRubricResultPayload, } from "./blinding.js";
19
+ import { buildRubricPrompt } from "./prompt.js";
20
+ import { attachVerifierWorkspaceMounts, buildStagedVerificationInputs, sharedInputsUseReferenceRepo, } from "./shared-layout.js";
21
+ export function createVerifyCompetitionAdapter(input) {
22
+ const { root, verificationId, resolvedTarget, aliasMap, environment, extraContextFiles, sharedInputs, teardown, mutators, renderer, } = input;
23
+ const startedAtByExecutionKey = new Map();
24
+ const tokenUsageResultByExecutionKey = new Map();
25
+ function executionKeyForCandidate(candidate) {
26
+ return `${candidate.template.template}:${candidate.agent.id}`;
27
+ }
28
+ return {
29
+ prepareCandidates: (candidates) => ({
30
+ ready: candidates.map((candidate) => {
31
+ const workspacePaths = buildVerifierRubricWorkspacePaths({
32
+ root,
33
+ verificationId,
34
+ verifierId: candidate.agent.id,
35
+ template: candidate.template.template,
36
+ });
37
+ registerScratchWorkspaceTeardown(teardown, workspacePaths, candidate.agent.id, candidate.template.template);
38
+ return { candidate, workspacePaths };
39
+ }),
40
+ failures: [],
41
+ }),
42
+ queueCandidate: async (candidate) => {
43
+ await mutators.recordMethodSnapshot({
44
+ method: "rubric",
45
+ template: candidate.template.template,
46
+ verifierId: candidate.agent.id,
47
+ scope: resolvedTarget.target.kind === "run"
48
+ ? { kind: "run" }
49
+ : { kind: "target" },
50
+ status: "queued",
51
+ });
52
+ emitStageProgressEvent(renderer, {
53
+ type: "stage.candidate",
54
+ stage: "verify",
55
+ candidate: {
56
+ methodKey: `rubric:${candidate.template.template}:${candidate.agent.id}`,
57
+ verifierLabel: candidate.template.template,
58
+ agentLabel: candidate.agent.id,
59
+ status: "queued",
60
+ },
61
+ });
62
+ },
63
+ onCandidateRunning: async (prepared) => {
64
+ const startedAt = new Date().toISOString();
65
+ const executionKey = executionKeyForCandidate(prepared.candidate);
66
+ startedAtByExecutionKey.set(executionKey, startedAt);
67
+ await mutators.recordMethodSnapshot({
68
+ method: "rubric",
69
+ template: prepared.candidate.template.template,
70
+ verifierId: prepared.candidate.agent.id,
71
+ scope: resolvedTarget.target.kind === "run"
72
+ ? { kind: "run" }
73
+ : { kind: "target" },
74
+ status: "running",
75
+ startedAt,
76
+ });
77
+ emitStageProgressEvent(renderer, {
78
+ type: "stage.candidate",
79
+ stage: "verify",
80
+ candidate: {
81
+ methodKey: `rubric:${prepared.candidate.template.template}:${prepared.candidate.agent.id}`,
82
+ verifierLabel: prepared.candidate.template.template,
83
+ agentLabel: prepared.candidate.agent.id,
84
+ status: "running",
85
+ startedAt,
86
+ },
87
+ });
88
+ },
89
+ executeCandidate: async (prepared) => {
90
+ const executionKey = executionKeyForCandidate(prepared.candidate);
91
+ const startedAt = startedAtByExecutionKey.get(executionKey) ?? new Date().toISOString();
92
+ const { workspacePaths } = prepared;
93
+ await prepareScratchAgentWorkspace({ paths: workspacePaths });
94
+ await attachVerifierWorkspaceMounts({
95
+ workspacePath: workspacePaths.workspacePath,
96
+ contextPath: workspacePaths.contextPath,
97
+ sharedInputs,
98
+ });
99
+ await ensureWorkspaceDependencies({
100
+ root,
101
+ workspacePath: workspacePaths.workspacePath,
102
+ environment,
103
+ });
104
+ await stageExtraContextFiles({
105
+ contextPath: workspacePaths.contextPath,
106
+ files: extraContextFiles,
107
+ });
108
+ const staged = buildStagedVerificationInputs({
109
+ workspacePaths,
110
+ sharedInputs,
111
+ });
112
+ const prompt = buildRubricPrompt({
113
+ template: prepared.candidate.template,
114
+ target: resolvedTarget.target,
115
+ staged,
116
+ extraContextFiles,
117
+ });
118
+ assertNoVerificationIdentityLeak({
119
+ text: prompt,
120
+ forbidden: buildForbiddenVerificationIdentityTokens({
121
+ resolvedTarget,
122
+ allowed: [
123
+ prepared.candidate.agent.id,
124
+ prepared.candidate.agent.model,
125
+ ],
126
+ }),
127
+ });
128
+ const sandboxPolicy = composeStageSandboxPolicy({
129
+ stageWriteProtectedPaths: [
130
+ workspacePaths.contextPath,
131
+ join(workspacePaths.workspacePath, "context"),
132
+ join(workspacePaths.workspacePath, "inputs"),
133
+ sharedInputs.sharedInputsAbsolute,
134
+ ...(sharedInputsUseReferenceRepo(sharedInputs)
135
+ ? [
136
+ join(workspacePaths.workspacePath, "reference_repo"),
137
+ sharedInputs.referenceRepoAbsolute,
138
+ ]
139
+ : []),
140
+ ],
141
+ });
142
+ const result = await runSandboxedAgent({
143
+ root,
144
+ sessionId: verificationId,
145
+ sandboxStageId: "verify",
146
+ agent: prepared.candidate.agent,
147
+ prompt,
148
+ environment,
149
+ teardownAuthOnExit: false,
150
+ captureChat: true,
151
+ paths: {
152
+ agentRoot: workspacePaths.agentRoot,
153
+ workspacePath: workspacePaths.workspacePath,
154
+ sandboxHomePath: workspacePaths.sandboxHomePath,
155
+ runtimeManifestPath: workspacePaths.runtimeManifestPath,
156
+ sandboxSettingsPath: workspacePaths.sandboxSettingsPath,
157
+ runtimePath: workspacePaths.runtimePath,
158
+ artifactsPath: workspacePaths.artifactsPath,
159
+ stdoutPath: workspacePaths.stdoutPath,
160
+ stderrPath: workspacePaths.stderrPath,
161
+ },
162
+ extraWriteProtectedPaths: sandboxPolicy.extraWriteProtectedPaths,
163
+ extraReadProtectedPaths: sandboxPolicy.extraReadProtectedPaths,
164
+ });
165
+ const tokenUsageResult = await extractProviderNativeTokenUsageForSession({
166
+ root,
167
+ domain: VORATIQ_VERIFICATION_DIR,
168
+ sessionId: verificationId,
169
+ agentId: prepared.candidate.agent.id,
170
+ provider: prepared.candidate.agent.provider,
171
+ modelId: prepared.candidate.agent.model,
172
+ chatCaptured: result.chat?.captured === true,
173
+ format: result.chat?.format,
174
+ artifactPath: result.chat?.artifactPath,
175
+ });
176
+ tokenUsageResultByExecutionKey.set(executionKey, tokenUsageResult);
177
+ const tokenUsage = resolveTokenUsage(tokenUsageResult);
178
+ const completedAt = new Date().toISOString();
179
+ const artifactPath = getVerificationRubricResultPath({
180
+ sessionId: verificationId,
181
+ verifierId: prepared.candidate.agent.id,
182
+ template: prepared.candidate.template.template,
183
+ });
184
+ if (result.exitCode !== 0 || result.errorMessage) {
185
+ const detectedDetail = result.watchdog?.trigger && result.errorMessage
186
+ ? result.errorMessage
187
+ : await detectAgentProcessFailureDetail({
188
+ provider: prepared.candidate.agent.provider,
189
+ stdoutPath: workspacePaths.stdoutPath,
190
+ stderrPath: workspacePaths.stderrPath,
191
+ });
192
+ const detail = detectedDetail ??
193
+ result.errorMessage ??
194
+ `Agent exited with code ${result.exitCode ?? "unknown"}`;
195
+ await writeFailureRubricArtifact({
196
+ root,
197
+ artifactPath,
198
+ verifierId: prepared.candidate.agent.id,
199
+ template: prepared.candidate.template.template,
200
+ generatedAt: completedAt,
201
+ error: detail,
202
+ });
203
+ return {
204
+ template: prepared.candidate.template.template,
205
+ verifierId: prepared.candidate.agent.id,
206
+ status: "failed",
207
+ artifactPath,
208
+ startedAt,
209
+ completedAt,
210
+ error: detail,
211
+ tokenUsage,
212
+ tokenUsageResult,
213
+ };
214
+ }
215
+ const outputPath = resolve(workspacePaths.workspacePath, "result.json");
216
+ if (!(await pathExists(outputPath))) {
217
+ const detail = `Missing result.json. See stderr: ${workspacePaths.stderrPath}`;
218
+ await writeFailureRubricArtifact({
219
+ root,
220
+ artifactPath,
221
+ verifierId: prepared.candidate.agent.id,
222
+ template: prepared.candidate.template.template,
223
+ generatedAt: completedAt,
224
+ error: detail,
225
+ });
226
+ return {
227
+ template: prepared.candidate.template.template,
228
+ verifierId: prepared.candidate.agent.id,
229
+ status: "failed",
230
+ artifactPath,
231
+ startedAt,
232
+ completedAt,
233
+ error: detail,
234
+ tokenUsage,
235
+ tokenUsageResult,
236
+ };
237
+ }
238
+ const raw = await readFile(outputPath, "utf8");
239
+ try {
240
+ assertNoVerificationIdentityLeak({
241
+ text: raw,
242
+ forbidden: buildForbiddenVerificationIdentityTokens({
243
+ resolvedTarget,
244
+ allowed: [
245
+ prepared.candidate.agent.id,
246
+ prepared.candidate.agent.model,
247
+ ],
248
+ }),
249
+ });
250
+ const resultPayload = parseRubricResultPayload(raw);
251
+ const artifact = verificationResultArtifactSchema.parse({
252
+ method: "rubric",
253
+ template: prepared.candidate.template.template,
254
+ verifierId: prepared.candidate.agent.id,
255
+ generatedAt: completedAt,
256
+ status: "succeeded",
257
+ result: resultPayload,
258
+ });
259
+ assertRubricResultSelectorsMatchAliasMap({
260
+ artifactPath,
261
+ result: artifact.method === "rubric" ? artifact.result : undefined,
262
+ aliasMap,
263
+ });
264
+ await writeVerificationArtifact({
265
+ root,
266
+ artifactPath,
267
+ artifact,
268
+ });
269
+ return {
270
+ template: prepared.candidate.template.template,
271
+ verifierId: prepared.candidate.agent.id,
272
+ status: "succeeded",
273
+ artifactPath,
274
+ startedAt,
275
+ completedAt,
276
+ tokenUsage,
277
+ tokenUsageResult,
278
+ };
279
+ }
280
+ catch (error) {
281
+ const detail = toErrorMessage(error);
282
+ await writeFailureRubricArtifact({
283
+ root,
284
+ artifactPath,
285
+ verifierId: prepared.candidate.agent.id,
286
+ template: prepared.candidate.template.template,
287
+ generatedAt: completedAt,
288
+ error: detail,
289
+ });
290
+ return {
291
+ template: prepared.candidate.template.template,
292
+ verifierId: prepared.candidate.agent.id,
293
+ status: "failed",
294
+ artifactPath,
295
+ startedAt,
296
+ completedAt,
297
+ error: detail,
298
+ tokenUsage,
299
+ tokenUsageResult,
300
+ };
301
+ }
302
+ },
303
+ onCandidateCompleted: async (prepared, result) => {
304
+ const executionKey = executionKeyForCandidate(prepared.candidate);
305
+ await mutators.recordMethodSnapshot({
306
+ method: "rubric",
307
+ template: prepared.candidate.template.template,
308
+ verifierId: prepared.candidate.agent.id,
309
+ scope: resolvedTarget.target.kind === "run"
310
+ ? { kind: "run" }
311
+ : { kind: "target" },
312
+ status: result.status,
313
+ artifactPath: result.artifactPath,
314
+ startedAt: result.startedAt,
315
+ completedAt: result.completedAt,
316
+ tokenUsage: result.tokenUsage,
317
+ ...(result.status === "failed" ? { error: result.error } : {}),
318
+ });
319
+ emitStageProgressEvent(renderer, {
320
+ type: "stage.candidate",
321
+ stage: "verify",
322
+ candidate: {
323
+ methodKey: `rubric:${prepared.candidate.template.template}:${prepared.candidate.agent.id}`,
324
+ verifierLabel: prepared.candidate.template.template,
325
+ agentLabel: prepared.candidate.agent.id,
326
+ status: result.status,
327
+ startedAt: result.startedAt,
328
+ completedAt: result.completedAt,
329
+ artifactPath: result.artifactPath,
330
+ tokenUsage: result.tokenUsage,
331
+ tokenUsageResult: result.tokenUsageResult,
332
+ },
333
+ });
334
+ startedAtByExecutionKey.delete(executionKey);
335
+ tokenUsageResultByExecutionKey.delete(executionKey);
336
+ },
337
+ captureExecutionFailure: async ({ prepared, error }) => {
338
+ const executionKey = executionKeyForCandidate(prepared.candidate);
339
+ const startedAt = startedAtByExecutionKey.get(executionKey) ?? new Date().toISOString();
340
+ const completedAt = new Date().toISOString();
341
+ const artifactPath = getVerificationRubricResultPath({
342
+ sessionId: verificationId,
343
+ verifierId: prepared.candidate.agent.id,
344
+ template: prepared.candidate.template.template,
345
+ });
346
+ const detail = toErrorMessage(error);
347
+ const tokenUsageResult = tokenUsageResultByExecutionKey.get(executionKey) ??
348
+ buildUnavailableTokenUsageResult({
349
+ provider: prepared.candidate.agent.provider,
350
+ modelId: prepared.candidate.agent.model,
351
+ message: detail,
352
+ });
353
+ const tokenUsage = resolveTokenUsage(tokenUsageResult);
354
+ await writeFailureRubricArtifact({
355
+ root,
356
+ artifactPath,
357
+ verifierId: prepared.candidate.agent.id,
358
+ template: prepared.candidate.template.template,
359
+ generatedAt: completedAt,
360
+ error: detail,
361
+ });
362
+ await mutators.recordMethodSnapshot({
363
+ method: "rubric",
364
+ template: prepared.candidate.template.template,
365
+ verifierId: prepared.candidate.agent.id,
366
+ scope: resolvedTarget.target.kind === "run"
367
+ ? { kind: "run" }
368
+ : { kind: "target" },
369
+ status: "failed",
370
+ artifactPath,
371
+ startedAt,
372
+ completedAt,
373
+ tokenUsage,
374
+ error: detail,
375
+ });
376
+ emitStageProgressEvent(renderer, {
377
+ type: "stage.candidate",
378
+ stage: "verify",
379
+ candidate: {
380
+ methodKey: `rubric:${prepared.candidate.template.template}:${prepared.candidate.agent.id}`,
381
+ verifierLabel: prepared.candidate.template.template,
382
+ agentLabel: prepared.candidate.agent.id,
383
+ status: "failed",
384
+ startedAt,
385
+ completedAt,
386
+ artifactPath,
387
+ tokenUsage,
388
+ tokenUsageResult,
389
+ },
390
+ });
391
+ startedAtByExecutionKey.delete(executionKey);
392
+ tokenUsageResultByExecutionKey.delete(executionKey);
393
+ return {
394
+ template: prepared.candidate.template.template,
395
+ verifierId: prepared.candidate.agent.id,
396
+ status: "failed",
397
+ artifactPath,
398
+ startedAt,
399
+ completedAt,
400
+ error: detail,
401
+ tokenUsage,
402
+ tokenUsageResult,
403
+ };
404
+ },
405
+ sortResults: compareVerificationsByTemplateThenVerifierId,
406
+ };
407
+ }
408
+ async function writeFailureRubricArtifact(options) {
409
+ const { root, artifactPath, verifierId, template, generatedAt, error } = options;
410
+ await writeVerificationArtifact({
411
+ root,
412
+ artifactPath,
413
+ artifact: {
414
+ method: "rubric",
415
+ template,
416
+ verifierId,
417
+ generatedAt,
418
+ status: "failed",
419
+ result: {},
420
+ error,
421
+ },
422
+ });
423
+ }
424
+ function buildVerifierRubricWorkspacePaths(options) {
425
+ const { root, verificationId, verifierId, template } = options;
426
+ return buildScopedAgentWorkspacePaths({
427
+ agentRoot: resolve(root, getVerificationRubricExecutionDirectoryPath({
428
+ sessionId: verificationId,
429
+ verifierId,
430
+ template,
431
+ })),
432
+ });
433
+ }
434
+ function compareVerificationsByTemplateThenVerifierId(left, right) {
435
+ return (left.template.localeCompare(right.template) ||
436
+ left.verifierId.localeCompare(right.verifierId));
437
+ }
438
+ function registerScratchWorkspaceTeardown(teardown, workspacePaths, verifierId, template) {
439
+ const labelPrefix = `${verifierId}/${template}`;
440
+ teardown.addPath(workspacePaths.workspacePath, `${labelPrefix} workspace`);
441
+ teardown.addPath(workspacePaths.contextPath, `${labelPrefix} context`);
442
+ teardown.addPath(workspacePaths.runtimePath, `${labelPrefix} runtime`);
443
+ teardown.addPath(workspacePaths.sandboxPath, `${labelPrefix} sandbox`);
444
+ }
@@ -0,0 +1,6 @@
1
+ import type { VerificationResultArtifact } from "../model/types.js";
2
+ export declare function writeVerificationArtifact(options: {
3
+ root: string;
4
+ artifactPath: string;
5
+ artifact: VerificationResultArtifact;
6
+ }): Promise<void>;
@@ -0,0 +1,7 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import { dirname, resolve } from "node:path";
3
+ export async function writeVerificationArtifact(options) {
4
+ const absolutePath = resolve(options.root, options.artifactPath);
5
+ await mkdir(dirname(absolutePath), { recursive: true });
6
+ await writeFile(absolutePath, `${JSON.stringify(options.artifact, null, 2)}\n`, "utf8");
7
+ }
@@ -0,0 +1,24 @@
1
+ import { parseRubricResultPayload, type RubricResultPayload } from "../rubric-result.js";
2
+ import type { ResolvedVerificationTarget } from "./target.js";
3
+ export declare function buildBlindedAliasMap(resolvedTarget: ResolvedVerificationTarget): Record<string, string> | undefined;
4
+ export declare function aliasForCandidate(canonicalId: string, aliasMap?: Record<string, string>): string;
5
+ export declare function assertRubricResultSelectorsMatchAliasMap(options: {
6
+ artifactPath: string;
7
+ result?: RubricResultPayload;
8
+ aliasMap?: Record<string, string>;
9
+ }): void;
10
+ export declare function buildForbiddenVerificationIdentityTokens(options: {
11
+ resolvedTarget: ResolvedVerificationTarget;
12
+ allowed?: readonly string[];
13
+ }): string[];
14
+ /**
15
+ * Build an overlap-safe allowlist: verifier self-identity tokens are only
16
+ * permitted when they are not also candidate-identity tokens under
17
+ * verification.
18
+ */
19
+ export declare function overlapSafeAllowlist(allowed: readonly string[], candidateTokens: ReadonlySet<string>): Set<string>;
20
+ export declare function assertNoVerificationIdentityLeak(options: {
21
+ text: string;
22
+ forbidden: readonly string[];
23
+ }): void;
24
+ export { parseRubricResultPayload };
@@ -0,0 +1,109 @@
1
+ import { generateBlindedCandidateAlias } from "../blinding/aliases.js";
2
+ import { parseRubricResultPayload, readRubricResultPreferred, readRubricResultRanking, } from "../rubric-result.js";
3
+ export function buildBlindedAliasMap(resolvedTarget) {
4
+ const candidateIds = resolvedTarget.competitiveCandidates.map((candidate) => candidate.canonicalId);
5
+ if (candidateIds.length === 0) {
6
+ return undefined;
7
+ }
8
+ const aliasMap = {};
9
+ const seen = new Set();
10
+ for (const candidateId of [...new Set(candidateIds)].sort((a, b) => a.localeCompare(b))) {
11
+ const alias = generateBlindedCandidateAlias({ seen });
12
+ seen.add(alias);
13
+ aliasMap[alias] = candidateId;
14
+ }
15
+ return aliasMap;
16
+ }
17
+ export function aliasForCandidate(canonicalId, aliasMap) {
18
+ if (!aliasMap) {
19
+ return canonicalId;
20
+ }
21
+ const match = Object.entries(aliasMap).find(([, value]) => value === canonicalId);
22
+ return match?.[0] ?? canonicalId;
23
+ }
24
+ export function assertRubricResultSelectorsMatchAliasMap(options) {
25
+ const { artifactPath, result, aliasMap } = options;
26
+ if (!aliasMap || !result) {
27
+ return;
28
+ }
29
+ const unknownSelectors = new Set();
30
+ const preferred = readRubricResultPreferred(result);
31
+ if (preferred && !aliasMap[preferred]) {
32
+ unknownSelectors.add(preferred);
33
+ }
34
+ for (const selector of readRubricResultRanking(result) ?? []) {
35
+ if (!aliasMap[selector]) {
36
+ unknownSelectors.add(selector);
37
+ }
38
+ }
39
+ if (unknownSelectors.size === 0) {
40
+ return;
41
+ }
42
+ const selectors = Array.from(unknownSelectors)
43
+ .sort((left, right) => left.localeCompare(right))
44
+ .map((selector) => `\`${selector}\``)
45
+ .join(", ");
46
+ throw new Error(`Invalid verifier result.json for \`${artifactPath}\`: unknown blinded selector(s) ${selectors}.`);
47
+ }
48
+ export function buildForbiddenVerificationIdentityTokens(options) {
49
+ const { resolvedTarget, allowed = [] } = options;
50
+ // Collect all candidate identity tokens first so we can detect overlap
51
+ // with verifier self-identity tokens.
52
+ const candidateTokens = new Set();
53
+ for (const candidate of resolvedTarget.competitiveCandidates) {
54
+ for (const token of candidate.forbiddenIdentityTokens) {
55
+ const normalized = token.toLowerCase().trim();
56
+ if (normalized) {
57
+ candidateTokens.add(normalized);
58
+ }
59
+ }
60
+ }
61
+ // Only allowlist verifier self-identity tokens that do NOT overlap with
62
+ // candidate identity tokens. Overlapping tokens must stay forbidden so
63
+ // that a shared identity between verifier and candidate is still detected
64
+ // as a leakage vector.
65
+ const allowedTokens = overlapSafeAllowlist(allowed, candidateTokens);
66
+ const tokens = new Set();
67
+ for (const candidate of resolvedTarget.competitiveCandidates) {
68
+ for (const token of candidate.forbiddenIdentityTokens) {
69
+ const normalized = token.toLowerCase().trim();
70
+ if (!normalized || allowedTokens.has(normalized)) {
71
+ continue;
72
+ }
73
+ tokens.add(normalized);
74
+ }
75
+ }
76
+ return Array.from(tokens);
77
+ }
78
+ /**
79
+ * Build an overlap-safe allowlist: verifier self-identity tokens are only
80
+ * permitted when they are not also candidate-identity tokens under
81
+ * verification.
82
+ */
83
+ export function overlapSafeAllowlist(allowed, candidateTokens) {
84
+ return new Set(allowed
85
+ .map((token) => token.toLowerCase().trim())
86
+ .filter((token) => token !== "" && !candidateTokens.has(token)));
87
+ }
88
+ export function assertNoVerificationIdentityLeak(options) {
89
+ const { text, forbidden } = options;
90
+ const haystack = text.toLowerCase();
91
+ const leaks = forbidden.filter((token) => containsBoundedToken(haystack, token));
92
+ if (leaks.length === 0) {
93
+ return;
94
+ }
95
+ const preview = leaks
96
+ .slice(0, 5)
97
+ .map((token) => `\`${token}\``)
98
+ .join(", ");
99
+ throw new Error(`Blinded verification leakage validation failed: forbidden candidate identity token(s) detected: ${preview}${leaks.length > 5 ? ", ..." : ""}.`);
100
+ }
101
+ function containsBoundedToken(text, token) {
102
+ if (!token) {
103
+ return false;
104
+ }
105
+ const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
106
+ const pattern = new RegExp(`(?<![a-z0-9_-])${escaped}(?![a-z0-9_-])`, "iu");
107
+ return pattern.test(text);
108
+ }
109
+ export { parseRubricResultPayload };
@@ -0,0 +1,11 @@
1
+ import type { VerificationMethodResultRef, VerificationStatus } from "../model/types.js";
2
+ import type { ResolvedVerificationTarget } from "./target.js";
3
+ export declare function deriveVerificationStatusFromMethods(methods: readonly VerificationMethodResultRef[]): VerificationStatus;
4
+ export declare function maybePersistSelectedSpecPath(options: {
5
+ root: string;
6
+ verificationsFilePath: string;
7
+ verificationId: string;
8
+ resolvedTarget: ResolvedVerificationTarget;
9
+ aliasMap?: Record<string, string>;
10
+ methods: readonly VerificationMethodResultRef[];
11
+ }): Promise<void>;
@@ -0,0 +1,65 @@
1
+ import { loadVerificationSelectionPolicyOutput } from "../../../policy/index.js";
2
+ import { rewriteVerificationRecord } from "../persistence/adapter.js";
3
+ export function deriveVerificationStatusFromMethods(methods) {
4
+ if (methods.some((method) => method.status === "failed")) {
5
+ return "failed";
6
+ }
7
+ if (methods.some((method) => method.status === "aborted")) {
8
+ return "aborted";
9
+ }
10
+ return "succeeded";
11
+ }
12
+ export async function maybePersistSelectedSpecPath(options) {
13
+ const { root, verificationsFilePath, verificationId, resolvedTarget, aliasMap, methods, } = options;
14
+ if (!("specRecord" in resolvedTarget)) {
15
+ return;
16
+ }
17
+ const record = await loadVerificationSelectionPolicyOutput({
18
+ root,
19
+ record: {
20
+ sessionId: verificationId,
21
+ createdAt: new Date(0).toISOString(),
22
+ status: "succeeded",
23
+ target: resolvedTarget.target,
24
+ methods: [...methods],
25
+ ...(resolvedTarget.specRecord.extraContext
26
+ ? { extraContext: resolvedTarget.specRecord.extraContext }
27
+ : {}),
28
+ ...(resolvedTarget.specRecord.extraContextMetadata
29
+ ? {
30
+ extraContextMetadata: resolvedTarget.specRecord.extraContextMetadata,
31
+ }
32
+ : {}),
33
+ ...(aliasMap ? { blinded: { enabled: true, aliasMap } } : {}),
34
+ },
35
+ canonicalCandidateIds: resolvedTarget.specRecord.agents.map((agent) => agent.agentId),
36
+ });
37
+ const decision = record.decision;
38
+ if (decision.state !== "resolvable") {
39
+ return;
40
+ }
41
+ const selected = resolvedTarget.specRecord.agents.find((agent) => agent.status === "succeeded" &&
42
+ agent.agentId === decision.selectedCanonicalAgentId &&
43
+ agent.outputPath);
44
+ if (!selected?.outputPath) {
45
+ return;
46
+ }
47
+ await rewriteVerificationRecord({
48
+ root,
49
+ verificationsFilePath,
50
+ sessionId: verificationId,
51
+ mutate: (existing) => {
52
+ if (existing.target.kind !== "spec") {
53
+ return existing;
54
+ }
55
+ return {
56
+ ...existing,
57
+ target: {
58
+ ...existing.target,
59
+ specPath: selected.outputPath,
60
+ },
61
+ };
62
+ },
63
+ forceFlush: true,
64
+ });
65
+ }