@jinn-network/client 0.1.7 → 0.1.8-canary.09a3b2f6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (438) hide show
  1. package/README.md +67 -1
  2. package/dist/adapters/mech/adapter.d.ts +39 -2
  3. package/dist/adapters/mech/adapter.js +178 -20
  4. package/dist/adapters/mech/adapter.js.map +1 -1
  5. package/dist/adapters/mech/contracts.d.ts +22 -1
  6. package/dist/adapters/mech/contracts.js +96 -52
  7. package/dist/adapters/mech/contracts.js.map +1 -1
  8. package/dist/adapters/mech/safe-revert.d.ts +4 -0
  9. package/dist/adapters/mech/safe-revert.js +5 -1
  10. package/dist/adapters/mech/safe-revert.js.map +1 -1
  11. package/dist/adapters/mech/safe.d.ts +1 -1
  12. package/dist/adapters/mech/safe.js +10 -4
  13. package/dist/adapters/mech/safe.js.map +1 -1
  14. package/dist/adapters/mech/types.d.ts +6 -1
  15. package/dist/adapters/mech/types.js.map +1 -1
  16. package/dist/agent/operator-claude.js +8 -0
  17. package/dist/agent/operator-claude.js.map +1 -1
  18. package/dist/api/activity-events-endpoint.d.ts +14 -0
  19. package/dist/api/activity-events-endpoint.js +59 -0
  20. package/dist/api/activity-events-endpoint.js.map +1 -0
  21. package/dist/api/bootstrap-endpoint.d.ts +1 -2
  22. package/dist/api/bootstrap-endpoint.js +42 -24
  23. package/dist/api/bootstrap-endpoint.js.map +1 -1
  24. package/dist/api/codex-doctor-endpoint.d.ts +22 -5
  25. package/dist/api/codex-doctor-endpoint.js +136 -17
  26. package/dist/api/codex-doctor-endpoint.js.map +1 -1
  27. package/dist/api/debug-report-endpoint.d.ts +27 -0
  28. package/dist/api/debug-report-endpoint.js +157 -0
  29. package/dist/api/debug-report-endpoint.js.map +1 -0
  30. package/dist/api/discovery-endpoint.d.ts +1 -0
  31. package/dist/api/discovery-endpoint.js +24 -0
  32. package/dist/api/discovery-endpoint.js.map +1 -1
  33. package/dist/api/fleet-build.d.ts +1 -7
  34. package/dist/api/fleet-build.js +0 -7
  35. package/dist/api/fleet-build.js.map +1 -1
  36. package/dist/api/gather-status.d.ts +39 -0
  37. package/dist/api/gather-status.js +181 -84
  38. package/dist/api/gather-status.js.map +1 -1
  39. package/dist/api/hermes-doctor-endpoint.d.ts +15 -7
  40. package/dist/api/hermes-doctor-endpoint.js +56 -19
  41. package/dist/api/hermes-doctor-endpoint.js.map +1 -1
  42. package/dist/api/launcher-status.d.ts +4 -2
  43. package/dist/api/launcher-status.js +11 -10
  44. package/dist/api/launcher-status.js.map +1 -1
  45. package/dist/api/launcher-tasks.d.ts +1 -1
  46. package/dist/api/launcher-tasks.js +12 -8
  47. package/dist/api/launcher-tasks.js.map +1 -1
  48. package/dist/api/loop-completion-build.d.ts +79 -0
  49. package/dist/api/loop-completion-build.js +155 -0
  50. package/dist/api/loop-completion-build.js.map +1 -0
  51. package/dist/api/operator-artifacts-endpoint.js +73 -6
  52. package/dist/api/operator-artifacts-endpoint.js.map +1 -1
  53. package/dist/api/portfolio-v0-build.d.ts +7 -1
  54. package/dist/api/portfolio-v0-build.js +6 -2
  55. package/dist/api/portfolio-v0-build.js.map +1 -1
  56. package/dist/api/prediction-v1-build.d.ts +6 -0
  57. package/dist/api/prediction-v1-build.js +3 -1
  58. package/dist/api/prediction-v1-build.js.map +1 -1
  59. package/dist/api/server.d.ts +17 -0
  60. package/dist/api/server.js +40 -1
  61. package/dist/api/server.js.map +1 -1
  62. package/dist/api/setup-endpoints.d.ts +13 -9
  63. package/dist/api/setup-endpoints.js +50 -173
  64. package/dist/api/setup-endpoints.js.map +1 -1
  65. package/dist/api/solvernets-endpoints.js +33 -63
  66. package/dist/api/solvernets-endpoints.js.map +1 -1
  67. package/dist/api/status-build.d.ts +140 -17
  68. package/dist/api/status-build.js +47 -34
  69. package/dist/api/status-build.js.map +1 -1
  70. package/dist/api/status-harness-rollup.d.ts +35 -0
  71. package/dist/api/status-harness-rollup.js +45 -0
  72. package/dist/api/status-harness-rollup.js.map +1 -0
  73. package/dist/api/status-rollup-build.d.ts +0 -4
  74. package/dist/api/status-rollup-build.js +0 -4
  75. package/dist/api/status-rollup-build.js.map +1 -1
  76. package/dist/api/task-runs-build.d.ts +8 -0
  77. package/dist/api/task-runs-build.js +5 -1
  78. package/dist/api/task-runs-build.js.map +1 -1
  79. package/dist/build-info.json +4 -4
  80. package/dist/build-meta.json +1 -1
  81. package/dist/captures/live-publisher.js +24 -4
  82. package/dist/captures/live-publisher.js.map +1 -1
  83. package/dist/captures/publish.d.ts +1 -1
  84. package/dist/chain-read-errors.d.ts +12 -0
  85. package/dist/chain-read-errors.js +26 -1
  86. package/dist/chain-read-errors.js.map +1 -1
  87. package/dist/cli/commands/codedigest-revert-check.d.ts +33 -0
  88. package/dist/cli/commands/codedigest-revert-check.js +253 -0
  89. package/dist/cli/commands/codedigest-revert-check.js.map +1 -0
  90. package/dist/cli/commands/doctor.d.ts +3 -0
  91. package/dist/cli/commands/doctor.js +35 -0
  92. package/dist/cli/commands/doctor.js.map +1 -1
  93. package/dist/cli/commands/eval.d.ts +76 -0
  94. package/dist/cli/commands/eval.js +401 -0
  95. package/dist/cli/commands/eval.js.map +1 -0
  96. package/dist/cli/commands/rewards.d.ts +2 -0
  97. package/dist/cli/commands/rewards.js +27 -0
  98. package/dist/cli/commands/rewards.js.map +1 -1
  99. package/dist/cli/commands/solver-nets.d.ts +1 -0
  100. package/dist/cli/commands/solver-nets.js +245 -22
  101. package/dist/cli/commands/solver-nets.js.map +1 -1
  102. package/dist/cli/commands/solver-plugins-block.d.ts +33 -0
  103. package/dist/cli/commands/solver-plugins-block.js +118 -0
  104. package/dist/cli/commands/solver-plugins-block.js.map +1 -0
  105. package/dist/cli/commands/solver-plugins-feedback.d.ts +72 -0
  106. package/dist/cli/commands/solver-plugins-feedback.js +262 -0
  107. package/dist/cli/commands/solver-plugins-feedback.js.map +1 -0
  108. package/dist/cli/commands/solver-plugins-read.d.ts +54 -0
  109. package/dist/cli/commands/solver-plugins-read.js +259 -0
  110. package/dist/cli/commands/solver-plugins-read.js.map +1 -0
  111. package/dist/cli/commands/solver-plugins.d.ts +35 -0
  112. package/dist/cli/commands/solver-plugins.js +399 -2
  113. package/dist/cli/commands/solver-plugins.js.map +1 -1
  114. package/dist/cli/commands/status.js +0 -1
  115. package/dist/cli/commands/status.js.map +1 -1
  116. package/dist/cli/commands/tasks.js +15 -2
  117. package/dist/cli/commands/tasks.js.map +1 -1
  118. package/dist/cli/index.js +4 -0
  119. package/dist/cli/index.js.map +1 -1
  120. package/dist/cli/task-native-readiness.d.ts +7 -0
  121. package/dist/cli/task-native-readiness.js +7 -5
  122. package/dist/cli/task-native-readiness.js.map +1 -1
  123. package/dist/config.d.ts +206 -232
  124. package/dist/config.js +289 -107
  125. package/dist/config.js.map +1 -1
  126. package/dist/daemon/ai-units-gate.d.ts +54 -0
  127. package/dist/daemon/ai-units-gate.js +83 -0
  128. package/dist/daemon/ai-units-gate.js.map +1 -0
  129. package/dist/daemon/creator.js +13 -0
  130. package/dist/daemon/creator.js.map +1 -1
  131. package/dist/daemon/daemon.d.ts +10 -0
  132. package/dist/daemon/daemon.js +205 -30
  133. package/dist/daemon/daemon.js.map +1 -1
  134. package/dist/daemon/eviction-loop.d.ts +7 -0
  135. package/dist/daemon/eviction-loop.js +16 -0
  136. package/dist/daemon/eviction-loop.js.map +1 -1
  137. package/dist/daemon/gate-logger.d.ts +9 -0
  138. package/dist/daemon/gate-logger.js +2 -0
  139. package/dist/daemon/gate-logger.js.map +1 -0
  140. package/dist/daemon/jinn-claim-loop.js +22 -4
  141. package/dist/daemon/jinn-claim-loop.js.map +1 -1
  142. package/dist/daemon/readiness-gate.d.ts +1 -4
  143. package/dist/daemon/readiness-gate.js.map +1 -1
  144. package/dist/daemon/spend-cap-gate.d.ts +40 -0
  145. package/dist/daemon/spend-cap-gate.js +46 -0
  146. package/dist/daemon/spend-cap-gate.js.map +1 -0
  147. package/dist/dashboard/assets/index-3quVQqik.js +167 -0
  148. package/dist/dashboard/assets/index-BVAWkLwY.css +1 -0
  149. package/dist/dashboard/index.html +2 -2
  150. package/dist/discovery/http.d.ts +7 -0
  151. package/dist/discovery/http.js +567 -24
  152. package/dist/discovery/http.js.map +1 -1
  153. package/dist/discovery/onchain.js +197 -5
  154. package/dist/discovery/onchain.js.map +1 -1
  155. package/dist/discovery/types.d.ts +235 -0
  156. package/dist/discovery/types.js +40 -0
  157. package/dist/discovery/types.js.map +1 -1
  158. package/dist/discovery/with-fallback.js +41 -0
  159. package/dist/discovery/with-fallback.js.map +1 -1
  160. package/dist/earning/bootstrap.d.ts +31 -3
  161. package/dist/earning/bootstrap.js +94 -22
  162. package/dist/earning/bootstrap.js.map +1 -1
  163. package/dist/earning/faucet.d.ts +1 -1
  164. package/dist/earning/faucet.js +2 -2
  165. package/dist/earning/faucet.js.map +1 -1
  166. package/dist/earning/safe-adapter.js +34 -11
  167. package/dist/earning/safe-adapter.js.map +1 -1
  168. package/dist/earning/types.d.ts +6 -6
  169. package/dist/earning/viem-clients.d.ts +11 -4
  170. package/dist/earning/viem-clients.js +14 -5
  171. package/dist/earning/viem-clients.js.map +1 -1
  172. package/dist/erc8004/identity.d.ts +19 -3
  173. package/dist/erc8004/identity.js +38 -11
  174. package/dist/erc8004/identity.js.map +1 -1
  175. package/dist/erc8004/index.d.ts +1 -1
  176. package/dist/erc8004/index.js.map +1 -1
  177. package/dist/eval/eval-harness-run.d.ts +63 -0
  178. package/dist/eval/eval-harness-run.js +123 -0
  179. package/dist/eval/eval-harness-run.js.map +1 -0
  180. package/dist/eval/orchestrator.d.ts +163 -0
  181. package/dist/eval/orchestrator.js +232 -0
  182. package/dist/eval/orchestrator.js.map +1 -0
  183. package/dist/eval/paired.d.ts +68 -0
  184. package/dist/eval/paired.js +93 -0
  185. package/dist/eval/paired.js.map +1 -0
  186. package/dist/eval/resolve-slate-tasks.d.ts +35 -0
  187. package/dist/eval/resolve-slate-tasks.js +56 -0
  188. package/dist/eval/resolve-slate-tasks.js.map +1 -0
  189. package/dist/eval/screen-discovery.d.ts +22 -0
  190. package/dist/eval/screen-discovery.js +71 -0
  191. package/dist/eval/screen-discovery.js.map +1 -0
  192. package/dist/eval/screen-progress.d.ts +41 -0
  193. package/dist/eval/screen-progress.js +60 -0
  194. package/dist/eval/screen-progress.js.map +1 -0
  195. package/dist/eval/screen-runner.d.ts +30 -0
  196. package/dist/eval/screen-runner.js +289 -0
  197. package/dist/eval/screen-runner.js.map +1 -0
  198. package/dist/eval/screen.d.ts +107 -0
  199. package/dist/eval/screen.js +159 -0
  200. package/dist/eval/screen.js.map +1 -0
  201. package/dist/eval/slope.d.ts +29 -0
  202. package/dist/eval/slope.js +46 -0
  203. package/dist/eval/slope.js.map +1 -0
  204. package/dist/eval/train-sequence.d.ts +35 -0
  205. package/dist/eval/train-sequence.js +59 -0
  206. package/dist/eval/train-sequence.js.map +1 -0
  207. package/dist/eval/wilson.d.ts +45 -0
  208. package/dist/eval/wilson.js +48 -0
  209. package/dist/eval/wilson.js.map +1 -0
  210. package/dist/events/types.d.ts +2 -2
  211. package/dist/harnesses/cost-estimates.d.ts +10 -31
  212. package/dist/harnesses/cost-estimates.js +11 -43
  213. package/dist/harnesses/cost-estimates.js.map +1 -1
  214. package/dist/harnesses/engine/canonical-json.js +5 -3
  215. package/dist/harnesses/engine/canonical-json.js.map +1 -1
  216. package/dist/harnesses/engine/engine.d.ts +37 -4
  217. package/dist/harnesses/engine/engine.js +151 -20
  218. package/dist/harnesses/engine/engine.js.map +1 -1
  219. package/dist/harnesses/engine/persistence.d.ts +38 -4
  220. package/dist/harnesses/engine/persistence.js +71 -6
  221. package/dist/harnesses/engine/persistence.js.map +1 -1
  222. package/dist/harnesses/engine/state.d.ts +9 -0
  223. package/dist/harnesses/engine/state.js +23 -10
  224. package/dist/harnesses/engine/state.js.map +1 -1
  225. package/dist/harnesses/impls/hermes-agent/adapter.d.ts +2 -0
  226. package/dist/harnesses/impls/hermes-agent/adapter.js +8 -5
  227. package/dist/harnesses/impls/hermes-agent/adapter.js.map +1 -1
  228. package/dist/harnesses/impls/hermes-agent/bootstrap.d.ts +1 -0
  229. package/dist/harnesses/impls/hermes-agent/bootstrap.js +10 -3
  230. package/dist/harnesses/impls/hermes-agent/bootstrap.js.map +1 -1
  231. package/dist/harnesses/impls/hermes-agent/config-builder.d.ts +1 -1
  232. package/dist/harnesses/impls/hermes-agent/config-builder.js +4 -2
  233. package/dist/harnesses/impls/hermes-agent/config-builder.js.map +1 -1
  234. package/dist/harnesses/impls/hermes-agent/harness.d.ts +31 -3
  235. package/dist/harnesses/impls/hermes-agent/harness.js +84 -7
  236. package/dist/harnesses/impls/hermes-agent/harness.js.map +1 -1
  237. package/dist/harnesses/impls/hermes-agent/prompt.d.ts +6 -6
  238. package/dist/harnesses/impls/hermes-agent/prompt.js +6 -6
  239. package/dist/harnesses/impls/index.d.ts +2 -0
  240. package/dist/harnesses/impls/index.js +2 -0
  241. package/dist/harnesses/impls/index.js.map +1 -1
  242. package/dist/harnesses/impls/learner/adapters/claude-code.d.ts +17 -0
  243. package/dist/harnesses/impls/learner/adapters/claude-code.js +118 -14
  244. package/dist/harnesses/impls/learner/adapters/claude-code.js.map +1 -1
  245. package/dist/harnesses/impls/learner/adapters/codex-code.d.ts +9 -0
  246. package/dist/harnesses/impls/learner/adapters/codex-code.js +30 -8
  247. package/dist/harnesses/impls/learner/adapters/codex-code.js.map +1 -1
  248. package/dist/harnesses/impls/learner/harness.d.ts +41 -1
  249. package/dist/harnesses/impls/learner/harness.js +78 -4
  250. package/dist/harnesses/impls/learner/harness.js.map +1 -1
  251. package/dist/harnesses/impls/learner/harvest.d.ts +3 -1
  252. package/dist/harnesses/impls/learner/harvest.js +30 -6
  253. package/dist/harnesses/impls/learner/harvest.js.map +1 -1
  254. package/dist/harnesses/impls/learner/plugin-path.js +1 -0
  255. package/dist/harnesses/impls/learner/plugin-path.js.map +1 -1
  256. package/dist/harnesses/impls/learner/restoration-patch.d.ts +2 -2
  257. package/dist/harnesses/impls/learner/restoration-patch.js +25 -6
  258. package/dist/harnesses/impls/learner/restoration-patch.js.map +1 -1
  259. package/dist/harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js +21 -1
  260. package/dist/harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js.map +1 -1
  261. package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js +3 -1
  262. package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js.map +1 -1
  263. package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.d.ts +74 -5
  264. package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js +103 -32
  265. package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js.map +1 -1
  266. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.d.ts +2 -2
  267. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js +3 -1
  268. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js.map +1 -1
  269. package/dist/harnesses/readiness-registry.d.ts +7 -0
  270. package/dist/harnesses/readiness-registry.js +9 -0
  271. package/dist/harnesses/readiness-registry.js.map +1 -1
  272. package/dist/harnesses/types.d.ts +14 -0
  273. package/dist/learner/revert-decision.d.ts +74 -0
  274. package/dist/learner/revert-decision.js +73 -0
  275. package/dist/learner/revert-decision.js.map +1 -0
  276. package/dist/learner/revert-stats.d.ts +38 -0
  277. package/dist/learner/revert-stats.js +86 -0
  278. package/dist/learner/revert-stats.js.map +1 -0
  279. package/dist/local-provider-url.d.ts +3 -0
  280. package/dist/local-provider-url.js +28 -0
  281. package/dist/local-provider-url.js.map +1 -0
  282. package/dist/main.js +199 -104
  283. package/dist/main.js.map +1 -1
  284. package/dist/mcp/get-codedigest-reward.d.ts +13 -0
  285. package/dist/mcp/get-codedigest-reward.js +23 -0
  286. package/dist/mcp/get-codedigest-reward.js.map +1 -0
  287. package/dist/mcp/server.js +23 -0
  288. package/dist/mcp/server.js.map +1 -1
  289. package/dist/observability/debug-report-assemble.d.ts +43 -0
  290. package/dist/observability/debug-report-assemble.js +80 -0
  291. package/dist/observability/debug-report-assemble.js.map +1 -0
  292. package/dist/observability/emit-event.d.ts +9 -2
  293. package/dist/observability/emit-event.js +36 -2
  294. package/dist/observability/emit-event.js.map +1 -1
  295. package/dist/observability/file-logger.d.ts +69 -0
  296. package/dist/observability/file-logger.js +177 -0
  297. package/dist/observability/file-logger.js.map +1 -0
  298. package/dist/observability/redact-secrets.d.ts +65 -0
  299. package/dist/observability/redact-secrets.js +300 -0
  300. package/dist/observability/redact-secrets.js.map +1 -0
  301. package/dist/observability/tar.d.ts +30 -0
  302. package/dist/observability/tar.js +102 -0
  303. package/dist/observability/tar.js.map +1 -0
  304. package/dist/plugins/learner/.claude-plugin/plugin.json +1 -1
  305. package/dist/plugins/learner/.codex-plugin/plugin.json +1 -1
  306. package/dist/plugins/learner/hooks/session-start +30 -1
  307. package/dist/plugins/learner/skills/learn/consolidator-prompt.md +22 -1
  308. package/dist/plugins/learner/skills/learn/promoter-prompt.md +72 -1
  309. package/dist/preflight/deployment-readiness.d.ts +147 -0
  310. package/dist/preflight/deployment-readiness.js +366 -0
  311. package/dist/preflight/deployment-readiness.js.map +1 -0
  312. package/dist/preflight/pidfile-liveness.d.ts +50 -0
  313. package/dist/preflight/pidfile-liveness.js +117 -0
  314. package/dist/preflight/pidfile-liveness.js.map +1 -0
  315. package/dist/preflight/rpc-network.d.ts +40 -0
  316. package/dist/preflight/rpc-network.js +67 -1
  317. package/dist/preflight/rpc-network.js.map +1 -1
  318. package/dist/rpc/transport.d.ts +145 -0
  319. package/dist/rpc/transport.js +319 -0
  320. package/dist/rpc/transport.js.map +1 -0
  321. package/dist/scripts/donation-consumption-acceptance.js +7 -28
  322. package/dist/scripts/donation-consumption-acceptance.js.map +1 -1
  323. package/dist/scripts/swe-rebench-v2-pytest-missing.json +16 -0
  324. package/dist/solver-nets/prediction-operator-ux.d.ts +1 -2
  325. package/dist/solver-nets/prediction-operator-ux.js +56 -53
  326. package/dist/solver-nets/prediction-operator-ux.js.map +1 -1
  327. package/dist/solver-nets/registry.d.ts +19 -1
  328. package/dist/solver-nets/registry.js +37 -24
  329. package/dist/solver-nets/registry.js.map +1 -1
  330. package/dist/solver-types/_swe-rebench-v2-held-out-slate.d.ts +76 -0
  331. package/dist/solver-types/_swe-rebench-v2-held-out-slate.js +156 -0
  332. package/dist/solver-types/_swe-rebench-v2-held-out-slate.js.map +1 -0
  333. package/dist/solver-types/_swe-rebench-v2-pool-recovery.d.ts +81 -0
  334. package/dist/solver-types/_swe-rebench-v2-pool-recovery.js +116 -0
  335. package/dist/solver-types/_swe-rebench-v2-pool-recovery.js.map +1 -0
  336. package/dist/solver-types/_swe-rebench-v2-pool.d.ts +9 -2
  337. package/dist/solver-types/_swe-rebench-v2-pool.js +15 -20
  338. package/dist/solver-types/_swe-rebench-v2-pool.js.map +1 -1
  339. package/dist/solver-types/_swe-rebench-v2-state.d.ts +24 -0
  340. package/dist/solver-types/_swe-rebench-v2-state.js +33 -0
  341. package/dist/solver-types/_swe-rebench-v2-state.js.map +1 -1
  342. package/dist/solver-types/_swe-rebench-v2-validated-pool.d.ts +116 -2
  343. package/dist/solver-types/_swe-rebench-v2-validated-pool.js +296 -21
  344. package/dist/solver-types/_swe-rebench-v2-validated-pool.js.map +1 -1
  345. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json +20 -0
  346. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.json +19 -0
  347. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.screening-report.json +628 -0
  348. package/dist/solver-types/solver-type.d.ts +8 -0
  349. package/dist/solver-types/swe-rebench-v2-auto.d.ts +20 -11
  350. package/dist/solver-types/swe-rebench-v2-auto.js +64 -19
  351. package/dist/solver-types/swe-rebench-v2-auto.js.map +1 -1
  352. package/dist/solver-types/swe-rebench-v2.d.ts +10 -2
  353. package/dist/solver-types/swe-rebench-v2.js +233 -13
  354. package/dist/solver-types/swe-rebench-v2.js.map +1 -1
  355. package/dist/solvernets/daemon-init.d.ts +1 -1
  356. package/dist/solvernets/daemon-init.js +19 -4
  357. package/dist/solvernets/daemon-init.js.map +1 -1
  358. package/dist/solvernets/launched-record-dispatcher.d.ts +7 -0
  359. package/dist/solvernets/launched-record-dispatcher.js +10 -4
  360. package/dist/solvernets/launched-record-dispatcher.js.map +1 -1
  361. package/dist/solvernets/registry-client-erc8004.js +40 -37
  362. package/dist/solvernets/registry-client-erc8004.js.map +1 -1
  363. package/dist/solvernets/registry-client.d.ts +6 -0
  364. package/dist/solvernets/store.d.ts +2 -2
  365. package/dist/solvernets/store.js +7 -2
  366. package/dist/solvernets/store.js.map +1 -1
  367. package/dist/spend/ai-units-config.d.ts +49 -0
  368. package/dist/spend/ai-units-config.js +34 -0
  369. package/dist/spend/ai-units-config.js.map +1 -0
  370. package/dist/spend/ai-units.d.ts +140 -0
  371. package/dist/spend/ai-units.js +229 -0
  372. package/dist/spend/ai-units.js.map +1 -0
  373. package/dist/spend/cost-surface-status.d.ts +12 -0
  374. package/dist/spend/cost-surface-status.js +24 -0
  375. package/dist/spend/cost-surface-status.js.map +1 -0
  376. package/dist/spend/credential.d.ts +39 -0
  377. package/dist/spend/credential.js +71 -0
  378. package/dist/spend/credential.js.map +1 -0
  379. package/dist/spend/daemon-config.d.ts +13 -0
  380. package/dist/spend/daemon-config.js +24 -0
  381. package/dist/spend/daemon-config.js.map +1 -0
  382. package/dist/spend/pricing.d.ts +16 -0
  383. package/dist/spend/pricing.js +26 -0
  384. package/dist/spend/pricing.js.map +1 -0
  385. package/dist/spend/record.d.ts +13 -0
  386. package/dist/spend/record.js +43 -0
  387. package/dist/spend/record.js.map +1 -0
  388. package/dist/spend/usage.d.ts +27 -0
  389. package/dist/spend/usage.js +113 -0
  390. package/dist/spend/usage.js.map +1 -0
  391. package/dist/store/store.d.ts +187 -0
  392. package/dist/store/store.js +467 -4
  393. package/dist/store/store.js.map +1 -1
  394. package/dist/trajectory/transcript-parsers/codex-session.d.ts +12 -6
  395. package/dist/trajectory/transcript-parsers/codex-session.js +114 -13
  396. package/dist/trajectory/transcript-parsers/codex-session.js.map +1 -1
  397. package/dist/trajectory/transcript-parsers/types.d.ts +8 -8
  398. package/dist/trajectory/transcript-session-dirs.d.ts +18 -0
  399. package/dist/trajectory/transcript-session-dirs.js +85 -0
  400. package/dist/trajectory/transcript-session-dirs.js.map +1 -0
  401. package/dist/trajectory/transcript-watcher.d.ts +20 -1
  402. package/dist/trajectory/transcript-watcher.js +108 -32
  403. package/dist/trajectory/transcript-watcher.js.map +1 -1
  404. package/dist/tx-retry.d.ts +25 -0
  405. package/dist/tx-retry.js +95 -7
  406. package/dist/tx-retry.js.map +1 -1
  407. package/dist/types/payloads/portfolio-v0.d.ts +3 -3
  408. package/dist/types/payloads/prediction-apy-v0.d.ts +3 -3
  409. package/dist/types/payloads/prediction-v0.d.ts +12 -12
  410. package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.d.ts +108 -1
  411. package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.js +25 -1
  412. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.d.ts +65 -0
  413. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.js +123 -0
  414. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.d.ts +2 -2
  415. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.js +1 -1
  416. package/dist/vendor/@jinn-network/sdk/package.json +4 -0
  417. package/docker-compose.yml +3 -2
  418. package/package.json +30 -18
  419. package/plugins/learner/.claude-plugin/plugin.json +1 -1
  420. package/plugins/learner/.codex-plugin/plugin.json +1 -1
  421. package/plugins/learner/hooks/session-start +30 -1
  422. package/plugins/learner/skills/learn/consolidator-prompt.md +22 -1
  423. package/plugins/learner/skills/learn/promoter-prompt.md +72 -1
  424. package/plugins/swe-rebench-v2-diffmin/README.md +10 -9
  425. package/plugins/swe-rebench-v2-diffmin/jinn.plugin.json +1 -1
  426. package/plugins/swe-rebench-v2-diffmin/skills/diffmin/SKILL.md +15 -10
  427. package/plugins/swe-rebench-v2-diffmin/skills/test-map/SKILL.md +10 -12
  428. package/plugins/swe-rebench-v2-runtime/.claude-plugin/plugin.json +1 -1
  429. package/plugins/swe-rebench-v2-runtime/.codex-plugin/plugin.json +3 -3
  430. package/plugins/swe-rebench-v2-runtime/README.md +6 -6
  431. package/plugins/swe-rebench-v2-runtime/hooks/hooks.json +16 -0
  432. package/plugins/swe-rebench-v2-runtime/hooks/session-start +74 -0
  433. package/plugins/swe-rebench-v2-runtime/jinn.plugin.json +2 -3
  434. package/plugins/swe-rebench-v2-runtime/skills/task/SKILL.md +81 -0
  435. package/dist/dashboard/assets/index-BUlE8F3Y.js +0 -330
  436. package/dist/dashboard/assets/index-blqc7eqq.css +0 -32
  437. package/plugins/swe-rebench-v2-runtime/skills/orient/SKILL.md +0 -29
  438. package/plugins/swe-rebench-v2-runtime/skills/plan/SKILL.md +0 -53
@@ -0,0 +1,289 @@
1
+ import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs';
2
+ import { tmpdir } from 'node:os';
3
+ import { join, dirname } from 'node:path';
4
+ import { fileURLToPath } from 'node:url';
5
+ import { loadConfig } from '../config.js';
6
+ import { Store } from '../store/store.js';
7
+ import { hashImplStateDir } from '../harnesses/freeze.js';
8
+ import { LearnerHarness } from '../harnesses/impls/learner/harness.js';
9
+ import { ClaudeCodeHarnessAdapter } from '../harnesses/impls/learner/adapters/claude-code.js';
10
+ import { CodexCodeHarnessAdapter } from '../harnesses/impls/learner/adapters/codex-code.js';
11
+ import { CODEX_HARNESS } from '../harnesses/names.js';
12
+ import { runHarnessForEval, resolveRuntimePluginsForSolverType } from './eval-harness-run.js';
13
+ import { corpusEnvFromConfig } from '../cli/commands/eval.js';
14
+ import { loadSweRebenchV2Pool, defaultStateDir, getSweRebenchV2ValidatedPoolStore, } from '../solver-types/swe-rebench-v2.js';
15
+ import { PoolCacheStore, loadPoolWithCacheFallback } from '../solver-types/_swe-rebench-v2-pool-cache.js';
16
+ import { validatePoolInstances, EVAL_SEMANTICS_VERSION, } from '../solver-types/_swe-rebench-v2-validated-pool.js';
17
+ import { resolveSlateTasks } from './resolve-slate-tasks.js';
18
+ import { loadActiveHeldOutSlateIds, ACTIVE_HELD_OUT_SLATE_VERSIONS, loadHeldOutSlate, } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
19
+ import { GeneratorStateStore } from '../solver-types/_swe-rebench-v2-state.js';
20
+ import { fetchAttemptedInstanceIds } from './screen-discovery.js';
21
+ import { DEFAULT_TESTNET_DISCOVERY_URL } from '../config.js';
22
+ import { solverTypeFromJoinedContract } from '../solver-nets/registry.js';
23
+ import { SweRebenchV2Evaluator } from '../harnesses/impls/swe-rebench-v2-evaluator/index.js';
24
+ import { HttpHfFetcher } from '../harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js';
25
+ import { PythonEvalRunner } from '../harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js';
26
+ import { readEnabledState, defaultSweRebenchV2EvaluatorImplStateDir } from '../harnesses/impls/swe-rebench-v2-evaluator/harness.js';
27
+ import { stratifyByRepo, screenBaseFailures, buildV2SlateFile, } from './screen.js';
28
+ import { ScreenProgressStore, screenSignature } from './screen-progress.js';
29
+ const DISPATCH_SOLVER_TYPE = 'swe-rebench-v2.v1';
30
+ const SLATE_VERSION = 'v2';
31
+ const RUN_BUDGET_MS = 3_600_000;
32
+ /** dist/src parity: the shipped slate JSON lives next to the compiled module. */
33
+ function slatesDir() {
34
+ return join(dirname(fileURLToPath(import.meta.url)), '..', 'solver-types', 'slates');
35
+ }
36
+ export async function runScreenHeldOut(opts) {
37
+ const log = opts.log ?? (() => { });
38
+ const config = loadConfig(opts.configPath);
39
+ // Precondition: evaluator enabled (upstream repo cloned).
40
+ const enabled = readEnabledState(defaultSweRebenchV2EvaluatorImplStateDir());
41
+ if (!enabled) {
42
+ throw new Error('swe-rebench-v2 evaluator not enabled — run `jinn harnesses enable swe-rebench-v2-evaluator` first');
43
+ }
44
+ const upstreamRepoDir = enabled.upstreamRepoDir;
45
+ const stateDir = process.env['JINN_SWE_REBENCH_V2_STATE_DIR'] ?? defaultStateDir();
46
+ const fetcher = new HttpHfFetcher();
47
+ const evaluator = new SweRebenchV2Evaluator({ fetcher, runner: new PythonEvalRunner({ upstreamRepoDir }) });
48
+ const validatedStore = getSweRebenchV2ValidatedPoolStore();
49
+ const runtimePlugins = await resolveRuntimePluginsForSolverType(DISPATCH_SOLVER_TYPE, config.joinedSolverNets);
50
+ // Common adapter wiring (mirrors buildEvalHarness in cli/commands/eval.ts).
51
+ const daemonApiToken = process.env['DAEMON_API_TOKEN']?.trim();
52
+ const corpusEnv = corpusEnvFromConfig(config);
53
+ const common = {
54
+ claudePath: config.claudePath ?? 'claude',
55
+ storePath: config.dbPath,
56
+ daemonApiUrl: `http://127.0.0.1:${config.apiPort}`,
57
+ ...(daemonApiToken ? { daemonApiToken } : {}),
58
+ ...(corpusEnv ? { corpusEnv } : {}),
59
+ };
60
+ const baseHarness = new LearnerHarness({
61
+ adapter: new ClaudeCodeHarnessAdapter({ ...common, claudeModel: config.claudeModel }),
62
+ claudePath: common.claudePath,
63
+ });
64
+ // Prover harness: codex (default) or claude-code (e.g. an Opus prover via the
65
+ // working Claude auth, sidestepping a codex rate limit; same-family Haiku→Opus
66
+ // is a clean capability ladder for "proven headroom").
67
+ const proverKind = opts.proverHarness ?? 'codex';
68
+ const proverHarness = proverKind === 'claude-code'
69
+ ? new LearnerHarness({
70
+ adapter: new ClaudeCodeHarnessAdapter({ ...common, claudeModel: opts.proverModel ?? 'opus' }),
71
+ claudePath: common.claudePath,
72
+ })
73
+ : new LearnerHarness({
74
+ name: CODEX_HARNESS,
75
+ adapter: new CodexCodeHarnessAdapter({ ...common, ...(opts.proverModel ? { codexModel: opts.proverModel } : {}) }),
76
+ claudePath: common.claudePath,
77
+ ...(config.codexPath !== undefined ? { codexPath: config.codexPath } : {}),
78
+ });
79
+ // Candidate pool (whole gradeable pool by default; scopeable).
80
+ const cacheResult = await loadPoolWithCacheFallback({
81
+ loadPool: loadSweRebenchV2Pool, cache: new PoolCacheStore({ stateDir }), currentPool: [],
82
+ });
83
+ let pool = cacheResult.pool;
84
+ if (pool.length === 0)
85
+ throw new Error(`SWE-rebench v2 pool empty${cacheResult.error ? ` (${cacheResult.error.message})` : ''}`);
86
+ // Held-out discipline (#986): draw the exam from the never-trained, never-held-out
87
+ // remainder. Union three exclusion sources:
88
+ // - active held-out slate (would overlap an existing exam);
89
+ // - already-ATTEMPTED on-network (indexer verdictEnvelopeMeta, any verdict,
90
+ // cross-operator, CURRENT) — the authoritative "the learner trained on it"
91
+ // signal; and
92
+ // - this box's posted ledger (belt — may be STALE when another generator is
93
+ // the active poster, e.g. a hosted operator; hence the indexer is the truth).
94
+ // A trained instance held out later would make a trained-checkpoint pass count
95
+ // as memorization, not generalization.
96
+ const heldOutIds = loadActiveHeldOutSlateIds(DISPATCH_SOLVER_TYPE, ACTIVE_HELD_OUT_SLATE_VERSIONS);
97
+ const postedIds = await new GeneratorStateStore({ stateDir }).postedInstanceIds();
98
+ let attemptedIds = new Set();
99
+ const discoveryUrl = config.discovery?.url?.trim()
100
+ || (config.network === 'testnet' ? DEFAULT_TESTNET_DISCOVERY_URL : undefined);
101
+ const joinedNet = Object.values(config.joinedSolverNets ?? {}).find((n) => solverTypeFromJoinedContract(n) === DISPATCH_SOLVER_TYPE);
102
+ if (discoveryUrl && joinedNet?.manifestCid) {
103
+ attemptedIds = await fetchAttemptedInstanceIds(discoveryUrl, joinedNet.manifestCid);
104
+ log(`[screen] indexer: ${attemptedIds.size} instance(s) already attempted on-network (authoritative, cross-operator) → excluded`);
105
+ }
106
+ else {
107
+ log('[screen] WARNING: no discovery URL / manifestCid — cannot exclude already-attempted instances; relying on the local posted ledger, which may be stale');
108
+ }
109
+ const excludeIds = new Set([...heldOutIds, ...postedIds, ...attemptedIds]);
110
+ if (opts.instanceIds?.length) {
111
+ // Explicit operator override — screen exactly these, but warn if any are
112
+ // already contaminated (attempted/posted/held-out) so an intentional pick is informed.
113
+ const want = new Set(opts.instanceIds);
114
+ pool = pool.filter((t) => want.has(t.instance_id));
115
+ const tainted = pool.filter((t) => excludeIds.has(t.instance_id)).map((t) => t.instance_id);
116
+ if (tainted.length > 0) {
117
+ log(`[screen] WARNING: ${tainted.length} explicitly-named instance(s) are already attempted/posted/held-out (NOT clean held-out candidates): ${tainted.join(', ')}`);
118
+ }
119
+ }
120
+ else {
121
+ if (opts.repo)
122
+ pool = pool.filter((t) => t.instance_id.startsWith(`${opts.repo}__`));
123
+ const before = pool.length;
124
+ pool = pool.filter((t) => !excludeIds.has(t.instance_id));
125
+ // Restrict to ALREADY-VALIDATED-SCORABLE candidates (#986): the never-validated
126
+ // tail is mostly not-gradeable (~90% deeper in the pool), so base-screening it
127
+ // wastes inference. Discovery of gradeability is `validate-pool`'s job; the
128
+ // screen selects held-out FROM the scorable set. Falls back to the full
129
+ // remainder only when no validation data exists yet (and warns).
130
+ const scorableIds = await validatedStore.getScorableIds(EVAL_SEMANTICS_VERSION);
131
+ if (scorableIds) {
132
+ const beforeScorable = pool.length;
133
+ pool = pool.filter((t) => scorableIds.has(t.instance_id));
134
+ log(`[screen] candidate pool ${before} → ${beforeScorable} (excluded ${excludeIds.size}: ${heldOutIds.size} held-out ∪ ${postedIds.size} posted ∪ ${attemptedIds.size} attempted) → ${pool.length} validated-scorable (run validate-pool to grow this)`);
135
+ }
136
+ else {
137
+ log(`[screen] WARNING: no validated-scorable data — screening the full ${pool.length}-task remainder (mostly not-gradeable; run validate-pool first for efficiency)`);
138
+ }
139
+ }
140
+ const candidates = stratifyByRepo(pool);
141
+ log(`[screen] ${candidates.length} candidate(s) after stratification`);
142
+ // Resolve a single instance to the {task,row} the harness + grader need.
143
+ const byId = new Map(pool.map((t) => [t.instance_id, t]));
144
+ async function runOnce(harness, poolTask) {
145
+ const implStateDir = mkdtempSync(join(tmpdir(), 'jinn-screen-state-'));
146
+ // Track which stage we're in so an unscorable result names its cause (#476
147
+ // excludes infra failures from the denominator — but it must be diagnosable,
148
+ // not an opaque black box).
149
+ let stage = 'resolve';
150
+ try {
151
+ const [resolved] = await resolveSlateTasks({
152
+ poolTasks: [poolTask], hf_dataset: poolTask.hf_dataset, hf_split: poolTask.hf_split, fetcher,
153
+ });
154
+ if (!resolved)
155
+ return { passed: null, unscorableReason: 'resolve: instance not in pool' };
156
+ const task = {
157
+ id: poolTask.instance_id,
158
+ description: resolved.task.problem_statement,
159
+ role: 'restoration',
160
+ solverType: DISPATCH_SOLVER_TYPE,
161
+ spec: resolved.task,
162
+ window: { startTs: 0, endTs: Date.now() + RUN_BUDGET_MS },
163
+ };
164
+ stage = 'harness';
165
+ const run = await runHarnessForEval({
166
+ harness, task, solverType: DISPATCH_SOLVER_TYPE, runtimePlugins, implStateDir, mode: 'frozen',
167
+ });
168
+ if (run.violation)
169
+ return { passed: null, unscorableReason: 'harness: freeze-fence violation' };
170
+ if (!run.solution)
171
+ return { passed: null, unscorableReason: 'harness: no solution produced' };
172
+ stage = 'grade';
173
+ const verdict = await evaluator.grade({
174
+ task: resolved.task,
175
+ solutionPayload: { schemaVersion: 'swe-rebench-v2-solution.v1', patch: run.solution.patch },
176
+ row: resolved.row,
177
+ });
178
+ return { passed: verdict.passed_match };
179
+ }
180
+ catch (err) {
181
+ // Any harness/grader/infra failure ⇒ unscorable, never a fail (#476). Name
182
+ // the stage + error so the exclusion is diagnosable. An "agent produced no
183
+ // patch" throw is flagged distinctly from a true infra/grader error.
184
+ const msg = err instanceof Error ? err.message : String(err);
185
+ const reason = /produced no\b|no .*patch/i.test(msg)
186
+ ? `${stage}: agent produced no patch`
187
+ : `${stage}-error: ${msg.slice(0, 200)}`;
188
+ return { passed: null, unscorableReason: reason };
189
+ }
190
+ finally {
191
+ rmSync(implStateDir, { recursive: true, force: true });
192
+ }
193
+ }
194
+ const emptyBaseDir = mkdtempSync(join(tmpdir(), 'jinn-screen-base-'));
195
+ const hashOpts = baseHarness.freezeStateHashIgnore?.length
196
+ ? { ignoreRelPaths: [...baseHarness.freezeStateHashIgnore] } : undefined;
197
+ const baseCodeDigest = `sha256:${await hashImplStateDir(emptyBaseDir, hashOpts)}`;
198
+ rmSync(emptyBaseDir, { recursive: true, force: true });
199
+ // Resumability: cache each candidate's measurement under a config signature so
200
+ // an interrupted run resumes (re-run the same command). The base policy is fixed
201
+ // (empty impl-state) so its measurement is stable; the signature invalidates the
202
+ // cache if the base model / prover / R / semantics change.
203
+ const proverModelLabel = opts.proverModel ?? (proverKind === 'claude-code' ? 'opus' : 'codex-default');
204
+ const progress = new ScreenProgressStore({
205
+ stateDir,
206
+ signature: screenSignature({
207
+ baseModel: config.claudeModel, proverHarness: proverKind, proverModel: proverModelLabel,
208
+ R: opts.R, evalSemanticsVersion: EVAL_SEMANTICS_VERSION,
209
+ }),
210
+ });
211
+ if (progress.size > 0)
212
+ log(`[screen] resuming: ${progress.size} candidate(s) already measured (cached) for this config`);
213
+ const deps = {
214
+ log,
215
+ getCachedMeasurement: (id) => progress.get(id),
216
+ recordMeasurement: (id, m) => progress.record(id, m),
217
+ ensureGradeable: async (task) => {
218
+ await validatePoolInstances([task], {
219
+ fetcher, runner: new PythonEvalRunner({ upstreamRepoDir }), store: validatedStore,
220
+ semanticsVersion: EVAL_SEMANTICS_VERSION, upstreamRepoDir,
221
+ }, {});
222
+ return (await validatedStore.getEntry(task.instance_id, EVAL_SEMANTICS_VERSION))?.scorable === true;
223
+ },
224
+ runBaseFrozen: (task) => runOnce(baseHarness, byId.get(task.instance_id)),
225
+ runProverFrozen: (task) => runOnce(proverHarness, byId.get(task.instance_id)),
226
+ };
227
+ const result = await screenBaseFailures(candidates, deps, {
228
+ R: opts.R, heldOutCount: opts.heldOutCount, maxCandidates: opts.maxCandidates, perRepoCap: opts.perRepoCap,
229
+ });
230
+ // Diagnosability: a base-failing candidate routed to the prover that comes
231
+ // back `proverPassed: null` means the prover produced NO gradeable result
232
+ // (errored / no patch), not a clean "the prover can't solve it". That silently
233
+ // routes to `no-headroom` and can yield a misleadingly empty slate when the
234
+ // prover is simply unavailable (e.g. codex CLI < 0.133.0, or auth missing).
235
+ // Surface it loudly rather than swallow it.
236
+ const proverUnscorable = result.screened.filter((s) => s.reason === 'no-headroom' && s.proverPassed === null).length;
237
+ if (proverUnscorable > 0) {
238
+ log(`[screen] WARNING: the prover returned no gradeable result on ${proverUnscorable} base-failing ` +
239
+ `candidate(s) — excluded as no-headroom, but this likely means the prover is UNAVAILABLE rather than ` +
240
+ `unable. Verify the codex CLI (>=0.133.0) + auth, then re-run. See proverPassed=null rows in the report.`);
241
+ }
242
+ // Cumulative v2: union the EXISTING slate with this run's NEW held-out, so
243
+ // re-running to GROW the exam never drops already-reserved instances. (The
244
+ // active-slate exclusion keeps existing held-out OUT of candidates, so
245
+ // result.heldOut is only the new admits.) Growing changes the slate's content
246
+ // hash; re-recording the base arm for the FULL set below upserts the prior rows
247
+ // to the new hash, so the orchestrator's slate-hash-drift guard stays satisfied.
248
+ let existingHeldOut = [];
249
+ try {
250
+ existingHeldOut = [...loadHeldOutSlate(DISPATCH_SOLVER_TYPE, SLATE_VERSION).instanceIds];
251
+ }
252
+ catch {
253
+ /* no v2 slate yet — this is the first cut */
254
+ }
255
+ const newHeldOut = result.heldOut.map((h) => h.instance_id);
256
+ const allHeldOutIds = [...new Set([...existingHeldOut, ...newHeldOut])];
257
+ const generatedAt = new Date().toISOString();
258
+ const slateFile = buildV2SlateFile(allHeldOutIds, generatedAt);
259
+ mkdirSync(slatesDir(), { recursive: true });
260
+ const slatePath = join(slatesDir(), 'held-out-slate.swe-rebench-v2.v2.json');
261
+ writeFileSync(slatePath, `${JSON.stringify(slateFile, null, 2)}\n`);
262
+ const reportPath = join(slatesDir(), 'held-out-slate.swe-rebench-v2.v2.screening-report.json');
263
+ writeFileSync(reportPath, `${JSON.stringify({
264
+ generatedAt, evalSemanticsVersion: EVAL_SEMANTICS_VERSION, baseCodeDigest,
265
+ R: opts.R, proverHarness: proverKind,
266
+ proverModel: opts.proverModel ?? (proverKind === 'claude-code' ? 'opus' : 'codex-default'),
267
+ heldOutTotal: allHeldOutIds.length, newThisRun: newHeldOut, carriedOver: existingHeldOut,
268
+ screened: result.screened,
269
+ }, null, 2)}\n`);
270
+ // Persist the base arm (all-fail) for the FULL held-out set under the (possibly
271
+ // new) slate hash. The upsert is keyed by (checkpoint, slate_version, instance_id),
272
+ // so prior rows get their slate_hash refreshed to match the grown slate — no drift.
273
+ const store = new Store(config.dbPath);
274
+ try {
275
+ const runAtMs = Date.now();
276
+ for (const id of allHeldOutIds) {
277
+ store.recordEvalResult({
278
+ checkpoint_cid: baseCodeDigest, slate_hash: slateFile.hash, slate_version: SLATE_VERSION,
279
+ instance_id: id, passed: false, unscorable: false, code_digest: baseCodeDigest,
280
+ run_at_ms: runAtMs, test_log_excerpt: 'base arm (screening): consistent fail 0/R',
281
+ });
282
+ }
283
+ }
284
+ finally {
285
+ store.close?.();
286
+ }
287
+ return { result, baseCodeDigest, slatePath, reportPath, heldOutCount: allHeldOutIds.length, proverUnscorable };
288
+ }
289
+ //# sourceMappingURL=screen-runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"screen-runner.js","sourceRoot":"","sources":["../../src/eval/screen-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACxE,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAIzC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,uCAAuC,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,MAAM,oDAAoD,CAAC;AAC9F,OAAO,EAAE,uBAAuB,EAAE,MAAM,mDAAmD,CAAC;AAC5F,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,kCAAkC,EAAE,MAAM,uBAAuB,CAAC;AAC9F,OAAO,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAC9D,OAAO,EACL,oBAAoB,EAAE,eAAe,EAAE,iCAAiC,GACzE,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,cAAc,EAAE,yBAAyB,EAAE,MAAM,+CAA+C,CAAC;AAC1G,OAAO,EACL,qBAAqB,EAAE,sBAAsB,GAC9C,MAAM,mDAAmD,CAAC;AAC3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EACL,yBAAyB,EAAE,8BAA8B,EAAE,gBAAgB,GAC5E,MAAM,mDAAmD,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,0CAA0C,CAAC;AAC/E,OAAO,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,EAAE,6BAA6B,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EAAE,4BAA4B,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,EAAE,qBAAqB,EAAE,MAAM,sDAAsD,CAAC;AAC7F,OAAO,EAAE,aAAa,EAAE,MAAM,2DAA2D,CAAC;AAC1F,OAAO,EAAE,gBAAgB,EAAE,MAAM,4DAA4D,CAAC;AAC9F,OAAO,EAAE,gBAAgB,EAAE,wCAAwC,EAAE,MAAM,wDAAwD,CAAC;AACpI,OAAO,EACL,cAAc,EAAE,kBAAkB,EAAE,gBAAgB,GAErD,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAE5E,MAAM,oBAAoB,GAAG,mBAAmB,CAAC;AACjD,MAAM,aAAa,GAAG,IAAI,CAAC;AAC3B,MAAM,aAAa,GAAG,SAAS,CAAC;AAgChC,iFAAiF;AACjF,SAAS,SAAS;IAChB,OAAO,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;AACvF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAsB;IAC3D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAE3C,0DAA0D;IAC1D,MAAM,OAAO,GAAG,gBAAgB,CAAC,wCAAwC,EAAE,CAAC,CAAC;IAC7E,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IACD,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAEhD,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,IAAI,eAAe,EAAE,CAAC;IACnF,MAAM,OAAO,GAAG,IAAI,aAAa,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,gBAAgB,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC,CAAC;IAC5G,MAAM,cAAc,GAAG,iCAAiC,EAAE,CAAC;IAC3D,MAAM,cAAc,GAAoB,MAAM,kCAAkC,CAC9E,oBAAoB,EAAE,MAAM,CAAC,gBAAgB,CAC9C,CAAC;IAEF,4EAA4E;IAC5E,MAAM,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,EAAE,IAAI,EAAE,CAAC;IAC/D,MAAM,SAAS,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG;QACb,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,QAAQ;QACzC,SAAS,EAAE,MAAM,CAAC,MAAM;QACxB,YAAY,EAAE,oBAAoB,MAAM,CAAC,OAAO,EAAE;QAClD,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7C,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACpC,CAAC;IACF,MAAM,WAAW,GAAY,IAAI,cAAc,CAAC;QAC9C,OAAO,EAAE,IAAI,wBAAwB,CAAC,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,CAAC;QACrF,UAAU,EAAE,MAAM,CAAC,UAAU;KAC9B,CAAC,CAAC;IACH,8EAA8E;IAC9E,+EAA+E;IAC/E,uDAAuD;IACvD,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC;IACjD,MAAM,aAAa,GAAY,UAAU,KAAK,aAAa;QACzD,CAAC,CAAC,IAAI,cAAc,CAAC;YACjB,OAAO,EAAE,IAAI,wBAAwB,CAAC,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,MAAM,EAAE,CAAC;YAC7F,UAAU,EAAE,MAAM,CAAC,UAAU;SAC9B,CAAC;QACJ,CAAC,CAAC,IAAI,cAAc,CAAC;YACjB,IAAI,EAAE,aAAa;YACnB,OAAO,EAAE,IAAI,uBAAuB,CAAC,EAAE,GAAG,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC;YAClH,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,GAAG,CAAC,MAAM,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC3E,CAAC,CAAC;IAEP,+DAA+D;IAC/D,MAAM,WAAW,GAAG,MAAM,yBAAyB,CAAC;QAClD,QAAQ,EAAE,oBAAoB,EAAE,KAAK,EAAE,IAAI,cAAc,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,WAAW,EAAE,EAAE;KACzF,CAAC,CAAC;IACH,IAAI,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC;IAC5B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAEjI,mFAAmF;IACnF,4CAA4C;IAC5C,6DAA6D;IAC7D,6EAA6E;IAC7E,8EAA8E;IAC9E,iBAAiB;IACjB,6EAA6E;IAC7E,iFAAiF;IACjF,+EAA+E;IAC/E,uCAAuC;IACvC,MAAM,UAAU,GAAG,yBAAyB,CAAC,oBAAoB,EAAE,8BAA8B,CAAC,CAAC;IACnG,MAAM,SAAS,GAAG,MAAM,IAAI,mBAAmB,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,iBAAiB,EAAE,CAAC;IAClF,IAAI,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACrC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,EAAE,GAAG,EAAE,IAAI,EAAE;WAC7C,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,6BAA6B,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAChF,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,IAAI,CACjE,CAAC,CAAC,EAAE,EAAE,CAAC,4BAA4B,CAAC,CAAC,CAAC,KAAK,oBAAoB,CAChE,CAAC;IACF,IAAI,YAAY,IAAI,SAAS,EAAE,WAAW,EAAE,CAAC;QAC3C,YAAY,GAAG,MAAM,yBAAyB,CAAC,YAAY,EAAE,SAAS,CAAC,WAAW,CAAC,CAAC;QACpF,GAAG,CAAC,qBAAqB,YAAY,CAAC,IAAI,sFAAsF,CAAC,CAAC;IACpI,CAAC;SAAM,CAAC;QACN,GAAG,CAAC,uJAAuJ,CAAC,CAAC;IAC/J,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,UAAU,EAAE,GAAG,SAAS,EAAE,GAAG,YAAY,CAAC,CAAC,CAAC;IACnF,IAAI,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE,CAAC;QAC7B,yEAAyE;QACzE,uFAAuF;QACvF,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAC5F,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,GAAG,CAAC,qBAAqB,OAAO,CAAC,MAAM,wGAAwG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvK,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,IAAI,CAAC,IAAI;YAAE,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;QACrF,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;QAC3B,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;QAC1D,gFAAgF;QAChF,+EAA+E;QAC/E,4EAA4E;QAC5E,wEAAwE;QACxE,iEAAiE;QACjE,MAAM,WAAW,GAAG,MAAM,cAAc,CAAC,cAAc,CAAC,sBAAsB,CAAC,CAAC;QAChF,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC;YACnC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;YAC1D,GAAG,CAAC,2BAA2B,MAAM,MAAM,cAAc,cAAc,UAAU,CAAC,IAAI,KAAK,UAAU,CAAC,IAAI,eAAe,SAAS,CAAC,IAAI,aAAa,YAAY,CAAC,IAAI,iBAAiB,IAAI,CAAC,MAAM,sDAAsD,CAAC,CAAC;QAC3P,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,qEAAqE,IAAI,CAAC,MAAM,gFAAgF,CAAC,CAAC;QACxK,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IACxC,GAAG,CAAC,YAAY,UAAU,CAAC,MAAM,oCAAoC,CAAC,CAAC;IAEvE,yEAAyE;IACzE,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1D,KAAK,UAAU,OAAO,CAAC,OAAgB,EAAE,QAAkB;QACzD,MAAM,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,oBAAoB,CAAC,CAAC,CAAC;QACvE,2EAA2E;QAC3E,6EAA6E;QAC7E,4BAA4B;QAC5B,IAAI,KAAK,GAAoC,SAAS,CAAC;QACvD,IAAI,CAAC;YACH,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,iBAAiB,CAAC;gBACzC,SAAS,EAAE,CAAC,QAAQ,CAAC,EAAE,UAAU,EAAE,QAAQ,CAAC,UAAU,EAAE,QAAQ,EAAE,QAAQ,CAAC,QAAQ,EAAE,OAAO;aAC7F,CAAC,CAAC;YACH,IAAI,CAAC,QAAQ;gBAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,+BAA+B,EAAE,CAAC;YAC1F,MAAM,IAAI,GAAS;gBACjB,EAAE,EAAE,QAAQ,CAAC,WAAW;gBACxB,WAAW,EAAE,QAAQ,CAAC,IAAI,CAAC,iBAAiB;gBAC5C,IAAI,EAAE,aAAa;gBACnB,UAAU,EAAE,oBAAoB;gBAChC,IAAI,EAAE,QAAQ,CAAC,IAA0C;gBACzD,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,aAAa,EAAE;aAC1D,CAAC;YACF,KAAK,GAAG,SAAS,CAAC;YAClB,MAAM,GAAG,GAAG,MAAM,iBAAiB,CAAC;gBAClC,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,oBAAoB,EAAE,cAAc,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ;aAC9F,CAAC,CAAC;YACH,IAAI,GAAG,CAAC,SAAS;gBAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,iCAAiC,EAAE,CAAC;YAChG,IAAI,CAAC,GAAG,CAAC,QAAQ;gBAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,+BAA+B,EAAE,CAAC;YAC9F,KAAK,GAAG,OAAO,CAAC;YAChB,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC;gBACpC,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,eAAe,EAAE,EAAE,aAAa,EAAE,4BAA4B,EAAE,KAAK,EAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,EAAE;gBAC3F,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC,CAAC;YACH,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,YAAY,EAAE,CAAC;QAC1C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,2EAA2E;YAC3E,2EAA2E;YAC3E,qEAAqE;YACrE,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC;gBAClD,CAAC,CAAC,GAAG,KAAK,2BAA2B;gBACrC,CAAC,CAAC,GAAG,KAAK,WAAW,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;YAC3C,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC;QACpD,CAAC;gBAAS,CAAC;YACT,MAAM,CAAC,YAAY,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,mBAAmB,CAAC,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG,WAAW,CAAC,qBAAqB,EAAE,MAAM;QACxD,CAAC,CAAC,EAAE,cAAc,EAAE,CAAC,GAAG,WAAW,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC3E,MAAM,cAAc,GAAG,UAAU,MAAM,gBAAgB,CAAC,YAAY,EAAE,QAAQ,CAAC,EAAE,CAAC;IAClF,MAAM,CAAC,YAAY,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAEvD,+EAA+E;IAC/E,iFAAiF;IACjF,iFAAiF;IACjF,2DAA2D;IAC3D,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,IAAI,CAAC,UAAU,KAAK,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IACvG,MAAM,QAAQ,GAAG,IAAI,mBAAmB,CAAC;QACvC,QAAQ;QACR,SAAS,EAAE,eAAe,CAAC;YACzB,SAAS,EAAE,MAAM,CAAC,WAAW,EAAE,aAAa,EAAE,UAAU,EAAE,WAAW,EAAE,gBAAgB;YACvF,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,oBAAoB,EAAE,sBAAsB;SACxD,CAAC;KACH,CAAC,CAAC;IACH,IAAI,QAAQ,CAAC,IAAI,GAAG,CAAC;QAAE,GAAG,CAAC,sBAAsB,QAAQ,CAAC,IAAI,yDAAyD,CAAC,CAAC;IAEzH,MAAM,IAAI,GAAe;QACvB,GAAG;QACH,oBAAoB,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9C,iBAAiB,EAAE,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC;QACpD,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,qBAAqB,CAAC,CAAC,IAAI,CAAC,EAAE;gBAClC,OAAO,EAAE,MAAM,EAAE,IAAI,gBAAgB,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,KAAK,EAAE,cAAc;gBACjF,gBAAgB,EAAE,sBAAsB,EAAE,eAAe;aAC1D,EAAE,EAAE,CAAC,CAAC;YACP,OAAO,CAAC,MAAM,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC,EAAE,QAAQ,KAAK,IAAI,CAAC;QACtG,CAAC;QACD,aAAa,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAE,CAAC;QAC1E,eAAe,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAE,CAAC;KAC/E,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,IAAI,EAAE;QACxD,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,YAAY,EAAE,IAAI,CAAC,YAAY,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU;KAC3G,CAAC,CAAC;IAEH,2EAA2E;IAC3E,0EAA0E;IAC1E,+EAA+E;IAC/E,4EAA4E;IAC5E,4EAA4E;IAC5E,4CAA4C;IAC5C,MAAM,gBAAgB,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,aAAa,IAAI,CAAC,CAAC,YAAY,KAAK,IAAI,CAC7D,CAAC,MAAM,CAAC;IACT,IAAI,gBAAgB,GAAG,CAAC,EAAE,CAAC;QACzB,GAAG,CACD,gEAAgE,gBAAgB,gBAAgB;YAChG,sGAAsG;YACtG,yGAAyG,CAC1G,CAAC;IACJ,CAAC;IAED,2EAA2E;IAC3E,2EAA2E;IAC3E,uEAAuE;IACvE,8EAA8E;IAC9E,gFAAgF;IAChF,iFAAiF;IACjF,IAAI,eAAe,GAAa,EAAE,CAAC;IACnC,IAAI,CAAC;QACH,eAAe,GAAG,CAAC,GAAG,gBAAgB,CAAC,oBAAoB,EAAE,aAAa,CAAC,CAAC,WAAW,CAAC,CAAC;IAC3F,CAAC;IAAC,MAAM,CAAC;QACP,6CAA6C;IAC/C,CAAC;IACD,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC5D,MAAM,aAAa,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,eAAe,EAAE,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAExE,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC7C,MAAM,SAAS,GAAG,gBAAgB,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC;IAC/D,SAAS,CAAC,SAAS,EAAE,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,EAAE,uCAAuC,CAAC,CAAC;IAC7E,aAAa,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IACpE,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,EAAE,EAAE,wDAAwD,CAAC,CAAC;IAC/F,aAAa,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC;QAC1C,WAAW,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc;QACzE,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU;QACpC,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC,UAAU,KAAK,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC;QAC1F,YAAY,EAAE,aAAa,CAAC,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,WAAW,EAAE,eAAe;QACxF,QAAQ,EAAE,MAAM,CAAC,QAAQ;KAC1B,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IAEjB,gFAAgF;IAChF,oFAAoF;IACpF,oFAAoF;IACpF,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACvC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;YAC/B,KAAK,CAAC,gBAAgB,CAAC;gBACrB,cAAc,EAAE,cAAc,EAAE,UAAU,EAAE,SAAS,CAAC,IAAI,EAAE,aAAa,EAAE,aAAa;gBACxF,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,WAAW,EAAE,cAAc;gBAC9E,SAAS,EAAE,OAAO,EAAE,gBAAgB,EAAE,2CAA2C;aAClF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;YAAS,CAAC;QACT,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC;IAClB,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,CAAC,MAAM,EAAE,gBAAgB,EAAE,CAAC;AACjH,CAAC"}
@@ -0,0 +1,107 @@
1
+ import type { PoolTask } from '../solver-types/_swe-rebench-v2-pool.js';
2
+ import { type HeldOutSlateArtifact } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
3
+ /** Stratification / diversity key: the org prefix of an instance_id
4
+ * (`tobymao__sqlglot-4661` → `tobymao`). Derivable without an HF fetch. */
5
+ export declare function repoOf(task: PoolTask): string;
6
+ /**
7
+ * Order candidates round-robin across repos so the first N base-fails span
8
+ * repos rather than clumping in alphabetically-early ones. Deterministic:
9
+ * instances sort by instance_id within each repo group; repo groups iterate in
10
+ * sorted repo order.
11
+ */
12
+ export declare function stratifyByRepo(pool: PoolTask[]): PoolTask[];
13
+ /** One frozen run's grade outcome. `null` = unscorable (Docker/grader/infra failure). */
14
+ export interface ScreenCandidateRun {
15
+ passed: boolean | null;
16
+ /** When `passed === null`, the stage + error that made it unscorable
17
+ * (e.g. `resolve: …`, `harness: no patch`, `grade-error: …`) — surfaced so an
18
+ * exclusion is never an opaque black box. */
19
+ unscorableReason?: string;
20
+ }
21
+ /**
22
+ * The expensive, cacheable per-candidate outcome — gradeability + the base R-run
23
+ * loop + (if base is 0/R) the prover. Decoupled from the selection decision so a
24
+ * resumed run replays it from cache without re-spending inference. The decision
25
+ * (held-out / no-headroom / caps) is always recomputed fresh from this.
26
+ */
27
+ export interface ScreenMeasurement {
28
+ gradeable: boolean;
29
+ basePasses: number;
30
+ baseRuns: number;
31
+ baseUnscorable: boolean;
32
+ baseUnscorableReason?: string;
33
+ /** Whether the prover was reached (only when base is gradeable + 0/R). */
34
+ proverRan: boolean;
35
+ /** Meaningful only when `proverRan`; `null` = prover unscorable. */
36
+ proverPassed: boolean | null;
37
+ proverUnscorableReason?: string;
38
+ }
39
+ export interface ScreenDeps {
40
+ /** Confirm gradeable at the current semantics version (idempotent; cheap/cached). */
41
+ ensureGradeable(task: PoolTask): Promise<boolean>;
42
+ /** Base Haiku, frozen, empty impl-state. `passed: null` = unscorable. */
43
+ runBaseFrozen(task: PoolTask): Promise<ScreenCandidateRun>;
44
+ /** Prover (Codex/GPT-5.5), frozen, empty impl-state. `passed: null` = unscorable. */
45
+ runProverFrozen(task: PoolTask): Promise<ScreenCandidateRun>;
46
+ /** Resumability (optional): return a cached measurement for this instance, or
47
+ * undefined to measure live. A cache hit replays for free (no inference) and
48
+ * does NOT consume the maxCandidates budget. */
49
+ getCachedMeasurement?(instance_id: string): ScreenMeasurement | undefined;
50
+ /** Resumability (optional): persist a freshly-measured candidate so a later
51
+ * re-run of the same command resumes instead of restarting. */
52
+ recordMeasurement?(instance_id: string, m: ScreenMeasurement): void;
53
+ log?: (msg: string) => void;
54
+ }
55
+ export interface ScreenOpts {
56
+ /** Base runs per candidate (≥3). A candidate is a reliable fail iff 0/R passed. */
57
+ R: number;
58
+ /** Exam cap N. */
59
+ heldOutCount: number;
60
+ /** Budget: stop after this many candidates reach the base-run stage. */
61
+ maxCandidates: number;
62
+ /** Max held-out instances per repo (diversity). */
63
+ perRepoCap: number;
64
+ }
65
+ export type ScreenReason = 'held-out' | 'not-gradeable' | 'base-passes' | 'base-unscorable' | 'no-headroom' | 'per-repo-cap';
66
+ export interface ScreenedCandidate {
67
+ instance_id: string;
68
+ repo: string;
69
+ gradeable: boolean;
70
+ baseRuns: number;
71
+ basePasses: number;
72
+ proverPassed: boolean | null;
73
+ heldOut: boolean;
74
+ reason: ScreenReason;
75
+ /** For `base-unscorable` / `no-headroom`-via-unscorable: the stage + error
76
+ * the run reported, so the exclusion is diagnosable without transcript digs. */
77
+ unscorableReason?: string;
78
+ }
79
+ export interface ScreenResult {
80
+ heldOut: {
81
+ instance_id: string;
82
+ repo: string;
83
+ baseRuns: number;
84
+ }[];
85
+ screened: ScreenedCandidate[];
86
+ }
87
+ /**
88
+ * Partition a candidate stream into the held-out exam vs the rest, applying the
89
+ * three filter layers cheapest-first. `candidates` MUST already be ordered (use
90
+ * {@link stratifyByRepo}); selection order is the iteration order and is frozen.
91
+ *
92
+ * Resumable: if `deps.getCachedMeasurement` is provided, an already-measured
93
+ * candidate replays from cache (no inference, no budget cost), so re-running the
94
+ * same command resumes — the `maxCandidates` budget bounds only NEW measurements
95
+ * per invocation, letting a long screen proceed in budget-sized chunks. The
96
+ * selection decision (caps, held-out) is always recomputed fresh, so the cached
97
+ * measurements stay valid even if `heldOutCount`/`perRepoCap` change.
98
+ */
99
+ export declare function screenBaseFailures(candidates: PoolTask[], deps: ScreenDeps, opts: ScreenOpts): Promise<ScreenResult>;
100
+ /** The on-disk v2 slate file = the hashed artifact + a provenance `comment`
101
+ * (the comment is outside the canonical hash). solverType matches the
102
+ * `${solverType}.v1` key `jinn eval` loads with. */
103
+ export interface V2SlateFile extends HeldOutSlateArtifact {
104
+ comment: string;
105
+ hash: `sha256:${string}`;
106
+ }
107
+ export declare function buildV2SlateFile(instanceIds: string[], generatedAt: string): V2SlateFile;
@@ -0,0 +1,159 @@
1
+ import { HELD_OUT_SLATE_SCHEMA_VERSION, hashHeldOutSlateArtifact, } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
2
+ /** Stratification / diversity key: the org prefix of an instance_id
3
+ * (`tobymao__sqlglot-4661` → `tobymao`). Derivable without an HF fetch. */
4
+ export function repoOf(task) {
5
+ const idx = task.instance_id.indexOf('__');
6
+ return idx === -1 ? task.instance_id : task.instance_id.slice(0, idx);
7
+ }
8
+ /**
9
+ * Order candidates round-robin across repos so the first N base-fails span
10
+ * repos rather than clumping in alphabetically-early ones. Deterministic:
11
+ * instances sort by instance_id within each repo group; repo groups iterate in
12
+ * sorted repo order.
13
+ */
14
+ export function stratifyByRepo(pool) {
15
+ const groups = new Map();
16
+ for (const task of pool) {
17
+ const repo = repoOf(task);
18
+ (groups.get(repo) ?? groups.set(repo, []).get(repo)).push(task);
19
+ }
20
+ const repos = [...groups.keys()].sort((a, b) => a.localeCompare(b));
21
+ for (const repo of repos) {
22
+ groups.get(repo).sort((a, b) => a.instance_id.localeCompare(b.instance_id));
23
+ }
24
+ const out = [];
25
+ let added = true;
26
+ for (let i = 0; added; i++) {
27
+ added = false;
28
+ for (const repo of repos) {
29
+ const g = groups.get(repo);
30
+ if (i < g.length) {
31
+ out.push(g[i]);
32
+ added = true;
33
+ }
34
+ }
35
+ }
36
+ return out;
37
+ }
38
+ /**
39
+ * The expensive part: gradeable → base R-runs (early-stop on first pass) →
40
+ * prover (only if base is 0/R). Pure measurement; no selection/caps. This is what
41
+ * gets cached for resumability.
42
+ */
43
+ async function measureCandidate(task, deps, R) {
44
+ const none = { gradeable: false, basePasses: 0, baseRuns: 0, baseUnscorable: false, proverRan: false, proverPassed: null };
45
+ if (!(await deps.ensureGradeable(task)))
46
+ return none;
47
+ let basePasses = 0;
48
+ let baseUnscorable = false;
49
+ let baseUnscorableReason;
50
+ let r = 0;
51
+ for (; r < R; r++) {
52
+ const run = await deps.runBaseFrozen(task);
53
+ if (run.passed === null) {
54
+ baseUnscorable = true;
55
+ baseUnscorableReason = run.unscorableReason;
56
+ break;
57
+ }
58
+ if (run.passed) {
59
+ basePasses++;
60
+ break;
61
+ }
62
+ }
63
+ const baseRuns = r + (baseUnscorable || basePasses > 0 ? 1 : 0);
64
+ if (baseUnscorable) {
65
+ return { gradeable: true, basePasses: 0, baseRuns, baseUnscorable: true, ...(baseUnscorableReason ? { baseUnscorableReason } : {}), proverRan: false, proverPassed: null };
66
+ }
67
+ if (basePasses > 0) {
68
+ return { gradeable: true, basePasses, baseRuns, baseUnscorable: false, proverRan: false, proverPassed: null };
69
+ }
70
+ // Base reliably fails (0/R) → layer 3: prover (existence proof of headroom).
71
+ const prover = await deps.runProverFrozen(task);
72
+ return {
73
+ gradeable: true, basePasses: 0, baseRuns, baseUnscorable: false, proverRan: true, proverPassed: prover.passed,
74
+ ...(prover.passed === null && prover.unscorableReason ? { proverUnscorableReason: prover.unscorableReason } : {}),
75
+ };
76
+ }
77
+ /**
78
+ * Partition a candidate stream into the held-out exam vs the rest, applying the
79
+ * three filter layers cheapest-first. `candidates` MUST already be ordered (use
80
+ * {@link stratifyByRepo}); selection order is the iteration order and is frozen.
81
+ *
82
+ * Resumable: if `deps.getCachedMeasurement` is provided, an already-measured
83
+ * candidate replays from cache (no inference, no budget cost), so re-running the
84
+ * same command resumes — the `maxCandidates` budget bounds only NEW measurements
85
+ * per invocation, letting a long screen proceed in budget-sized chunks. The
86
+ * selection decision (caps, held-out) is always recomputed fresh, so the cached
87
+ * measurements stay valid even if `heldOutCount`/`perRepoCap` change.
88
+ */
89
+ export async function screenBaseFailures(candidates, deps, opts) {
90
+ const log = deps.log ?? (() => { });
91
+ const heldOut = [];
92
+ const screened = [];
93
+ const perRepo = new Map();
94
+ let liveMeasured = 0;
95
+ for (const task of candidates) {
96
+ if (heldOut.length >= opts.heldOutCount)
97
+ break;
98
+ const repo = repoOf(task);
99
+ const base = { instance_id: task.instance_id, repo, basePasses: 0, proverPassed: null };
100
+ // Measure (from cache, or live — bounded by the per-invocation budget).
101
+ let m = deps.getCachedMeasurement?.(task.instance_id);
102
+ if (!m) {
103
+ if (liveMeasured >= opts.maxCandidates)
104
+ break; // budget bounds NEW (inference-spending) measurements
105
+ m = await measureCandidate(task, deps, opts.R);
106
+ liveMeasured += 1;
107
+ deps.recordMeasurement?.(task.instance_id, m);
108
+ }
109
+ // Decide from the measurement (always fresh; cap/diversity not cached).
110
+ if (!m.gradeable) {
111
+ screened.push({ ...base, baseRuns: 0, gradeable: false, heldOut: false, reason: 'not-gradeable' });
112
+ continue;
113
+ }
114
+ if (m.baseUnscorable) {
115
+ if (m.baseUnscorableReason)
116
+ log(`[screen] ${task.instance_id} base-unscorable: ${m.baseUnscorableReason}`);
117
+ screened.push({ ...base, baseRuns: m.baseRuns, gradeable: true, heldOut: false, reason: 'base-unscorable', ...(m.baseUnscorableReason ? { unscorableReason: m.baseUnscorableReason } : {}) });
118
+ continue;
119
+ }
120
+ if (m.basePasses > 0) {
121
+ screened.push({ ...base, baseRuns: m.baseRuns, basePasses: m.basePasses, gradeable: true, heldOut: false, reason: 'base-passes' });
122
+ continue;
123
+ }
124
+ // Base 0/R → prover outcome (layer 3).
125
+ if (m.proverPassed !== true) {
126
+ if (m.proverPassed === null && m.proverUnscorableReason)
127
+ log(`[screen] ${task.instance_id} prover-unscorable: ${m.proverUnscorableReason}`);
128
+ screened.push({
129
+ ...base, baseRuns: m.baseRuns, gradeable: true, proverPassed: m.proverPassed, heldOut: false, reason: 'no-headroom',
130
+ ...(m.proverPassed === null && m.proverUnscorableReason ? { unscorableReason: m.proverUnscorableReason } : {}),
131
+ });
132
+ continue;
133
+ }
134
+ if ((perRepo.get(repo) ?? 0) >= opts.perRepoCap) {
135
+ screened.push({ ...base, baseRuns: m.baseRuns, gradeable: true, proverPassed: true, heldOut: false, reason: 'per-repo-cap' });
136
+ continue;
137
+ }
138
+ perRepo.set(repo, (perRepo.get(repo) ?? 0) + 1);
139
+ heldOut.push({ instance_id: task.instance_id, repo, baseRuns: m.baseRuns });
140
+ screened.push({ ...base, baseRuns: m.baseRuns, gradeable: true, proverPassed: true, heldOut: true, reason: 'held-out' });
141
+ log(`[screen] held out ${task.instance_id} (${heldOut.length}/${opts.heldOutCount})`);
142
+ }
143
+ return { heldOut, screened };
144
+ }
145
+ const V2_SLATE_COMMENT = 'BASELINE-FAILURE REGRESSION BENCHMARK (issue #986). Screened: gradeable at the current ' +
146
+ 'evalSemanticsVersion AND base claude-code/Haiku frozen fails 0/R (R≥3) AND a stronger Codex/GPT-5.5 ' +
147
+ 'prover passes ≥1 (proven headroom). Baseline 0% by construction. Held out from the generator train ' +
148
+ 'stream via the active-slate-version union. Content-addressed; scores comparable WITHIN this version only.';
149
+ export function buildV2SlateFile(instanceIds, generatedAt) {
150
+ const artifact = {
151
+ schemaVersion: HELD_OUT_SLATE_SCHEMA_VERSION,
152
+ solverType: 'swe-rebench-v2.v1',
153
+ version: 'v2',
154
+ generatedAt,
155
+ instanceIds: [...instanceIds].sort((a, b) => a.localeCompare(b)),
156
+ };
157
+ return { comment: V2_SLATE_COMMENT, ...artifact, hash: hashHeldOutSlateArtifact(artifact) };
158
+ }
159
+ //# sourceMappingURL=screen.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"screen.js","sourceRoot":"","sources":["../../src/eval/screen.ts"],"names":[],"mappings":"AACA,OAAO,EACL,6BAA6B,EAC7B,wBAAwB,GAEzB,MAAM,mDAAmD,CAAC;AAE3D;4EAC4E;AAC5E,MAAM,UAAU,MAAM,CAAC,IAAc;IACnC,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3C,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACxE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,IAAgB;IAC7C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAsB,CAAC;IAC7C,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnE,CAAC;IACD,MAAM,KAAK,GAAG,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IACpE,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;IAC/E,CAAC;IACD,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,IAAI,KAAK,GAAG,IAAI,CAAC;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,KAAK,GAAG,KAAK,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;YAC5B,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC;gBACjB,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;gBAChB,KAAK,GAAG,IAAI,CAAC;YACf,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAgFD;;;;GAIG;AACH,KAAK,UAAU,gBAAgB,CAAC,IAAc,EAAE,IAAgB,EAAE,CAAS;IACzE,MAAM,IAAI,GAAsB,EAAE,SAAS,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAC9I,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAErD,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,cAAc,GAAG,KAAK,CAAC;IAC3B,IAAI,oBAAwC,CAAC;IAC7C,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAClB,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;YAAC,cAAc,GAAG,IAAI,CAAC;YAAC,oBAAoB,GAAG,GAAG,CAAC,gBAAgB,CAAC;YAAC,MAAM;QAAC,CAAC;QACvG,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;YAAC,UAAU,EAAE,CAAC;YAAC,MAAM;QAAC,CAAC;IAC1C,CAAC;IACD,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,cAAc,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAChE,IAAI,cAAc,EAAE,CAAC;QACnB,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,GAAG,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,oBAAoB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAC7K,CAAC;IACD,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAChH,CAAC;IACD,6EAA6E;IAC7E,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;IAChD,OAAO;QACL,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM;QAC7G,GAAG,CAAC,MAAM,CAAC,MAAM,KAAK,IAAI,IAAI,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAC,EAAE,sBAAsB,EAAE,MAAM,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAClH,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,UAAsB,EACtB,IAAgB,EAChB,IAAgB;IAEhB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACnC,MAAM,OAAO,GAA4B,EAAE,CAAC;IAC5C,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC1C,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,YAAY;YAAE,MAAM;QAC/C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,YAAY,EAAE,IAAsB,EAAE,CAAC;QAE1G,wEAAwE;QACxE,IAAI,CAAC,GAAG,IAAI,CAAC,oBAAoB,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtD,IAAI,CAAC,CAAC,EAAE,CAAC;YACP,IAAI,YAAY,IAAI,IAAI,CAAC,aAAa;gBAAE,MAAM,CAAC,sDAAsD;YACrG,CAAC,GAAG,MAAM,gBAAgB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC;YAC/C,YAAY,IAAI,CAAC,CAAC;YAClB,IAAI,CAAC,iBAAiB,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAChD,CAAC;QAED,wEAAwE;QACxE,IAAI,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;YACjB,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,QAAQ,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC;YACnG,SAAS;QACX,CAAC;QACD,IAAI,CAAC,CAAC,cAAc,EAAE,CAAC;YACrB,IAAI,CAAC,CAAC,oBAAoB;gBAAE,GAAG,CAAC,YAAY,IAAI,CAAC,WAAW,qBAAqB,CAAC,CAAC,oBAAoB,EAAE,CAAC,CAAC;YAC3G,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,iBAAiB,EAAE,GAAG,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,gBAAgB,EAAE,CAAC,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YAC9L,SAAS;QACX,CAAC;QACD,IAAI,CAAC,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YACrB,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;YACnI,SAAS;QACX,CAAC;QACD,uCAAuC;QACvC,IAAI,CAAC,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,CAAC,YAAY,KAAK,IAAI,IAAI,CAAC,CAAC,sBAAsB;gBAAE,GAAG,CAAC,YAAY,IAAI,CAAC,WAAW,uBAAuB,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC;YAC5I,QAAQ,CAAC,IAAI,CAAC;gBACZ,GAAG,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC,YAAY,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa;gBACnH,GAAG,CAAC,CAAC,CAAC,YAAY,KAAK,IAAI,IAAI,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,EAAE,gBAAgB,EAAE,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC/G,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YAChD,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;YAC9H,SAAS;QACX,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC5E,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QACzH,GAAG,CAAC,qBAAqB,IAAI,CAAC,WAAW,KAAK,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IACxF,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;AAC/B,CAAC;AAUD,MAAM,gBAAgB,GACpB,yFAAyF;IACzF,sGAAsG;IACtG,qGAAqG;IACrG,2GAA2G,CAAC;AAE9G,MAAM,UAAU,gBAAgB,CAAC,WAAqB,EAAE,WAAmB;IACzE,MAAM,QAAQ,GAAyB;QACrC,aAAa,EAAE,6BAA6B;QAC5C,UAAU,EAAE,mBAAmB;QAC/B,OAAO,EAAE,IAAI;QACb,WAAW;QACX,WAAW,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;KACjE,CAAC;IACF,OAAO,EAAE,OAAO,EAAE,gBAAgB,EAAE,GAAG,QAAQ,EAAE,IAAI,EAAE,wBAAwB,CAAC,QAAQ,CAAC,EAAE,CAAC;AAC9F,CAAC"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Ordinary least-squares slope of resolved-rate vs cycle index for the
3
+ * train-arm slope measurement (issue #822, AC#1).
4
+ *
5
+ * The train-arm e2e evaluates a checkpoint against the held-out slate (#817)
6
+ * at intervals via the eval orchestrator (#818), collecting one
7
+ * `{ cycleIndex, rate }` point per interval (`rate` = passed / scorable, the
8
+ * Wilson point estimate). The slope of the least-squares fit is the headline
9
+ * "is the learner improving across the training sequence" number.
10
+ *
11
+ * It is deliberately a thin helper over the closed-form OLS slope
12
+ * (`cov(x,y) / var(x)`); the per-point confidence intervals come from
13
+ * `wilson.ts` — this module does NOT reimplement them. The slope sign alone is
14
+ * never a verdict at small N: a flat or slightly negative slope is "within
15
+ * noise", which the e2e surfaces via the §4.1 honesty caveat.
16
+ */
17
+ export interface RatePoint {
18
+ /** Training cycle index the eval ran at (0 = baseline, before any training). */
19
+ cycleIndex: number;
20
+ /** Observed resolved rate at that interval (passed / scorable, in [0, 1]). */
21
+ rate: number;
22
+ }
23
+ /**
24
+ * Least-squares slope of `rate` regressed on `cycleIndex`. Returns 0 for fewer
25
+ * than two points (no line to fit) and for a degenerate fit where every x is
26
+ * identical (zero variance — division would be NaN). A flat sequence yields
27
+ * exactly 0.
28
+ */
29
+ export declare function leastSquaresSlope(points: RatePoint[]): number;