@jinn-network/client 0.1.8 → 0.1.9-canary.144d87d2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. package/README.md +6 -0
  2. package/dist/adapters/mech/adapter.d.ts +21 -1
  3. package/dist/adapters/mech/adapter.js +77 -10
  4. package/dist/adapters/mech/adapter.js.map +1 -1
  5. package/dist/adapters/mech/contracts.js +62 -28
  6. package/dist/adapters/mech/contracts.js.map +1 -1
  7. package/dist/adapters/mech/safe-revert.d.ts +4 -0
  8. package/dist/adapters/mech/safe-revert.js +5 -1
  9. package/dist/adapters/mech/safe-revert.js.map +1 -1
  10. package/dist/adapters/mech/safe.js +5 -1
  11. package/dist/adapters/mech/safe.js.map +1 -1
  12. package/dist/adapters/mech/verdict-code.js +1 -1
  13. package/dist/adapters/mech/verdict-code.js.map +1 -1
  14. package/dist/api/bootstrap-endpoint.d.ts +1 -0
  15. package/dist/api/bootstrap-endpoint.js +1 -0
  16. package/dist/api/bootstrap-endpoint.js.map +1 -1
  17. package/dist/api/discovery-endpoint.d.ts +1 -0
  18. package/dist/api/discovery-endpoint.js +24 -0
  19. package/dist/api/discovery-endpoint.js.map +1 -1
  20. package/dist/api/fleet-build.d.ts +1 -7
  21. package/dist/api/fleet-build.js +0 -7
  22. package/dist/api/fleet-build.js.map +1 -1
  23. package/dist/api/gather-status.d.ts +8 -2
  24. package/dist/api/gather-status.js +29 -117
  25. package/dist/api/gather-status.js.map +1 -1
  26. package/dist/api/loop-completion-build.d.ts +79 -0
  27. package/dist/api/loop-completion-build.js +155 -0
  28. package/dist/api/loop-completion-build.js.map +1 -0
  29. package/dist/api/operator-artifacts-endpoint.js +1 -1
  30. package/dist/api/operator-artifacts-endpoint.js.map +1 -1
  31. package/dist/api/peers.js +2 -0
  32. package/dist/api/peers.js.map +1 -1
  33. package/dist/api/setup-endpoints.d.ts +32 -0
  34. package/dist/api/setup-endpoints.js +94 -24
  35. package/dist/api/setup-endpoints.js.map +1 -1
  36. package/dist/api/solvernets-endpoints.js +4 -1
  37. package/dist/api/solvernets-endpoints.js.map +1 -1
  38. package/dist/api/status-build.d.ts +43 -33
  39. package/dist/api/status-build.js +3 -26
  40. package/dist/api/status-build.js.map +1 -1
  41. package/dist/api/status-rollup-build.d.ts +0 -4
  42. package/dist/api/status-rollup-build.js +0 -4
  43. package/dist/api/status-rollup-build.js.map +1 -1
  44. package/dist/api/stop-hook.d.ts +1 -1
  45. package/dist/api/stop-hook.js +1 -1
  46. package/dist/api/stop-hook.js.map +1 -1
  47. package/dist/build-info.json +4 -4
  48. package/dist/build-meta.json +1 -1
  49. package/dist/cli/commands/codedigest-revert-check.js +6 -2
  50. package/dist/cli/commands/codedigest-revert-check.js.map +1 -1
  51. package/dist/cli/commands/doctor.d.ts +3 -0
  52. package/dist/cli/commands/doctor.js +37 -2
  53. package/dist/cli/commands/doctor.js.map +1 -1
  54. package/dist/cli/commands/eval.d.ts +87 -0
  55. package/dist/cli/commands/eval.js +481 -0
  56. package/dist/cli/commands/eval.js.map +1 -0
  57. package/dist/cli/commands/rewards.d.ts +2 -0
  58. package/dist/cli/commands/rewards.js +30 -3
  59. package/dist/cli/commands/rewards.js.map +1 -1
  60. package/dist/cli/commands/solver-nets.js +68 -0
  61. package/dist/cli/commands/solver-nets.js.map +1 -1
  62. package/dist/cli/commands/status.js +0 -1
  63. package/dist/cli/commands/status.js.map +1 -1
  64. package/dist/cli/index.js +2 -0
  65. package/dist/cli/index.js.map +1 -1
  66. package/dist/config.d.ts +102 -15
  67. package/dist/config.js +166 -19
  68. package/dist/config.js.map +1 -1
  69. package/dist/daemon/ai-units-gate.d.ts +6 -6
  70. package/dist/daemon/ai-units-gate.js +11 -10
  71. package/dist/daemon/ai-units-gate.js.map +1 -1
  72. package/dist/daemon/balance-topup-loop.js +3 -0
  73. package/dist/daemon/balance-topup-loop.js.map +1 -1
  74. package/dist/daemon/checkpoint-loop.js +2 -2
  75. package/dist/daemon/creator.d.ts +1 -0
  76. package/dist/daemon/creator.js +26 -14
  77. package/dist/daemon/creator.js.map +1 -1
  78. package/dist/daemon/daemon.d.ts +15 -0
  79. package/dist/daemon/daemon.js +78 -22
  80. package/dist/daemon/daemon.js.map +1 -1
  81. package/dist/daemon/eviction-loop.d.ts +7 -0
  82. package/dist/daemon/eviction-loop.js +19 -3
  83. package/dist/daemon/eviction-loop.js.map +1 -1
  84. package/dist/daemon/jinn-claim-loop.js +3 -0
  85. package/dist/daemon/jinn-claim-loop.js.map +1 -1
  86. package/dist/daemon/join-applier.d.ts +35 -0
  87. package/dist/daemon/join-applier.js +49 -0
  88. package/dist/daemon/join-applier.js.map +1 -0
  89. package/dist/daemon/loop-heartbeat.d.ts +34 -0
  90. package/dist/daemon/loop-heartbeat.js +39 -0
  91. package/dist/daemon/loop-heartbeat.js.map +1 -0
  92. package/dist/daemon/reward-claim-loop.js +4 -1
  93. package/dist/daemon/reward-claim-loop.js.map +1 -1
  94. package/dist/daemon/watchdog-loop.d.ts +84 -0
  95. package/dist/daemon/watchdog-loop.js +91 -0
  96. package/dist/daemon/watchdog-loop.js.map +1 -0
  97. package/dist/dashboard/assets/index-8tAiMbUV.css +1 -0
  98. package/dist/dashboard/assets/index-D6a-DfaM.js +171 -0
  99. package/dist/dashboard/index.html +2 -2
  100. package/dist/discovery/http.d.ts +17 -0
  101. package/dist/discovery/http.js +295 -25
  102. package/dist/discovery/http.js.map +1 -1
  103. package/dist/discovery/onchain.js +155 -1
  104. package/dist/discovery/onchain.js.map +1 -1
  105. package/dist/discovery/types.d.ts +106 -0
  106. package/dist/discovery/types.js +40 -0
  107. package/dist/discovery/types.js.map +1 -1
  108. package/dist/discovery/with-fallback.js +14 -0
  109. package/dist/discovery/with-fallback.js.map +1 -1
  110. package/dist/earning/bootstrap.d.ts +25 -0
  111. package/dist/earning/bootstrap.js +79 -28
  112. package/dist/earning/bootstrap.js.map +1 -1
  113. package/dist/earning/faucet.d.ts +1 -1
  114. package/dist/earning/faucet.js +2 -2
  115. package/dist/earning/faucet.js.map +1 -1
  116. package/dist/earning/safe-adapter.js +11 -0
  117. package/dist/earning/safe-adapter.js.map +1 -1
  118. package/dist/earning/stolas-claim.js +5 -5
  119. package/dist/earning/types.d.ts +1 -1
  120. package/dist/earning/types.js +1 -1
  121. package/dist/earning/types.js.map +1 -1
  122. package/dist/eval/eval-harness-run.d.ts +63 -0
  123. package/dist/eval/eval-harness-run.js +123 -0
  124. package/dist/eval/eval-harness-run.js.map +1 -0
  125. package/dist/eval/orchestrator.d.ts +224 -0
  126. package/dist/eval/orchestrator.js +250 -0
  127. package/dist/eval/orchestrator.js.map +1 -0
  128. package/dist/eval/paired.d.ts +68 -0
  129. package/dist/eval/paired.js +93 -0
  130. package/dist/eval/paired.js.map +1 -0
  131. package/dist/eval/resolve-slate-tasks.d.ts +35 -0
  132. package/dist/eval/resolve-slate-tasks.js +56 -0
  133. package/dist/eval/resolve-slate-tasks.js.map +1 -0
  134. package/dist/eval/screen-discovery.d.ts +22 -0
  135. package/dist/eval/screen-discovery.js +71 -0
  136. package/dist/eval/screen-discovery.js.map +1 -0
  137. package/dist/eval/screen-progress.d.ts +41 -0
  138. package/dist/eval/screen-progress.js +60 -0
  139. package/dist/eval/screen-progress.js.map +1 -0
  140. package/dist/eval/screen-runner.d.ts +30 -0
  141. package/dist/eval/screen-runner.js +289 -0
  142. package/dist/eval/screen-runner.js.map +1 -0
  143. package/dist/eval/screen.d.ts +107 -0
  144. package/dist/eval/screen.js +159 -0
  145. package/dist/eval/screen.js.map +1 -0
  146. package/dist/eval/slope.d.ts +29 -0
  147. package/dist/eval/slope.js +46 -0
  148. package/dist/eval/slope.js.map +1 -0
  149. package/dist/eval/train-sequence.d.ts +35 -0
  150. package/dist/eval/train-sequence.js +59 -0
  151. package/dist/eval/train-sequence.js.map +1 -0
  152. package/dist/eval/wilson.d.ts +45 -0
  153. package/dist/eval/wilson.js +48 -0
  154. package/dist/eval/wilson.js.map +1 -0
  155. package/dist/events/types.d.ts +1 -1
  156. package/dist/events/types.js +1 -1
  157. package/dist/events/types.js.map +1 -1
  158. package/dist/harnesses/engine/canonical-json.js +5 -3
  159. package/dist/harnesses/engine/canonical-json.js.map +1 -1
  160. package/dist/harnesses/engine/engine.d.ts +24 -0
  161. package/dist/harnesses/engine/engine.js +72 -9
  162. package/dist/harnesses/engine/engine.js.map +1 -1
  163. package/dist/harnesses/engine/packaging.js +1 -1
  164. package/dist/harnesses/engine/packaging.js.map +1 -1
  165. package/dist/harnesses/engine/persistence.d.ts +17 -0
  166. package/dist/harnesses/engine/persistence.js +28 -0
  167. package/dist/harnesses/engine/persistence.js.map +1 -1
  168. package/dist/harnesses/impls/claude-mcp-hyperliquid/mcp-tools.d.ts +1 -1
  169. package/dist/harnesses/impls/claude-mcp-hyperliquid/mcp-tools.js +1 -1
  170. package/dist/harnesses/impls/claude-mcp-hyperliquid/mcp-tools.js.map +1 -1
  171. package/dist/harnesses/impls/hermes-agent/adapter.d.ts +2 -0
  172. package/dist/harnesses/impls/hermes-agent/adapter.js +8 -5
  173. package/dist/harnesses/impls/hermes-agent/adapter.js.map +1 -1
  174. package/dist/harnesses/impls/hermes-agent/bootstrap.d.ts +1 -0
  175. package/dist/harnesses/impls/hermes-agent/bootstrap.js +6 -1
  176. package/dist/harnesses/impls/hermes-agent/bootstrap.js.map +1 -1
  177. package/dist/harnesses/impls/hermes-agent/harness.d.ts +17 -3
  178. package/dist/harnesses/impls/hermes-agent/harness.js +68 -5
  179. package/dist/harnesses/impls/hermes-agent/harness.js.map +1 -1
  180. package/dist/harnesses/impls/index.d.ts +2 -0
  181. package/dist/harnesses/impls/index.js +9 -0
  182. package/dist/harnesses/impls/index.js.map +1 -1
  183. package/dist/harnesses/impls/jinn-repo-evaluator/eval-runner.d.ts +34 -0
  184. package/dist/harnesses/impls/jinn-repo-evaluator/eval-runner.js +111 -0
  185. package/dist/harnesses/impls/jinn-repo-evaluator/eval-runner.js.map +1 -0
  186. package/dist/harnesses/impls/jinn-repo-evaluator/evaluator.d.ts +24 -0
  187. package/dist/harnesses/impls/jinn-repo-evaluator/evaluator.js +19 -0
  188. package/dist/harnesses/impls/jinn-repo-evaluator/evaluator.js.map +1 -0
  189. package/dist/harnesses/impls/jinn-repo-evaluator/harness.d.ts +64 -0
  190. package/dist/harnesses/impls/jinn-repo-evaluator/harness.js +125 -0
  191. package/dist/harnesses/impls/jinn-repo-evaluator/harness.js.map +1 -0
  192. package/dist/harnesses/impls/jinn-repo-evaluator/repro.d.ts +32 -0
  193. package/dist/harnesses/impls/jinn-repo-evaluator/repro.js +73 -0
  194. package/dist/harnesses/impls/jinn-repo-evaluator/repro.js.map +1 -0
  195. package/dist/harnesses/impls/learner/adapters/claude-code.js +5 -0
  196. package/dist/harnesses/impls/learner/adapters/claude-code.js.map +1 -1
  197. package/dist/harnesses/impls/learner/harness.d.ts +17 -1
  198. package/dist/harnesses/impls/learner/harness.js +51 -1
  199. package/dist/harnesses/impls/learner/harness.js.map +1 -1
  200. package/dist/harnesses/impls/learner/harvest.d.ts +2 -0
  201. package/dist/harnesses/impls/learner/harvest.js +51 -1
  202. package/dist/harnesses/impls/learner/harvest.js.map +1 -1
  203. package/dist/harnesses/impls/learner/plugin-path.js +1 -0
  204. package/dist/harnesses/impls/learner/plugin-path.js.map +1 -1
  205. package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js +3 -1
  206. package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js.map +1 -1
  207. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.d.ts +2 -2
  208. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js +3 -1
  209. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js.map +1 -1
  210. package/dist/harnesses/readiness-registry.d.ts +10 -0
  211. package/dist/harnesses/readiness-registry.js +13 -0
  212. package/dist/harnesses/readiness-registry.js.map +1 -1
  213. package/dist/harnesses/types.d.ts +14 -0
  214. package/dist/learner/revert-decision.d.ts +16 -1
  215. package/dist/learner/revert-decision.js +38 -18
  216. package/dist/learner/revert-decision.js.map +1 -1
  217. package/dist/learner/revert-stats.d.ts +14 -0
  218. package/dist/learner/revert-stats.js +42 -0
  219. package/dist/learner/revert-stats.js.map +1 -1
  220. package/dist/local-provider-url.d.ts +3 -0
  221. package/dist/local-provider-url.js +28 -0
  222. package/dist/local-provider-url.js.map +1 -0
  223. package/dist/main.js +94 -25
  224. package/dist/main.js.map +1 -1
  225. package/dist/mcp/operator-server.js +1 -1
  226. package/dist/mcp/operator-server.js.map +1 -1
  227. package/dist/mcp/server.js +1 -1
  228. package/dist/mcp/server.js.map +1 -1
  229. package/dist/plugins/learner/.claude-plugin/plugin.json +1 -1
  230. package/dist/plugins/learner/.codex-plugin/plugin.json +1 -1
  231. package/dist/plugins/learner/hooks/session-start +30 -1
  232. package/dist/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
  233. package/dist/preflight/deployment-readiness.d.ts +147 -0
  234. package/dist/preflight/deployment-readiness.js +366 -0
  235. package/dist/preflight/deployment-readiness.js.map +1 -0
  236. package/dist/preflight/pidfile-liveness.d.ts +7 -1
  237. package/dist/preflight/pidfile-liveness.js +14 -0
  238. package/dist/preflight/pidfile-liveness.js.map +1 -1
  239. package/dist/rpc/transport.d.ts +43 -5
  240. package/dist/rpc/transport.js +131 -30
  241. package/dist/rpc/transport.js.map +1 -1
  242. package/dist/scripts/swe-rebench-v2-seed-pool.json +2 -1
  243. package/dist/solver-nets/registry.d.ts +19 -0
  244. package/dist/solver-nets/registry.js +95 -66
  245. package/dist/solver-nets/registry.js.map +1 -1
  246. package/dist/solver-types/_jinn-repo-pool.d.ts +27 -0
  247. package/dist/solver-types/_jinn-repo-pool.js +27 -0
  248. package/dist/solver-types/_jinn-repo-pool.js.map +1 -0
  249. package/dist/solver-types/_swe-rebench-v2-held-out-slate.d.ts +76 -0
  250. package/dist/solver-types/_swe-rebench-v2-held-out-slate.js +156 -0
  251. package/dist/solver-types/_swe-rebench-v2-held-out-slate.js.map +1 -0
  252. package/dist/solver-types/_swe-rebench-v2-pool-recovery.d.ts +81 -0
  253. package/dist/solver-types/_swe-rebench-v2-pool-recovery.js +116 -0
  254. package/dist/solver-types/_swe-rebench-v2-pool-recovery.js.map +1 -0
  255. package/dist/solver-types/_swe-rebench-v2-state.d.ts +9 -0
  256. package/dist/solver-types/_swe-rebench-v2-state.js +14 -0
  257. package/dist/solver-types/_swe-rebench-v2-state.js.map +1 -1
  258. package/dist/solver-types/_swe-rebench-v2-validated-pool.d.ts +30 -0
  259. package/dist/solver-types/_swe-rebench-v2-validated-pool.js +40 -0
  260. package/dist/solver-types/_swe-rebench-v2-validated-pool.js.map +1 -1
  261. package/dist/solver-types/index.js +2 -0
  262. package/dist/solver-types/index.js.map +1 -1
  263. package/dist/solver-types/jinn-repo-admit.d.ts +17 -0
  264. package/dist/solver-types/jinn-repo-admit.js +16 -0
  265. package/dist/solver-types/jinn-repo-admit.js.map +1 -0
  266. package/dist/solver-types/jinn-repo-auto.d.ts +60 -0
  267. package/dist/solver-types/jinn-repo-auto.js +163 -0
  268. package/dist/solver-types/jinn-repo-auto.js.map +1 -0
  269. package/dist/solver-types/jinn-repo-definition.d.ts +15 -0
  270. package/dist/solver-types/jinn-repo-definition.js +34 -0
  271. package/dist/solver-types/jinn-repo-definition.js.map +1 -0
  272. package/dist/solver-types/jinn-repo-extract.d.ts +16 -0
  273. package/dist/solver-types/jinn-repo-extract.js +32 -0
  274. package/dist/solver-types/jinn-repo-extract.js.map +1 -0
  275. package/dist/solver-types/jinn-repo.d.ts +21 -0
  276. package/dist/solver-types/jinn-repo.js +23 -0
  277. package/dist/solver-types/jinn-repo.js.map +1 -0
  278. package/dist/solver-types/learner-loop-test.js +1 -1
  279. package/dist/solver-types/learner-loop-test.js.map +1 -1
  280. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json +20 -0
  281. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.json +19 -0
  282. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.screening-report.json +628 -0
  283. package/dist/solver-types/solver-type.d.ts +8 -0
  284. package/dist/solver-types/swe-rebench-v2.d.ts +2 -0
  285. package/dist/solver-types/swe-rebench-v2.js +115 -10
  286. package/dist/solver-types/swe-rebench-v2.js.map +1 -1
  287. package/dist/solvernets/launched-record-dispatcher.d.ts +5 -0
  288. package/dist/solvernets/launched-record-dispatcher.js +8 -1
  289. package/dist/solvernets/launched-record-dispatcher.js.map +1 -1
  290. package/dist/solvernets/registry-client-erc8004.js +29 -37
  291. package/dist/solvernets/registry-client-erc8004.js.map +1 -1
  292. package/dist/solvernets/registry-client.d.ts +6 -0
  293. package/dist/solvernets/store.d.ts +1 -1
  294. package/dist/solvernets/store.js +8 -3
  295. package/dist/solvernets/store.js.map +1 -1
  296. package/dist/spend/ai-units-config.d.ts +10 -0
  297. package/dist/spend/ai-units-config.js +7 -1
  298. package/dist/spend/ai-units-config.js.map +1 -1
  299. package/dist/spend/ai-units.d.ts +51 -0
  300. package/dist/spend/ai-units.js +73 -0
  301. package/dist/spend/ai-units.js.map +1 -1
  302. package/dist/spend/record.js +12 -5
  303. package/dist/spend/record.js.map +1 -1
  304. package/dist/store/store.d.ts +91 -5
  305. package/dist/store/store.js +170 -7
  306. package/dist/store/store.js.map +1 -1
  307. package/dist/trajectory/harness-bundle-schema.d.ts +1 -1
  308. package/dist/trajectory/harness-bundle-schema.js +1 -1
  309. package/dist/trajectory/harness-bundle-schema.js.map +1 -1
  310. package/dist/trajectory/schema.d.ts +1 -1
  311. package/dist/trajectory/schema.js +1 -1
  312. package/dist/trajectory/schema.js.map +1 -1
  313. package/dist/trajectory/transcript-parsers/types.d.ts +1 -1
  314. package/dist/trajectory/transcript-parsers/types.js +1 -1
  315. package/dist/trajectory/transcript-parsers/types.js.map +1 -1
  316. package/dist/types/envelope.d.ts +1 -1
  317. package/dist/types/envelope.js +1 -1
  318. package/dist/types/envelope.js.map +1 -1
  319. package/dist/types/payloads/index.d.ts +1 -1
  320. package/dist/types/payloads/index.js +7 -1
  321. package/dist/types/payloads/index.js.map +1 -1
  322. package/dist/types/payloads/portfolio-v0.d.ts +1 -1
  323. package/dist/types/payloads/portfolio-v0.js +1 -1
  324. package/dist/types/payloads/portfolio-v0.js.map +1 -1
  325. package/dist/types/payloads/prediction-apy-v0.d.ts +1 -1
  326. package/dist/types/payloads/prediction-apy-v0.js +1 -1
  327. package/dist/types/payloads/prediction-apy-v0.js.map +1 -1
  328. package/dist/types/payloads/prediction-v0.d.ts +1 -1
  329. package/dist/types/payloads/prediction-v0.js +1 -1
  330. package/dist/types/payloads/prediction-v0.js.map +1 -1
  331. package/dist/types/portfolio.d.ts +1 -1
  332. package/dist/types/portfolio.js +1 -1
  333. package/dist/types/portfolio.js.map +1 -1
  334. package/dist/types/prediction-apy.d.ts +1 -1
  335. package/dist/types/prediction-apy.js +1 -1
  336. package/dist/types/prediction-apy.js.map +1 -1
  337. package/dist/types/prediction.d.ts +1 -1
  338. package/dist/types/prediction.js +1 -1
  339. package/dist/types/prediction.js.map +1 -1
  340. package/dist/types/session-provenance.d.ts +1 -1
  341. package/dist/types/session-provenance.js +1 -1
  342. package/dist/types/session-provenance.js.map +1 -1
  343. package/dist/types/task-document.d.ts +1 -1
  344. package/dist/types/task-document.js +1 -1
  345. package/dist/types/task-document.js.map +1 -1
  346. package/dist/types/task.d.ts +1 -1
  347. package/dist/types/task.js +1 -1
  348. package/dist/types/task.js.map +1 -1
  349. package/dist/types/window.d.ts +1 -1
  350. package/dist/types/window.js +1 -1
  351. package/dist/types/window.js.map +1 -1
  352. package/dist/vendor/@jinn-network/sdk/dist/checkpoint.d.ts +1 -1
  353. package/dist/vendor/@jinn-network/sdk/dist/checkpoint.js +1 -1
  354. package/dist/vendor/@jinn-network/sdk/dist/contracts.d.ts +3 -2
  355. package/dist/vendor/@jinn-network/sdk/dist/contracts.js +49 -0
  356. package/dist/vendor/@jinn-network/sdk/dist/jinn-repo.d.ts +44 -0
  357. package/dist/vendor/@jinn-network/sdk/dist/jinn-repo.js +25 -0
  358. package/dist/vendor/@jinn-network/sdk/dist/json-schema.d.ts +1 -1
  359. package/dist/vendor/@jinn-network/sdk/dist/json-schema.js +1 -1
  360. package/dist/vendor/@jinn-network/sdk/dist/payloads/jinn-repo.d.ts +38 -0
  361. package/dist/vendor/@jinn-network/sdk/dist/payloads/jinn-repo.js +22 -0
  362. package/dist/vendor/@jinn-network/sdk/dist/payloads/prediction-v1.d.ts +1 -1
  363. package/dist/vendor/@jinn-network/sdk/dist/payloads/prediction-v1.js +1 -1
  364. package/dist/vendor/@jinn-network/sdk/dist/payloads/session-derived.d.ts +1 -1
  365. package/dist/vendor/@jinn-network/sdk/dist/payloads/session-derived.js +1 -1
  366. package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.d.ts +109 -2
  367. package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.js +26 -2
  368. package/dist/vendor/@jinn-network/sdk/dist/prediction-v1.d.ts +1 -1
  369. package/dist/vendor/@jinn-network/sdk/dist/prediction-v1.js +1 -1
  370. package/dist/vendor/@jinn-network/sdk/dist/solvernets/jinn-repo.d.ts +4 -0
  371. package/dist/vendor/@jinn-network/sdk/dist/solvernets/jinn-repo.js +2 -0
  372. package/dist/vendor/@jinn-network/sdk/dist/solvernets/manifest-schema.d.ts +1 -1
  373. package/dist/vendor/@jinn-network/sdk/dist/solvernets/manifest-schema.js +1 -1
  374. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.d.ts +65 -0
  375. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.js +123 -0
  376. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.d.ts +2 -2
  377. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.js +1 -1
  378. package/dist/vendor/@jinn-network/sdk/dist/swe-rebench-v2.d.ts +1 -1
  379. package/dist/vendor/@jinn-network/sdk/dist/swe-rebench-v2.js +1 -1
  380. package/dist/vendor/@jinn-network/sdk/package.json +9 -1
  381. package/docker-compose.yml +3 -2
  382. package/package.json +23 -20
  383. package/plugins/jinn-repo-runtime/.claude-plugin/plugin.json +5 -0
  384. package/plugins/jinn-repo-runtime/.codex-plugin/plugin.json +39 -0
  385. package/plugins/jinn-repo-runtime/README.md +27 -0
  386. package/plugins/jinn-repo-runtime/hooks/hooks.json +16 -0
  387. package/plugins/jinn-repo-runtime/hooks/session-start +73 -0
  388. package/plugins/jinn-repo-runtime/jinn.plugin.json +11 -0
  389. package/plugins/jinn-repo-runtime/skills/task/SKILL.md +92 -0
  390. package/plugins/learner/.claude-plugin/plugin.json +1 -1
  391. package/plugins/learner/.codex-plugin/plugin.json +1 -1
  392. package/plugins/learner/hooks/session-start +30 -1
  393. package/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
  394. package/plugins/swe-rebench-v2-runtime/hooks/hooks.json +16 -0
  395. package/plugins/swe-rebench-v2-runtime/hooks/session-start +74 -0
  396. package/dist/dashboard/assets/index-CzKxvMcU.css +0 -32
  397. package/dist/dashboard/assets/index-yVemxHot.js +0 -351
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Resumability store for held-out screening (#986).
3
+ *
4
+ * A real exam cut runs base R-runs + a prover over dozens of candidates — many
5
+ * hours of inference. This store persists each candidate's {@link ScreenMeasurement}
6
+ * so an interrupted run (rate limit, crash, disk) resumes instead of restarting:
7
+ * re-running the same command replays cached candidates for free and the
8
+ * `maxCandidates` budget bounds only the NEW work, so a long screen proceeds in
9
+ * budget-sized chunks.
10
+ *
11
+ * Keyed by a `signature` of the measurement-determining config (base model,
12
+ * prover, R, eval-semantics version). A signature mismatch on load → fresh start:
13
+ * cached measurements are only valid for the exact config that produced them
14
+ * (e.g. changing R changes the 0/R determination; a stronger base model changes
15
+ * pass/fail). Stored at `<stateDir>/held-out-screen-progress.json`.
16
+ */
17
+ import type { ScreenMeasurement } from './screen.js';
18
+ export declare class ScreenProgressStore {
19
+ private readonly file;
20
+ private readonly signature;
21
+ private data;
22
+ constructor(opts: {
23
+ stateDir: string;
24
+ signature: string;
25
+ });
26
+ private load;
27
+ /** Cached measurement for this instance under the current signature, or undefined. */
28
+ get(instance_id: string): ScreenMeasurement | undefined;
29
+ /** Persist a freshly-measured candidate (atomic-enough: whole-file rewrite). */
30
+ record(instance_id: string, m: ScreenMeasurement): void;
31
+ /** Number of candidates already measured under the current signature. */
32
+ get size(): number;
33
+ }
34
+ /** Build the cache signature from the measurement-determining config. */
35
+ export declare function screenSignature(args: {
36
+ baseModel: string;
37
+ proverHarness: string;
38
+ proverModel: string;
39
+ R: number;
40
+ evalSemanticsVersion: string;
41
+ }): string;
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Resumability store for held-out screening (#986).
3
+ *
4
+ * A real exam cut runs base R-runs + a prover over dozens of candidates — many
5
+ * hours of inference. This store persists each candidate's {@link ScreenMeasurement}
6
+ * so an interrupted run (rate limit, crash, disk) resumes instead of restarting:
7
+ * re-running the same command replays cached candidates for free and the
8
+ * `maxCandidates` budget bounds only the NEW work, so a long screen proceeds in
9
+ * budget-sized chunks.
10
+ *
11
+ * Keyed by a `signature` of the measurement-determining config (base model,
12
+ * prover, R, eval-semantics version). A signature mismatch on load → fresh start:
13
+ * cached measurements are only valid for the exact config that produced them
14
+ * (e.g. changing R changes the 0/R determination; a stronger base model changes
15
+ * pass/fail). Stored at `<stateDir>/held-out-screen-progress.json`.
16
+ */
17
+ import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
18
+ import { dirname, join } from 'node:path';
19
+ const SCHEMA_VERSION = 'held-out-screen-progress.v1';
20
+ export class ScreenProgressStore {
21
+ file;
22
+ signature;
23
+ data;
24
+ constructor(opts) {
25
+ this.file = join(opts.stateDir, 'held-out-screen-progress.json');
26
+ this.signature = opts.signature;
27
+ this.data = this.load();
28
+ }
29
+ load() {
30
+ try {
31
+ const raw = JSON.parse(readFileSync(this.file, 'utf8'));
32
+ if (raw?.schemaVersion === SCHEMA_VERSION && raw.signature === this.signature && raw.measurements) {
33
+ return raw;
34
+ }
35
+ }
36
+ catch {
37
+ /* absent or corrupt → fresh */
38
+ }
39
+ return { schemaVersion: SCHEMA_VERSION, signature: this.signature, measurements: {} };
40
+ }
41
+ /** Cached measurement for this instance under the current signature, or undefined. */
42
+ get(instance_id) {
43
+ return this.data.measurements[instance_id];
44
+ }
45
+ /** Persist a freshly-measured candidate (atomic-enough: whole-file rewrite). */
46
+ record(instance_id, m) {
47
+ this.data.measurements[instance_id] = m;
48
+ mkdirSync(dirname(this.file), { recursive: true });
49
+ writeFileSync(this.file, `${JSON.stringify(this.data, null, 2)}\n`);
50
+ }
51
+ /** Number of candidates already measured under the current signature. */
52
+ get size() {
53
+ return Object.keys(this.data.measurements).length;
54
+ }
55
+ }
56
+ /** Build the cache signature from the measurement-determining config. */
57
+ export function screenSignature(args) {
58
+ return `base=${args.baseModel}|prover=${args.proverHarness}:${args.proverModel}|R=${args.R}|sem=${args.evalSemanticsVersion}`;
59
+ }
60
+ //# sourceMappingURL=screen-progress.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"screen-progress.js","sourceRoot":"","sources":["../../src/eval/screen-progress.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAG1C,MAAM,cAAc,GAAG,6BAAsC,CAAC;AAS9D,MAAM,OAAO,mBAAmB;IACb,IAAI,CAAS;IACb,SAAS,CAAS;IAC3B,IAAI,CAAe;IAE3B,YAAY,IAA6C;QACvD,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,+BAA+B,CAAC,CAAC;QACjE,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAEO,IAAI;QACV,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAiB,CAAC;YACxE,IAAI,GAAG,EAAE,aAAa,KAAK,cAAc,IAAI,GAAG,CAAC,SAAS,KAAK,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC,YAAY,EAAE,CAAC;gBAClG,OAAO,GAAG,CAAC;YACb,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,+BAA+B;QACjC,CAAC;QACD,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;IACxF,CAAC;IAED,sFAAsF;IACtF,GAAG,CAAC,WAAmB;QACrB,OAAO,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;IAC7C,CAAC;IAED,gFAAgF;IAChF,MAAM,CAAC,WAAmB,EAAE,CAAoB;QAC9C,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QACxC,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACnD,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IACtE,CAAC;IAED,yEAAyE;IACzE,IAAI,IAAI;QACN,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,MAAM,CAAC;IACpD,CAAC;CACF;AAED,yEAAyE;AACzE,MAAM,UAAU,eAAe,CAAC,IAM/B;IACC,OAAO,QAAQ,IAAI,CAAC,SAAS,WAAW,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,WAAW,MAAM,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,oBAAoB,EAAE,CAAC;AAChI,CAAC"}
@@ -0,0 +1,30 @@
1
+ import { type ScreenResult } from './screen.js';
2
+ export interface ScreenRunOptions {
3
+ R: number;
4
+ heldOutCount: number;
5
+ maxCandidates: number;
6
+ perRepoCap: number;
7
+ /** Prover agent harness: `codex` (default) or `claude-code` (e.g. an Opus prover
8
+ * via the working Claude auth — useful when codex is rate-limited). */
9
+ proverHarness?: 'codex' | 'claude-code';
10
+ /** Prover model: `codexModel` for the codex harness, `claudeModel` for the
11
+ * claude-code harness (defaults to `opus` there). */
12
+ proverModel?: string;
13
+ /** Restrict candidates to these instance ids (else whole gradeable pool). */
14
+ instanceIds?: string[];
15
+ /** Restrict candidates to one repo (org prefix), e.g. `tobymao`. */
16
+ repo?: string;
17
+ configPath?: string;
18
+ log?: (msg: string) => void;
19
+ }
20
+ export interface ScreenRunSummary {
21
+ result: ScreenResult;
22
+ baseCodeDigest: string;
23
+ slatePath: string;
24
+ reportPath: string;
25
+ heldOutCount: number;
26
+ /** Base-failing candidates whose prover run returned no gradeable result
27
+ * (excluded as no-headroom) — a signal the prover may be unavailable. */
28
+ proverUnscorable: number;
29
+ }
30
+ export declare function runScreenHeldOut(opts: ScreenRunOptions): Promise<ScreenRunSummary>;
@@ -0,0 +1,289 @@
1
+ import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs';
2
+ import { tmpdir } from 'node:os';
3
+ import { join, dirname } from 'node:path';
4
+ import { fileURLToPath } from 'node:url';
5
+ import { loadConfig } from '../config.js';
6
+ import { Store } from '../store/store.js';
7
+ import { hashImplStateDir } from '../harnesses/freeze.js';
8
+ import { LearnerHarness } from '../harnesses/impls/learner/harness.js';
9
+ import { ClaudeCodeHarnessAdapter } from '../harnesses/impls/learner/adapters/claude-code.js';
10
+ import { CodexCodeHarnessAdapter } from '../harnesses/impls/learner/adapters/codex-code.js';
11
+ import { CODEX_HARNESS } from '../harnesses/names.js';
12
+ import { runHarnessForEval, resolveRuntimePluginsForSolverType } from './eval-harness-run.js';
13
+ import { corpusEnvFromConfig } from '../cli/commands/eval.js';
14
+ import { loadSweRebenchV2Pool, defaultStateDir, getSweRebenchV2ValidatedPoolStore, } from '../solver-types/swe-rebench-v2.js';
15
+ import { PoolCacheStore, loadPoolWithCacheFallback } from '../solver-types/_swe-rebench-v2-pool-cache.js';
16
+ import { validatePoolInstances, EVAL_SEMANTICS_VERSION, } from '../solver-types/_swe-rebench-v2-validated-pool.js';
17
+ import { resolveSlateTasks } from './resolve-slate-tasks.js';
18
+ import { loadActiveHeldOutSlateIds, ACTIVE_HELD_OUT_SLATE_VERSIONS, loadHeldOutSlate, } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
19
+ import { GeneratorStateStore } from '../solver-types/_swe-rebench-v2-state.js';
20
+ import { fetchAttemptedInstanceIds } from './screen-discovery.js';
21
+ import { DEFAULT_TESTNET_DISCOVERY_URL } from '../config.js';
22
+ import { solverTypeFromJoinedContract } from '../solver-nets/registry.js';
23
+ import { SweRebenchV2Evaluator } from '../harnesses/impls/swe-rebench-v2-evaluator/index.js';
24
+ import { HttpHfFetcher } from '../harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js';
25
+ import { PythonEvalRunner } from '../harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js';
26
+ import { readEnabledState, defaultSweRebenchV2EvaluatorImplStateDir } from '../harnesses/impls/swe-rebench-v2-evaluator/harness.js';
27
+ import { stratifyByRepo, screenBaseFailures, buildV2SlateFile, } from './screen.js';
28
+ import { ScreenProgressStore, screenSignature } from './screen-progress.js';
29
+ const DISPATCH_SOLVER_TYPE = 'swe-rebench-v2.v1';
30
+ const SLATE_VERSION = 'v2';
31
+ const RUN_BUDGET_MS = 3_600_000;
32
+ /** dist/src parity: the shipped slate JSON lives next to the compiled module. */
33
+ function slatesDir() {
34
+ return join(dirname(fileURLToPath(import.meta.url)), '..', 'solver-types', 'slates');
35
+ }
36
+ export async function runScreenHeldOut(opts) {
37
+ const log = opts.log ?? (() => { });
38
+ const config = loadConfig(opts.configPath);
39
+ // Precondition: evaluator enabled (upstream repo cloned).
40
+ const enabled = readEnabledState(defaultSweRebenchV2EvaluatorImplStateDir());
41
+ if (!enabled) {
42
+ throw new Error('swe-rebench-v2 evaluator not enabled — run `jinn harnesses enable swe-rebench-v2-evaluator` first');
43
+ }
44
+ const upstreamRepoDir = enabled.upstreamRepoDir;
45
+ const stateDir = process.env['JINN_SWE_REBENCH_V2_STATE_DIR'] ?? defaultStateDir();
46
+ const fetcher = new HttpHfFetcher();
47
+ const evaluator = new SweRebenchV2Evaluator({ fetcher, runner: new PythonEvalRunner({ upstreamRepoDir }) });
48
+ const validatedStore = getSweRebenchV2ValidatedPoolStore();
49
+ const runtimePlugins = await resolveRuntimePluginsForSolverType(DISPATCH_SOLVER_TYPE, config.joinedSolverNets);
50
+ // Common adapter wiring (mirrors buildEvalHarness in cli/commands/eval.ts).
51
+ const daemonApiToken = process.env['DAEMON_API_TOKEN']?.trim();
52
+ const corpusEnv = corpusEnvFromConfig(config);
53
+ const common = {
54
+ claudePath: config.claudePath ?? 'claude',
55
+ storePath: config.dbPath,
56
+ daemonApiUrl: `http://127.0.0.1:${config.apiPort}`,
57
+ ...(daemonApiToken ? { daemonApiToken } : {}),
58
+ ...(corpusEnv ? { corpusEnv } : {}),
59
+ };
60
+ const baseHarness = new LearnerHarness({
61
+ adapter: new ClaudeCodeHarnessAdapter({ ...common, claudeModel: config.claudeModel }),
62
+ claudePath: common.claudePath,
63
+ });
64
+ // Prover harness: codex (default) or claude-code (e.g. an Opus prover via the
65
+ // working Claude auth, sidestepping a codex rate limit; same-family Haiku→Opus
66
+ // is a clean capability ladder for "proven headroom").
67
+ const proverKind = opts.proverHarness ?? 'codex';
68
+ const proverHarness = proverKind === 'claude-code'
69
+ ? new LearnerHarness({
70
+ adapter: new ClaudeCodeHarnessAdapter({ ...common, claudeModel: opts.proverModel ?? 'opus' }),
71
+ claudePath: common.claudePath,
72
+ })
73
+ : new LearnerHarness({
74
+ name: CODEX_HARNESS,
75
+ adapter: new CodexCodeHarnessAdapter({ ...common, ...(opts.proverModel ? { codexModel: opts.proverModel } : {}) }),
76
+ claudePath: common.claudePath,
77
+ ...(config.codexPath !== undefined ? { codexPath: config.codexPath } : {}),
78
+ });
79
+ // Candidate pool (whole gradeable pool by default; scopeable).
80
+ const cacheResult = await loadPoolWithCacheFallback({
81
+ loadPool: loadSweRebenchV2Pool, cache: new PoolCacheStore({ stateDir }), currentPool: [],
82
+ });
83
+ let pool = cacheResult.pool;
84
+ if (pool.length === 0)
85
+ throw new Error(`SWE-rebench v2 pool empty${cacheResult.error ? ` (${cacheResult.error.message})` : ''}`);
86
+ // Held-out discipline (#986): draw the exam from the never-trained, never-held-out
87
+ // remainder. Union three exclusion sources:
88
+ // - active held-out slate (would overlap an existing exam);
89
+ // - already-ATTEMPTED on-network (indexer verdictEnvelopeMeta, any verdict,
90
+ // cross-operator, CURRENT) — the authoritative "the learner trained on it"
91
+ // signal; and
92
+ // - this box's posted ledger (belt — may be STALE when another generator is
93
+ // the active poster, e.g. a hosted operator; hence the indexer is the truth).
94
+ // A trained instance held out later would make a trained-checkpoint pass count
95
+ // as memorization, not generalization.
96
+ const heldOutIds = loadActiveHeldOutSlateIds(DISPATCH_SOLVER_TYPE, ACTIVE_HELD_OUT_SLATE_VERSIONS);
97
+ const postedIds = await new GeneratorStateStore({ stateDir }).postedInstanceIds();
98
+ let attemptedIds = new Set();
99
+ const discoveryUrl = config.discovery?.url?.trim()
100
+ || (config.network === 'testnet' ? DEFAULT_TESTNET_DISCOVERY_URL : undefined);
101
+ const joinedNet = Object.values(config.joinedSolverNets ?? {}).find((n) => solverTypeFromJoinedContract(n) === DISPATCH_SOLVER_TYPE);
102
+ if (discoveryUrl && joinedNet?.manifestCid) {
103
+ attemptedIds = await fetchAttemptedInstanceIds(discoveryUrl, joinedNet.manifestCid);
104
+ log(`[screen] indexer: ${attemptedIds.size} instance(s) already attempted on-network (authoritative, cross-operator) → excluded`);
105
+ }
106
+ else {
107
+ log('[screen] WARNING: no discovery URL / manifestCid — cannot exclude already-attempted instances; relying on the local posted ledger, which may be stale');
108
+ }
109
+ const excludeIds = new Set([...heldOutIds, ...postedIds, ...attemptedIds]);
110
+ if (opts.instanceIds?.length) {
111
+ // Explicit operator override — screen exactly these, but warn if any are
112
+ // already contaminated (attempted/posted/held-out) so an intentional pick is informed.
113
+ const want = new Set(opts.instanceIds);
114
+ pool = pool.filter((t) => want.has(t.instance_id));
115
+ const tainted = pool.filter((t) => excludeIds.has(t.instance_id)).map((t) => t.instance_id);
116
+ if (tainted.length > 0) {
117
+ log(`[screen] WARNING: ${tainted.length} explicitly-named instance(s) are already attempted/posted/held-out (NOT clean held-out candidates): ${tainted.join(', ')}`);
118
+ }
119
+ }
120
+ else {
121
+ if (opts.repo)
122
+ pool = pool.filter((t) => t.instance_id.startsWith(`${opts.repo}__`));
123
+ const before = pool.length;
124
+ pool = pool.filter((t) => !excludeIds.has(t.instance_id));
125
+ // Restrict to ALREADY-VALIDATED-SCORABLE candidates (#986): the never-validated
126
+ // tail is mostly not-gradeable (~90% deeper in the pool), so base-screening it
127
+ // wastes inference. Discovery of gradeability is `validate-pool`'s job; the
128
+ // screen selects held-out FROM the scorable set. Falls back to the full
129
+ // remainder only when no validation data exists yet (and warns).
130
+ const scorableIds = await validatedStore.getScorableIds(EVAL_SEMANTICS_VERSION);
131
+ if (scorableIds) {
132
+ const beforeScorable = pool.length;
133
+ pool = pool.filter((t) => scorableIds.has(t.instance_id));
134
+ log(`[screen] candidate pool ${before} → ${beforeScorable} (excluded ${excludeIds.size}: ${heldOutIds.size} held-out ∪ ${postedIds.size} posted ∪ ${attemptedIds.size} attempted) → ${pool.length} validated-scorable (run validate-pool to grow this)`);
135
+ }
136
+ else {
137
+ log(`[screen] WARNING: no validated-scorable data — screening the full ${pool.length}-task remainder (mostly not-gradeable; run validate-pool first for efficiency)`);
138
+ }
139
+ }
140
+ const candidates = stratifyByRepo(pool);
141
+ log(`[screen] ${candidates.length} candidate(s) after stratification`);
142
+ // Resolve a single instance to the {task,row} the harness + grader need.
143
+ const byId = new Map(pool.map((t) => [t.instance_id, t]));
144
+ async function runOnce(harness, poolTask) {
145
+ const implStateDir = mkdtempSync(join(tmpdir(), 'jinn-screen-state-'));
146
+ // Track which stage we're in so an unscorable result names its cause (#476
147
+ // excludes infra failures from the denominator — but it must be diagnosable,
148
+ // not an opaque black box).
149
+ let stage = 'resolve';
150
+ try {
151
+ const [resolved] = await resolveSlateTasks({
152
+ poolTasks: [poolTask], hf_dataset: poolTask.hf_dataset, hf_split: poolTask.hf_split, fetcher,
153
+ });
154
+ if (!resolved)
155
+ return { passed: null, unscorableReason: 'resolve: instance not in pool' };
156
+ const task = {
157
+ id: poolTask.instance_id,
158
+ description: resolved.task.problem_statement,
159
+ role: 'restoration',
160
+ solverType: DISPATCH_SOLVER_TYPE,
161
+ spec: resolved.task,
162
+ window: { startTs: 0, endTs: Date.now() + RUN_BUDGET_MS },
163
+ };
164
+ stage = 'harness';
165
+ const run = await runHarnessForEval({
166
+ harness, task, solverType: DISPATCH_SOLVER_TYPE, runtimePlugins, implStateDir, mode: 'frozen',
167
+ });
168
+ if (run.violation)
169
+ return { passed: null, unscorableReason: 'harness: freeze-fence violation' };
170
+ if (!run.solution)
171
+ return { passed: null, unscorableReason: 'harness: no solution produced' };
172
+ stage = 'grade';
173
+ const verdict = await evaluator.grade({
174
+ task: resolved.task,
175
+ solutionPayload: { schemaVersion: 'swe-rebench-v2-solution.v1', patch: run.solution.patch },
176
+ row: resolved.row,
177
+ });
178
+ return { passed: verdict.passed_match };
179
+ }
180
+ catch (err) {
181
+ // Any harness/grader/infra failure ⇒ unscorable, never a fail (#476). Name
182
+ // the stage + error so the exclusion is diagnosable. An "agent produced no
183
+ // patch" throw is flagged distinctly from a true infra/grader error.
184
+ const msg = err instanceof Error ? err.message : String(err);
185
+ const reason = /produced no\b|no .*patch/i.test(msg)
186
+ ? `${stage}: agent produced no patch`
187
+ : `${stage}-error: ${msg.slice(0, 200)}`;
188
+ return { passed: null, unscorableReason: reason };
189
+ }
190
+ finally {
191
+ rmSync(implStateDir, { recursive: true, force: true });
192
+ }
193
+ }
194
+ const emptyBaseDir = mkdtempSync(join(tmpdir(), 'jinn-screen-base-'));
195
+ const hashOpts = baseHarness.freezeStateHashIgnore?.length
196
+ ? { ignoreRelPaths: [...baseHarness.freezeStateHashIgnore] } : undefined;
197
+ const baseCodeDigest = `sha256:${await hashImplStateDir(emptyBaseDir, hashOpts)}`;
198
+ rmSync(emptyBaseDir, { recursive: true, force: true });
199
+ // Resumability: cache each candidate's measurement under a config signature so
200
+ // an interrupted run resumes (re-run the same command). The base policy is fixed
201
+ // (empty impl-state) so its measurement is stable; the signature invalidates the
202
+ // cache if the base model / prover / R / semantics change.
203
+ const proverModelLabel = opts.proverModel ?? (proverKind === 'claude-code' ? 'opus' : 'codex-default');
204
+ const progress = new ScreenProgressStore({
205
+ stateDir,
206
+ signature: screenSignature({
207
+ baseModel: config.claudeModel, proverHarness: proverKind, proverModel: proverModelLabel,
208
+ R: opts.R, evalSemanticsVersion: EVAL_SEMANTICS_VERSION,
209
+ }),
210
+ });
211
+ if (progress.size > 0)
212
+ log(`[screen] resuming: ${progress.size} candidate(s) already measured (cached) for this config`);
213
+ const deps = {
214
+ log,
215
+ getCachedMeasurement: (id) => progress.get(id),
216
+ recordMeasurement: (id, m) => progress.record(id, m),
217
+ ensureGradeable: async (task) => {
218
+ await validatePoolInstances([task], {
219
+ fetcher, runner: new PythonEvalRunner({ upstreamRepoDir }), store: validatedStore,
220
+ semanticsVersion: EVAL_SEMANTICS_VERSION, upstreamRepoDir,
221
+ }, {});
222
+ return (await validatedStore.getEntry(task.instance_id, EVAL_SEMANTICS_VERSION))?.scorable === true;
223
+ },
224
+ runBaseFrozen: (task) => runOnce(baseHarness, byId.get(task.instance_id)),
225
+ runProverFrozen: (task) => runOnce(proverHarness, byId.get(task.instance_id)),
226
+ };
227
+ const result = await screenBaseFailures(candidates, deps, {
228
+ R: opts.R, heldOutCount: opts.heldOutCount, maxCandidates: opts.maxCandidates, perRepoCap: opts.perRepoCap,
229
+ });
230
+ // Diagnosability: a base-failing candidate routed to the prover that comes
231
+ // back `proverPassed: null` means the prover produced NO gradeable result
232
+ // (errored / no patch), not a clean "the prover can't solve it". That silently
233
+ // routes to `no-headroom` and can yield a misleadingly empty slate when the
234
+ // prover is simply unavailable (e.g. codex CLI < 0.133.0, or auth missing).
235
+ // Surface it loudly rather than swallow it.
236
+ const proverUnscorable = result.screened.filter((s) => s.reason === 'no-headroom' && s.proverPassed === null).length;
237
+ if (proverUnscorable > 0) {
238
+ log(`[screen] WARNING: the prover returned no gradeable result on ${proverUnscorable} base-failing ` +
239
+ `candidate(s) — excluded as no-headroom, but this likely means the prover is UNAVAILABLE rather than ` +
240
+ `unable. Verify the codex CLI (>=0.133.0) + auth, then re-run. See proverPassed=null rows in the report.`);
241
+ }
242
+ // Cumulative v2: union the EXISTING slate with this run's NEW held-out, so
243
+ // re-running to GROW the exam never drops already-reserved instances. (The
244
+ // active-slate exclusion keeps existing held-out OUT of candidates, so
245
+ // result.heldOut is only the new admits.) Growing changes the slate's content
246
+ // hash; re-recording the base arm for the FULL set below upserts the prior rows
247
+ // to the new hash, so the orchestrator's slate-hash-drift guard stays satisfied.
248
+ let existingHeldOut = [];
249
+ try {
250
+ existingHeldOut = [...loadHeldOutSlate(DISPATCH_SOLVER_TYPE, SLATE_VERSION).instanceIds];
251
+ }
252
+ catch {
253
+ /* no v2 slate yet — this is the first cut */
254
+ }
255
+ const newHeldOut = result.heldOut.map((h) => h.instance_id);
256
+ const allHeldOutIds = [...new Set([...existingHeldOut, ...newHeldOut])];
257
+ const generatedAt = new Date().toISOString();
258
+ const slateFile = buildV2SlateFile(allHeldOutIds, generatedAt);
259
+ mkdirSync(slatesDir(), { recursive: true });
260
+ const slatePath = join(slatesDir(), 'held-out-slate.swe-rebench-v2.v2.json');
261
+ writeFileSync(slatePath, `${JSON.stringify(slateFile, null, 2)}\n`);
262
+ const reportPath = join(slatesDir(), 'held-out-slate.swe-rebench-v2.v2.screening-report.json');
263
+ writeFileSync(reportPath, `${JSON.stringify({
264
+ generatedAt, evalSemanticsVersion: EVAL_SEMANTICS_VERSION, baseCodeDigest,
265
+ R: opts.R, proverHarness: proverKind,
266
+ proverModel: opts.proverModel ?? (proverKind === 'claude-code' ? 'opus' : 'codex-default'),
267
+ heldOutTotal: allHeldOutIds.length, newThisRun: newHeldOut, carriedOver: existingHeldOut,
268
+ screened: result.screened,
269
+ }, null, 2)}\n`);
270
+ // Persist the base arm (all-fail) for the FULL held-out set under the (possibly
271
+ // new) slate hash. The upsert is keyed by (checkpoint, slate_version, instance_id),
272
+ // so prior rows get their slate_hash refreshed to match the grown slate — no drift.
273
+ const store = new Store(config.dbPath);
274
+ try {
275
+ const runAtMs = Date.now();
276
+ for (const id of allHeldOutIds) {
277
+ store.recordEvalResult({
278
+ checkpoint_cid: baseCodeDigest, slate_hash: slateFile.hash, slate_version: SLATE_VERSION,
279
+ instance_id: id, passed: false, unscorable: false, code_digest: baseCodeDigest,
280
+ run_at_ms: runAtMs, test_log_excerpt: 'base arm (screening): consistent fail 0/R',
281
+ });
282
+ }
283
+ }
284
+ finally {
285
+ store.close?.();
286
+ }
287
+ return { result, baseCodeDigest, slatePath, reportPath, heldOutCount: allHeldOutIds.length, proverUnscorable };
288
+ }
289
+ //# sourceMappingURL=screen-runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"screen-runner.js","sourceRoot":"","sources":["../../src/eval/screen-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACxE,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAIzC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,uCAAuC,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,MAAM,oDAAoD,CAAC;AAC9F,OAAO,EAAE,uBAAuB,EAAE,MAAM,mDAAmD,CAAC;AAC5F,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,kCAAkC,EAAE,MAAM,uBAAuB,CAAC;AAC9F,OAAO,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAC9D,OAAO,EACL,oBAAoB,EAAE,eAAe,EAAE,iCAAiC,GACzE,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,cAAc,EAAE,yBAAyB,EAAE,MAAM,+CAA+C,CAAC;AAC1G,OAAO,EACL,qBAAqB,EAAE,sBAAsB,GAC9C,MAAM,mDAAmD,CAAC;AAC3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EACL,yBAAyB,EAAE,8BAA8B,EAAE,gBAAgB,GAC5E,MAAM,mDAAmD,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,0CAA0C,CAAC;AAC/E,OAAO,EAAE,yBAAyB,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,EAAE,6BAA6B,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EAAE,4BAA4B,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,EAAE,qBAAqB,EAAE,MAAM,sDAAsD,CAAC;AAC7F,OAAO,EAAE,aAAa,EAAE,MAAM,2DAA2D,CAAC;AAC1F,OAAO,EAAE,gBAAgB,EAAE,MAAM,4DAA4D,CAAC;AAC9F,OAAO,EAAE,gBAAgB,EAAE,wCAAwC,EAAE,MAAM,wDAAwD,CAAC;AACpI,OAAO,EACL,cAAc,EAAE,kBAAkB,EAAE,gBAAgB,GAErD,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAE5E,MAAM,oBAAoB,GAAG,mBAAmB,CAAC;AACjD,MAAM,aAAa,GAAG,IAAI,CAAC;AAC3B,MAAM,aAAa,GAAG,SAAS,CAAC;AAgChC,iFAAiF;AACjF,SAAS,SAAS;IAChB,OAAO,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;AACvF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAsB;IAC3D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAE3C,0DAA0D;IAC1D,MAAM,OAAO,GAAG,gBAAgB,CAAC,wCAAwC,EAAE,CAAC,CAAC;IAC7E,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IACD,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAEhD,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,IAAI,eAAe,EAAE,CAAC;IACnF,MAAM,OAAO,GAAG,IAAI,aAAa,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,gBAAgB,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC,CAAC;IAC5G,MAAM,cAAc,GAAG,iCAAiC,EAAE,CAAC;IAC3D,MAAM,cAAc,GAAoB,MAAM,kCAAkC,CAC9E,oBAAoB,EAAE,MAAM,CAAC,gBAAgB,CAC9C,CAAC;IAEF,4EAA4E;IAC5E,MAAM,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,EAAE,IAAI,EAAE,CAAC;IAC/D,MAAM,SAAS,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG;QACb,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,QAAQ;QACzC,SAAS,EAAE,MAAM,CAAC,MAAM;QACxB,YAAY,EAAE,oBAAoB,MAAM,CAAC,OAAO,EAAE;QAClD,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7C,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACpC,CAAC;IACF,MAAM,WAAW,GAAY,IAAI,cAAc,CAAC;QAC9C,OAAO,EAAE,IAAI,wBAAwB,CAAC,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,CAAC;QACrF,UAAU,EAAE,MAAM,CAAC,UAAU;KAC9B,CAAC,CAAC;IACH,8EAA8E;IAC9E,+EAA+E;IAC/E,uDAAuD;IACvD,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC;IACjD,MAAM,aAAa,GAAY,UAAU,KAAK,aAAa;QACzD,CAAC,CAAC,IAAI,cAAc,CAAC;YACjB,OAAO,EAAE,IAAI,wBAAwB,CAAC,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,MAAM,EAAE,CAAC;YAC7F,UAAU,EAAE,MAAM,CAAC,UAAU;SAC9B,CAAC;QACJ,CAAC,CAAC,IAAI,cAAc,CAAC;YACjB,IAAI,EAAE,aAAa;YACnB,OAAO,EAAE,IAAI,uBAAuB,CAAC,EAAE,GAAG,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC;YAClH,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,GAAG,CAAC,MAAM,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC3E,CAAC,CAAC;IAEP,+DAA+D;IAC/D,MAAM,WAAW,GAAG,MAAM,yBAAyB,CAAC;QAClD,QAAQ,EAAE,oBAAoB,EAAE,KAAK,EAAE,IAAI,cAAc,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,WAAW,EAAE,EAAE;KACzF,CAAC,CAAC;IACH,IAAI,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC;IAC5B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAEjI,mFAAmF;IACnF,4CAA4C;IAC5C,6DAA6D;IAC7D,6EAA6E;IAC7E,8EAA8E;IAC9E,iBAAiB;IACjB,6EAA6E;IAC7E,iFAAiF;IACjF,+EAA+E;IAC/E,uCAAuC;IACvC,MAAM,UAAU,GAAG,yBAAyB,CAAC,oBAAoB,EAAE,8BAA8B,CAAC,CAAC;IACnG,MAAM,SAAS,GAAG,MAAM,IAAI,mBAAmB,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,iBAAiB,EAAE,CAAC;IAClF,IAAI,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACrC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,EAAE,GAAG,EAAE,IAAI,EAAE;WAC7C,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,6BAA6B,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAChF,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,IAAI,CACjE,CAAC,CAAC,EAAE,EAAE,CAAC,4BAA4B,CAAC,CAAC,CAAC,KAAK,oBAAoB,CAChE,CAAC;IACF,IAAI,YAAY,IAAI,SAAS,EAAE,WAAW,EAAE,CAAC;QAC3C,YAAY,GAAG,MAAM,yBAAyB,CAAC,YAAY,EAAE,SAAS,CAAC,WAAW,CAAC,CAAC;QACpF,GAAG,CAAC,qBAAqB,YAAY,CAAC,IAAI,sFAAsF,CAAC,CAAC;IACpI,CAAC;SAAM,CAAC;QACN,GAAG,CAAC,uJAAuJ,CAAC,CAAC;IAC/J,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,UAAU,EAAE,GAAG,SAAS,EAAE,GAAG,YAAY,CAAC,CAAC,CAAC;IACnF,IAAI,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE,CAAC;QAC7B,yEAAyE;QACzE,uFAAuF;QACvF,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;QACnD,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAC5F,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,GAAG,CAAC,qBAAqB,OAAO,CAAC,MAAM,wGAAwG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvK,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,IAAI,CAAC,IAAI;YAAE,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;QACrF,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;QAC3B,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;QAC1D,gFAAgF;QAChF,+EAA+E;QAC/E,4EAA4E;QAC5E,wEAAwE;QACxE,iEAAiE;QACjE,MAAM,WAAW,GAAG,MAAM,cAAc,CAAC,cAAc,CAAC,sBAAsB,CAAC,CAAC;QAChF,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC;YACnC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;YAC1D,GAAG,CAAC,2BAA2B,MAAM,MAAM,cAAc,cAAc,UAAU,CAAC,IAAI,KAAK,UAAU,CAAC,IAAI,eAAe,SAAS,CAAC,IAAI,aAAa,YAAY,CAAC,IAAI,iBAAiB,IAAI,CAAC,MAAM,sDAAsD,CAAC,CAAC;QAC3P,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,qEAAqE,IAAI,CAAC,MAAM,gFAAgF,CAAC,CAAC;QACxK,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IACxC,GAAG,CAAC,YAAY,UAAU,CAAC,MAAM,oCAAoC,CAAC,CAAC;IAEvE,yEAAyE;IACzE,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1D,KAAK,UAAU,OAAO,CAAC,OAAgB,EAAE,QAAkB;QACzD,MAAM,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,oBAAoB,CAAC,CAAC,CAAC;QACvE,2EAA2E;QAC3E,6EAA6E;QAC7E,4BAA4B;QAC5B,IAAI,KAAK,GAAoC,SAAS,CAAC;QACvD,IAAI,CAAC;YACH,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,iBAAiB,CAAC;gBACzC,SAAS,EAAE,CAAC,QAAQ,CAAC,EAAE,UAAU,EAAE,QAAQ,CAAC,UAAU,EAAE,QAAQ,EAAE,QAAQ,CAAC,QAAQ,EAAE,OAAO;aAC7F,CAAC,CAAC;YACH,IAAI,CAAC,QAAQ;gBAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,+BAA+B,EAAE,CAAC;YAC1F,MAAM,IAAI,GAAS;gBACjB,EAAE,EAAE,QAAQ,CAAC,WAAW;gBACxB,WAAW,EAAE,QAAQ,CAAC,IAAI,CAAC,iBAAiB;gBAC5C,IAAI,EAAE,aAAa;gBACnB,UAAU,EAAE,oBAAoB;gBAChC,IAAI,EAAE,QAAQ,CAAC,IAA0C;gBACzD,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,aAAa,EAAE;aAC1D,CAAC;YACF,KAAK,GAAG,SAAS,CAAC;YAClB,MAAM,GAAG,GAAG,MAAM,iBAAiB,CAAC;gBAClC,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,oBAAoB,EAAE,cAAc,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ;aAC9F,CAAC,CAAC;YACH,IAAI,GAAG,CAAC,SAAS;gBAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,iCAAiC,EAAE,CAAC;YAChG,IAAI,CAAC,GAAG,CAAC,QAAQ;gBAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,+BAA+B,EAAE,CAAC;YAC9F,KAAK,GAAG,OAAO,CAAC;YAChB,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC;gBACpC,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,eAAe,EAAE,EAAE,aAAa,EAAE,4BAA4B,EAAE,KAAK,EAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,EAAE;gBAC3F,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC,CAAC;YACH,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,YAAY,EAAE,CAAC;QAC1C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,2EAA2E;YAC3E,2EAA2E;YAC3E,qEAAqE;YACrE,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC;gBAClD,CAAC,CAAC,GAAG,KAAK,2BAA2B;gBACrC,CAAC,CAAC,GAAG,KAAK,WAAW,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;YAC3C,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC;QACpD,CAAC;gBAAS,CAAC;YACT,MAAM,CAAC,YAAY,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,mBAAmB,CAAC,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG,WAAW,CAAC,qBAAqB,EAAE,MAAM;QACxD,CAAC,CAAC,EAAE,cAAc,EAAE,CAAC,GAAG,WAAW,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC3E,MAAM,cAAc,GAAG,UAAU,MAAM,gBAAgB,CAAC,YAAY,EAAE,QAAQ,CAAC,EAAE,CAAC;IAClF,MAAM,CAAC,YAAY,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAEvD,+EAA+E;IAC/E,iFAAiF;IACjF,iFAAiF;IACjF,2DAA2D;IAC3D,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,IAAI,CAAC,UAAU,KAAK,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IACvG,MAAM,QAAQ,GAAG,IAAI,mBAAmB,CAAC;QACvC,QAAQ;QACR,SAAS,EAAE,eAAe,CAAC;YACzB,SAAS,EAAE,MAAM,CAAC,WAAW,EAAE,aAAa,EAAE,UAAU,EAAE,WAAW,EAAE,gBAAgB;YACvF,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,oBAAoB,EAAE,sBAAsB;SACxD,CAAC;KACH,CAAC,CAAC;IACH,IAAI,QAAQ,CAAC,IAAI,GAAG,CAAC;QAAE,GAAG,CAAC,sBAAsB,QAAQ,CAAC,IAAI,yDAAyD,CAAC,CAAC;IAEzH,MAAM,IAAI,GAAe;QACvB,GAAG;QACH,oBAAoB,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9C,iBAAiB,EAAE,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC;QACpD,eAAe,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,qBAAqB,CAAC,CAAC,IAAI,CAAC,EAAE;gBAClC,OAAO,EAAE,MAAM,EAAE,IAAI,gBAAgB,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,KAAK,EAAE,cAAc;gBACjF,gBAAgB,EAAE,sBAAsB,EAAE,eAAe;aAC1D,EAAE,EAAE,CAAC,CAAC;YACP,OAAO,CAAC,MAAM,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC,EAAE,QAAQ,KAAK,IAAI,CAAC;QACtG,CAAC;QACD,aAAa,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAE,CAAC;QAC1E,eAAe,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAE,CAAC;KAC/E,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,UAAU,EAAE,IAAI,EAAE;QACxD,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,YAAY,EAAE,IAAI,CAAC,YAAY,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU;KAC3G,CAAC,CAAC;IAEH,2EAA2E;IAC3E,0EAA0E;IAC1E,+EAA+E;IAC/E,4EAA4E;IAC5E,4EAA4E;IAC5E,4CAA4C;IAC5C,MAAM,gBAAgB,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,aAAa,IAAI,CAAC,CAAC,YAAY,KAAK,IAAI,CAC7D,CAAC,MAAM,CAAC;IACT,IAAI,gBAAgB,GAAG,CAAC,EAAE,CAAC;QACzB,GAAG,CACD,gEAAgE,gBAAgB,gBAAgB;YAChG,sGAAsG;YACtG,yGAAyG,CAC1G,CAAC;IACJ,CAAC;IAED,2EAA2E;IAC3E,2EAA2E;IAC3E,uEAAuE;IACvE,8EAA8E;IAC9E,gFAAgF;IAChF,iFAAiF;IACjF,IAAI,eAAe,GAAa,EAAE,CAAC;IACnC,IAAI,CAAC;QACH,eAAe,GAAG,CAAC,GAAG,gBAAgB,CAAC,oBAAoB,EAAE,aAAa,CAAC,CAAC,WAAW,CAAC,CAAC;IAC3F,CAAC;IAAC,MAAM,CAAC;QACP,6CAA6C;IAC/C,CAAC;IACD,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC5D,MAAM,aAAa,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,eAAe,EAAE,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAExE,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC7C,MAAM,SAAS,GAAG,gBAAgB,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC;IAC/D,SAAS,CAAC,SAAS,EAAE,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,EAAE,uCAAuC,CAAC,CAAC;IAC7E,aAAa,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IACpE,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,EAAE,EAAE,wDAAwD,CAAC,CAAC;IAC/F,aAAa,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC;QAC1C,WAAW,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc;QACzE,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU;QACpC,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC,UAAU,KAAK,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC;QAC1F,YAAY,EAAE,aAAa,CAAC,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,WAAW,EAAE,eAAe;QACxF,QAAQ,EAAE,MAAM,CAAC,QAAQ;KAC1B,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;IAEjB,gFAAgF;IAChF,oFAAoF;IACpF,oFAAoF;IACpF,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACvC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;YAC/B,KAAK,CAAC,gBAAgB,CAAC;gBACrB,cAAc,EAAE,cAAc,EAAE,UAAU,EAAE,SAAS,CAAC,IAAI,EAAE,aAAa,EAAE,aAAa;gBACxF,WAAW,EAAE,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,WAAW,EAAE,cAAc;gBAC9E,SAAS,EAAE,OAAO,EAAE,gBAAgB,EAAE,2CAA2C;aAClF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;YAAS,CAAC;QACT,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC;IAClB,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,CAAC,MAAM,EAAE,gBAAgB,EAAE,CAAC;AACjH,CAAC"}
@@ -0,0 +1,107 @@
1
+ import type { PoolTask } from '../solver-types/_swe-rebench-v2-pool.js';
2
+ import { type HeldOutSlateArtifact } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
3
+ /** Stratification / diversity key: the org prefix of an instance_id
4
+ * (`tobymao__sqlglot-4661` → `tobymao`). Derivable without an HF fetch. */
5
+ export declare function repoOf(task: PoolTask): string;
6
+ /**
7
+ * Order candidates round-robin across repos so the first N base-fails span
8
+ * repos rather than clumping in alphabetically-early ones. Deterministic:
9
+ * instances sort by instance_id within each repo group; repo groups iterate in
10
+ * sorted repo order.
11
+ */
12
+ export declare function stratifyByRepo(pool: PoolTask[]): PoolTask[];
13
+ /** One frozen run's grade outcome. `null` = unscorable (Docker/grader/infra failure). */
14
+ export interface ScreenCandidateRun {
15
+ passed: boolean | null;
16
+ /** When `passed === null`, the stage + error that made it unscorable
17
+ * (e.g. `resolve: …`, `harness: no patch`, `grade-error: …`) — surfaced so an
18
+ * exclusion is never an opaque black box. */
19
+ unscorableReason?: string;
20
+ }
21
+ /**
22
+ * The expensive, cacheable per-candidate outcome — gradeability + the base R-run
23
+ * loop + (if base is 0/R) the prover. Decoupled from the selection decision so a
24
+ * resumed run replays it from cache without re-spending inference. The decision
25
+ * (held-out / no-headroom / caps) is always recomputed fresh from this.
26
+ */
27
+ export interface ScreenMeasurement {
28
+ gradeable: boolean;
29
+ basePasses: number;
30
+ baseRuns: number;
31
+ baseUnscorable: boolean;
32
+ baseUnscorableReason?: string;
33
+ /** Whether the prover was reached (only when base is gradeable + 0/R). */
34
+ proverRan: boolean;
35
+ /** Meaningful only when `proverRan`; `null` = prover unscorable. */
36
+ proverPassed: boolean | null;
37
+ proverUnscorableReason?: string;
38
+ }
39
+ export interface ScreenDeps {
40
+ /** Confirm gradeable at the current semantics version (idempotent; cheap/cached). */
41
+ ensureGradeable(task: PoolTask): Promise<boolean>;
42
+ /** Base Haiku, frozen, empty impl-state. `passed: null` = unscorable. */
43
+ runBaseFrozen(task: PoolTask): Promise<ScreenCandidateRun>;
44
+ /** Prover (Codex/GPT-5.5), frozen, empty impl-state. `passed: null` = unscorable. */
45
+ runProverFrozen(task: PoolTask): Promise<ScreenCandidateRun>;
46
+ /** Resumability (optional): return a cached measurement for this instance, or
47
+ * undefined to measure live. A cache hit replays for free (no inference) and
48
+ * does NOT consume the maxCandidates budget. */
49
+ getCachedMeasurement?(instance_id: string): ScreenMeasurement | undefined;
50
+ /** Resumability (optional): persist a freshly-measured candidate so a later
51
+ * re-run of the same command resumes instead of restarting. */
52
+ recordMeasurement?(instance_id: string, m: ScreenMeasurement): void;
53
+ log?: (msg: string) => void;
54
+ }
55
+ export interface ScreenOpts {
56
+ /** Base runs per candidate (≥3). A candidate is a reliable fail iff 0/R passed. */
57
+ R: number;
58
+ /** Exam cap N. */
59
+ heldOutCount: number;
60
+ /** Budget: stop after this many candidates reach the base-run stage. */
61
+ maxCandidates: number;
62
+ /** Max held-out instances per repo (diversity). */
63
+ perRepoCap: number;
64
+ }
65
+ export type ScreenReason = 'held-out' | 'not-gradeable' | 'base-passes' | 'base-unscorable' | 'no-headroom' | 'per-repo-cap';
66
+ export interface ScreenedCandidate {
67
+ instance_id: string;
68
+ repo: string;
69
+ gradeable: boolean;
70
+ baseRuns: number;
71
+ basePasses: number;
72
+ proverPassed: boolean | null;
73
+ heldOut: boolean;
74
+ reason: ScreenReason;
75
+ /** For `base-unscorable` / `no-headroom`-via-unscorable: the stage + error
76
+ * the run reported, so the exclusion is diagnosable without transcript digs. */
77
+ unscorableReason?: string;
78
+ }
79
+ export interface ScreenResult {
80
+ heldOut: {
81
+ instance_id: string;
82
+ repo: string;
83
+ baseRuns: number;
84
+ }[];
85
+ screened: ScreenedCandidate[];
86
+ }
87
+ /**
88
+ * Partition a candidate stream into the held-out exam vs the rest, applying the
89
+ * three filter layers cheapest-first. `candidates` MUST already be ordered (use
90
+ * {@link stratifyByRepo}); selection order is the iteration order and is frozen.
91
+ *
92
+ * Resumable: if `deps.getCachedMeasurement` is provided, an already-measured
93
+ * candidate replays from cache (no inference, no budget cost), so re-running the
94
+ * same command resumes — the `maxCandidates` budget bounds only NEW measurements
95
+ * per invocation, letting a long screen proceed in budget-sized chunks. The
96
+ * selection decision (caps, held-out) is always recomputed fresh, so the cached
97
+ * measurements stay valid even if `heldOutCount`/`perRepoCap` change.
98
+ */
99
+ export declare function screenBaseFailures(candidates: PoolTask[], deps: ScreenDeps, opts: ScreenOpts): Promise<ScreenResult>;
100
+ /** The on-disk v2 slate file = the hashed artifact + a provenance `comment`
101
+ * (the comment is outside the canonical hash). solverType matches the
102
+ * `${solverType}.v1` key `jinn eval` loads with. */
103
+ export interface V2SlateFile extends HeldOutSlateArtifact {
104
+ comment: string;
105
+ hash: `sha256:${string}`;
106
+ }
107
+ export declare function buildV2SlateFile(instanceIds: string[], generatedAt: string): V2SlateFile;