@jinn-network/client 0.1.8 → 0.1.9-canary.050a41b1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. package/dist/adapters/mech/adapter.d.ts +21 -1
  2. package/dist/adapters/mech/adapter.js +77 -10
  3. package/dist/adapters/mech/adapter.js.map +1 -1
  4. package/dist/adapters/mech/contracts.js +62 -28
  5. package/dist/adapters/mech/contracts.js.map +1 -1
  6. package/dist/adapters/mech/safe-revert.d.ts +4 -0
  7. package/dist/adapters/mech/safe-revert.js +5 -1
  8. package/dist/adapters/mech/safe-revert.js.map +1 -1
  9. package/dist/adapters/mech/safe.js +5 -1
  10. package/dist/adapters/mech/safe.js.map +1 -1
  11. package/dist/adapters/mech/verdict-code.js +1 -1
  12. package/dist/adapters/mech/verdict-code.js.map +1 -1
  13. package/dist/api/bootstrap-endpoint.d.ts +1 -0
  14. package/dist/api/bootstrap-endpoint.js +1 -0
  15. package/dist/api/bootstrap-endpoint.js.map +1 -1
  16. package/dist/api/discovery-endpoint.d.ts +1 -0
  17. package/dist/api/discovery-endpoint.js +24 -0
  18. package/dist/api/discovery-endpoint.js.map +1 -1
  19. package/dist/api/fleet-build.d.ts +1 -7
  20. package/dist/api/fleet-build.js +0 -7
  21. package/dist/api/fleet-build.js.map +1 -1
  22. package/dist/api/gather-status.d.ts +8 -2
  23. package/dist/api/gather-status.js +29 -117
  24. package/dist/api/gather-status.js.map +1 -1
  25. package/dist/api/loop-completion-build.d.ts +79 -0
  26. package/dist/api/loop-completion-build.js +155 -0
  27. package/dist/api/loop-completion-build.js.map +1 -0
  28. package/dist/api/peers.js +2 -0
  29. package/dist/api/peers.js.map +1 -1
  30. package/dist/api/setup-endpoints.d.ts +32 -0
  31. package/dist/api/setup-endpoints.js +93 -23
  32. package/dist/api/setup-endpoints.js.map +1 -1
  33. package/dist/api/solvernets-endpoints.js +3 -0
  34. package/dist/api/solvernets-endpoints.js.map +1 -1
  35. package/dist/api/status-build.d.ts +43 -33
  36. package/dist/api/status-build.js +3 -26
  37. package/dist/api/status-build.js.map +1 -1
  38. package/dist/api/status-rollup-build.d.ts +0 -4
  39. package/dist/api/status-rollup-build.js +0 -4
  40. package/dist/api/status-rollup-build.js.map +1 -1
  41. package/dist/build-info.json +4 -4
  42. package/dist/build-meta.json +1 -1
  43. package/dist/cli/commands/codedigest-revert-check.js +6 -2
  44. package/dist/cli/commands/codedigest-revert-check.js.map +1 -1
  45. package/dist/cli/commands/doctor.d.ts +3 -0
  46. package/dist/cli/commands/doctor.js +37 -2
  47. package/dist/cli/commands/doctor.js.map +1 -1
  48. package/dist/cli/commands/eval.d.ts +76 -0
  49. package/dist/cli/commands/eval.js +401 -0
  50. package/dist/cli/commands/eval.js.map +1 -0
  51. package/dist/cli/commands/rewards.d.ts +2 -0
  52. package/dist/cli/commands/rewards.js +30 -3
  53. package/dist/cli/commands/rewards.js.map +1 -1
  54. package/dist/cli/commands/solver-nets.js +68 -0
  55. package/dist/cli/commands/solver-nets.js.map +1 -1
  56. package/dist/cli/commands/status.js +0 -1
  57. package/dist/cli/commands/status.js.map +1 -1
  58. package/dist/cli/index.js +2 -0
  59. package/dist/cli/index.js.map +1 -1
  60. package/dist/config.d.ts +58 -7
  61. package/dist/config.js +96 -7
  62. package/dist/config.js.map +1 -1
  63. package/dist/daemon/ai-units-gate.d.ts +6 -6
  64. package/dist/daemon/ai-units-gate.js +11 -10
  65. package/dist/daemon/ai-units-gate.js.map +1 -1
  66. package/dist/daemon/balance-topup-loop.js +3 -0
  67. package/dist/daemon/balance-topup-loop.js.map +1 -1
  68. package/dist/daemon/creator.js +2 -0
  69. package/dist/daemon/creator.js.map +1 -1
  70. package/dist/daemon/daemon.d.ts +15 -0
  71. package/dist/daemon/daemon.js +78 -22
  72. package/dist/daemon/daemon.js.map +1 -1
  73. package/dist/daemon/eviction-loop.d.ts +7 -0
  74. package/dist/daemon/eviction-loop.js +16 -0
  75. package/dist/daemon/eviction-loop.js.map +1 -1
  76. package/dist/daemon/jinn-claim-loop.js +3 -0
  77. package/dist/daemon/jinn-claim-loop.js.map +1 -1
  78. package/dist/daemon/join-applier.d.ts +35 -0
  79. package/dist/daemon/join-applier.js +49 -0
  80. package/dist/daemon/join-applier.js.map +1 -0
  81. package/dist/daemon/loop-heartbeat.d.ts +34 -0
  82. package/dist/daemon/loop-heartbeat.js +39 -0
  83. package/dist/daemon/loop-heartbeat.js.map +1 -0
  84. package/dist/daemon/reward-claim-loop.js +3 -0
  85. package/dist/daemon/reward-claim-loop.js.map +1 -1
  86. package/dist/daemon/watchdog-loop.d.ts +84 -0
  87. package/dist/daemon/watchdog-loop.js +91 -0
  88. package/dist/daemon/watchdog-loop.js.map +1 -0
  89. package/dist/dashboard/assets/index-8tAiMbUV.css +1 -0
  90. package/dist/dashboard/assets/index-CSFVwGFh.js +167 -0
  91. package/dist/dashboard/index.html +2 -2
  92. package/dist/discovery/http.d.ts +7 -0
  93. package/dist/discovery/http.js +241 -25
  94. package/dist/discovery/http.js.map +1 -1
  95. package/dist/discovery/onchain.js +155 -1
  96. package/dist/discovery/onchain.js.map +1 -1
  97. package/dist/discovery/types.d.ts +106 -0
  98. package/dist/discovery/types.js +40 -0
  99. package/dist/discovery/types.js.map +1 -1
  100. package/dist/discovery/with-fallback.js +14 -0
  101. package/dist/discovery/with-fallback.js.map +1 -1
  102. package/dist/earning/bootstrap.d.ts +23 -0
  103. package/dist/earning/bootstrap.js +76 -27
  104. package/dist/earning/bootstrap.js.map +1 -1
  105. package/dist/earning/faucet.d.ts +1 -1
  106. package/dist/earning/faucet.js +2 -2
  107. package/dist/earning/faucet.js.map +1 -1
  108. package/dist/earning/safe-adapter.js +11 -0
  109. package/dist/earning/safe-adapter.js.map +1 -1
  110. package/dist/eval/eval-harness-run.d.ts +63 -0
  111. package/dist/eval/eval-harness-run.js +123 -0
  112. package/dist/eval/eval-harness-run.js.map +1 -0
  113. package/dist/eval/orchestrator.d.ts +163 -0
  114. package/dist/eval/orchestrator.js +232 -0
  115. package/dist/eval/orchestrator.js.map +1 -0
  116. package/dist/eval/paired.d.ts +68 -0
  117. package/dist/eval/paired.js +93 -0
  118. package/dist/eval/paired.js.map +1 -0
  119. package/dist/eval/resolve-slate-tasks.d.ts +35 -0
  120. package/dist/eval/resolve-slate-tasks.js +56 -0
  121. package/dist/eval/resolve-slate-tasks.js.map +1 -0
  122. package/dist/eval/screen-discovery.d.ts +22 -0
  123. package/dist/eval/screen-discovery.js +71 -0
  124. package/dist/eval/screen-discovery.js.map +1 -0
  125. package/dist/eval/screen-progress.d.ts +41 -0
  126. package/dist/eval/screen-progress.js +60 -0
  127. package/dist/eval/screen-progress.js.map +1 -0
  128. package/dist/eval/screen-runner.d.ts +30 -0
  129. package/dist/eval/screen-runner.js +289 -0
  130. package/dist/eval/screen-runner.js.map +1 -0
  131. package/dist/eval/screen.d.ts +107 -0
  132. package/dist/eval/screen.js +159 -0
  133. package/dist/eval/screen.js.map +1 -0
  134. package/dist/eval/slope.d.ts +29 -0
  135. package/dist/eval/slope.js +46 -0
  136. package/dist/eval/slope.js.map +1 -0
  137. package/dist/eval/train-sequence.d.ts +35 -0
  138. package/dist/eval/train-sequence.js +59 -0
  139. package/dist/eval/train-sequence.js.map +1 -0
  140. package/dist/eval/wilson.d.ts +45 -0
  141. package/dist/eval/wilson.js +48 -0
  142. package/dist/eval/wilson.js.map +1 -0
  143. package/dist/harnesses/engine/canonical-json.js +5 -3
  144. package/dist/harnesses/engine/canonical-json.js.map +1 -1
  145. package/dist/harnesses/engine/engine.d.ts +24 -0
  146. package/dist/harnesses/engine/engine.js +72 -9
  147. package/dist/harnesses/engine/engine.js.map +1 -1
  148. package/dist/harnesses/engine/persistence.d.ts +17 -0
  149. package/dist/harnesses/engine/persistence.js +28 -0
  150. package/dist/harnesses/engine/persistence.js.map +1 -1
  151. package/dist/harnesses/impls/hermes-agent/adapter.d.ts +2 -0
  152. package/dist/harnesses/impls/hermes-agent/adapter.js +8 -5
  153. package/dist/harnesses/impls/hermes-agent/adapter.js.map +1 -1
  154. package/dist/harnesses/impls/hermes-agent/bootstrap.d.ts +1 -0
  155. package/dist/harnesses/impls/hermes-agent/bootstrap.js +6 -1
  156. package/dist/harnesses/impls/hermes-agent/bootstrap.js.map +1 -1
  157. package/dist/harnesses/impls/hermes-agent/harness.d.ts +17 -3
  158. package/dist/harnesses/impls/hermes-agent/harness.js +68 -5
  159. package/dist/harnesses/impls/hermes-agent/harness.js.map +1 -1
  160. package/dist/harnesses/impls/index.d.ts +2 -0
  161. package/dist/harnesses/impls/index.js +2 -0
  162. package/dist/harnesses/impls/index.js.map +1 -1
  163. package/dist/harnesses/impls/learner/adapters/claude-code.js +5 -0
  164. package/dist/harnesses/impls/learner/adapters/claude-code.js.map +1 -1
  165. package/dist/harnesses/impls/learner/harness.d.ts +17 -1
  166. package/dist/harnesses/impls/learner/harness.js +51 -1
  167. package/dist/harnesses/impls/learner/harness.js.map +1 -1
  168. package/dist/harnesses/impls/learner/harvest.d.ts +2 -0
  169. package/dist/harnesses/impls/learner/harvest.js +7 -1
  170. package/dist/harnesses/impls/learner/harvest.js.map +1 -1
  171. package/dist/harnesses/impls/learner/plugin-path.js +1 -0
  172. package/dist/harnesses/impls/learner/plugin-path.js.map +1 -1
  173. package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js +3 -1
  174. package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js.map +1 -1
  175. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.d.ts +2 -2
  176. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js +3 -1
  177. package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js.map +1 -1
  178. package/dist/harnesses/readiness-registry.d.ts +10 -0
  179. package/dist/harnesses/readiness-registry.js +13 -0
  180. package/dist/harnesses/readiness-registry.js.map +1 -1
  181. package/dist/harnesses/types.d.ts +14 -0
  182. package/dist/learner/revert-decision.d.ts +16 -1
  183. package/dist/learner/revert-decision.js +38 -18
  184. package/dist/learner/revert-decision.js.map +1 -1
  185. package/dist/learner/revert-stats.d.ts +14 -0
  186. package/dist/learner/revert-stats.js +42 -0
  187. package/dist/learner/revert-stats.js.map +1 -1
  188. package/dist/local-provider-url.d.ts +3 -0
  189. package/dist/local-provider-url.js +28 -0
  190. package/dist/local-provider-url.js.map +1 -0
  191. package/dist/main.js +94 -25
  192. package/dist/main.js.map +1 -1
  193. package/dist/plugins/learner/.claude-plugin/plugin.json +1 -1
  194. package/dist/plugins/learner/.codex-plugin/plugin.json +1 -1
  195. package/dist/plugins/learner/hooks/session-start +30 -1
  196. package/dist/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
  197. package/dist/preflight/deployment-readiness.d.ts +147 -0
  198. package/dist/preflight/deployment-readiness.js +366 -0
  199. package/dist/preflight/deployment-readiness.js.map +1 -0
  200. package/dist/preflight/pidfile-liveness.d.ts +7 -1
  201. package/dist/preflight/pidfile-liveness.js +14 -0
  202. package/dist/preflight/pidfile-liveness.js.map +1 -1
  203. package/dist/rpc/transport.d.ts +36 -0
  204. package/dist/rpc/transport.js +123 -24
  205. package/dist/rpc/transport.js.map +1 -1
  206. package/dist/scripts/swe-rebench-v2-seed-pool.json +2 -1
  207. package/dist/solver-nets/registry.d.ts +19 -0
  208. package/dist/solver-nets/registry.js +92 -66
  209. package/dist/solver-nets/registry.js.map +1 -1
  210. package/dist/solver-types/_swe-rebench-v2-held-out-slate.d.ts +76 -0
  211. package/dist/solver-types/_swe-rebench-v2-held-out-slate.js +156 -0
  212. package/dist/solver-types/_swe-rebench-v2-held-out-slate.js.map +1 -0
  213. package/dist/solver-types/_swe-rebench-v2-pool-recovery.d.ts +81 -0
  214. package/dist/solver-types/_swe-rebench-v2-pool-recovery.js +116 -0
  215. package/dist/solver-types/_swe-rebench-v2-pool-recovery.js.map +1 -0
  216. package/dist/solver-types/_swe-rebench-v2-state.d.ts +9 -0
  217. package/dist/solver-types/_swe-rebench-v2-state.js +14 -0
  218. package/dist/solver-types/_swe-rebench-v2-state.js.map +1 -1
  219. package/dist/solver-types/_swe-rebench-v2-validated-pool.d.ts +30 -0
  220. package/dist/solver-types/_swe-rebench-v2-validated-pool.js +40 -0
  221. package/dist/solver-types/_swe-rebench-v2-validated-pool.js.map +1 -1
  222. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json +20 -0
  223. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.json +19 -0
  224. package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.screening-report.json +628 -0
  225. package/dist/solver-types/solver-type.d.ts +8 -0
  226. package/dist/solver-types/swe-rebench-v2.d.ts +2 -0
  227. package/dist/solver-types/swe-rebench-v2.js +115 -10
  228. package/dist/solver-types/swe-rebench-v2.js.map +1 -1
  229. package/dist/solvernets/launched-record-dispatcher.d.ts +3 -0
  230. package/dist/solvernets/launched-record-dispatcher.js.map +1 -1
  231. package/dist/solvernets/registry-client-erc8004.js +29 -37
  232. package/dist/solvernets/registry-client-erc8004.js.map +1 -1
  233. package/dist/solvernets/registry-client.d.ts +6 -0
  234. package/dist/solvernets/store.js +7 -2
  235. package/dist/solvernets/store.js.map +1 -1
  236. package/dist/spend/ai-units-config.d.ts +10 -0
  237. package/dist/spend/ai-units-config.js +7 -1
  238. package/dist/spend/ai-units-config.js.map +1 -1
  239. package/dist/spend/ai-units.d.ts +51 -0
  240. package/dist/spend/ai-units.js +73 -0
  241. package/dist/spend/ai-units.js.map +1 -1
  242. package/dist/spend/record.js +12 -5
  243. package/dist/spend/record.js.map +1 -1
  244. package/dist/store/store.d.ts +91 -5
  245. package/dist/store/store.js +170 -7
  246. package/dist/store/store.js.map +1 -1
  247. package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.d.ts +108 -1
  248. package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.js +25 -1
  249. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.d.ts +65 -0
  250. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.js +123 -0
  251. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.d.ts +2 -2
  252. package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.js +1 -1
  253. package/dist/vendor/@jinn-network/sdk/package.json +4 -0
  254. package/docker-compose.yml +3 -2
  255. package/package.json +22 -18
  256. package/plugins/learner/.claude-plugin/plugin.json +1 -1
  257. package/plugins/learner/.codex-plugin/plugin.json +1 -1
  258. package/plugins/learner/hooks/session-start +30 -1
  259. package/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
  260. package/plugins/swe-rebench-v2-runtime/hooks/hooks.json +16 -0
  261. package/plugins/swe-rebench-v2-runtime/hooks/session-start +74 -0
  262. package/dist/dashboard/assets/index-CzKxvMcU.css +0 -32
  263. package/dist/dashboard/assets/index-yVemxHot.js +0 -351
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Ordinary least-squares slope of resolved-rate vs cycle index for the
3
+ * train-arm slope measurement (issue #822, AC#1).
4
+ *
5
+ * The train-arm e2e evaluates a checkpoint against the held-out slate (#817)
6
+ * at intervals via the eval orchestrator (#818), collecting one
7
+ * `{ cycleIndex, rate }` point per interval (`rate` = passed / scorable, the
8
+ * Wilson point estimate). The slope of the least-squares fit is the headline
9
+ * "is the learner improving across the training sequence" number.
10
+ *
11
+ * It is deliberately a thin helper over the closed-form OLS slope
12
+ * (`cov(x,y) / var(x)`); the per-point confidence intervals come from
13
+ * `wilson.ts` — this module does NOT reimplement them. The slope sign alone is
14
+ * never a verdict at small N: a flat or slightly negative slope is "within
15
+ * noise", which the e2e surfaces via the §4.1 honesty caveat.
16
+ */
17
+ /**
18
+ * Least-squares slope of `rate` regressed on `cycleIndex`. Returns 0 for fewer
19
+ * than two points (no line to fit) and for a degenerate fit where every x is
20
+ * identical (zero variance — division would be NaN). A flat sequence yields
21
+ * exactly 0.
22
+ */
23
+ export function leastSquaresSlope(points) {
24
+ const n = points.length;
25
+ if (n < 2)
26
+ return 0;
27
+ let sumX = 0;
28
+ let sumY = 0;
29
+ for (const { cycleIndex, rate } of points) {
30
+ sumX += cycleIndex;
31
+ sumY += rate;
32
+ }
33
+ const meanX = sumX / n;
34
+ const meanY = sumY / n;
35
+ let cov = 0;
36
+ let varX = 0;
37
+ for (const { cycleIndex, rate } of points) {
38
+ const dx = cycleIndex - meanX;
39
+ cov += dx * (rate - meanY);
40
+ varX += dx * dx;
41
+ }
42
+ if (varX === 0)
43
+ return 0;
44
+ return cov / varX;
45
+ }
46
+ //# sourceMappingURL=slope.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"slope.js","sourceRoot":"","sources":["../../src/eval/slope.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AASH;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAAmB;IACnD,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAC1C,IAAI,IAAI,UAAU,CAAC;QACnB,IAAI,IAAI,IAAI,CAAC;IACf,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC;IACvB,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC;IACvB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAC1C,MAAM,EAAE,GAAG,UAAU,GAAG,KAAK,CAAC;QAC9B,GAAG,IAAI,EAAE,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;QAC3B,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;IAClB,CAAC;IACD,IAAI,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzB,OAAO,GAAG,GAAG,IAAI,CAAC;AACpB,CAAC"}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Train-sequence builder with the no-train/test-overlap guard for the
3
+ * train-arm slope e2e (issue #822, AC#2).
4
+ *
5
+ * The learner-full-cycle e2e drives `runCycle` DIRECTLY (it does not post tasks
6
+ * through the generator), so the generator's `excludeHeldOutSlate` train-stream
7
+ * chokepoint (#817) is BYPASSED. This builder is therefore the load-bearing
8
+ * AC#2 mechanism for the e2e: it selects the N distinct training instances from
9
+ * the pool with the held-out slate excluded, and asserts the resulting
10
+ * sequence is disjoint from the slate (fail-loud, never a silent drop).
11
+ *
12
+ * It reuses `excludeHeldOutSlate` from the #817 primitive rather than
13
+ * reimplementing the exclusion. Selection is deterministic (instance-id sorted)
14
+ * so a given pool yields a stable sequence across runs.
15
+ */
16
+ import type { PoolTask } from '../solver-types/_swe-rebench-v2-pool.js';
17
+ /** Thrown when a chosen training sequence intersects the held-out slate (AC#2). */
18
+ export declare class TrainTestOverlapError extends Error {
19
+ readonly overlap: string[];
20
+ constructor(overlap: string[]);
21
+ }
22
+ /** Assert a set of training ids is disjoint from the slate, else throw loud. */
23
+ export declare function assertNoOverlap(trainIds: string[], slateIds: Set<string>): void;
24
+ export declare function buildTrainSequence(args: {
25
+ pool: PoolTask[];
26
+ slateIds: Set<string>;
27
+ /** Number of distinct training tasks (= N training cycles). */
28
+ count: number;
29
+ /**
30
+ * Optional explicit, hand-picked instance_ids (in order). When set, the
31
+ * builder still runs the no-overlap guard and resolves each id against the
32
+ * pool — used to fail-loud on a hand-edited sequence that overlaps the slate.
33
+ */
34
+ explicitIds?: string[];
35
+ }): PoolTask[];
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Train-sequence builder with the no-train/test-overlap guard for the
3
+ * train-arm slope e2e (issue #822, AC#2).
4
+ *
5
+ * The learner-full-cycle e2e drives `runCycle` DIRECTLY (it does not post tasks
6
+ * through the generator), so the generator's `excludeHeldOutSlate` train-stream
7
+ * chokepoint (#817) is BYPASSED. This builder is therefore the load-bearing
8
+ * AC#2 mechanism for the e2e: it selects the N distinct training instances from
9
+ * the pool with the held-out slate excluded, and asserts the resulting
10
+ * sequence is disjoint from the slate (fail-loud, never a silent drop).
11
+ *
12
+ * It reuses `excludeHeldOutSlate` from the #817 primitive rather than
13
+ * reimplementing the exclusion. Selection is deterministic (instance-id sorted)
14
+ * so a given pool yields a stable sequence across runs.
15
+ */
16
+ import { excludeHeldOutSlate } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
17
+ /** Thrown when a chosen training sequence intersects the held-out slate (AC#2). */
18
+ export class TrainTestOverlapError extends Error {
19
+ overlap;
20
+ constructor(overlap) {
21
+ super(`train/test overlap: training sequence includes held-out slate instance(s) ` +
22
+ `${overlap.join(', ')} — refusing to train on the eval slate (AC#2). ` +
23
+ `The slate must stay out-of-sample for the slope to mean anything.`);
24
+ this.overlap = overlap;
25
+ this.name = 'TrainTestOverlapError';
26
+ }
27
+ }
28
+ /** Assert a set of training ids is disjoint from the slate, else throw loud. */
29
+ export function assertNoOverlap(trainIds, slateIds) {
30
+ const overlap = trainIds.filter((id) => slateIds.has(id));
31
+ if (overlap.length > 0)
32
+ throw new TrainTestOverlapError(overlap);
33
+ }
34
+ export function buildTrainSequence(args) {
35
+ const eligible = excludeHeldOutSlate(args.pool, args.slateIds);
36
+ const byId = new Map(eligible.map((t) => [t.instance_id, t]));
37
+ if (args.explicitIds) {
38
+ // Guard the hand-picked sequence against the slate BEFORE resolving, so a
39
+ // slate-overlapping id is a TrainTestOverlapError, not a "not eligible".
40
+ assertNoOverlap(args.explicitIds, args.slateIds);
41
+ return args.explicitIds.map((id) => {
42
+ const task = byId.get(id);
43
+ if (!task) {
44
+ throw new Error(`explicit training instance ${id} not in the eligible pool`);
45
+ }
46
+ return task;
47
+ });
48
+ }
49
+ // Deterministic selection: instance-id sorted, first `count`.
50
+ const sorted = [...eligible].sort((a, b) => a.instance_id.localeCompare(b.instance_id));
51
+ if (sorted.length < args.count) {
52
+ throw new Error(`train sequence needs ${args.count} distinct tasks but only ${sorted.length} eligible ` +
53
+ `(pool size ${args.pool.length} minus ${args.slateIds.size} held-out slate instance(s))`);
54
+ }
55
+ const picked = sorted.slice(0, args.count);
56
+ assertNoOverlap(picked.map((t) => t.instance_id), args.slateIds);
57
+ return picked;
58
+ }
59
+ //# sourceMappingURL=train-sequence.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"train-sequence.js","sourceRoot":"","sources":["../../src/eval/train-sequence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,EAAE,mBAAmB,EAAE,MAAM,mDAAmD,CAAC;AAExF,mFAAmF;AACnF,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IAClB;IAA5B,YAA4B,OAAiB;QAC3C,KAAK,CACH,4EAA4E;YAC1E,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,iDAAiD;YACtE,mEAAmE,CACtE,CAAC;QALwB,YAAO,GAAP,OAAO,CAAU;QAM3C,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAED,gFAAgF;AAChF,MAAM,UAAU,eAAe,CAAC,QAAkB,EAAE,QAAqB;IACvE,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,MAAM,IAAI,qBAAqB,CAAC,OAAO,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAWlC;IACC,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAE9D,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,0EAA0E;QAC1E,yEAAyE;QACzE,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE;YACjC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC1B,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,MAAM,IAAI,KAAK,CAAC,8BAA8B,EAAE,2BAA2B,CAAC,CAAC;YAC/E,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8DAA8D;IAC9D,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;IACxF,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,wBAAwB,IAAI,CAAC,KAAK,4BAA4B,MAAM,CAAC,MAAM,YAAY;YACrF,cAAc,IAAI,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,CAAC,QAAQ,CAAC,IAAI,8BAA8B,CAC3F,CAAC;IACJ,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3C,eAAe,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IACjE,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Wilson score interval + resolved-rate comparison for the `jinn eval`
3
+ * held-out checkpoint orchestrator (issue #818, AC#2).
4
+ *
5
+ * The Wilson score interval is a binomial proportion confidence interval that
6
+ * behaves well at the extremes (p=0, p=1) and for small n — unlike the naive
7
+ * normal-approximation interval. We write it small (no stats dependency, per
8
+ * repo convention): the formula is ~10 lines.
9
+ *
10
+ * Per log/decisions/2026-05-28-rl-eval-measurement.md §4: this is v1-simple.
11
+ * Only *large* deltas are trustworthy — we encode that as "the child and
12
+ * parent intervals do not overlap." No seed control, no multi-run averaging.
13
+ */
14
+ export interface Interval {
15
+ /** Observed point estimate, passed / scorable (0 when scorable=0). */
16
+ p: number;
17
+ /** Lower bound, clamped to [0, 1]. */
18
+ lo: number;
19
+ /** Upper bound, clamped to [0, 1]. */
20
+ hi: number;
21
+ }
22
+ /**
23
+ * Wilson score interval for `passed` successes out of `scorable` trials.
24
+ * `scorable === 0` returns a degenerate `{ p: 0, lo: 0, hi: 0 }` (no NaN).
25
+ */
26
+ export declare function wilsonInterval(passed: number, scorable: number, z?: number): Interval;
27
+ export type RateVerdict = 'trustworthy' | 'within-noise';
28
+ export interface RateComparison {
29
+ child: Interval;
30
+ parent: Interval;
31
+ /** child.p − parent.p (point-estimate difference, can be negative). */
32
+ delta: number;
33
+ /**
34
+ * 'trustworthy' iff the two Wilson intervals do NOT overlap; otherwise
35
+ * 'within-noise'. v1-simple: only disjoint intervals justify a claim.
36
+ */
37
+ verdict: RateVerdict;
38
+ }
39
+ export declare function compareRates(child: {
40
+ passed: number;
41
+ scorable: number;
42
+ }, parent: {
43
+ passed: number;
44
+ scorable: number;
45
+ }): RateComparison;
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Wilson score interval + resolved-rate comparison for the `jinn eval`
3
+ * held-out checkpoint orchestrator (issue #818, AC#2).
4
+ *
5
+ * The Wilson score interval is a binomial proportion confidence interval that
6
+ * behaves well at the extremes (p=0, p=1) and for small n — unlike the naive
7
+ * normal-approximation interval. We write it small (no stats dependency, per
8
+ * repo convention): the formula is ~10 lines.
9
+ *
10
+ * Per log/decisions/2026-05-28-rl-eval-measurement.md §4: this is v1-simple.
11
+ * Only *large* deltas are trustworthy — we encode that as "the child and
12
+ * parent intervals do not overlap." No seed control, no multi-run averaging.
13
+ */
14
+ /** Two-sided z for a 95% interval (1.96 ≈ Φ⁻¹(0.975)). */
15
+ const DEFAULT_Z = 1.96;
16
+ /**
17
+ * Wilson score interval for `passed` successes out of `scorable` trials.
18
+ * `scorable === 0` returns a degenerate `{ p: 0, lo: 0, hi: 0 }` (no NaN).
19
+ */
20
+ export function wilsonInterval(passed, scorable, z = DEFAULT_Z) {
21
+ if (scorable === 0)
22
+ return { p: 0, lo: 0, hi: 0 };
23
+ const n = scorable;
24
+ const p = passed / n;
25
+ const z2 = z * z;
26
+ const denom = 1 + z2 / n;
27
+ const centre = p + z2 / (2 * n);
28
+ const margin = z * Math.sqrt((p * (1 - p)) / n + z2 / (4 * n * n));
29
+ const lo = (centre - margin) / denom;
30
+ const hi = (centre + margin) / denom;
31
+ return {
32
+ p,
33
+ lo: Math.max(0, lo),
34
+ hi: Math.min(1, hi),
35
+ };
36
+ }
37
+ export function compareRates(child, parent) {
38
+ const c = wilsonInterval(child.passed, child.scorable);
39
+ const p = wilsonInterval(parent.passed, parent.scorable);
40
+ const disjoint = c.lo > p.hi || p.lo > c.hi;
41
+ return {
42
+ child: c,
43
+ parent: p,
44
+ delta: c.p - p.p,
45
+ verdict: disjoint ? 'trustworthy' : 'within-noise',
46
+ };
47
+ }
48
+ //# sourceMappingURL=wilson.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wilson.js","sourceRoot":"","sources":["../../src/eval/wilson.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,0DAA0D;AAC1D,MAAM,SAAS,GAAG,IAAI,CAAC;AAWvB;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,MAAc,EAAE,QAAgB,EAAE,IAAY,SAAS;IACpF,IAAI,QAAQ,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC;IAClD,MAAM,CAAC,GAAG,QAAQ,CAAC;IACnB,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACjB,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IACzB,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChC,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACnE,MAAM,EAAE,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,MAAM,EAAE,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,OAAO;QACL,CAAC;QACD,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC;QACnB,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC;KACpB,CAAC;AACJ,CAAC;AAgBD,MAAM,UAAU,YAAY,CAC1B,KAA2C,EAC3C,MAA4C;IAE5C,MAAM,CAAC,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvD,MAAM,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IACzD,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC;IAC5C,OAAO;QACL,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,CAAC;QACT,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAChB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,cAAc;KACnD,CAAC;AACJ,CAAC"}
@@ -22,9 +22,11 @@
22
22
  * Used for manifest signing: produce a deterministic byte string that two
23
23
  * independent parties can reproduce from the same object graph.
24
24
  */
25
- import { createRequire } from 'node:module';
26
- const require = createRequire(import.meta.url);
27
- const canonicalize = require('canonicalize');
25
+ // canonicalize v3 is ESM-only (no CJS entry); the client is `"type": "module"`
26
+ // so a native ESM default import is the correct interop (replaces the v2-era
27
+ // createRequire shim). v3 is packaging-only vs v2 — its RFC 8785 output is
28
+ // byte-identical, so existing content hashes / manifest signatures are stable.
29
+ import canonicalize from 'canonicalize';
28
30
  /**
29
31
  * Recursively replace NaN / ±Infinity with null so that canonicalize does not
30
32
  * throw — matching the JSON.stringify behaviour that the rest of the codebase
@@ -1 +1 @@
1
- {"version":3,"file":"canonical-json.js","sourceRoot":"","sources":["../../../src/harnesses/engine/canonical-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,YAAY,GAAG,OAAO,CAAC,cAAc,CAA2C,CAAC;AAEvF;;;;;GAKG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC/D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAC5D,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,mEAAmE;QACnE,qEAAqE;QACrE,sEAAsE;QACtE,WAAW;QACX,MAAM,MAAM,GAAI,KAAoC,CAAC,MAAM,CAAC;QAC5D,IAAI,OAAO,MAAM,KAAK,UAAU,EAAE,CAAC;YACjC,OAAO,eAAe,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC7C,CAAC;QACD,MAAM,GAAG,GAA4B,EAAE,CAAC;QACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QACxE,OAAO,GAAG,CAAC;IACb,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEvC,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACrC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,wEAAwE;QACxE,uEAAuE;QACvE,kCAAkC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"canonical-json.js","sourceRoot":"","sources":["../../../src/harnesses/engine/canonical-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,+EAA+E;AAC/E,6EAA6E;AAC7E,2EAA2E;AAC3E,+EAA+E;AAC/E,OAAO,YAAY,MAAM,cAAc,CAAC;AAExC;;;;;GAKG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC/D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAC5D,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,mEAAmE;QACnE,qEAAqE;QACrE,sEAAsE;QACtE,WAAW;QACX,MAAM,MAAM,GAAI,KAAoC,CAAC,MAAM,CAAC;QAC5D,IAAI,OAAO,MAAM,KAAK,UAAU,EAAE,CAAC;YACjC,OAAO,eAAe,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC7C,CAAC;QACD,MAAM,GAAG,GAA4B,EAAE,CAAC;QACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QACxE,OAAO,GAAG,CAAC;IACb,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEvC,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACrC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,wEAAwE;QACxE,uEAAuE;QACvE,kCAAkC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -70,6 +70,10 @@ export interface JoinedSolverNetsView {
70
70
  } | undefined;
71
71
  /** Enumerate all joined manifest CIDs (used for digest-based filtering). */
72
72
  manifestCids(): string[];
73
+ /** Add/replace one joined entry live (used by the hot-apply join applier, #1037). */
74
+ set(manifestCid: string, entry: {
75
+ roles: Array<'solver' | 'evaluator'>;
76
+ }): void;
73
77
  }
74
78
  /**
75
79
  * Build a `JoinedSolverNetsView` from the raw operator-config block.
@@ -82,6 +86,17 @@ export declare function joinedSolverNetsViewFromConfig(joined: Record<string, {
82
86
  manifestCid: string;
83
87
  roles: Array<'solver' | 'evaluator'>;
84
88
  }> | undefined): JoinedSolverNetsView | undefined;
89
+ /**
90
+ * Mutable `JoinedSolverNetsView` for the running daemon. Unlike
91
+ * `joinedSolverNetsViewFromConfig` (boot snapshot), the applier
92
+ * (`daemon/join-applier.ts`, #1037) keeps a handle and calls `set()` when a
93
+ * join is hot-applied, so the engine's per-task eligibility check sees the new
94
+ * cid on its next call without a restart.
95
+ */
96
+ export declare function createMutableJoinedSolverNetsView(initial: Record<string, {
97
+ manifestCid: string;
98
+ roles: Array<'solver' | 'evaluator'>;
99
+ }> | undefined): JoinedSolverNetsView;
85
100
  /**
86
101
  * Resolves a launched SolverNet manifest by IPFS CID.
87
102
  *
@@ -576,6 +591,15 @@ export declare class TaskEngine {
576
591
  * TCAttemptAlreadyFinalized, …). We mark the row RACE_LOST and emit a
577
592
  * `kind=race_lost` activity event so operators can audit prunes
578
593
  * without inflating the FAILED counter (#896).
594
+ * - A transport-transient error (e.g. `AllRpcsFailedError` — every provider
595
+ * in the L2 fallback chain failed at once) on a task whose delivery window
596
+ * is still open: leave the row in its current in-flight state so the next
597
+ * tick re-drives it once the RPCs recover, and emit a `tick_error` (warn)
598
+ * event instead of inflating the FAILED counter. Without this the daemon
599
+ * stamped the row FAILED, dropping it from `getInFlight()` permanently, so
600
+ * L2 work went silent until a manual restart (#912). Past-window transient
601
+ * errors still terminalize to avoid churning on work that can no longer
602
+ * settle.
579
603
  * - Everything else: existing markFailed behaviour. When invoked from
580
604
  * recovery, `contextLabel === 'recovery'` so the failure_reason
581
605
  * carries the `recovery:` prefix the original code path used.
@@ -18,6 +18,7 @@ import { assembleAndSignEnvelope, } from './envelope-assembly.js';
18
18
  import { deliverAndClaim, } from './delivery.js';
19
19
  import { SafeInnerRevertError, isNonRecoverableInnerRevert, formatDecodedRevert, } from '../../adapters/mech/safe-revert.js';
20
20
  import { emitEvent } from '../../observability/emit-event.js';
21
+ import { isRecoverableTransactionError } from '../../tx-retry.js';
21
22
  import { SkippableError } from '../types.js';
22
23
  import { submitEvaluatorFeedback, codeDigestSha256ToBytes32, encodeExecutionPayload, encodeExecutionPayloadV2, modeStringToFlag, } from '../../erc8004/index.js';
23
24
  import { TrajectoryCollector, emitTrajectory } from '../../trajectory/index.js';
@@ -26,6 +27,7 @@ import { VerdictCode } from '../../adapters/mech/verdict-code.js';
26
27
  import { buildInfo } from '../../build-info.js';
27
28
  import { getSolverNetContract } from '../../vendor/@jinn-network/sdk/dist/solvernets/index.js';
28
29
  import { runHarnessWithFreezeFence, } from '../../daemon/freeze-fence.js';
30
+ import { recordLoopTick } from '../../daemon/loop-heartbeat.js';
29
31
  import { harnessStateDirName } from '../names.js';
30
32
  import { recordTaskCost } from '../../spend/record.js';
31
33
  // ── Sentinel error ────────────────────────────────────────────────────────────
@@ -61,6 +63,25 @@ export function joinedSolverNetsViewFromConfig(joined) {
61
63
  return {
62
64
  get: (cid) => map.get(cid),
63
65
  manifestCids: () => [...map.keys()],
66
+ set: (cid, entry) => { map.set(cid, entry); },
67
+ };
68
+ }
69
+ /**
70
+ * Mutable `JoinedSolverNetsView` for the running daemon. Unlike
71
+ * `joinedSolverNetsViewFromConfig` (boot snapshot), the applier
72
+ * (`daemon/join-applier.ts`, #1037) keeps a handle and calls `set()` when a
73
+ * join is hot-applied, so the engine's per-task eligibility check sees the new
74
+ * cid on its next call without a restart.
75
+ */
76
+ export function createMutableJoinedSolverNetsView(initial) {
77
+ const map = new Map();
78
+ for (const [key, entry] of Object.entries(initial ?? {})) {
79
+ map.set(entry.manifestCid ?? key, { roles: entry.roles });
80
+ }
81
+ return {
82
+ get: (cid) => map.get(cid),
83
+ manifestCids: () => [...map.keys()],
84
+ set: (cid, entry) => { map.set(cid, entry); },
64
85
  };
65
86
  }
66
87
  // ── TaskEngine ─────────────────────────────────────────────────────────
@@ -281,6 +302,7 @@ export class TaskEngine {
281
302
  catch (err) {
282
303
  console.error('[harness-engine] tick loop error (continuing):', err instanceof Error ? err.message : err);
283
304
  }
305
+ recordLoopTick(this.store, 'engine-tick'); // #1043 loop watchdog
284
306
  if (this.stopped)
285
307
  break;
286
308
  await Promise.race([
@@ -761,7 +783,17 @@ export class TaskEngine {
761
783
  throw new NotImplementedError('runImpl');
762
784
  }
763
785
  const runtimePlugins = solverNet?.runtimePlugins ?? [];
764
- this.runtimePluginsByRequest.set(task.requestId, runtimePlugins);
786
+ // #1035: merge harness self-attributed plugins (e.g. claude-code-learner)
787
+ // into the envelope carrier so they appear in executor.plugins. This is a
788
+ // SEPARATE array from `runtimePlugins`: the latter still feeds
789
+ // ctx.runtimePlugins / ctx.solverPluginRoots (which the harness uses to
790
+ // LOAD solver plugins), and the learner plugin is already loaded by the
791
+ // harness itself via its own plugin root — adding it there would double-load.
792
+ const attributedPlugins = [
793
+ ...runtimePlugins,
794
+ ...(impl.attributionPlugins?.() ?? []),
795
+ ];
796
+ this.runtimePluginsByRequest.set(task.requestId, attributedPlugins);
765
797
  const workingDir = task.workingDir ?? join(this.paths.workingDirRoot, task.requestId);
766
798
  const kindSeg = solverType.replace(/[.:]/g, '_');
767
799
  const implStateDir = task.implStateDir ?? (kindSeg
@@ -846,7 +878,7 @@ export class TaskEngine {
846
878
  informationalClaim: skippedOutput.informational ?? null,
847
879
  solutionOutputsJson: JSON.stringify(skippedOutput),
848
880
  implName: impl.name,
849
- runtimePluginsJson: JSON.stringify(runtimePlugins),
881
+ runtimePluginsJson: JSON.stringify(attributedPlugins),
850
882
  });
851
883
  console.log(`[harness-engine] ${task.requestId} RUNNING → POST_SNAPSHOT via impl=${impl.name} (skipped)`);
852
884
  return;
@@ -890,7 +922,7 @@ export class TaskEngine {
890
922
  informationalClaim: output.informational ?? null,
891
923
  solutionOutputsJson: JSON.stringify(output),
892
924
  implName: impl.name,
893
- runtimePluginsJson: JSON.stringify(runtimePlugins),
925
+ runtimePluginsJson: JSON.stringify(attributedPlugins),
894
926
  });
895
927
  }
896
928
  finally {
@@ -1488,10 +1520,7 @@ export class TaskEngine {
1488
1520
  case 'UNRESOLVED':
1489
1521
  return VerdictCode.Unresolved;
1490
1522
  default:
1491
- // gatingClaim is null, verdict is absent, or the string is unrecognized.
1492
- // Return Invalid(3) — not Pass(1). Pass must come from an explicit PASS/SCORED verdict.
1493
- console.warn(`[harness-engine] verdictCodeForTask: unrecognized gatingClaim.verdict (got=${String(raw)}); defaulting to Invalid(3) — should never happen, indicates the evaluator harness didn't set gatingClaim.verdict before submission`);
1494
- return VerdictCode.Invalid;
1523
+ throw new Error(`[harness-engine] verdictCodeForTask: missing or unrecognized gatingClaim.verdict (got=${String(raw)}); refusing to claim Invalid(3) on-chain without an explicit evaluator verdict`);
1495
1524
  }
1496
1525
  }
1497
1526
  async _maybePostEvaluatorFeedback(task) {
@@ -1630,6 +1659,15 @@ export class TaskEngine {
1630
1659
  * TCAttemptAlreadyFinalized, …). We mark the row RACE_LOST and emit a
1631
1660
  * `kind=race_lost` activity event so operators can audit prunes
1632
1661
  * without inflating the FAILED counter (#896).
1662
+ * - A transport-transient error (e.g. `AllRpcsFailedError` — every provider
1663
+ * in the L2 fallback chain failed at once) on a task whose delivery window
1664
+ * is still open: leave the row in its current in-flight state so the next
1665
+ * tick re-drives it once the RPCs recover, and emit a `tick_error` (warn)
1666
+ * event instead of inflating the FAILED counter. Without this the daemon
1667
+ * stamped the row FAILED, dropping it from `getInFlight()` permanently, so
1668
+ * L2 work went silent until a manual restart (#912). Past-window transient
1669
+ * errors still terminalize to avoid churning on work that can no longer
1670
+ * settle.
1633
1671
  * - Everything else: existing markFailed behaviour. When invoked from
1634
1672
  * recovery, `contextLabel === 'recovery'` so the failure_reason
1635
1673
  * carries the `recovery:` prefix the original code path used.
@@ -1652,6 +1690,24 @@ export class TaskEngine {
1652
1690
  return 'race_lost';
1653
1691
  }
1654
1692
  const reason = err instanceof Error ? err.message : String(err);
1693
+ // A transport-transient failure (all RPC providers in the fallback chain
1694
+ // blipped at once, 429s, timeouts, …) is not the task's fault and is not
1695
+ // permanent. Leave the row in its in-flight state — do NOT call
1696
+ // markFailed, which would drop it from getInFlight() forever (#912) — so
1697
+ // the engine-tick loop re-drives it once the RPCs recover. The tick loop
1698
+ // IS the retry; there is no per-task attempt counter. Skip this only once
1699
+ // the delivery window has closed, so we never churn on work that can no
1700
+ // longer settle on-chain.
1701
+ if (task.windowEndTs > Date.now() && isRecoverableTransactionError(err)) {
1702
+ emitEvent(this.store, {
1703
+ kind: 'tick_error',
1704
+ requestId: task.requestId,
1705
+ solverType: task.solverType ?? undefined,
1706
+ outcome: 'warn',
1707
+ detail: `transient RPC failure in ${contextLabel}; left ${task.state} for retry: ${reason}`,
1708
+ }, 'harness-engine');
1709
+ return 'transient';
1710
+ }
1655
1711
  const stamped = contextLabel === 'recovery' ? `recovery: ${reason}` : reason;
1656
1712
  this.persistence.markFailed(task.requestId, stamped);
1657
1713
  return 'failed';
@@ -1695,8 +1751,15 @@ export class TaskEngine {
1695
1751
  if (current && current.state === task.state) {
1696
1752
  const classification = this._classifyAndMarkTerminal(task, err, 'recovery');
1697
1753
  const reason = err instanceof Error ? err.message : String(err);
1698
- const log = classification === 'race_lost' ? console.log : console.error;
1699
- const verb = classification === 'race_lost' ? 'pruned' : 'failed';
1754
+ // 'transient' leaves the row in-flight (not terminal); the next tick
1755
+ // re-drives it once the RPCs recover (#912). Log it at warn so the
1756
+ // stall is visible without firing the error-level alerting that a
1757
+ // genuine failure does.
1758
+ const { log, verb } = {
1759
+ race_lost: { log: console.log, verb: 'pruned' },
1760
+ transient: { log: console.warn, verb: 'deferred (transient RPC)' },
1761
+ failed: { log: console.error, verb: 'failed' },
1762
+ }[classification];
1700
1763
  log(`[harness-engine] resume ${verb} for ${task.requestId}: ${reason}`);
1701
1764
  }
1702
1765
  throw err;