@jinn-network/client 0.1.8 → 0.1.9-canary.050a41b1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/mech/adapter.d.ts +21 -1
- package/dist/adapters/mech/adapter.js +77 -10
- package/dist/adapters/mech/adapter.js.map +1 -1
- package/dist/adapters/mech/contracts.js +62 -28
- package/dist/adapters/mech/contracts.js.map +1 -1
- package/dist/adapters/mech/safe-revert.d.ts +4 -0
- package/dist/adapters/mech/safe-revert.js +5 -1
- package/dist/adapters/mech/safe-revert.js.map +1 -1
- package/dist/adapters/mech/safe.js +5 -1
- package/dist/adapters/mech/safe.js.map +1 -1
- package/dist/adapters/mech/verdict-code.js +1 -1
- package/dist/adapters/mech/verdict-code.js.map +1 -1
- package/dist/api/bootstrap-endpoint.d.ts +1 -0
- package/dist/api/bootstrap-endpoint.js +1 -0
- package/dist/api/bootstrap-endpoint.js.map +1 -1
- package/dist/api/discovery-endpoint.d.ts +1 -0
- package/dist/api/discovery-endpoint.js +24 -0
- package/dist/api/discovery-endpoint.js.map +1 -1
- package/dist/api/fleet-build.d.ts +1 -7
- package/dist/api/fleet-build.js +0 -7
- package/dist/api/fleet-build.js.map +1 -1
- package/dist/api/gather-status.d.ts +8 -2
- package/dist/api/gather-status.js +29 -117
- package/dist/api/gather-status.js.map +1 -1
- package/dist/api/loop-completion-build.d.ts +79 -0
- package/dist/api/loop-completion-build.js +155 -0
- package/dist/api/loop-completion-build.js.map +1 -0
- package/dist/api/peers.js +2 -0
- package/dist/api/peers.js.map +1 -1
- package/dist/api/setup-endpoints.d.ts +32 -0
- package/dist/api/setup-endpoints.js +93 -23
- package/dist/api/setup-endpoints.js.map +1 -1
- package/dist/api/solvernets-endpoints.js +3 -0
- package/dist/api/solvernets-endpoints.js.map +1 -1
- package/dist/api/status-build.d.ts +43 -33
- package/dist/api/status-build.js +3 -26
- package/dist/api/status-build.js.map +1 -1
- package/dist/api/status-rollup-build.d.ts +0 -4
- package/dist/api/status-rollup-build.js +0 -4
- package/dist/api/status-rollup-build.js.map +1 -1
- package/dist/build-info.json +4 -4
- package/dist/build-meta.json +1 -1
- package/dist/cli/commands/codedigest-revert-check.js +6 -2
- package/dist/cli/commands/codedigest-revert-check.js.map +1 -1
- package/dist/cli/commands/doctor.d.ts +3 -0
- package/dist/cli/commands/doctor.js +37 -2
- package/dist/cli/commands/doctor.js.map +1 -1
- package/dist/cli/commands/eval.d.ts +76 -0
- package/dist/cli/commands/eval.js +401 -0
- package/dist/cli/commands/eval.js.map +1 -0
- package/dist/cli/commands/rewards.d.ts +2 -0
- package/dist/cli/commands/rewards.js +30 -3
- package/dist/cli/commands/rewards.js.map +1 -1
- package/dist/cli/commands/solver-nets.js +68 -0
- package/dist/cli/commands/solver-nets.js.map +1 -1
- package/dist/cli/commands/status.js +0 -1
- package/dist/cli/commands/status.js.map +1 -1
- package/dist/cli/index.js +2 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/config.d.ts +58 -7
- package/dist/config.js +96 -7
- package/dist/config.js.map +1 -1
- package/dist/daemon/ai-units-gate.d.ts +6 -6
- package/dist/daemon/ai-units-gate.js +11 -10
- package/dist/daemon/ai-units-gate.js.map +1 -1
- package/dist/daemon/balance-topup-loop.js +3 -0
- package/dist/daemon/balance-topup-loop.js.map +1 -1
- package/dist/daemon/creator.js +2 -0
- package/dist/daemon/creator.js.map +1 -1
- package/dist/daemon/daemon.d.ts +15 -0
- package/dist/daemon/daemon.js +78 -22
- package/dist/daemon/daemon.js.map +1 -1
- package/dist/daemon/eviction-loop.d.ts +7 -0
- package/dist/daemon/eviction-loop.js +16 -0
- package/dist/daemon/eviction-loop.js.map +1 -1
- package/dist/daemon/jinn-claim-loop.js +3 -0
- package/dist/daemon/jinn-claim-loop.js.map +1 -1
- package/dist/daemon/join-applier.d.ts +35 -0
- package/dist/daemon/join-applier.js +49 -0
- package/dist/daemon/join-applier.js.map +1 -0
- package/dist/daemon/loop-heartbeat.d.ts +34 -0
- package/dist/daemon/loop-heartbeat.js +39 -0
- package/dist/daemon/loop-heartbeat.js.map +1 -0
- package/dist/daemon/reward-claim-loop.js +3 -0
- package/dist/daemon/reward-claim-loop.js.map +1 -1
- package/dist/daemon/watchdog-loop.d.ts +84 -0
- package/dist/daemon/watchdog-loop.js +91 -0
- package/dist/daemon/watchdog-loop.js.map +1 -0
- package/dist/dashboard/assets/index-8tAiMbUV.css +1 -0
- package/dist/dashboard/assets/index-CSFVwGFh.js +167 -0
- package/dist/dashboard/index.html +2 -2
- package/dist/discovery/http.d.ts +7 -0
- package/dist/discovery/http.js +241 -25
- package/dist/discovery/http.js.map +1 -1
- package/dist/discovery/onchain.js +155 -1
- package/dist/discovery/onchain.js.map +1 -1
- package/dist/discovery/types.d.ts +106 -0
- package/dist/discovery/types.js +40 -0
- package/dist/discovery/types.js.map +1 -1
- package/dist/discovery/with-fallback.js +14 -0
- package/dist/discovery/with-fallback.js.map +1 -1
- package/dist/earning/bootstrap.d.ts +23 -0
- package/dist/earning/bootstrap.js +76 -27
- package/dist/earning/bootstrap.js.map +1 -1
- package/dist/earning/faucet.d.ts +1 -1
- package/dist/earning/faucet.js +2 -2
- package/dist/earning/faucet.js.map +1 -1
- package/dist/earning/safe-adapter.js +11 -0
- package/dist/earning/safe-adapter.js.map +1 -1
- package/dist/eval/eval-harness-run.d.ts +63 -0
- package/dist/eval/eval-harness-run.js +123 -0
- package/dist/eval/eval-harness-run.js.map +1 -0
- package/dist/eval/orchestrator.d.ts +163 -0
- package/dist/eval/orchestrator.js +232 -0
- package/dist/eval/orchestrator.js.map +1 -0
- package/dist/eval/paired.d.ts +68 -0
- package/dist/eval/paired.js +93 -0
- package/dist/eval/paired.js.map +1 -0
- package/dist/eval/resolve-slate-tasks.d.ts +35 -0
- package/dist/eval/resolve-slate-tasks.js +56 -0
- package/dist/eval/resolve-slate-tasks.js.map +1 -0
- package/dist/eval/screen-discovery.d.ts +22 -0
- package/dist/eval/screen-discovery.js +71 -0
- package/dist/eval/screen-discovery.js.map +1 -0
- package/dist/eval/screen-progress.d.ts +41 -0
- package/dist/eval/screen-progress.js +60 -0
- package/dist/eval/screen-progress.js.map +1 -0
- package/dist/eval/screen-runner.d.ts +30 -0
- package/dist/eval/screen-runner.js +289 -0
- package/dist/eval/screen-runner.js.map +1 -0
- package/dist/eval/screen.d.ts +107 -0
- package/dist/eval/screen.js +159 -0
- package/dist/eval/screen.js.map +1 -0
- package/dist/eval/slope.d.ts +29 -0
- package/dist/eval/slope.js +46 -0
- package/dist/eval/slope.js.map +1 -0
- package/dist/eval/train-sequence.d.ts +35 -0
- package/dist/eval/train-sequence.js +59 -0
- package/dist/eval/train-sequence.js.map +1 -0
- package/dist/eval/wilson.d.ts +45 -0
- package/dist/eval/wilson.js +48 -0
- package/dist/eval/wilson.js.map +1 -0
- package/dist/harnesses/engine/canonical-json.js +5 -3
- package/dist/harnesses/engine/canonical-json.js.map +1 -1
- package/dist/harnesses/engine/engine.d.ts +24 -0
- package/dist/harnesses/engine/engine.js +72 -9
- package/dist/harnesses/engine/engine.js.map +1 -1
- package/dist/harnesses/engine/persistence.d.ts +17 -0
- package/dist/harnesses/engine/persistence.js +28 -0
- package/dist/harnesses/engine/persistence.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/adapter.d.ts +2 -0
- package/dist/harnesses/impls/hermes-agent/adapter.js +8 -5
- package/dist/harnesses/impls/hermes-agent/adapter.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/bootstrap.d.ts +1 -0
- package/dist/harnesses/impls/hermes-agent/bootstrap.js +6 -1
- package/dist/harnesses/impls/hermes-agent/bootstrap.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/harness.d.ts +17 -3
- package/dist/harnesses/impls/hermes-agent/harness.js +68 -5
- package/dist/harnesses/impls/hermes-agent/harness.js.map +1 -1
- package/dist/harnesses/impls/index.d.ts +2 -0
- package/dist/harnesses/impls/index.js +2 -0
- package/dist/harnesses/impls/index.js.map +1 -1
- package/dist/harnesses/impls/learner/adapters/claude-code.js +5 -0
- package/dist/harnesses/impls/learner/adapters/claude-code.js.map +1 -1
- package/dist/harnesses/impls/learner/harness.d.ts +17 -1
- package/dist/harnesses/impls/learner/harness.js +51 -1
- package/dist/harnesses/impls/learner/harness.js.map +1 -1
- package/dist/harnesses/impls/learner/harvest.d.ts +2 -0
- package/dist/harnesses/impls/learner/harvest.js +7 -1
- package/dist/harnesses/impls/learner/harvest.js.map +1 -1
- package/dist/harnesses/impls/learner/plugin-path.js +1 -0
- package/dist/harnesses/impls/learner/plugin-path.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js +3 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.d.ts +2 -2
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js +3 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js.map +1 -1
- package/dist/harnesses/readiness-registry.d.ts +10 -0
- package/dist/harnesses/readiness-registry.js +13 -0
- package/dist/harnesses/readiness-registry.js.map +1 -1
- package/dist/harnesses/types.d.ts +14 -0
- package/dist/learner/revert-decision.d.ts +16 -1
- package/dist/learner/revert-decision.js +38 -18
- package/dist/learner/revert-decision.js.map +1 -1
- package/dist/learner/revert-stats.d.ts +14 -0
- package/dist/learner/revert-stats.js +42 -0
- package/dist/learner/revert-stats.js.map +1 -1
- package/dist/local-provider-url.d.ts +3 -0
- package/dist/local-provider-url.js +28 -0
- package/dist/local-provider-url.js.map +1 -0
- package/dist/main.js +94 -25
- package/dist/main.js.map +1 -1
- package/dist/plugins/learner/.claude-plugin/plugin.json +1 -1
- package/dist/plugins/learner/.codex-plugin/plugin.json +1 -1
- package/dist/plugins/learner/hooks/session-start +30 -1
- package/dist/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
- package/dist/preflight/deployment-readiness.d.ts +147 -0
- package/dist/preflight/deployment-readiness.js +366 -0
- package/dist/preflight/deployment-readiness.js.map +1 -0
- package/dist/preflight/pidfile-liveness.d.ts +7 -1
- package/dist/preflight/pidfile-liveness.js +14 -0
- package/dist/preflight/pidfile-liveness.js.map +1 -1
- package/dist/rpc/transport.d.ts +36 -0
- package/dist/rpc/transport.js +123 -24
- package/dist/rpc/transport.js.map +1 -1
- package/dist/scripts/swe-rebench-v2-seed-pool.json +2 -1
- package/dist/solver-nets/registry.d.ts +19 -0
- package/dist/solver-nets/registry.js +92 -66
- package/dist/solver-nets/registry.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.d.ts +76 -0
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.js +156 -0
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.js.map +1 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.d.ts +81 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.js +116 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.js.map +1 -0
- package/dist/solver-types/_swe-rebench-v2-state.d.ts +9 -0
- package/dist/solver-types/_swe-rebench-v2-state.js +14 -0
- package/dist/solver-types/_swe-rebench-v2-state.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-validated-pool.d.ts +30 -0
- package/dist/solver-types/_swe-rebench-v2-validated-pool.js +40 -0
- package/dist/solver-types/_swe-rebench-v2-validated-pool.js.map +1 -1
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json +20 -0
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.json +19 -0
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.screening-report.json +628 -0
- package/dist/solver-types/solver-type.d.ts +8 -0
- package/dist/solver-types/swe-rebench-v2.d.ts +2 -0
- package/dist/solver-types/swe-rebench-v2.js +115 -10
- package/dist/solver-types/swe-rebench-v2.js.map +1 -1
- package/dist/solvernets/launched-record-dispatcher.d.ts +3 -0
- package/dist/solvernets/launched-record-dispatcher.js.map +1 -1
- package/dist/solvernets/registry-client-erc8004.js +29 -37
- package/dist/solvernets/registry-client-erc8004.js.map +1 -1
- package/dist/solvernets/registry-client.d.ts +6 -0
- package/dist/solvernets/store.js +7 -2
- package/dist/solvernets/store.js.map +1 -1
- package/dist/spend/ai-units-config.d.ts +10 -0
- package/dist/spend/ai-units-config.js +7 -1
- package/dist/spend/ai-units-config.js.map +1 -1
- package/dist/spend/ai-units.d.ts +51 -0
- package/dist/spend/ai-units.js +73 -0
- package/dist/spend/ai-units.js.map +1 -1
- package/dist/spend/record.js +12 -5
- package/dist/spend/record.js.map +1 -1
- package/dist/store/store.d.ts +91 -5
- package/dist/store/store.js +170 -7
- package/dist/store/store.js.map +1 -1
- package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.d.ts +108 -1
- package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.js +25 -1
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.d.ts +65 -0
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.js +123 -0
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.d.ts +2 -2
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.js +1 -1
- package/dist/vendor/@jinn-network/sdk/package.json +4 -0
- package/docker-compose.yml +3 -2
- package/package.json +22 -18
- package/plugins/learner/.claude-plugin/plugin.json +1 -1
- package/plugins/learner/.codex-plugin/plugin.json +1 -1
- package/plugins/learner/hooks/session-start +30 -1
- package/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
- package/plugins/swe-rebench-v2-runtime/hooks/hooks.json +16 -0
- package/plugins/swe-rebench-v2-runtime/hooks/session-start +74 -0
- package/dist/dashboard/assets/index-CzKxvMcU.css +0 -32
- package/dist/dashboard/assets/index-yVemxHot.js +0 -351
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ordinary least-squares slope of resolved-rate vs cycle index for the
|
|
3
|
+
* train-arm slope measurement (issue #822, AC#1).
|
|
4
|
+
*
|
|
5
|
+
* The train-arm e2e evaluates a checkpoint against the held-out slate (#817)
|
|
6
|
+
* at intervals via the eval orchestrator (#818), collecting one
|
|
7
|
+
* `{ cycleIndex, rate }` point per interval (`rate` = passed / scorable, the
|
|
8
|
+
* Wilson point estimate). The slope of the least-squares fit is the headline
|
|
9
|
+
* "is the learner improving across the training sequence" number.
|
|
10
|
+
*
|
|
11
|
+
* It is deliberately a thin helper over the closed-form OLS slope
|
|
12
|
+
* (`cov(x,y) / var(x)`); the per-point confidence intervals come from
|
|
13
|
+
* `wilson.ts` — this module does NOT reimplement them. The slope sign alone is
|
|
14
|
+
* never a verdict at small N: a flat or slightly negative slope is "within
|
|
15
|
+
* noise", which the e2e surfaces via the §4.1 honesty caveat.
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Least-squares slope of `rate` regressed on `cycleIndex`. Returns 0 for fewer
|
|
19
|
+
* than two points (no line to fit) and for a degenerate fit where every x is
|
|
20
|
+
* identical (zero variance — division would be NaN). A flat sequence yields
|
|
21
|
+
* exactly 0.
|
|
22
|
+
*/
|
|
23
|
+
export function leastSquaresSlope(points) {
|
|
24
|
+
const n = points.length;
|
|
25
|
+
if (n < 2)
|
|
26
|
+
return 0;
|
|
27
|
+
let sumX = 0;
|
|
28
|
+
let sumY = 0;
|
|
29
|
+
for (const { cycleIndex, rate } of points) {
|
|
30
|
+
sumX += cycleIndex;
|
|
31
|
+
sumY += rate;
|
|
32
|
+
}
|
|
33
|
+
const meanX = sumX / n;
|
|
34
|
+
const meanY = sumY / n;
|
|
35
|
+
let cov = 0;
|
|
36
|
+
let varX = 0;
|
|
37
|
+
for (const { cycleIndex, rate } of points) {
|
|
38
|
+
const dx = cycleIndex - meanX;
|
|
39
|
+
cov += dx * (rate - meanY);
|
|
40
|
+
varX += dx * dx;
|
|
41
|
+
}
|
|
42
|
+
if (varX === 0)
|
|
43
|
+
return 0;
|
|
44
|
+
return cov / varX;
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=slope.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"slope.js","sourceRoot":"","sources":["../../src/eval/slope.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AASH;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAAmB;IACnD,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAC1C,IAAI,IAAI,UAAU,CAAC;QACnB,IAAI,IAAI,IAAI,CAAC;IACf,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC;IACvB,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC;IACvB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAC1C,MAAM,EAAE,GAAG,UAAU,GAAG,KAAK,CAAC;QAC9B,GAAG,IAAI,EAAE,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;QAC3B,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;IAClB,CAAC;IACD,IAAI,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzB,OAAO,GAAG,GAAG,IAAI,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Train-sequence builder with the no-train/test-overlap guard for the
|
|
3
|
+
* train-arm slope e2e (issue #822, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The learner-full-cycle e2e drives `runCycle` DIRECTLY (it does not post tasks
|
|
6
|
+
* through the generator), so the generator's `excludeHeldOutSlate` train-stream
|
|
7
|
+
* chokepoint (#817) is BYPASSED. This builder is therefore the load-bearing
|
|
8
|
+
* AC#2 mechanism for the e2e: it selects the N distinct training instances from
|
|
9
|
+
* the pool with the held-out slate excluded, and asserts the resulting
|
|
10
|
+
* sequence is disjoint from the slate (fail-loud, never a silent drop).
|
|
11
|
+
*
|
|
12
|
+
* It reuses `excludeHeldOutSlate` from the #817 primitive rather than
|
|
13
|
+
* reimplementing the exclusion. Selection is deterministic (instance-id sorted)
|
|
14
|
+
* so a given pool yields a stable sequence across runs.
|
|
15
|
+
*/
|
|
16
|
+
import type { PoolTask } from '../solver-types/_swe-rebench-v2-pool.js';
|
|
17
|
+
/** Thrown when a chosen training sequence intersects the held-out slate (AC#2). */
|
|
18
|
+
export declare class TrainTestOverlapError extends Error {
|
|
19
|
+
readonly overlap: string[];
|
|
20
|
+
constructor(overlap: string[]);
|
|
21
|
+
}
|
|
22
|
+
/** Assert a set of training ids is disjoint from the slate, else throw loud. */
|
|
23
|
+
export declare function assertNoOverlap(trainIds: string[], slateIds: Set<string>): void;
|
|
24
|
+
export declare function buildTrainSequence(args: {
|
|
25
|
+
pool: PoolTask[];
|
|
26
|
+
slateIds: Set<string>;
|
|
27
|
+
/** Number of distinct training tasks (= N training cycles). */
|
|
28
|
+
count: number;
|
|
29
|
+
/**
|
|
30
|
+
* Optional explicit, hand-picked instance_ids (in order). When set, the
|
|
31
|
+
* builder still runs the no-overlap guard and resolves each id against the
|
|
32
|
+
* pool — used to fail-loud on a hand-edited sequence that overlaps the slate.
|
|
33
|
+
*/
|
|
34
|
+
explicitIds?: string[];
|
|
35
|
+
}): PoolTask[];
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Train-sequence builder with the no-train/test-overlap guard for the
|
|
3
|
+
* train-arm slope e2e (issue #822, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The learner-full-cycle e2e drives `runCycle` DIRECTLY (it does not post tasks
|
|
6
|
+
* through the generator), so the generator's `excludeHeldOutSlate` train-stream
|
|
7
|
+
* chokepoint (#817) is BYPASSED. This builder is therefore the load-bearing
|
|
8
|
+
* AC#2 mechanism for the e2e: it selects the N distinct training instances from
|
|
9
|
+
* the pool with the held-out slate excluded, and asserts the resulting
|
|
10
|
+
* sequence is disjoint from the slate (fail-loud, never a silent drop).
|
|
11
|
+
*
|
|
12
|
+
* It reuses `excludeHeldOutSlate` from the #817 primitive rather than
|
|
13
|
+
* reimplementing the exclusion. Selection is deterministic (instance-id sorted)
|
|
14
|
+
* so a given pool yields a stable sequence across runs.
|
|
15
|
+
*/
|
|
16
|
+
import { excludeHeldOutSlate } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
|
|
17
|
+
/** Thrown when a chosen training sequence intersects the held-out slate (AC#2). */
|
|
18
|
+
export class TrainTestOverlapError extends Error {
|
|
19
|
+
overlap;
|
|
20
|
+
constructor(overlap) {
|
|
21
|
+
super(`train/test overlap: training sequence includes held-out slate instance(s) ` +
|
|
22
|
+
`${overlap.join(', ')} — refusing to train on the eval slate (AC#2). ` +
|
|
23
|
+
`The slate must stay out-of-sample for the slope to mean anything.`);
|
|
24
|
+
this.overlap = overlap;
|
|
25
|
+
this.name = 'TrainTestOverlapError';
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
/** Assert a set of training ids is disjoint from the slate, else throw loud. */
|
|
29
|
+
export function assertNoOverlap(trainIds, slateIds) {
|
|
30
|
+
const overlap = trainIds.filter((id) => slateIds.has(id));
|
|
31
|
+
if (overlap.length > 0)
|
|
32
|
+
throw new TrainTestOverlapError(overlap);
|
|
33
|
+
}
|
|
34
|
+
export function buildTrainSequence(args) {
|
|
35
|
+
const eligible = excludeHeldOutSlate(args.pool, args.slateIds);
|
|
36
|
+
const byId = new Map(eligible.map((t) => [t.instance_id, t]));
|
|
37
|
+
if (args.explicitIds) {
|
|
38
|
+
// Guard the hand-picked sequence against the slate BEFORE resolving, so a
|
|
39
|
+
// slate-overlapping id is a TrainTestOverlapError, not a "not eligible".
|
|
40
|
+
assertNoOverlap(args.explicitIds, args.slateIds);
|
|
41
|
+
return args.explicitIds.map((id) => {
|
|
42
|
+
const task = byId.get(id);
|
|
43
|
+
if (!task) {
|
|
44
|
+
throw new Error(`explicit training instance ${id} not in the eligible pool`);
|
|
45
|
+
}
|
|
46
|
+
return task;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
// Deterministic selection: instance-id sorted, first `count`.
|
|
50
|
+
const sorted = [...eligible].sort((a, b) => a.instance_id.localeCompare(b.instance_id));
|
|
51
|
+
if (sorted.length < args.count) {
|
|
52
|
+
throw new Error(`train sequence needs ${args.count} distinct tasks but only ${sorted.length} eligible ` +
|
|
53
|
+
`(pool size ${args.pool.length} minus ${args.slateIds.size} held-out slate instance(s))`);
|
|
54
|
+
}
|
|
55
|
+
const picked = sorted.slice(0, args.count);
|
|
56
|
+
assertNoOverlap(picked.map((t) => t.instance_id), args.slateIds);
|
|
57
|
+
return picked;
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=train-sequence.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"train-sequence.js","sourceRoot":"","sources":["../../src/eval/train-sequence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,EAAE,mBAAmB,EAAE,MAAM,mDAAmD,CAAC;AAExF,mFAAmF;AACnF,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IAClB;IAA5B,YAA4B,OAAiB;QAC3C,KAAK,CACH,4EAA4E;YAC1E,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,iDAAiD;YACtE,mEAAmE,CACtE,CAAC;QALwB,YAAO,GAAP,OAAO,CAAU;QAM3C,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAED,gFAAgF;AAChF,MAAM,UAAU,eAAe,CAAC,QAAkB,EAAE,QAAqB;IACvE,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,MAAM,IAAI,qBAAqB,CAAC,OAAO,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAWlC;IACC,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAE9D,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,0EAA0E;QAC1E,yEAAyE;QACzE,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE;YACjC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC1B,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,MAAM,IAAI,KAAK,CAAC,8BAA8B,EAAE,2BAA2B,CAAC,CAAC;YAC/E,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8DAA8D;IAC9D,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;IACxF,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,wBAAwB,IAAI,CAAC,KAAK,4BAA4B,MAAM,CAAC,MAAM,YAAY;YACrF,cAAc,IAAI,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,CAAC,QAAQ,CAAC,IAAI,8BAA8B,CAC3F,CAAC;IACJ,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3C,eAAe,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IACjE,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wilson score interval + resolved-rate comparison for the `jinn eval`
|
|
3
|
+
* held-out checkpoint orchestrator (issue #818, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The Wilson score interval is a binomial proportion confidence interval that
|
|
6
|
+
* behaves well at the extremes (p=0, p=1) and for small n — unlike the naive
|
|
7
|
+
* normal-approximation interval. We write it small (no stats dependency, per
|
|
8
|
+
* repo convention): the formula is ~10 lines.
|
|
9
|
+
*
|
|
10
|
+
* Per log/decisions/2026-05-28-rl-eval-measurement.md §4: this is v1-simple.
|
|
11
|
+
* Only *large* deltas are trustworthy — we encode that as "the child and
|
|
12
|
+
* parent intervals do not overlap." No seed control, no multi-run averaging.
|
|
13
|
+
*/
|
|
14
|
+
export interface Interval {
|
|
15
|
+
/** Observed point estimate, passed / scorable (0 when scorable=0). */
|
|
16
|
+
p: number;
|
|
17
|
+
/** Lower bound, clamped to [0, 1]. */
|
|
18
|
+
lo: number;
|
|
19
|
+
/** Upper bound, clamped to [0, 1]. */
|
|
20
|
+
hi: number;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Wilson score interval for `passed` successes out of `scorable` trials.
|
|
24
|
+
* `scorable === 0` returns a degenerate `{ p: 0, lo: 0, hi: 0 }` (no NaN).
|
|
25
|
+
*/
|
|
26
|
+
export declare function wilsonInterval(passed: number, scorable: number, z?: number): Interval;
|
|
27
|
+
export type RateVerdict = 'trustworthy' | 'within-noise';
|
|
28
|
+
export interface RateComparison {
|
|
29
|
+
child: Interval;
|
|
30
|
+
parent: Interval;
|
|
31
|
+
/** child.p − parent.p (point-estimate difference, can be negative). */
|
|
32
|
+
delta: number;
|
|
33
|
+
/**
|
|
34
|
+
* 'trustworthy' iff the two Wilson intervals do NOT overlap; otherwise
|
|
35
|
+
* 'within-noise'. v1-simple: only disjoint intervals justify a claim.
|
|
36
|
+
*/
|
|
37
|
+
verdict: RateVerdict;
|
|
38
|
+
}
|
|
39
|
+
export declare function compareRates(child: {
|
|
40
|
+
passed: number;
|
|
41
|
+
scorable: number;
|
|
42
|
+
}, parent: {
|
|
43
|
+
passed: number;
|
|
44
|
+
scorable: number;
|
|
45
|
+
}): RateComparison;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wilson score interval + resolved-rate comparison for the `jinn eval`
|
|
3
|
+
* held-out checkpoint orchestrator (issue #818, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The Wilson score interval is a binomial proportion confidence interval that
|
|
6
|
+
* behaves well at the extremes (p=0, p=1) and for small n — unlike the naive
|
|
7
|
+
* normal-approximation interval. We write it small (no stats dependency, per
|
|
8
|
+
* repo convention): the formula is ~10 lines.
|
|
9
|
+
*
|
|
10
|
+
* Per log/decisions/2026-05-28-rl-eval-measurement.md §4: this is v1-simple.
|
|
11
|
+
* Only *large* deltas are trustworthy — we encode that as "the child and
|
|
12
|
+
* parent intervals do not overlap." No seed control, no multi-run averaging.
|
|
13
|
+
*/
|
|
14
|
+
/** Two-sided z for a 95% interval (1.96 ≈ Φ⁻¹(0.975)). */
|
|
15
|
+
const DEFAULT_Z = 1.96;
|
|
16
|
+
/**
|
|
17
|
+
* Wilson score interval for `passed` successes out of `scorable` trials.
|
|
18
|
+
* `scorable === 0` returns a degenerate `{ p: 0, lo: 0, hi: 0 }` (no NaN).
|
|
19
|
+
*/
|
|
20
|
+
export function wilsonInterval(passed, scorable, z = DEFAULT_Z) {
|
|
21
|
+
if (scorable === 0)
|
|
22
|
+
return { p: 0, lo: 0, hi: 0 };
|
|
23
|
+
const n = scorable;
|
|
24
|
+
const p = passed / n;
|
|
25
|
+
const z2 = z * z;
|
|
26
|
+
const denom = 1 + z2 / n;
|
|
27
|
+
const centre = p + z2 / (2 * n);
|
|
28
|
+
const margin = z * Math.sqrt((p * (1 - p)) / n + z2 / (4 * n * n));
|
|
29
|
+
const lo = (centre - margin) / denom;
|
|
30
|
+
const hi = (centre + margin) / denom;
|
|
31
|
+
return {
|
|
32
|
+
p,
|
|
33
|
+
lo: Math.max(0, lo),
|
|
34
|
+
hi: Math.min(1, hi),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
export function compareRates(child, parent) {
|
|
38
|
+
const c = wilsonInterval(child.passed, child.scorable);
|
|
39
|
+
const p = wilsonInterval(parent.passed, parent.scorable);
|
|
40
|
+
const disjoint = c.lo > p.hi || p.lo > c.hi;
|
|
41
|
+
return {
|
|
42
|
+
child: c,
|
|
43
|
+
parent: p,
|
|
44
|
+
delta: c.p - p.p,
|
|
45
|
+
verdict: disjoint ? 'trustworthy' : 'within-noise',
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=wilson.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wilson.js","sourceRoot":"","sources":["../../src/eval/wilson.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,0DAA0D;AAC1D,MAAM,SAAS,GAAG,IAAI,CAAC;AAWvB;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,MAAc,EAAE,QAAgB,EAAE,IAAY,SAAS;IACpF,IAAI,QAAQ,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC;IAClD,MAAM,CAAC,GAAG,QAAQ,CAAC;IACnB,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACjB,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IACzB,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChC,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACnE,MAAM,EAAE,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,MAAM,EAAE,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,OAAO;QACL,CAAC;QACD,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC;QACnB,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC;KACpB,CAAC;AACJ,CAAC;AAgBD,MAAM,UAAU,YAAY,CAC1B,KAA2C,EAC3C,MAA4C;IAE5C,MAAM,CAAC,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvD,MAAM,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IACzD,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC;IAC5C,OAAO;QACL,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,CAAC;QACT,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAChB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,cAAc;KACnD,CAAC;AACJ,CAAC"}
|
|
@@ -22,9 +22,11 @@
|
|
|
22
22
|
* Used for manifest signing: produce a deterministic byte string that two
|
|
23
23
|
* independent parties can reproduce from the same object graph.
|
|
24
24
|
*/
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
// canonicalize v3 is ESM-only (no CJS entry); the client is `"type": "module"`
|
|
26
|
+
// so a native ESM default import is the correct interop (replaces the v2-era
|
|
27
|
+
// createRequire shim). v3 is packaging-only vs v2 — its RFC 8785 output is
|
|
28
|
+
// byte-identical, so existing content hashes / manifest signatures are stable.
|
|
29
|
+
import canonicalize from 'canonicalize';
|
|
28
30
|
/**
|
|
29
31
|
* Recursively replace NaN / ±Infinity with null so that canonicalize does not
|
|
30
32
|
* throw — matching the JSON.stringify behaviour that the rest of the codebase
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"canonical-json.js","sourceRoot":"","sources":["../../../src/harnesses/engine/canonical-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,
|
|
1
|
+
{"version":3,"file":"canonical-json.js","sourceRoot":"","sources":["../../../src/harnesses/engine/canonical-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,+EAA+E;AAC/E,6EAA6E;AAC7E,2EAA2E;AAC3E,+EAA+E;AAC/E,OAAO,YAAY,MAAM,cAAc,CAAC;AAExC;;;;;GAKG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC/D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAC5D,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,mEAAmE;QACnE,qEAAqE;QACrE,sEAAsE;QACtE,WAAW;QACX,MAAM,MAAM,GAAI,KAAoC,CAAC,MAAM,CAAC;QAC5D,IAAI,OAAO,MAAM,KAAK,UAAU,EAAE,CAAC;YACjC,OAAO,eAAe,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC7C,CAAC;QACD,MAAM,GAAG,GAA4B,EAAE,CAAC;QACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QACxE,OAAO,GAAG,CAAC;IACb,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEvC,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACrC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,wEAAwE;QACxE,uEAAuE;QACvE,kCAAkC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -70,6 +70,10 @@ export interface JoinedSolverNetsView {
|
|
|
70
70
|
} | undefined;
|
|
71
71
|
/** Enumerate all joined manifest CIDs (used for digest-based filtering). */
|
|
72
72
|
manifestCids(): string[];
|
|
73
|
+
/** Add/replace one joined entry live (used by the hot-apply join applier, #1037). */
|
|
74
|
+
set(manifestCid: string, entry: {
|
|
75
|
+
roles: Array<'solver' | 'evaluator'>;
|
|
76
|
+
}): void;
|
|
73
77
|
}
|
|
74
78
|
/**
|
|
75
79
|
* Build a `JoinedSolverNetsView` from the raw operator-config block.
|
|
@@ -82,6 +86,17 @@ export declare function joinedSolverNetsViewFromConfig(joined: Record<string, {
|
|
|
82
86
|
manifestCid: string;
|
|
83
87
|
roles: Array<'solver' | 'evaluator'>;
|
|
84
88
|
}> | undefined): JoinedSolverNetsView | undefined;
|
|
89
|
+
/**
|
|
90
|
+
* Mutable `JoinedSolverNetsView` for the running daemon. Unlike
|
|
91
|
+
* `joinedSolverNetsViewFromConfig` (boot snapshot), the applier
|
|
92
|
+
* (`daemon/join-applier.ts`, #1037) keeps a handle and calls `set()` when a
|
|
93
|
+
* join is hot-applied, so the engine's per-task eligibility check sees the new
|
|
94
|
+
* cid on its next call without a restart.
|
|
95
|
+
*/
|
|
96
|
+
export declare function createMutableJoinedSolverNetsView(initial: Record<string, {
|
|
97
|
+
manifestCid: string;
|
|
98
|
+
roles: Array<'solver' | 'evaluator'>;
|
|
99
|
+
}> | undefined): JoinedSolverNetsView;
|
|
85
100
|
/**
|
|
86
101
|
* Resolves a launched SolverNet manifest by IPFS CID.
|
|
87
102
|
*
|
|
@@ -576,6 +591,15 @@ export declare class TaskEngine {
|
|
|
576
591
|
* TCAttemptAlreadyFinalized, …). We mark the row RACE_LOST and emit a
|
|
577
592
|
* `kind=race_lost` activity event so operators can audit prunes
|
|
578
593
|
* without inflating the FAILED counter (#896).
|
|
594
|
+
* - A transport-transient error (e.g. `AllRpcsFailedError` — every provider
|
|
595
|
+
* in the L2 fallback chain failed at once) on a task whose delivery window
|
|
596
|
+
* is still open: leave the row in its current in-flight state so the next
|
|
597
|
+
* tick re-drives it once the RPCs recover, and emit a `tick_error` (warn)
|
|
598
|
+
* event instead of inflating the FAILED counter. Without this the daemon
|
|
599
|
+
* stamped the row FAILED, dropping it from `getInFlight()` permanently, so
|
|
600
|
+
* L2 work went silent until a manual restart (#912). Past-window transient
|
|
601
|
+
* errors still terminalize to avoid churning on work that can no longer
|
|
602
|
+
* settle.
|
|
579
603
|
* - Everything else: existing markFailed behaviour. When invoked from
|
|
580
604
|
* recovery, `contextLabel === 'recovery'` so the failure_reason
|
|
581
605
|
* carries the `recovery:` prefix the original code path used.
|
|
@@ -18,6 +18,7 @@ import { assembleAndSignEnvelope, } from './envelope-assembly.js';
|
|
|
18
18
|
import { deliverAndClaim, } from './delivery.js';
|
|
19
19
|
import { SafeInnerRevertError, isNonRecoverableInnerRevert, formatDecodedRevert, } from '../../adapters/mech/safe-revert.js';
|
|
20
20
|
import { emitEvent } from '../../observability/emit-event.js';
|
|
21
|
+
import { isRecoverableTransactionError } from '../../tx-retry.js';
|
|
21
22
|
import { SkippableError } from '../types.js';
|
|
22
23
|
import { submitEvaluatorFeedback, codeDigestSha256ToBytes32, encodeExecutionPayload, encodeExecutionPayloadV2, modeStringToFlag, } from '../../erc8004/index.js';
|
|
23
24
|
import { TrajectoryCollector, emitTrajectory } from '../../trajectory/index.js';
|
|
@@ -26,6 +27,7 @@ import { VerdictCode } from '../../adapters/mech/verdict-code.js';
|
|
|
26
27
|
import { buildInfo } from '../../build-info.js';
|
|
27
28
|
import { getSolverNetContract } from '../../vendor/@jinn-network/sdk/dist/solvernets/index.js';
|
|
28
29
|
import { runHarnessWithFreezeFence, } from '../../daemon/freeze-fence.js';
|
|
30
|
+
import { recordLoopTick } from '../../daemon/loop-heartbeat.js';
|
|
29
31
|
import { harnessStateDirName } from '../names.js';
|
|
30
32
|
import { recordTaskCost } from '../../spend/record.js';
|
|
31
33
|
// ── Sentinel error ────────────────────────────────────────────────────────────
|
|
@@ -61,6 +63,25 @@ export function joinedSolverNetsViewFromConfig(joined) {
|
|
|
61
63
|
return {
|
|
62
64
|
get: (cid) => map.get(cid),
|
|
63
65
|
manifestCids: () => [...map.keys()],
|
|
66
|
+
set: (cid, entry) => { map.set(cid, entry); },
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Mutable `JoinedSolverNetsView` for the running daemon. Unlike
|
|
71
|
+
* `joinedSolverNetsViewFromConfig` (boot snapshot), the applier
|
|
72
|
+
* (`daemon/join-applier.ts`, #1037) keeps a handle and calls `set()` when a
|
|
73
|
+
* join is hot-applied, so the engine's per-task eligibility check sees the new
|
|
74
|
+
* cid on its next call without a restart.
|
|
75
|
+
*/
|
|
76
|
+
export function createMutableJoinedSolverNetsView(initial) {
|
|
77
|
+
const map = new Map();
|
|
78
|
+
for (const [key, entry] of Object.entries(initial ?? {})) {
|
|
79
|
+
map.set(entry.manifestCid ?? key, { roles: entry.roles });
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
get: (cid) => map.get(cid),
|
|
83
|
+
manifestCids: () => [...map.keys()],
|
|
84
|
+
set: (cid, entry) => { map.set(cid, entry); },
|
|
64
85
|
};
|
|
65
86
|
}
|
|
66
87
|
// ── TaskEngine ─────────────────────────────────────────────────────────
|
|
@@ -281,6 +302,7 @@ export class TaskEngine {
|
|
|
281
302
|
catch (err) {
|
|
282
303
|
console.error('[harness-engine] tick loop error (continuing):', err instanceof Error ? err.message : err);
|
|
283
304
|
}
|
|
305
|
+
recordLoopTick(this.store, 'engine-tick'); // #1043 loop watchdog
|
|
284
306
|
if (this.stopped)
|
|
285
307
|
break;
|
|
286
308
|
await Promise.race([
|
|
@@ -761,7 +783,17 @@ export class TaskEngine {
|
|
|
761
783
|
throw new NotImplementedError('runImpl');
|
|
762
784
|
}
|
|
763
785
|
const runtimePlugins = solverNet?.runtimePlugins ?? [];
|
|
764
|
-
|
|
786
|
+
// #1035: merge harness self-attributed plugins (e.g. claude-code-learner)
|
|
787
|
+
// into the envelope carrier so they appear in executor.plugins. This is a
|
|
788
|
+
// SEPARATE array from `runtimePlugins`: the latter still feeds
|
|
789
|
+
// ctx.runtimePlugins / ctx.solverPluginRoots (which the harness uses to
|
|
790
|
+
// LOAD solver plugins), and the learner plugin is already loaded by the
|
|
791
|
+
// harness itself via its own plugin root — adding it there would double-load.
|
|
792
|
+
const attributedPlugins = [
|
|
793
|
+
...runtimePlugins,
|
|
794
|
+
...(impl.attributionPlugins?.() ?? []),
|
|
795
|
+
];
|
|
796
|
+
this.runtimePluginsByRequest.set(task.requestId, attributedPlugins);
|
|
765
797
|
const workingDir = task.workingDir ?? join(this.paths.workingDirRoot, task.requestId);
|
|
766
798
|
const kindSeg = solverType.replace(/[.:]/g, '_');
|
|
767
799
|
const implStateDir = task.implStateDir ?? (kindSeg
|
|
@@ -846,7 +878,7 @@ export class TaskEngine {
|
|
|
846
878
|
informationalClaim: skippedOutput.informational ?? null,
|
|
847
879
|
solutionOutputsJson: JSON.stringify(skippedOutput),
|
|
848
880
|
implName: impl.name,
|
|
849
|
-
runtimePluginsJson: JSON.stringify(
|
|
881
|
+
runtimePluginsJson: JSON.stringify(attributedPlugins),
|
|
850
882
|
});
|
|
851
883
|
console.log(`[harness-engine] ${task.requestId} RUNNING → POST_SNAPSHOT via impl=${impl.name} (skipped)`);
|
|
852
884
|
return;
|
|
@@ -890,7 +922,7 @@ export class TaskEngine {
|
|
|
890
922
|
informationalClaim: output.informational ?? null,
|
|
891
923
|
solutionOutputsJson: JSON.stringify(output),
|
|
892
924
|
implName: impl.name,
|
|
893
|
-
runtimePluginsJson: JSON.stringify(
|
|
925
|
+
runtimePluginsJson: JSON.stringify(attributedPlugins),
|
|
894
926
|
});
|
|
895
927
|
}
|
|
896
928
|
finally {
|
|
@@ -1488,10 +1520,7 @@ export class TaskEngine {
|
|
|
1488
1520
|
case 'UNRESOLVED':
|
|
1489
1521
|
return VerdictCode.Unresolved;
|
|
1490
1522
|
default:
|
|
1491
|
-
|
|
1492
|
-
// Return Invalid(3) — not Pass(1). Pass must come from an explicit PASS/SCORED verdict.
|
|
1493
|
-
console.warn(`[harness-engine] verdictCodeForTask: unrecognized gatingClaim.verdict (got=${String(raw)}); defaulting to Invalid(3) — should never happen, indicates the evaluator harness didn't set gatingClaim.verdict before submission`);
|
|
1494
|
-
return VerdictCode.Invalid;
|
|
1523
|
+
throw new Error(`[harness-engine] verdictCodeForTask: missing or unrecognized gatingClaim.verdict (got=${String(raw)}); refusing to claim Invalid(3) on-chain without an explicit evaluator verdict`);
|
|
1495
1524
|
}
|
|
1496
1525
|
}
|
|
1497
1526
|
async _maybePostEvaluatorFeedback(task) {
|
|
@@ -1630,6 +1659,15 @@ export class TaskEngine {
|
|
|
1630
1659
|
* TCAttemptAlreadyFinalized, …). We mark the row RACE_LOST and emit a
|
|
1631
1660
|
* `kind=race_lost` activity event so operators can audit prunes
|
|
1632
1661
|
* without inflating the FAILED counter (#896).
|
|
1662
|
+
* - A transport-transient error (e.g. `AllRpcsFailedError` — every provider
|
|
1663
|
+
* in the L2 fallback chain failed at once) on a task whose delivery window
|
|
1664
|
+
* is still open: leave the row in its current in-flight state so the next
|
|
1665
|
+
* tick re-drives it once the RPCs recover, and emit a `tick_error` (warn)
|
|
1666
|
+
* event instead of inflating the FAILED counter. Without this the daemon
|
|
1667
|
+
* stamped the row FAILED, dropping it from `getInFlight()` permanently, so
|
|
1668
|
+
* L2 work went silent until a manual restart (#912). Past-window transient
|
|
1669
|
+
* errors still terminalize to avoid churning on work that can no longer
|
|
1670
|
+
* settle.
|
|
1633
1671
|
* - Everything else: existing markFailed behaviour. When invoked from
|
|
1634
1672
|
* recovery, `contextLabel === 'recovery'` so the failure_reason
|
|
1635
1673
|
* carries the `recovery:` prefix the original code path used.
|
|
@@ -1652,6 +1690,24 @@ export class TaskEngine {
|
|
|
1652
1690
|
return 'race_lost';
|
|
1653
1691
|
}
|
|
1654
1692
|
const reason = err instanceof Error ? err.message : String(err);
|
|
1693
|
+
// A transport-transient failure (all RPC providers in the fallback chain
|
|
1694
|
+
// blipped at once, 429s, timeouts, …) is not the task's fault and is not
|
|
1695
|
+
// permanent. Leave the row in its in-flight state — do NOT call
|
|
1696
|
+
// markFailed, which would drop it from getInFlight() forever (#912) — so
|
|
1697
|
+
// the engine-tick loop re-drives it once the RPCs recover. The tick loop
|
|
1698
|
+
// IS the retry; there is no per-task attempt counter. Skip this only once
|
|
1699
|
+
// the delivery window has closed, so we never churn on work that can no
|
|
1700
|
+
// longer settle on-chain.
|
|
1701
|
+
if (task.windowEndTs > Date.now() && isRecoverableTransactionError(err)) {
|
|
1702
|
+
emitEvent(this.store, {
|
|
1703
|
+
kind: 'tick_error',
|
|
1704
|
+
requestId: task.requestId,
|
|
1705
|
+
solverType: task.solverType ?? undefined,
|
|
1706
|
+
outcome: 'warn',
|
|
1707
|
+
detail: `transient RPC failure in ${contextLabel}; left ${task.state} for retry: ${reason}`,
|
|
1708
|
+
}, 'harness-engine');
|
|
1709
|
+
return 'transient';
|
|
1710
|
+
}
|
|
1655
1711
|
const stamped = contextLabel === 'recovery' ? `recovery: ${reason}` : reason;
|
|
1656
1712
|
this.persistence.markFailed(task.requestId, stamped);
|
|
1657
1713
|
return 'failed';
|
|
@@ -1695,8 +1751,15 @@ export class TaskEngine {
|
|
|
1695
1751
|
if (current && current.state === task.state) {
|
|
1696
1752
|
const classification = this._classifyAndMarkTerminal(task, err, 'recovery');
|
|
1697
1753
|
const reason = err instanceof Error ? err.message : String(err);
|
|
1698
|
-
|
|
1699
|
-
|
|
1754
|
+
// 'transient' leaves the row in-flight (not terminal); the next tick
|
|
1755
|
+
// re-drives it once the RPCs recover (#912). Log it at warn so the
|
|
1756
|
+
// stall is visible without firing the error-level alerting that a
|
|
1757
|
+
// genuine failure does.
|
|
1758
|
+
const { log, verb } = {
|
|
1759
|
+
race_lost: { log: console.log, verb: 'pruned' },
|
|
1760
|
+
transient: { log: console.warn, verb: 'deferred (transient RPC)' },
|
|
1761
|
+
failed: { log: console.error, verb: 'failed' },
|
|
1762
|
+
}[classification];
|
|
1700
1763
|
log(`[harness-engine] resume ${verb} for ${task.requestId}: ${reason}`);
|
|
1701
1764
|
}
|
|
1702
1765
|
throw err;
|