@jinn-network/client 0.1.7 → 0.1.8-canary.026c5503
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -1
- package/dist/adapters/mech/adapter.d.ts +39 -2
- package/dist/adapters/mech/adapter.js +178 -20
- package/dist/adapters/mech/adapter.js.map +1 -1
- package/dist/adapters/mech/contracts.d.ts +22 -1
- package/dist/adapters/mech/contracts.js +96 -52
- package/dist/adapters/mech/contracts.js.map +1 -1
- package/dist/adapters/mech/safe-revert.d.ts +4 -0
- package/dist/adapters/mech/safe-revert.js +5 -1
- package/dist/adapters/mech/safe-revert.js.map +1 -1
- package/dist/adapters/mech/safe.d.ts +1 -1
- package/dist/adapters/mech/safe.js +10 -4
- package/dist/adapters/mech/safe.js.map +1 -1
- package/dist/adapters/mech/types.d.ts +6 -1
- package/dist/adapters/mech/types.js.map +1 -1
- package/dist/agent/operator-claude.js +8 -0
- package/dist/agent/operator-claude.js.map +1 -1
- package/dist/api/activity-events-endpoint.d.ts +14 -0
- package/dist/api/activity-events-endpoint.js +59 -0
- package/dist/api/activity-events-endpoint.js.map +1 -0
- package/dist/api/bootstrap-endpoint.d.ts +1 -2
- package/dist/api/bootstrap-endpoint.js +42 -24
- package/dist/api/bootstrap-endpoint.js.map +1 -1
- package/dist/api/codex-doctor-endpoint.d.ts +22 -5
- package/dist/api/codex-doctor-endpoint.js +136 -17
- package/dist/api/codex-doctor-endpoint.js.map +1 -1
- package/dist/api/debug-report-endpoint.d.ts +27 -0
- package/dist/api/debug-report-endpoint.js +157 -0
- package/dist/api/debug-report-endpoint.js.map +1 -0
- package/dist/api/discovery-endpoint.d.ts +1 -0
- package/dist/api/discovery-endpoint.js +24 -0
- package/dist/api/discovery-endpoint.js.map +1 -1
- package/dist/api/fleet-build.d.ts +1 -7
- package/dist/api/fleet-build.js +0 -7
- package/dist/api/fleet-build.js.map +1 -1
- package/dist/api/gather-status.d.ts +39 -0
- package/dist/api/gather-status.js +181 -84
- package/dist/api/gather-status.js.map +1 -1
- package/dist/api/hermes-doctor-endpoint.d.ts +15 -7
- package/dist/api/hermes-doctor-endpoint.js +56 -19
- package/dist/api/hermes-doctor-endpoint.js.map +1 -1
- package/dist/api/launcher-status.d.ts +4 -2
- package/dist/api/launcher-status.js +11 -10
- package/dist/api/launcher-status.js.map +1 -1
- package/dist/api/launcher-tasks.d.ts +1 -1
- package/dist/api/launcher-tasks.js +12 -8
- package/dist/api/launcher-tasks.js.map +1 -1
- package/dist/api/loop-completion-build.d.ts +79 -0
- package/dist/api/loop-completion-build.js +155 -0
- package/dist/api/loop-completion-build.js.map +1 -0
- package/dist/api/operator-artifacts-endpoint.js +73 -6
- package/dist/api/operator-artifacts-endpoint.js.map +1 -1
- package/dist/api/portfolio-v0-build.d.ts +7 -1
- package/dist/api/portfolio-v0-build.js +6 -2
- package/dist/api/portfolio-v0-build.js.map +1 -1
- package/dist/api/prediction-v1-build.d.ts +6 -0
- package/dist/api/prediction-v1-build.js +3 -1
- package/dist/api/prediction-v1-build.js.map +1 -1
- package/dist/api/server.d.ts +17 -0
- package/dist/api/server.js +40 -1
- package/dist/api/server.js.map +1 -1
- package/dist/api/setup-endpoints.d.ts +13 -9
- package/dist/api/setup-endpoints.js +50 -173
- package/dist/api/setup-endpoints.js.map +1 -1
- package/dist/api/solvernets-endpoints.js +33 -63
- package/dist/api/solvernets-endpoints.js.map +1 -1
- package/dist/api/status-build.d.ts +140 -17
- package/dist/api/status-build.js +47 -34
- package/dist/api/status-build.js.map +1 -1
- package/dist/api/status-harness-rollup.d.ts +35 -0
- package/dist/api/status-harness-rollup.js +45 -0
- package/dist/api/status-harness-rollup.js.map +1 -0
- package/dist/api/status-rollup-build.d.ts +0 -4
- package/dist/api/status-rollup-build.js +0 -4
- package/dist/api/status-rollup-build.js.map +1 -1
- package/dist/api/task-runs-build.d.ts +8 -0
- package/dist/api/task-runs-build.js +5 -1
- package/dist/api/task-runs-build.js.map +1 -1
- package/dist/build-info.json +4 -4
- package/dist/build-meta.json +1 -1
- package/dist/captures/live-publisher.js +24 -4
- package/dist/captures/live-publisher.js.map +1 -1
- package/dist/captures/publish.d.ts +1 -1
- package/dist/chain-read-errors.d.ts +12 -0
- package/dist/chain-read-errors.js +26 -1
- package/dist/chain-read-errors.js.map +1 -1
- package/dist/cli/commands/codedigest-revert-check.d.ts +33 -0
- package/dist/cli/commands/codedigest-revert-check.js +253 -0
- package/dist/cli/commands/codedigest-revert-check.js.map +1 -0
- package/dist/cli/commands/doctor.d.ts +3 -0
- package/dist/cli/commands/doctor.js +35 -0
- package/dist/cli/commands/doctor.js.map +1 -1
- package/dist/cli/commands/eval.d.ts +76 -0
- package/dist/cli/commands/eval.js +401 -0
- package/dist/cli/commands/eval.js.map +1 -0
- package/dist/cli/commands/rewards.d.ts +2 -0
- package/dist/cli/commands/rewards.js +27 -0
- package/dist/cli/commands/rewards.js.map +1 -1
- package/dist/cli/commands/solver-nets.d.ts +1 -0
- package/dist/cli/commands/solver-nets.js +245 -22
- package/dist/cli/commands/solver-nets.js.map +1 -1
- package/dist/cli/commands/solver-plugins-block.d.ts +33 -0
- package/dist/cli/commands/solver-plugins-block.js +118 -0
- package/dist/cli/commands/solver-plugins-block.js.map +1 -0
- package/dist/cli/commands/solver-plugins-feedback.d.ts +72 -0
- package/dist/cli/commands/solver-plugins-feedback.js +262 -0
- package/dist/cli/commands/solver-plugins-feedback.js.map +1 -0
- package/dist/cli/commands/solver-plugins-read.d.ts +54 -0
- package/dist/cli/commands/solver-plugins-read.js +259 -0
- package/dist/cli/commands/solver-plugins-read.js.map +1 -0
- package/dist/cli/commands/solver-plugins.d.ts +35 -0
- package/dist/cli/commands/solver-plugins.js +399 -2
- package/dist/cli/commands/solver-plugins.js.map +1 -1
- package/dist/cli/commands/status.js +0 -1
- package/dist/cli/commands/status.js.map +1 -1
- package/dist/cli/commands/tasks.js +15 -2
- package/dist/cli/commands/tasks.js.map +1 -1
- package/dist/cli/index.js +4 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/task-native-readiness.d.ts +7 -0
- package/dist/cli/task-native-readiness.js +7 -5
- package/dist/cli/task-native-readiness.js.map +1 -1
- package/dist/config.d.ts +206 -232
- package/dist/config.js +289 -107
- package/dist/config.js.map +1 -1
- package/dist/daemon/ai-units-gate.d.ts +54 -0
- package/dist/daemon/ai-units-gate.js +83 -0
- package/dist/daemon/ai-units-gate.js.map +1 -0
- package/dist/daemon/creator.js +13 -0
- package/dist/daemon/creator.js.map +1 -1
- package/dist/daemon/daemon.d.ts +10 -0
- package/dist/daemon/daemon.js +205 -30
- package/dist/daemon/daemon.js.map +1 -1
- package/dist/daemon/eviction-loop.d.ts +7 -0
- package/dist/daemon/eviction-loop.js +16 -0
- package/dist/daemon/eviction-loop.js.map +1 -1
- package/dist/daemon/gate-logger.d.ts +9 -0
- package/dist/daemon/gate-logger.js +2 -0
- package/dist/daemon/gate-logger.js.map +1 -0
- package/dist/daemon/jinn-claim-loop.js +22 -4
- package/dist/daemon/jinn-claim-loop.js.map +1 -1
- package/dist/daemon/readiness-gate.d.ts +1 -4
- package/dist/daemon/readiness-gate.js.map +1 -1
- package/dist/daemon/spend-cap-gate.d.ts +40 -0
- package/dist/daemon/spend-cap-gate.js +46 -0
- package/dist/daemon/spend-cap-gate.js.map +1 -0
- package/dist/dashboard/assets/index-3quVQqik.js +167 -0
- package/dist/dashboard/assets/index-BVAWkLwY.css +1 -0
- package/dist/dashboard/index.html +2 -2
- package/dist/discovery/http.d.ts +7 -0
- package/dist/discovery/http.js +567 -24
- package/dist/discovery/http.js.map +1 -1
- package/dist/discovery/onchain.js +197 -5
- package/dist/discovery/onchain.js.map +1 -1
- package/dist/discovery/types.d.ts +235 -0
- package/dist/discovery/types.js +40 -0
- package/dist/discovery/types.js.map +1 -1
- package/dist/discovery/with-fallback.js +41 -0
- package/dist/discovery/with-fallback.js.map +1 -1
- package/dist/earning/bootstrap.d.ts +31 -3
- package/dist/earning/bootstrap.js +94 -22
- package/dist/earning/bootstrap.js.map +1 -1
- package/dist/earning/faucet.d.ts +1 -1
- package/dist/earning/faucet.js +2 -2
- package/dist/earning/faucet.js.map +1 -1
- package/dist/earning/safe-adapter.js +34 -11
- package/dist/earning/safe-adapter.js.map +1 -1
- package/dist/earning/types.d.ts +6 -6
- package/dist/earning/viem-clients.d.ts +11 -4
- package/dist/earning/viem-clients.js +14 -5
- package/dist/earning/viem-clients.js.map +1 -1
- package/dist/erc8004/identity.d.ts +19 -3
- package/dist/erc8004/identity.js +38 -11
- package/dist/erc8004/identity.js.map +1 -1
- package/dist/erc8004/index.d.ts +1 -1
- package/dist/erc8004/index.js.map +1 -1
- package/dist/eval/eval-harness-run.d.ts +63 -0
- package/dist/eval/eval-harness-run.js +123 -0
- package/dist/eval/eval-harness-run.js.map +1 -0
- package/dist/eval/orchestrator.d.ts +163 -0
- package/dist/eval/orchestrator.js +232 -0
- package/dist/eval/orchestrator.js.map +1 -0
- package/dist/eval/paired.d.ts +68 -0
- package/dist/eval/paired.js +93 -0
- package/dist/eval/paired.js.map +1 -0
- package/dist/eval/resolve-slate-tasks.d.ts +35 -0
- package/dist/eval/resolve-slate-tasks.js +56 -0
- package/dist/eval/resolve-slate-tasks.js.map +1 -0
- package/dist/eval/screen-discovery.d.ts +22 -0
- package/dist/eval/screen-discovery.js +71 -0
- package/dist/eval/screen-discovery.js.map +1 -0
- package/dist/eval/screen-progress.d.ts +41 -0
- package/dist/eval/screen-progress.js +60 -0
- package/dist/eval/screen-progress.js.map +1 -0
- package/dist/eval/screen-runner.d.ts +30 -0
- package/dist/eval/screen-runner.js +289 -0
- package/dist/eval/screen-runner.js.map +1 -0
- package/dist/eval/screen.d.ts +107 -0
- package/dist/eval/screen.js +159 -0
- package/dist/eval/screen.js.map +1 -0
- package/dist/eval/slope.d.ts +29 -0
- package/dist/eval/slope.js +46 -0
- package/dist/eval/slope.js.map +1 -0
- package/dist/eval/train-sequence.d.ts +35 -0
- package/dist/eval/train-sequence.js +59 -0
- package/dist/eval/train-sequence.js.map +1 -0
- package/dist/eval/wilson.d.ts +45 -0
- package/dist/eval/wilson.js +48 -0
- package/dist/eval/wilson.js.map +1 -0
- package/dist/events/types.d.ts +2 -2
- package/dist/harnesses/cost-estimates.d.ts +10 -31
- package/dist/harnesses/cost-estimates.js +11 -43
- package/dist/harnesses/cost-estimates.js.map +1 -1
- package/dist/harnesses/engine/canonical-json.js +5 -3
- package/dist/harnesses/engine/canonical-json.js.map +1 -1
- package/dist/harnesses/engine/engine.d.ts +37 -4
- package/dist/harnesses/engine/engine.js +151 -20
- package/dist/harnesses/engine/engine.js.map +1 -1
- package/dist/harnesses/engine/persistence.d.ts +38 -4
- package/dist/harnesses/engine/persistence.js +71 -6
- package/dist/harnesses/engine/persistence.js.map +1 -1
- package/dist/harnesses/engine/state.d.ts +9 -0
- package/dist/harnesses/engine/state.js +23 -10
- package/dist/harnesses/engine/state.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/adapter.d.ts +2 -0
- package/dist/harnesses/impls/hermes-agent/adapter.js +8 -5
- package/dist/harnesses/impls/hermes-agent/adapter.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/bootstrap.d.ts +1 -0
- package/dist/harnesses/impls/hermes-agent/bootstrap.js +10 -3
- package/dist/harnesses/impls/hermes-agent/bootstrap.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/config-builder.d.ts +1 -1
- package/dist/harnesses/impls/hermes-agent/config-builder.js +4 -2
- package/dist/harnesses/impls/hermes-agent/config-builder.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/harness.d.ts +31 -3
- package/dist/harnesses/impls/hermes-agent/harness.js +84 -7
- package/dist/harnesses/impls/hermes-agent/harness.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/prompt.d.ts +6 -6
- package/dist/harnesses/impls/hermes-agent/prompt.js +6 -6
- package/dist/harnesses/impls/index.d.ts +2 -0
- package/dist/harnesses/impls/index.js +2 -0
- package/dist/harnesses/impls/index.js.map +1 -1
- package/dist/harnesses/impls/learner/adapters/claude-code.d.ts +17 -0
- package/dist/harnesses/impls/learner/adapters/claude-code.js +118 -14
- package/dist/harnesses/impls/learner/adapters/claude-code.js.map +1 -1
- package/dist/harnesses/impls/learner/adapters/codex-code.d.ts +9 -0
- package/dist/harnesses/impls/learner/adapters/codex-code.js +30 -8
- package/dist/harnesses/impls/learner/adapters/codex-code.js.map +1 -1
- package/dist/harnesses/impls/learner/harness.d.ts +41 -1
- package/dist/harnesses/impls/learner/harness.js +78 -4
- package/dist/harnesses/impls/learner/harness.js.map +1 -1
- package/dist/harnesses/impls/learner/harvest.d.ts +3 -1
- package/dist/harnesses/impls/learner/harvest.js +30 -6
- package/dist/harnesses/impls/learner/harvest.js.map +1 -1
- package/dist/harnesses/impls/learner/plugin-path.js +1 -0
- package/dist/harnesses/impls/learner/plugin-path.js.map +1 -1
- package/dist/harnesses/impls/learner/restoration-patch.d.ts +2 -2
- package/dist/harnesses/impls/learner/restoration-patch.js +25 -6
- package/dist/harnesses/impls/learner/restoration-patch.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js +21 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js +3 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.d.ts +74 -5
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js +103 -32
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.d.ts +2 -2
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js +3 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js.map +1 -1
- package/dist/harnesses/readiness-registry.d.ts +7 -0
- package/dist/harnesses/readiness-registry.js +9 -0
- package/dist/harnesses/readiness-registry.js.map +1 -1
- package/dist/harnesses/types.d.ts +14 -0
- package/dist/learner/revert-decision.d.ts +74 -0
- package/dist/learner/revert-decision.js +73 -0
- package/dist/learner/revert-decision.js.map +1 -0
- package/dist/learner/revert-stats.d.ts +38 -0
- package/dist/learner/revert-stats.js +86 -0
- package/dist/learner/revert-stats.js.map +1 -0
- package/dist/local-provider-url.d.ts +3 -0
- package/dist/local-provider-url.js +28 -0
- package/dist/local-provider-url.js.map +1 -0
- package/dist/main.js +199 -104
- package/dist/main.js.map +1 -1
- package/dist/mcp/get-codedigest-reward.d.ts +13 -0
- package/dist/mcp/get-codedigest-reward.js +23 -0
- package/dist/mcp/get-codedigest-reward.js.map +1 -0
- package/dist/mcp/server.js +23 -0
- package/dist/mcp/server.js.map +1 -1
- package/dist/observability/debug-report-assemble.d.ts +43 -0
- package/dist/observability/debug-report-assemble.js +80 -0
- package/dist/observability/debug-report-assemble.js.map +1 -0
- package/dist/observability/emit-event.d.ts +9 -2
- package/dist/observability/emit-event.js +36 -2
- package/dist/observability/emit-event.js.map +1 -1
- package/dist/observability/file-logger.d.ts +69 -0
- package/dist/observability/file-logger.js +177 -0
- package/dist/observability/file-logger.js.map +1 -0
- package/dist/observability/redact-secrets.d.ts +65 -0
- package/dist/observability/redact-secrets.js +300 -0
- package/dist/observability/redact-secrets.js.map +1 -0
- package/dist/observability/tar.d.ts +30 -0
- package/dist/observability/tar.js +102 -0
- package/dist/observability/tar.js.map +1 -0
- package/dist/plugins/learner/.claude-plugin/plugin.json +1 -1
- package/dist/plugins/learner/.codex-plugin/plugin.json +1 -1
- package/dist/plugins/learner/hooks/session-start +30 -1
- package/dist/plugins/learner/skills/learn/consolidator-prompt.md +22 -1
- package/dist/plugins/learner/skills/learn/promoter-prompt.md +72 -1
- package/dist/preflight/deployment-readiness.d.ts +147 -0
- package/dist/preflight/deployment-readiness.js +366 -0
- package/dist/preflight/deployment-readiness.js.map +1 -0
- package/dist/preflight/pidfile-liveness.d.ts +50 -0
- package/dist/preflight/pidfile-liveness.js +117 -0
- package/dist/preflight/pidfile-liveness.js.map +1 -0
- package/dist/preflight/rpc-network.d.ts +40 -0
- package/dist/preflight/rpc-network.js +67 -1
- package/dist/preflight/rpc-network.js.map +1 -1
- package/dist/rpc/transport.d.ts +145 -0
- package/dist/rpc/transport.js +319 -0
- package/dist/rpc/transport.js.map +1 -0
- package/dist/scripts/donation-consumption-acceptance.js +7 -28
- package/dist/scripts/donation-consumption-acceptance.js.map +1 -1
- package/dist/scripts/swe-rebench-v2-pytest-missing.json +16 -0
- package/dist/solver-nets/prediction-operator-ux.d.ts +1 -2
- package/dist/solver-nets/prediction-operator-ux.js +56 -53
- package/dist/solver-nets/prediction-operator-ux.js.map +1 -1
- package/dist/solver-nets/registry.d.ts +19 -1
- package/dist/solver-nets/registry.js +37 -24
- package/dist/solver-nets/registry.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.d.ts +76 -0
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.js +156 -0
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.js.map +1 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.d.ts +81 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.js +116 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.js.map +1 -0
- package/dist/solver-types/_swe-rebench-v2-pool.d.ts +9 -2
- package/dist/solver-types/_swe-rebench-v2-pool.js +15 -20
- package/dist/solver-types/_swe-rebench-v2-pool.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-state.d.ts +24 -0
- package/dist/solver-types/_swe-rebench-v2-state.js +33 -0
- package/dist/solver-types/_swe-rebench-v2-state.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-validated-pool.d.ts +116 -2
- package/dist/solver-types/_swe-rebench-v2-validated-pool.js +296 -21
- package/dist/solver-types/_swe-rebench-v2-validated-pool.js.map +1 -1
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json +20 -0
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.json +19 -0
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.screening-report.json +628 -0
- package/dist/solver-types/solver-type.d.ts +8 -0
- package/dist/solver-types/swe-rebench-v2-auto.d.ts +20 -11
- package/dist/solver-types/swe-rebench-v2-auto.js +64 -19
- package/dist/solver-types/swe-rebench-v2-auto.js.map +1 -1
- package/dist/solver-types/swe-rebench-v2.d.ts +10 -2
- package/dist/solver-types/swe-rebench-v2.js +233 -13
- package/dist/solver-types/swe-rebench-v2.js.map +1 -1
- package/dist/solvernets/daemon-init.d.ts +1 -1
- package/dist/solvernets/daemon-init.js +19 -4
- package/dist/solvernets/daemon-init.js.map +1 -1
- package/dist/solvernets/launched-record-dispatcher.d.ts +7 -0
- package/dist/solvernets/launched-record-dispatcher.js +10 -4
- package/dist/solvernets/launched-record-dispatcher.js.map +1 -1
- package/dist/solvernets/registry-client-erc8004.js +40 -37
- package/dist/solvernets/registry-client-erc8004.js.map +1 -1
- package/dist/solvernets/registry-client.d.ts +6 -0
- package/dist/solvernets/store.d.ts +2 -2
- package/dist/solvernets/store.js +7 -2
- package/dist/solvernets/store.js.map +1 -1
- package/dist/spend/ai-units-config.d.ts +49 -0
- package/dist/spend/ai-units-config.js +34 -0
- package/dist/spend/ai-units-config.js.map +1 -0
- package/dist/spend/ai-units.d.ts +140 -0
- package/dist/spend/ai-units.js +229 -0
- package/dist/spend/ai-units.js.map +1 -0
- package/dist/spend/cost-surface-status.d.ts +12 -0
- package/dist/spend/cost-surface-status.js +24 -0
- package/dist/spend/cost-surface-status.js.map +1 -0
- package/dist/spend/credential.d.ts +39 -0
- package/dist/spend/credential.js +71 -0
- package/dist/spend/credential.js.map +1 -0
- package/dist/spend/daemon-config.d.ts +13 -0
- package/dist/spend/daemon-config.js +24 -0
- package/dist/spend/daemon-config.js.map +1 -0
- package/dist/spend/pricing.d.ts +16 -0
- package/dist/spend/pricing.js +26 -0
- package/dist/spend/pricing.js.map +1 -0
- package/dist/spend/record.d.ts +13 -0
- package/dist/spend/record.js +43 -0
- package/dist/spend/record.js.map +1 -0
- package/dist/spend/usage.d.ts +27 -0
- package/dist/spend/usage.js +113 -0
- package/dist/spend/usage.js.map +1 -0
- package/dist/store/store.d.ts +187 -0
- package/dist/store/store.js +467 -4
- package/dist/store/store.js.map +1 -1
- package/dist/trajectory/transcript-parsers/codex-session.d.ts +12 -6
- package/dist/trajectory/transcript-parsers/codex-session.js +114 -13
- package/dist/trajectory/transcript-parsers/codex-session.js.map +1 -1
- package/dist/trajectory/transcript-parsers/types.d.ts +8 -8
- package/dist/trajectory/transcript-session-dirs.d.ts +18 -0
- package/dist/trajectory/transcript-session-dirs.js +85 -0
- package/dist/trajectory/transcript-session-dirs.js.map +1 -0
- package/dist/trajectory/transcript-watcher.d.ts +20 -1
- package/dist/trajectory/transcript-watcher.js +108 -32
- package/dist/trajectory/transcript-watcher.js.map +1 -1
- package/dist/tx-retry.d.ts +25 -0
- package/dist/tx-retry.js +95 -7
- package/dist/tx-retry.js.map +1 -1
- package/dist/types/payloads/portfolio-v0.d.ts +3 -3
- package/dist/types/payloads/prediction-apy-v0.d.ts +3 -3
- package/dist/types/payloads/prediction-v0.d.ts +12 -12
- package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.d.ts +108 -1
- package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.js +25 -1
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.d.ts +65 -0
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.js +123 -0
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.d.ts +2 -2
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.js +1 -1
- package/dist/vendor/@jinn-network/sdk/package.json +4 -0
- package/docker-compose.yml +3 -2
- package/package.json +30 -18
- package/plugins/learner/.claude-plugin/plugin.json +1 -1
- package/plugins/learner/.codex-plugin/plugin.json +1 -1
- package/plugins/learner/hooks/session-start +30 -1
- package/plugins/learner/skills/learn/consolidator-prompt.md +22 -1
- package/plugins/learner/skills/learn/promoter-prompt.md +72 -1
- package/plugins/swe-rebench-v2-diffmin/README.md +10 -9
- package/plugins/swe-rebench-v2-diffmin/jinn.plugin.json +1 -1
- package/plugins/swe-rebench-v2-diffmin/skills/diffmin/SKILL.md +15 -10
- package/plugins/swe-rebench-v2-diffmin/skills/test-map/SKILL.md +10 -12
- package/plugins/swe-rebench-v2-runtime/.claude-plugin/plugin.json +1 -1
- package/plugins/swe-rebench-v2-runtime/.codex-plugin/plugin.json +3 -3
- package/plugins/swe-rebench-v2-runtime/README.md +6 -6
- package/plugins/swe-rebench-v2-runtime/hooks/hooks.json +16 -0
- package/plugins/swe-rebench-v2-runtime/hooks/session-start +74 -0
- package/plugins/swe-rebench-v2-runtime/jinn.plugin.json +2 -3
- package/plugins/swe-rebench-v2-runtime/skills/task/SKILL.md +81 -0
- package/dist/dashboard/assets/index-BUlE8F3Y.js +0 -330
- package/dist/dashboard/assets/index-blqc7eqq.css +0 -32
- package/plugins/swe-rebench-v2-runtime/skills/orient/SKILL.md +0 -29
- package/plugins/swe-rebench-v2-runtime/skills/plan/SKILL.md +0 -53
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ordinary least-squares slope of resolved-rate vs cycle index for the
|
|
3
|
+
* train-arm slope measurement (issue #822, AC#1).
|
|
4
|
+
*
|
|
5
|
+
* The train-arm e2e evaluates a checkpoint against the held-out slate (#817)
|
|
6
|
+
* at intervals via the eval orchestrator (#818), collecting one
|
|
7
|
+
* `{ cycleIndex, rate }` point per interval (`rate` = passed / scorable, the
|
|
8
|
+
* Wilson point estimate). The slope of the least-squares fit is the headline
|
|
9
|
+
* "is the learner improving across the training sequence" number.
|
|
10
|
+
*
|
|
11
|
+
* It is deliberately a thin helper over the closed-form OLS slope
|
|
12
|
+
* (`cov(x,y) / var(x)`); the per-point confidence intervals come from
|
|
13
|
+
* `wilson.ts` — this module does NOT reimplement them. The slope sign alone is
|
|
14
|
+
* never a verdict at small N: a flat or slightly negative slope is "within
|
|
15
|
+
* noise", which the e2e surfaces via the §4.1 honesty caveat.
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Least-squares slope of `rate` regressed on `cycleIndex`. Returns 0 for fewer
|
|
19
|
+
* than two points (no line to fit) and for a degenerate fit where every x is
|
|
20
|
+
* identical (zero variance — division would be NaN). A flat sequence yields
|
|
21
|
+
* exactly 0.
|
|
22
|
+
*/
|
|
23
|
+
export function leastSquaresSlope(points) {
|
|
24
|
+
const n = points.length;
|
|
25
|
+
if (n < 2)
|
|
26
|
+
return 0;
|
|
27
|
+
let sumX = 0;
|
|
28
|
+
let sumY = 0;
|
|
29
|
+
for (const { cycleIndex, rate } of points) {
|
|
30
|
+
sumX += cycleIndex;
|
|
31
|
+
sumY += rate;
|
|
32
|
+
}
|
|
33
|
+
const meanX = sumX / n;
|
|
34
|
+
const meanY = sumY / n;
|
|
35
|
+
let cov = 0;
|
|
36
|
+
let varX = 0;
|
|
37
|
+
for (const { cycleIndex, rate } of points) {
|
|
38
|
+
const dx = cycleIndex - meanX;
|
|
39
|
+
cov += dx * (rate - meanY);
|
|
40
|
+
varX += dx * dx;
|
|
41
|
+
}
|
|
42
|
+
if (varX === 0)
|
|
43
|
+
return 0;
|
|
44
|
+
return cov / varX;
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=slope.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"slope.js","sourceRoot":"","sources":["../../src/eval/slope.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AASH;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAAmB;IACnD,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAC1C,IAAI,IAAI,UAAU,CAAC;QACnB,IAAI,IAAI,IAAI,CAAC;IACf,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC;IACvB,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC;IACvB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAC1C,MAAM,EAAE,GAAG,UAAU,GAAG,KAAK,CAAC;QAC9B,GAAG,IAAI,EAAE,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;QAC3B,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;IAClB,CAAC;IACD,IAAI,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzB,OAAO,GAAG,GAAG,IAAI,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Train-sequence builder with the no-train/test-overlap guard for the
|
|
3
|
+
* train-arm slope e2e (issue #822, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The learner-full-cycle e2e drives `runCycle` DIRECTLY (it does not post tasks
|
|
6
|
+
* through the generator), so the generator's `excludeHeldOutSlate` train-stream
|
|
7
|
+
* chokepoint (#817) is BYPASSED. This builder is therefore the load-bearing
|
|
8
|
+
* AC#2 mechanism for the e2e: it selects the N distinct training instances from
|
|
9
|
+
* the pool with the held-out slate excluded, and asserts the resulting
|
|
10
|
+
* sequence is disjoint from the slate (fail-loud, never a silent drop).
|
|
11
|
+
*
|
|
12
|
+
* It reuses `excludeHeldOutSlate` from the #817 primitive rather than
|
|
13
|
+
* reimplementing the exclusion. Selection is deterministic (instance-id sorted)
|
|
14
|
+
* so a given pool yields a stable sequence across runs.
|
|
15
|
+
*/
|
|
16
|
+
import type { PoolTask } from '../solver-types/_swe-rebench-v2-pool.js';
|
|
17
|
+
/** Thrown when a chosen training sequence intersects the held-out slate (AC#2). */
|
|
18
|
+
export declare class TrainTestOverlapError extends Error {
|
|
19
|
+
readonly overlap: string[];
|
|
20
|
+
constructor(overlap: string[]);
|
|
21
|
+
}
|
|
22
|
+
/** Assert a set of training ids is disjoint from the slate, else throw loud. */
|
|
23
|
+
export declare function assertNoOverlap(trainIds: string[], slateIds: Set<string>): void;
|
|
24
|
+
export declare function buildTrainSequence(args: {
|
|
25
|
+
pool: PoolTask[];
|
|
26
|
+
slateIds: Set<string>;
|
|
27
|
+
/** Number of distinct training tasks (= N training cycles). */
|
|
28
|
+
count: number;
|
|
29
|
+
/**
|
|
30
|
+
* Optional explicit, hand-picked instance_ids (in order). When set, the
|
|
31
|
+
* builder still runs the no-overlap guard and resolves each id against the
|
|
32
|
+
* pool — used to fail-loud on a hand-edited sequence that overlaps the slate.
|
|
33
|
+
*/
|
|
34
|
+
explicitIds?: string[];
|
|
35
|
+
}): PoolTask[];
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Train-sequence builder with the no-train/test-overlap guard for the
|
|
3
|
+
* train-arm slope e2e (issue #822, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The learner-full-cycle e2e drives `runCycle` DIRECTLY (it does not post tasks
|
|
6
|
+
* through the generator), so the generator's `excludeHeldOutSlate` train-stream
|
|
7
|
+
* chokepoint (#817) is BYPASSED. This builder is therefore the load-bearing
|
|
8
|
+
* AC#2 mechanism for the e2e: it selects the N distinct training instances from
|
|
9
|
+
* the pool with the held-out slate excluded, and asserts the resulting
|
|
10
|
+
* sequence is disjoint from the slate (fail-loud, never a silent drop).
|
|
11
|
+
*
|
|
12
|
+
* It reuses `excludeHeldOutSlate` from the #817 primitive rather than
|
|
13
|
+
* reimplementing the exclusion. Selection is deterministic (instance-id sorted)
|
|
14
|
+
* so a given pool yields a stable sequence across runs.
|
|
15
|
+
*/
|
|
16
|
+
import { excludeHeldOutSlate } from '../solver-types/_swe-rebench-v2-held-out-slate.js';
|
|
17
|
+
/** Thrown when a chosen training sequence intersects the held-out slate (AC#2). */
|
|
18
|
+
export class TrainTestOverlapError extends Error {
|
|
19
|
+
overlap;
|
|
20
|
+
constructor(overlap) {
|
|
21
|
+
super(`train/test overlap: training sequence includes held-out slate instance(s) ` +
|
|
22
|
+
`${overlap.join(', ')} — refusing to train on the eval slate (AC#2). ` +
|
|
23
|
+
`The slate must stay out-of-sample for the slope to mean anything.`);
|
|
24
|
+
this.overlap = overlap;
|
|
25
|
+
this.name = 'TrainTestOverlapError';
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
/** Assert a set of training ids is disjoint from the slate, else throw loud. */
|
|
29
|
+
export function assertNoOverlap(trainIds, slateIds) {
|
|
30
|
+
const overlap = trainIds.filter((id) => slateIds.has(id));
|
|
31
|
+
if (overlap.length > 0)
|
|
32
|
+
throw new TrainTestOverlapError(overlap);
|
|
33
|
+
}
|
|
34
|
+
export function buildTrainSequence(args) {
|
|
35
|
+
const eligible = excludeHeldOutSlate(args.pool, args.slateIds);
|
|
36
|
+
const byId = new Map(eligible.map((t) => [t.instance_id, t]));
|
|
37
|
+
if (args.explicitIds) {
|
|
38
|
+
// Guard the hand-picked sequence against the slate BEFORE resolving, so a
|
|
39
|
+
// slate-overlapping id is a TrainTestOverlapError, not a "not eligible".
|
|
40
|
+
assertNoOverlap(args.explicitIds, args.slateIds);
|
|
41
|
+
return args.explicitIds.map((id) => {
|
|
42
|
+
const task = byId.get(id);
|
|
43
|
+
if (!task) {
|
|
44
|
+
throw new Error(`explicit training instance ${id} not in the eligible pool`);
|
|
45
|
+
}
|
|
46
|
+
return task;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
// Deterministic selection: instance-id sorted, first `count`.
|
|
50
|
+
const sorted = [...eligible].sort((a, b) => a.instance_id.localeCompare(b.instance_id));
|
|
51
|
+
if (sorted.length < args.count) {
|
|
52
|
+
throw new Error(`train sequence needs ${args.count} distinct tasks but only ${sorted.length} eligible ` +
|
|
53
|
+
`(pool size ${args.pool.length} minus ${args.slateIds.size} held-out slate instance(s))`);
|
|
54
|
+
}
|
|
55
|
+
const picked = sorted.slice(0, args.count);
|
|
56
|
+
assertNoOverlap(picked.map((t) => t.instance_id), args.slateIds);
|
|
57
|
+
return picked;
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=train-sequence.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"train-sequence.js","sourceRoot":"","sources":["../../src/eval/train-sequence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,EAAE,mBAAmB,EAAE,MAAM,mDAAmD,CAAC;AAExF,mFAAmF;AACnF,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IAClB;IAA5B,YAA4B,OAAiB;QAC3C,KAAK,CACH,4EAA4E;YAC1E,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,iDAAiD;YACtE,mEAAmE,CACtE,CAAC;QALwB,YAAO,GAAP,OAAO,CAAU;QAM3C,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;IACtC,CAAC;CACF;AAED,gFAAgF;AAChF,MAAM,UAAU,eAAe,CAAC,QAAkB,EAAE,QAAqB;IACvE,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,MAAM,IAAI,qBAAqB,CAAC,OAAO,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAWlC;IACC,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAE9D,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,0EAA0E;QAC1E,yEAAyE;QACzE,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE;YACjC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC1B,IAAI,CAAC,IAAI,EAAE,CAAC;gBACV,MAAM,IAAI,KAAK,CAAC,8BAA8B,EAAE,2BAA2B,CAAC,CAAC;YAC/E,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8DAA8D;IAC9D,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;IACxF,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,wBAAwB,IAAI,CAAC,KAAK,4BAA4B,MAAM,CAAC,MAAM,YAAY;YACrF,cAAc,IAAI,CAAC,IAAI,CAAC,MAAM,UAAU,IAAI,CAAC,QAAQ,CAAC,IAAI,8BAA8B,CAC3F,CAAC;IACJ,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3C,eAAe,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IACjE,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wilson score interval + resolved-rate comparison for the `jinn eval`
|
|
3
|
+
* held-out checkpoint orchestrator (issue #818, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The Wilson score interval is a binomial proportion confidence interval that
|
|
6
|
+
* behaves well at the extremes (p=0, p=1) and for small n — unlike the naive
|
|
7
|
+
* normal-approximation interval. We write it small (no stats dependency, per
|
|
8
|
+
* repo convention): the formula is ~10 lines.
|
|
9
|
+
*
|
|
10
|
+
* Per log/decisions/2026-05-28-rl-eval-measurement.md §4: this is v1-simple.
|
|
11
|
+
* Only *large* deltas are trustworthy — we encode that as "the child and
|
|
12
|
+
* parent intervals do not overlap." No seed control, no multi-run averaging.
|
|
13
|
+
*/
|
|
14
|
+
export interface Interval {
|
|
15
|
+
/** Observed point estimate, passed / scorable (0 when scorable=0). */
|
|
16
|
+
p: number;
|
|
17
|
+
/** Lower bound, clamped to [0, 1]. */
|
|
18
|
+
lo: number;
|
|
19
|
+
/** Upper bound, clamped to [0, 1]. */
|
|
20
|
+
hi: number;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Wilson score interval for `passed` successes out of `scorable` trials.
|
|
24
|
+
* `scorable === 0` returns a degenerate `{ p: 0, lo: 0, hi: 0 }` (no NaN).
|
|
25
|
+
*/
|
|
26
|
+
export declare function wilsonInterval(passed: number, scorable: number, z?: number): Interval;
|
|
27
|
+
export type RateVerdict = 'trustworthy' | 'within-noise';
|
|
28
|
+
export interface RateComparison {
|
|
29
|
+
child: Interval;
|
|
30
|
+
parent: Interval;
|
|
31
|
+
/** child.p − parent.p (point-estimate difference, can be negative). */
|
|
32
|
+
delta: number;
|
|
33
|
+
/**
|
|
34
|
+
* 'trustworthy' iff the two Wilson intervals do NOT overlap; otherwise
|
|
35
|
+
* 'within-noise'. v1-simple: only disjoint intervals justify a claim.
|
|
36
|
+
*/
|
|
37
|
+
verdict: RateVerdict;
|
|
38
|
+
}
|
|
39
|
+
export declare function compareRates(child: {
|
|
40
|
+
passed: number;
|
|
41
|
+
scorable: number;
|
|
42
|
+
}, parent: {
|
|
43
|
+
passed: number;
|
|
44
|
+
scorable: number;
|
|
45
|
+
}): RateComparison;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wilson score interval + resolved-rate comparison for the `jinn eval`
|
|
3
|
+
* held-out checkpoint orchestrator (issue #818, AC#2).
|
|
4
|
+
*
|
|
5
|
+
* The Wilson score interval is a binomial proportion confidence interval that
|
|
6
|
+
* behaves well at the extremes (p=0, p=1) and for small n — unlike the naive
|
|
7
|
+
* normal-approximation interval. We write it small (no stats dependency, per
|
|
8
|
+
* repo convention): the formula is ~10 lines.
|
|
9
|
+
*
|
|
10
|
+
* Per log/decisions/2026-05-28-rl-eval-measurement.md §4: this is v1-simple.
|
|
11
|
+
* Only *large* deltas are trustworthy — we encode that as "the child and
|
|
12
|
+
* parent intervals do not overlap." No seed control, no multi-run averaging.
|
|
13
|
+
*/
|
|
14
|
+
/** Two-sided z for a 95% interval (1.96 ≈ Φ⁻¹(0.975)). */
|
|
15
|
+
const DEFAULT_Z = 1.96;
|
|
16
|
+
/**
|
|
17
|
+
* Wilson score interval for `passed` successes out of `scorable` trials.
|
|
18
|
+
* `scorable === 0` returns a degenerate `{ p: 0, lo: 0, hi: 0 }` (no NaN).
|
|
19
|
+
*/
|
|
20
|
+
export function wilsonInterval(passed, scorable, z = DEFAULT_Z) {
|
|
21
|
+
if (scorable === 0)
|
|
22
|
+
return { p: 0, lo: 0, hi: 0 };
|
|
23
|
+
const n = scorable;
|
|
24
|
+
const p = passed / n;
|
|
25
|
+
const z2 = z * z;
|
|
26
|
+
const denom = 1 + z2 / n;
|
|
27
|
+
const centre = p + z2 / (2 * n);
|
|
28
|
+
const margin = z * Math.sqrt((p * (1 - p)) / n + z2 / (4 * n * n));
|
|
29
|
+
const lo = (centre - margin) / denom;
|
|
30
|
+
const hi = (centre + margin) / denom;
|
|
31
|
+
return {
|
|
32
|
+
p,
|
|
33
|
+
lo: Math.max(0, lo),
|
|
34
|
+
hi: Math.min(1, hi),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
export function compareRates(child, parent) {
|
|
38
|
+
const c = wilsonInterval(child.passed, child.scorable);
|
|
39
|
+
const p = wilsonInterval(parent.passed, parent.scorable);
|
|
40
|
+
const disjoint = c.lo > p.hi || p.lo > c.hi;
|
|
41
|
+
return {
|
|
42
|
+
child: c,
|
|
43
|
+
parent: p,
|
|
44
|
+
delta: c.p - p.p,
|
|
45
|
+
verdict: disjoint ? 'trustworthy' : 'within-noise',
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=wilson.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wilson.js","sourceRoot":"","sources":["../../src/eval/wilson.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,0DAA0D;AAC1D,MAAM,SAAS,GAAG,IAAI,CAAC;AAWvB;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,MAAc,EAAE,QAAgB,EAAE,IAAY,SAAS;IACpF,IAAI,QAAQ,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC;IAClD,MAAM,CAAC,GAAG,QAAQ,CAAC;IACnB,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACjB,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IACzB,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChC,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACnE,MAAM,EAAE,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,MAAM,EAAE,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,OAAO;QACL,CAAC;QACD,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC;QACnB,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC;KACpB,CAAC;AACJ,CAAC;AAgBD,MAAM,UAAU,YAAY,CAC1B,KAA2C,EAC3C,MAA4C;IAE5C,MAAM,CAAC,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvD,MAAM,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IACzD,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC;IAC5C,OAAO;QACL,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,CAAC;QACT,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAChB,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,cAAc;KACnD,CAAC;AACJ,CAAC"}
|
package/dist/events/types.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export declare const StructuredEventSchema: z.ZodObject<{
|
|
|
21
21
|
}, "strip", z.ZodTypeAny, {
|
|
22
22
|
message: string;
|
|
23
23
|
id: string;
|
|
24
|
-
kind: "reward" | "error" | "
|
|
24
|
+
kind: "reward" | "error" | "intent" | "fleet" | "system" | "log";
|
|
25
25
|
schemaVersion: 1;
|
|
26
26
|
ts: string;
|
|
27
27
|
details?: Record<string, unknown> | undefined;
|
|
@@ -31,7 +31,7 @@ export declare const StructuredEventSchema: z.ZodObject<{
|
|
|
31
31
|
}, {
|
|
32
32
|
message: string;
|
|
33
33
|
id: string;
|
|
34
|
-
kind: "reward" | "error" | "
|
|
34
|
+
kind: "reward" | "error" | "intent" | "fleet" | "system" | "log";
|
|
35
35
|
schemaVersion: 1;
|
|
36
36
|
ts: string;
|
|
37
37
|
details?: Record<string, unknown> | undefined;
|
|
@@ -17,14 +17,15 @@
|
|
|
17
17
|
* Heuristic shape per the spec:
|
|
18
18
|
* per-1k-token rate × typical task length, by model id.
|
|
19
19
|
*
|
|
20
|
-
* Subscription
|
|
21
|
-
* `
|
|
22
|
-
*
|
|
23
|
-
* the confirmation gate, regardless of the model selected.
|
|
20
|
+
* Subscription billing paths (`usesPaidApiKey: false` from daemon
|
|
21
|
+
* `/v1/status` `costSurface`) show "Included in subscription, no per-task
|
|
22
|
+
* API cost" and **do not** trigger the confirmation gate.
|
|
24
23
|
*
|
|
25
24
|
* To add a new model: append an entry to `MODEL_COST_TABLE`. The id is the
|
|
26
|
-
* exact `model` string persisted to `joinedSolverNets[<cid>].model`.
|
|
27
|
-
*
|
|
25
|
+
* exact `model` string persisted to `joinedSolverNets[<cid>].model`. Paid
|
|
26
|
+
* vs subscription is resolved on the daemon via `resolveCredentialId` and
|
|
27
|
+
* exposed on `GET /v1/status` as `costSurface.harnesses[<canonical>]`.
|
|
28
|
+
* Keep the units
|
|
28
29
|
* consistent: token counts in tokens, rates in USD-per-1k-tokens.
|
|
29
30
|
*
|
|
30
31
|
* Units note: Anthropic + OpenAI publish prices per million tokens; we
|
|
@@ -69,21 +70,6 @@ export interface ModelCostEntry {
|
|
|
69
70
|
* `claudeModels.ts` is the source of truth for which ids appear here.
|
|
70
71
|
*/
|
|
71
72
|
export declare const MODEL_COST_TABLE: Readonly<Record<string, ModelCostEntry>>;
|
|
72
|
-
/**
|
|
73
|
-
* Harness-level billing classification.
|
|
74
|
-
*
|
|
75
|
-
* `subscriptionPath: true` means the harness shells out to a
|
|
76
|
-
* subscription-billed CLI (Claude Code, Codex) and the operator does NOT
|
|
77
|
-
* incur per-task API charges — the cost surface is suppressed entirely
|
|
78
|
-
* and the confirmation gate is never triggered.
|
|
79
|
-
*
|
|
80
|
-
* `subscriptionPath: false` means the harness routes to a paid API key
|
|
81
|
-
* (Hermes via OpenRouter / Anthropic API / Nous Portal). The cost surface
|
|
82
|
-
* is shown and the gate fires above the configured threshold.
|
|
83
|
-
*/
|
|
84
|
-
export declare const HARNESS_BILLING: Readonly<Record<string, {
|
|
85
|
-
subscriptionPath: boolean;
|
|
86
|
-
}>>;
|
|
87
73
|
/** Default per-task USD threshold above which the confirmation gate fires. */
|
|
88
74
|
export declare const DEFAULT_HIGH_COST_THRESHOLD_USD = 1;
|
|
89
75
|
export interface CostEstimate {
|
|
@@ -106,13 +92,6 @@ export interface CostEstimate {
|
|
|
106
92
|
* that as "unknown, don't surface a number" rather than rendering $0.
|
|
107
93
|
*/
|
|
108
94
|
export declare function estimateModelCost(modelId: string): CostEstimate | null;
|
|
109
|
-
/**
|
|
110
|
-
* `true` when the harness routes through a paid API key. Subscription
|
|
111
|
-
* harnesses (Claude Code, Codex) return `false`; unknown harnesses
|
|
112
|
-
* conservatively return `true` so a misnamed harness doesn't silently
|
|
113
|
-
* skip the cost surface.
|
|
114
|
-
*/
|
|
115
|
-
export declare function harnessUsesPaidApiKey(harness: string | undefined): boolean;
|
|
116
95
|
export interface CostSurfaceDecision {
|
|
117
96
|
/** Whether to show a numeric cost-per-task estimate at all. */
|
|
118
97
|
showEstimate: boolean;
|
|
@@ -133,11 +112,11 @@ export interface CostSurfaceDecision {
|
|
|
133
112
|
*
|
|
134
113
|
* Defaults:
|
|
135
114
|
* - threshold: $1 / task (see DEFAULT_HIGH_COST_THRESHOLD_USD).
|
|
136
|
-
* -
|
|
137
|
-
* - unknown model on a paid
|
|
115
|
+
* - `usesPaidApiKey: false` → suppress surface + skip gate.
|
|
116
|
+
* - unknown model on a paid path → show estimate slot (caller may
|
|
138
117
|
* render "estimate unavailable") but DO NOT trigger the gate.
|
|
139
118
|
*/
|
|
140
|
-
export declare function decideCostSurface(
|
|
119
|
+
export declare function decideCostSurface(usesPaidApiKey: boolean, modelId: string | undefined, thresholdUsd?: number): CostSurfaceDecision;
|
|
141
120
|
/**
|
|
142
121
|
* Format a USD amount for compact display in the dashboard. Picks the
|
|
143
122
|
* smallest fraction count that still distinguishes the value from $0.
|
|
@@ -17,14 +17,15 @@
|
|
|
17
17
|
* Heuristic shape per the spec:
|
|
18
18
|
* per-1k-token rate × typical task length, by model id.
|
|
19
19
|
*
|
|
20
|
-
* Subscription
|
|
21
|
-
* `
|
|
22
|
-
*
|
|
23
|
-
* the confirmation gate, regardless of the model selected.
|
|
20
|
+
* Subscription billing paths (`usesPaidApiKey: false` from daemon
|
|
21
|
+
* `/v1/status` `costSurface`) show "Included in subscription, no per-task
|
|
22
|
+
* API cost" and **do not** trigger the confirmation gate.
|
|
24
23
|
*
|
|
25
24
|
* To add a new model: append an entry to `MODEL_COST_TABLE`. The id is the
|
|
26
|
-
* exact `model` string persisted to `joinedSolverNets[<cid>].model`.
|
|
27
|
-
*
|
|
25
|
+
* exact `model` string persisted to `joinedSolverNets[<cid>].model`. Paid
|
|
26
|
+
* vs subscription is resolved on the daemon via `resolveCredentialId` and
|
|
27
|
+
* exposed on `GET /v1/status` as `costSurface.harnesses[<canonical>]`.
|
|
28
|
+
* Keep the units
|
|
28
29
|
* consistent: token counts in tokens, rates in USD-per-1k-tokens.
|
|
29
30
|
*
|
|
30
31
|
* Units note: Anthropic + OpenAI publish prices per million tokens; we
|
|
@@ -43,7 +44,6 @@
|
|
|
43
44
|
* These are heuristics. Provider-API reconciliation (actual usage) is
|
|
44
45
|
* deferred to the P1 follow-up tracked in #331.
|
|
45
46
|
*/
|
|
46
|
-
import { CLAUDE_CODE_HARNESS, CODEX_HARNESS, HERMES_AGENT_HARNESS, canonicalHarnessName, } from './names.js';
|
|
47
47
|
/**
|
|
48
48
|
* Per-model cost entries. Keyed by the exact `model` id persisted to
|
|
49
49
|
* `joinedSolverNets[<cid>].model`. The dashboard model dropdown in
|
|
@@ -187,23 +187,6 @@ export const MODEL_COST_TABLE = {
|
|
|
187
187
|
typicalOutputTokens: 20_000,
|
|
188
188
|
},
|
|
189
189
|
};
|
|
190
|
-
/**
|
|
191
|
-
* Harness-level billing classification.
|
|
192
|
-
*
|
|
193
|
-
* `subscriptionPath: true` means the harness shells out to a
|
|
194
|
-
* subscription-billed CLI (Claude Code, Codex) and the operator does NOT
|
|
195
|
-
* incur per-task API charges — the cost surface is suppressed entirely
|
|
196
|
-
* and the confirmation gate is never triggered.
|
|
197
|
-
*
|
|
198
|
-
* `subscriptionPath: false` means the harness routes to a paid API key
|
|
199
|
-
* (Hermes via OpenRouter / Anthropic API / Nous Portal). The cost surface
|
|
200
|
-
* is shown and the gate fires above the configured threshold.
|
|
201
|
-
*/
|
|
202
|
-
export const HARNESS_BILLING = {
|
|
203
|
-
[CLAUDE_CODE_HARNESS]: { subscriptionPath: true },
|
|
204
|
-
[CODEX_HARNESS]: { subscriptionPath: true },
|
|
205
|
-
[HERMES_AGENT_HARNESS]: { subscriptionPath: false },
|
|
206
|
-
};
|
|
207
190
|
/** Default per-task USD threshold above which the confirmation gate fires. */
|
|
208
191
|
export const DEFAULT_HIGH_COST_THRESHOLD_USD = 1;
|
|
209
192
|
/**
|
|
@@ -226,33 +209,18 @@ export function estimateModelCost(modelId) {
|
|
|
226
209
|
entry,
|
|
227
210
|
};
|
|
228
211
|
}
|
|
229
|
-
/**
|
|
230
|
-
* `true` when the harness routes through a paid API key. Subscription
|
|
231
|
-
* harnesses (Claude Code, Codex) return `false`; unknown harnesses
|
|
232
|
-
* conservatively return `true` so a misnamed harness doesn't silently
|
|
233
|
-
* skip the cost surface.
|
|
234
|
-
*/
|
|
235
|
-
export function harnessUsesPaidApiKey(harness) {
|
|
236
|
-
if (!harness)
|
|
237
|
-
return false;
|
|
238
|
-
const canonical = canonicalHarnessName(harness);
|
|
239
|
-
const billing = HARNESS_BILLING[canonical];
|
|
240
|
-
if (!billing)
|
|
241
|
-
return true;
|
|
242
|
-
return !billing.subscriptionPath;
|
|
243
|
-
}
|
|
244
212
|
/**
|
|
245
213
|
* Decide what the cost surface should render for a given harness + model
|
|
246
214
|
* combination, plus whether the confirmation gate fires.
|
|
247
215
|
*
|
|
248
216
|
* Defaults:
|
|
249
217
|
* - threshold: $1 / task (see DEFAULT_HIGH_COST_THRESHOLD_USD).
|
|
250
|
-
* -
|
|
251
|
-
* - unknown model on a paid
|
|
218
|
+
* - `usesPaidApiKey: false` → suppress surface + skip gate.
|
|
219
|
+
* - unknown model on a paid path → show estimate slot (caller may
|
|
252
220
|
* render "estimate unavailable") but DO NOT trigger the gate.
|
|
253
221
|
*/
|
|
254
|
-
export function decideCostSurface(
|
|
255
|
-
if (!
|
|
222
|
+
export function decideCostSurface(usesPaidApiKey, modelId, thresholdUsd = DEFAULT_HIGH_COST_THRESHOLD_USD) {
|
|
223
|
+
if (!usesPaidApiKey) {
|
|
256
224
|
return {
|
|
257
225
|
showEstimate: false,
|
|
258
226
|
estimate: null,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cost-estimates.js","sourceRoot":"","sources":["../../src/harnesses/cost-estimates.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"cost-estimates.js","sourceRoot":"","sources":["../../src/harnesses/cost-estimates.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AAwBH;;;;GAIG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAA6C;IACxE,mEAAmE;IACnE,iBAAiB,EAAE;QACjB,QAAQ,EAAE,WAAW;QACrB,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,mBAAmB,EAAE;QACnB,QAAQ,EAAE,WAAW;QACrB,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,2BAA2B,EAAE;QAC3B,QAAQ,EAAE,WAAW;QACrB,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,uEAAuE;IACvE,2BAA2B,EAAE;QAC3B,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,6BAA6B,EAAE;QAC7B,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IAED,sCAAsC;IACtC,SAAS,EAAE;QACT,QAAQ,EAAE,QAAQ;QAClB,gBAAgB,EAAE,OAAO;QACzB,iBAAiB,EAAE,IAAI;QACvB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,cAAc,EAAE;QACd,QAAQ,EAAE,QAAQ;QAClB,gBAAgB,EAAE,OAAO;QACzB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,SAAS,EAAE;QACT,QAAQ,EAAE,QAAQ;QAClB,oEAAoE;QACpE,kEAAkE;QAClE,kCAAkC;QAClC,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,IAAI;QACvB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,eAAe,EAAE;QACf,QAAQ,EAAE,QAAQ;QAClB,gBAAgB,EAAE,OAAO;QACzB,iBAAiB,EAAE,IAAI;QACvB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,qBAAqB,EAAE;QACrB,QAAQ,EAAE,QAAQ;QAClB,gBAAgB,EAAE,OAAO;QACzB,iBAAiB,EAAE,IAAI;QACvB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IAED,sDAAsD;IACtD,uEAAuE;IACvE,oEAAoE;IACpE,iEAAiE;IACjE,qBAAqB,EAAE;QACrB,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,0BAA0B,EAAE;QAC1B,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,MAAM;QACxB,iBAAiB,EAAE,MAAM;QACzB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,4BAA4B,EAAE;QAC5B,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,MAAM;QACxB,iBAAiB,EAAE,MAAM;QACzB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,8BAA8B,EAAE;QAC9B,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,MAAM;QACxB,iBAAiB,EAAE,MAAM;QACzB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,sBAAsB,EAAE;QACtB,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,MAAM;QACxB,iBAAiB,EAAE,MAAM;QACzB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,sBAAsB,EAAE;QACtB,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,KAAK;QACvB,iBAAiB,EAAE,KAAK;QACxB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,sBAAsB,EAAE;QACtB,QAAQ,EAAE,YAAY;QACtB,gBAAgB,EAAE,MAAM;QACxB,iBAAiB,EAAE,MAAM;QACzB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;IACD,4BAA4B,EAAE;QAC5B,QAAQ,EAAE,MAAM;QAChB,gBAAgB,EAAE,MAAM;QACxB,iBAAiB,EAAE,MAAM;QACzB,kBAAkB,EAAE,MAAM;QAC1B,mBAAmB,EAAE,MAAM;KAC5B;CACF,CAAC;AAEF,8EAA8E;AAC9E,MAAM,CAAC,MAAM,+BAA+B,GAAG,CAAC,CAAC;AAiBjD;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,QAAQ,GAAG,CAAC,KAAK,CAAC,kBAAkB,GAAG,IAAI,CAAC,GAAG,KAAK,CAAC,gBAAgB,CAAC;IAC5E,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,mBAAmB,GAAG,IAAI,CAAC,GAAG,KAAK,CAAC,iBAAiB,CAAC;IAC/E,OAAO;QACL,GAAG,EAAE,QAAQ,GAAG,SAAS;QACzB,QAAQ;QACR,SAAS;QACT,kBAAkB,EAAE,KAAK,CAAC,kBAAkB;QAC5C,mBAAmB,EAAE,KAAK,CAAC,mBAAmB;QAC9C,KAAK;KACN,CAAC;AACJ,CAAC;AAiBD;;;;;;;;;GASG;AACH,MAAM,UAAU,iBAAiB,CAC/B,cAAuB,EACvB,OAA2B,EAC3B,eAAuB,+BAA+B;IAEtD,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO;YACL,YAAY,EAAE,KAAK;YACnB,QAAQ,EAAE,IAAI;YACd,oBAAoB,EAAE,KAAK;YAC3B,gBAAgB,EAAE,iDAAiD;SACpE,CAAC;IACJ,CAAC;IACD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC7D,MAAM,0BAA0B,GAC9B,QAAQ,EAAE,KAAK,CAAC,gBAAgB,KAAK,IAAI,CAAC;IAC5C,IAAI,0BAA0B,EAAE,CAAC;QAC/B,OAAO;YACL,YAAY,EAAE,KAAK;YACnB,QAAQ,EAAE,IAAI;YACd,oBAAoB,EAAE,KAAK;YAC3B,gBAAgB,EAAE,iDAAiD;SACpE,CAAC;IACJ,CAAC;IACD,OAAO;QACL,YAAY,EAAE,IAAI;QAClB,QAAQ;QACR,oBAAoB,EAAE,QAAQ,KAAK,IAAI,IAAI,QAAQ,CAAC,GAAG,GAAG,YAAY;QACtE,gBAAgB,EAAE,IAAI;KACvB,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,MAAc;IACtC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;QAAE,OAAO,GAAG,CAAC;IACzC,IAAI,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC9B,IAAI,MAAM,GAAG,IAAI;QAAE,OAAO,QAAQ,CAAC;IACnC,IAAI,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,IAAI,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;IAChD,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;AACjC,CAAC"}
|
|
@@ -22,9 +22,11 @@
|
|
|
22
22
|
* Used for manifest signing: produce a deterministic byte string that two
|
|
23
23
|
* independent parties can reproduce from the same object graph.
|
|
24
24
|
*/
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
// canonicalize v3 is ESM-only (no CJS entry); the client is `"type": "module"`
|
|
26
|
+
// so a native ESM default import is the correct interop (replaces the v2-era
|
|
27
|
+
// createRequire shim). v3 is packaging-only vs v2 — its RFC 8785 output is
|
|
28
|
+
// byte-identical, so existing content hashes / manifest signatures are stable.
|
|
29
|
+
import canonicalize from 'canonicalize';
|
|
28
30
|
/**
|
|
29
31
|
* Recursively replace NaN / ±Infinity with null so that canonicalize does not
|
|
30
32
|
* throw — matching the JSON.stringify behaviour that the rest of the codebase
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"canonical-json.js","sourceRoot":"","sources":["../../../src/harnesses/engine/canonical-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,
|
|
1
|
+
{"version":3,"file":"canonical-json.js","sourceRoot":"","sources":["../../../src/harnesses/engine/canonical-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,+EAA+E;AAC/E,6EAA6E;AAC7E,2EAA2E;AAC3E,+EAA+E;AAC/E,OAAO,YAAY,MAAM,cAAc,CAAC;AAExC;;;;;GAKG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC/D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAC5D,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,mEAAmE;QACnE,qEAAqE;QACrE,sEAAsE;QACtE,WAAW;QACX,MAAM,MAAM,GAAI,KAAoC,CAAC,MAAM,CAAC;QAC5D,IAAI,OAAO,MAAM,KAAK,UAAU,EAAE,CAAC;YACjC,OAAO,eAAe,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC7C,CAAC;QACD,MAAM,GAAG,GAA4B,EAAE,CAAC;QACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QACxE,OAAO,GAAG,CAAC;IACb,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEvC,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACrC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,wEAAwE;QACxE,uEAAuE;QACvE,kCAAkC;QAClC,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -329,9 +329,15 @@ export declare class TaskEngine {
|
|
|
329
329
|
* Performance contract (issue #398): this runs once per task announcement,
|
|
330
330
|
* on the engine-watcher hot path, for every observed task. It MUST NOT
|
|
331
331
|
* perform per-task blocking I/O. In particular it does not probe
|
|
332
|
-
* `impl.isReady()` —
|
|
333
|
-
*
|
|
334
|
-
* spawns and starve the daemon event loop.
|
|
332
|
+
* `impl.isReady()` — historically the Hermes harness ran child processes
|
|
333
|
+
* inside its readiness probe, so a backlog would pay per-task blocking
|
|
334
|
+
* spawns and starve the daemon event loop. (#778 converted the matching
|
|
335
|
+
* post-execution `git diff` wedge in harvest.ts to async + 60s timeout;
|
|
336
|
+
* the #778 follow-up converted the readiness-probe spawns themselves —
|
|
337
|
+
* Hermes `doctor` / `auth list` and the codex-doctor `--version` probe —
|
|
338
|
+
* to async, so the registry's background refresh loop no longer blocks the
|
|
339
|
+
* main thread either. The readiness path remains O(1) here via the cache
|
|
340
|
+
* below.)
|
|
335
341
|
*
|
|
336
342
|
* Harness readiness for the claim gate is instead served O(1) from the
|
|
337
343
|
* daemon's cached `HarnessReadinessRegistry` snapshot: the engine-watcher
|
|
@@ -561,9 +567,36 @@ export declare class TaskEngine {
|
|
|
561
567
|
* when a previous run already produced the data.
|
|
562
568
|
*/
|
|
563
569
|
private dataDrivenAdvance;
|
|
570
|
+
/**
|
|
571
|
+
* Classify a transition error and mark the task terminal accordingly.
|
|
572
|
+
*
|
|
573
|
+
* - `SafeInnerRevertError` with a non-recoverable inner name (the same
|
|
574
|
+
* set the daemon's pre-claim catch in `emitTickErrorOrRaceLost` uses):
|
|
575
|
+
* the on-chain slot is already pruned (TCMaxVerdictsReached,
|
|
576
|
+
* TCAttemptAlreadyFinalized, …). We mark the row RACE_LOST and emit a
|
|
577
|
+
* `kind=race_lost` activity event so operators can audit prunes
|
|
578
|
+
* without inflating the FAILED counter (#896).
|
|
579
|
+
* - A transport-transient error (e.g. `AllRpcsFailedError` — every provider
|
|
580
|
+
* in the L2 fallback chain failed at once) on a task whose delivery window
|
|
581
|
+
* is still open: leave the row in its current in-flight state so the next
|
|
582
|
+
* tick re-drives it once the RPCs recover, and emit a `tick_error` (warn)
|
|
583
|
+
* event instead of inflating the FAILED counter. Without this the daemon
|
|
584
|
+
* stamped the row FAILED, dropping it from `getInFlight()` permanently, so
|
|
585
|
+
* L2 work went silent until a manual restart (#912). Past-window transient
|
|
586
|
+
* errors still terminalize to avoid churning on work that can no longer
|
|
587
|
+
* settle.
|
|
588
|
+
* - Everything else: existing markFailed behaviour. When invoked from
|
|
589
|
+
* recovery, `contextLabel === 'recovery'` so the failure_reason
|
|
590
|
+
* carries the `recovery:` prefix the original code path used.
|
|
591
|
+
*
|
|
592
|
+
* Returns the classification so callers can log appropriately.
|
|
593
|
+
*/
|
|
594
|
+
private _classifyAndMarkTerminal;
|
|
564
595
|
/**
|
|
565
596
|
* Wraps a transition method call with error handling: if the transition
|
|
566
|
-
* throws, the task is marked FAILED
|
|
597
|
+
* throws, the task is marked terminal (FAILED or RACE_LOST per
|
|
598
|
+
* `_classifyAndMarkTerminal`) and the error is rethrown so the caller can
|
|
599
|
+
* surface it.
|
|
567
600
|
*/
|
|
568
601
|
private _runTransition;
|
|
569
602
|
/**
|