@jinn-network/client 0.1.7 → 0.1.8-canary.09a3b2f6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -1
- package/dist/adapters/mech/adapter.d.ts +39 -2
- package/dist/adapters/mech/adapter.js +178 -20
- package/dist/adapters/mech/adapter.js.map +1 -1
- package/dist/adapters/mech/contracts.d.ts +22 -1
- package/dist/adapters/mech/contracts.js +96 -52
- package/dist/adapters/mech/contracts.js.map +1 -1
- package/dist/adapters/mech/safe-revert.d.ts +4 -0
- package/dist/adapters/mech/safe-revert.js +5 -1
- package/dist/adapters/mech/safe-revert.js.map +1 -1
- package/dist/adapters/mech/safe.d.ts +1 -1
- package/dist/adapters/mech/safe.js +10 -4
- package/dist/adapters/mech/safe.js.map +1 -1
- package/dist/adapters/mech/types.d.ts +6 -1
- package/dist/adapters/mech/types.js.map +1 -1
- package/dist/agent/operator-claude.js +8 -0
- package/dist/agent/operator-claude.js.map +1 -1
- package/dist/api/activity-events-endpoint.d.ts +14 -0
- package/dist/api/activity-events-endpoint.js +59 -0
- package/dist/api/activity-events-endpoint.js.map +1 -0
- package/dist/api/bootstrap-endpoint.d.ts +1 -2
- package/dist/api/bootstrap-endpoint.js +42 -24
- package/dist/api/bootstrap-endpoint.js.map +1 -1
- package/dist/api/codex-doctor-endpoint.d.ts +22 -5
- package/dist/api/codex-doctor-endpoint.js +136 -17
- package/dist/api/codex-doctor-endpoint.js.map +1 -1
- package/dist/api/debug-report-endpoint.d.ts +27 -0
- package/dist/api/debug-report-endpoint.js +157 -0
- package/dist/api/debug-report-endpoint.js.map +1 -0
- package/dist/api/discovery-endpoint.d.ts +1 -0
- package/dist/api/discovery-endpoint.js +24 -0
- package/dist/api/discovery-endpoint.js.map +1 -1
- package/dist/api/fleet-build.d.ts +1 -7
- package/dist/api/fleet-build.js +0 -7
- package/dist/api/fleet-build.js.map +1 -1
- package/dist/api/gather-status.d.ts +39 -0
- package/dist/api/gather-status.js +181 -84
- package/dist/api/gather-status.js.map +1 -1
- package/dist/api/hermes-doctor-endpoint.d.ts +15 -7
- package/dist/api/hermes-doctor-endpoint.js +56 -19
- package/dist/api/hermes-doctor-endpoint.js.map +1 -1
- package/dist/api/launcher-status.d.ts +4 -2
- package/dist/api/launcher-status.js +11 -10
- package/dist/api/launcher-status.js.map +1 -1
- package/dist/api/launcher-tasks.d.ts +1 -1
- package/dist/api/launcher-tasks.js +12 -8
- package/dist/api/launcher-tasks.js.map +1 -1
- package/dist/api/loop-completion-build.d.ts +79 -0
- package/dist/api/loop-completion-build.js +155 -0
- package/dist/api/loop-completion-build.js.map +1 -0
- package/dist/api/operator-artifacts-endpoint.js +73 -6
- package/dist/api/operator-artifacts-endpoint.js.map +1 -1
- package/dist/api/portfolio-v0-build.d.ts +7 -1
- package/dist/api/portfolio-v0-build.js +6 -2
- package/dist/api/portfolio-v0-build.js.map +1 -1
- package/dist/api/prediction-v1-build.d.ts +6 -0
- package/dist/api/prediction-v1-build.js +3 -1
- package/dist/api/prediction-v1-build.js.map +1 -1
- package/dist/api/server.d.ts +17 -0
- package/dist/api/server.js +40 -1
- package/dist/api/server.js.map +1 -1
- package/dist/api/setup-endpoints.d.ts +13 -9
- package/dist/api/setup-endpoints.js +50 -173
- package/dist/api/setup-endpoints.js.map +1 -1
- package/dist/api/solvernets-endpoints.js +33 -63
- package/dist/api/solvernets-endpoints.js.map +1 -1
- package/dist/api/status-build.d.ts +140 -17
- package/dist/api/status-build.js +47 -34
- package/dist/api/status-build.js.map +1 -1
- package/dist/api/status-harness-rollup.d.ts +35 -0
- package/dist/api/status-harness-rollup.js +45 -0
- package/dist/api/status-harness-rollup.js.map +1 -0
- package/dist/api/status-rollup-build.d.ts +0 -4
- package/dist/api/status-rollup-build.js +0 -4
- package/dist/api/status-rollup-build.js.map +1 -1
- package/dist/api/task-runs-build.d.ts +8 -0
- package/dist/api/task-runs-build.js +5 -1
- package/dist/api/task-runs-build.js.map +1 -1
- package/dist/build-info.json +4 -4
- package/dist/build-meta.json +1 -1
- package/dist/captures/live-publisher.js +24 -4
- package/dist/captures/live-publisher.js.map +1 -1
- package/dist/captures/publish.d.ts +1 -1
- package/dist/chain-read-errors.d.ts +12 -0
- package/dist/chain-read-errors.js +26 -1
- package/dist/chain-read-errors.js.map +1 -1
- package/dist/cli/commands/codedigest-revert-check.d.ts +33 -0
- package/dist/cli/commands/codedigest-revert-check.js +253 -0
- package/dist/cli/commands/codedigest-revert-check.js.map +1 -0
- package/dist/cli/commands/doctor.d.ts +3 -0
- package/dist/cli/commands/doctor.js +35 -0
- package/dist/cli/commands/doctor.js.map +1 -1
- package/dist/cli/commands/eval.d.ts +76 -0
- package/dist/cli/commands/eval.js +401 -0
- package/dist/cli/commands/eval.js.map +1 -0
- package/dist/cli/commands/rewards.d.ts +2 -0
- package/dist/cli/commands/rewards.js +27 -0
- package/dist/cli/commands/rewards.js.map +1 -1
- package/dist/cli/commands/solver-nets.d.ts +1 -0
- package/dist/cli/commands/solver-nets.js +245 -22
- package/dist/cli/commands/solver-nets.js.map +1 -1
- package/dist/cli/commands/solver-plugins-block.d.ts +33 -0
- package/dist/cli/commands/solver-plugins-block.js +118 -0
- package/dist/cli/commands/solver-plugins-block.js.map +1 -0
- package/dist/cli/commands/solver-plugins-feedback.d.ts +72 -0
- package/dist/cli/commands/solver-plugins-feedback.js +262 -0
- package/dist/cli/commands/solver-plugins-feedback.js.map +1 -0
- package/dist/cli/commands/solver-plugins-read.d.ts +54 -0
- package/dist/cli/commands/solver-plugins-read.js +259 -0
- package/dist/cli/commands/solver-plugins-read.js.map +1 -0
- package/dist/cli/commands/solver-plugins.d.ts +35 -0
- package/dist/cli/commands/solver-plugins.js +399 -2
- package/dist/cli/commands/solver-plugins.js.map +1 -1
- package/dist/cli/commands/status.js +0 -1
- package/dist/cli/commands/status.js.map +1 -1
- package/dist/cli/commands/tasks.js +15 -2
- package/dist/cli/commands/tasks.js.map +1 -1
- package/dist/cli/index.js +4 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/task-native-readiness.d.ts +7 -0
- package/dist/cli/task-native-readiness.js +7 -5
- package/dist/cli/task-native-readiness.js.map +1 -1
- package/dist/config.d.ts +206 -232
- package/dist/config.js +289 -107
- package/dist/config.js.map +1 -1
- package/dist/daemon/ai-units-gate.d.ts +54 -0
- package/dist/daemon/ai-units-gate.js +83 -0
- package/dist/daemon/ai-units-gate.js.map +1 -0
- package/dist/daemon/creator.js +13 -0
- package/dist/daemon/creator.js.map +1 -1
- package/dist/daemon/daemon.d.ts +10 -0
- package/dist/daemon/daemon.js +205 -30
- package/dist/daemon/daemon.js.map +1 -1
- package/dist/daemon/eviction-loop.d.ts +7 -0
- package/dist/daemon/eviction-loop.js +16 -0
- package/dist/daemon/eviction-loop.js.map +1 -1
- package/dist/daemon/gate-logger.d.ts +9 -0
- package/dist/daemon/gate-logger.js +2 -0
- package/dist/daemon/gate-logger.js.map +1 -0
- package/dist/daemon/jinn-claim-loop.js +22 -4
- package/dist/daemon/jinn-claim-loop.js.map +1 -1
- package/dist/daemon/readiness-gate.d.ts +1 -4
- package/dist/daemon/readiness-gate.js.map +1 -1
- package/dist/daemon/spend-cap-gate.d.ts +40 -0
- package/dist/daemon/spend-cap-gate.js +46 -0
- package/dist/daemon/spend-cap-gate.js.map +1 -0
- package/dist/dashboard/assets/index-3quVQqik.js +167 -0
- package/dist/dashboard/assets/index-BVAWkLwY.css +1 -0
- package/dist/dashboard/index.html +2 -2
- package/dist/discovery/http.d.ts +7 -0
- package/dist/discovery/http.js +567 -24
- package/dist/discovery/http.js.map +1 -1
- package/dist/discovery/onchain.js +197 -5
- package/dist/discovery/onchain.js.map +1 -1
- package/dist/discovery/types.d.ts +235 -0
- package/dist/discovery/types.js +40 -0
- package/dist/discovery/types.js.map +1 -1
- package/dist/discovery/with-fallback.js +41 -0
- package/dist/discovery/with-fallback.js.map +1 -1
- package/dist/earning/bootstrap.d.ts +31 -3
- package/dist/earning/bootstrap.js +94 -22
- package/dist/earning/bootstrap.js.map +1 -1
- package/dist/earning/faucet.d.ts +1 -1
- package/dist/earning/faucet.js +2 -2
- package/dist/earning/faucet.js.map +1 -1
- package/dist/earning/safe-adapter.js +34 -11
- package/dist/earning/safe-adapter.js.map +1 -1
- package/dist/earning/types.d.ts +6 -6
- package/dist/earning/viem-clients.d.ts +11 -4
- package/dist/earning/viem-clients.js +14 -5
- package/dist/earning/viem-clients.js.map +1 -1
- package/dist/erc8004/identity.d.ts +19 -3
- package/dist/erc8004/identity.js +38 -11
- package/dist/erc8004/identity.js.map +1 -1
- package/dist/erc8004/index.d.ts +1 -1
- package/dist/erc8004/index.js.map +1 -1
- package/dist/eval/eval-harness-run.d.ts +63 -0
- package/dist/eval/eval-harness-run.js +123 -0
- package/dist/eval/eval-harness-run.js.map +1 -0
- package/dist/eval/orchestrator.d.ts +163 -0
- package/dist/eval/orchestrator.js +232 -0
- package/dist/eval/orchestrator.js.map +1 -0
- package/dist/eval/paired.d.ts +68 -0
- package/dist/eval/paired.js +93 -0
- package/dist/eval/paired.js.map +1 -0
- package/dist/eval/resolve-slate-tasks.d.ts +35 -0
- package/dist/eval/resolve-slate-tasks.js +56 -0
- package/dist/eval/resolve-slate-tasks.js.map +1 -0
- package/dist/eval/screen-discovery.d.ts +22 -0
- package/dist/eval/screen-discovery.js +71 -0
- package/dist/eval/screen-discovery.js.map +1 -0
- package/dist/eval/screen-progress.d.ts +41 -0
- package/dist/eval/screen-progress.js +60 -0
- package/dist/eval/screen-progress.js.map +1 -0
- package/dist/eval/screen-runner.d.ts +30 -0
- package/dist/eval/screen-runner.js +289 -0
- package/dist/eval/screen-runner.js.map +1 -0
- package/dist/eval/screen.d.ts +107 -0
- package/dist/eval/screen.js +159 -0
- package/dist/eval/screen.js.map +1 -0
- package/dist/eval/slope.d.ts +29 -0
- package/dist/eval/slope.js +46 -0
- package/dist/eval/slope.js.map +1 -0
- package/dist/eval/train-sequence.d.ts +35 -0
- package/dist/eval/train-sequence.js +59 -0
- package/dist/eval/train-sequence.js.map +1 -0
- package/dist/eval/wilson.d.ts +45 -0
- package/dist/eval/wilson.js +48 -0
- package/dist/eval/wilson.js.map +1 -0
- package/dist/events/types.d.ts +2 -2
- package/dist/harnesses/cost-estimates.d.ts +10 -31
- package/dist/harnesses/cost-estimates.js +11 -43
- package/dist/harnesses/cost-estimates.js.map +1 -1
- package/dist/harnesses/engine/canonical-json.js +5 -3
- package/dist/harnesses/engine/canonical-json.js.map +1 -1
- package/dist/harnesses/engine/engine.d.ts +37 -4
- package/dist/harnesses/engine/engine.js +151 -20
- package/dist/harnesses/engine/engine.js.map +1 -1
- package/dist/harnesses/engine/persistence.d.ts +38 -4
- package/dist/harnesses/engine/persistence.js +71 -6
- package/dist/harnesses/engine/persistence.js.map +1 -1
- package/dist/harnesses/engine/state.d.ts +9 -0
- package/dist/harnesses/engine/state.js +23 -10
- package/dist/harnesses/engine/state.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/adapter.d.ts +2 -0
- package/dist/harnesses/impls/hermes-agent/adapter.js +8 -5
- package/dist/harnesses/impls/hermes-agent/adapter.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/bootstrap.d.ts +1 -0
- package/dist/harnesses/impls/hermes-agent/bootstrap.js +10 -3
- package/dist/harnesses/impls/hermes-agent/bootstrap.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/config-builder.d.ts +1 -1
- package/dist/harnesses/impls/hermes-agent/config-builder.js +4 -2
- package/dist/harnesses/impls/hermes-agent/config-builder.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/harness.d.ts +31 -3
- package/dist/harnesses/impls/hermes-agent/harness.js +84 -7
- package/dist/harnesses/impls/hermes-agent/harness.js.map +1 -1
- package/dist/harnesses/impls/hermes-agent/prompt.d.ts +6 -6
- package/dist/harnesses/impls/hermes-agent/prompt.js +6 -6
- package/dist/harnesses/impls/index.d.ts +2 -0
- package/dist/harnesses/impls/index.js +2 -0
- package/dist/harnesses/impls/index.js.map +1 -1
- package/dist/harnesses/impls/learner/adapters/claude-code.d.ts +17 -0
- package/dist/harnesses/impls/learner/adapters/claude-code.js +118 -14
- package/dist/harnesses/impls/learner/adapters/claude-code.js.map +1 -1
- package/dist/harnesses/impls/learner/adapters/codex-code.d.ts +9 -0
- package/dist/harnesses/impls/learner/adapters/codex-code.js +30 -8
- package/dist/harnesses/impls/learner/adapters/codex-code.js.map +1 -1
- package/dist/harnesses/impls/learner/harness.d.ts +41 -1
- package/dist/harnesses/impls/learner/harness.js +78 -4
- package/dist/harnesses/impls/learner/harness.js.map +1 -1
- package/dist/harnesses/impls/learner/harvest.d.ts +3 -1
- package/dist/harnesses/impls/learner/harvest.js +30 -6
- package/dist/harnesses/impls/learner/harvest.js.map +1 -1
- package/dist/harnesses/impls/learner/plugin-path.js +1 -0
- package/dist/harnesses/impls/learner/plugin-path.js.map +1 -1
- package/dist/harnesses/impls/learner/restoration-patch.d.ts +2 -2
- package/dist/harnesses/impls/learner/restoration-patch.js +25 -6
- package/dist/harnesses/impls/learner/restoration-patch.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js +21 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/eval-runner.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js +3 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.d.ts +74 -5
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js +103 -32
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/hf-fetcher.js.map +1 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.d.ts +2 -2
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js +3 -1
- package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js.map +1 -1
- package/dist/harnesses/readiness-registry.d.ts +7 -0
- package/dist/harnesses/readiness-registry.js +9 -0
- package/dist/harnesses/readiness-registry.js.map +1 -1
- package/dist/harnesses/types.d.ts +14 -0
- package/dist/learner/revert-decision.d.ts +74 -0
- package/dist/learner/revert-decision.js +73 -0
- package/dist/learner/revert-decision.js.map +1 -0
- package/dist/learner/revert-stats.d.ts +38 -0
- package/dist/learner/revert-stats.js +86 -0
- package/dist/learner/revert-stats.js.map +1 -0
- package/dist/local-provider-url.d.ts +3 -0
- package/dist/local-provider-url.js +28 -0
- package/dist/local-provider-url.js.map +1 -0
- package/dist/main.js +199 -104
- package/dist/main.js.map +1 -1
- package/dist/mcp/get-codedigest-reward.d.ts +13 -0
- package/dist/mcp/get-codedigest-reward.js +23 -0
- package/dist/mcp/get-codedigest-reward.js.map +1 -0
- package/dist/mcp/server.js +23 -0
- package/dist/mcp/server.js.map +1 -1
- package/dist/observability/debug-report-assemble.d.ts +43 -0
- package/dist/observability/debug-report-assemble.js +80 -0
- package/dist/observability/debug-report-assemble.js.map +1 -0
- package/dist/observability/emit-event.d.ts +9 -2
- package/dist/observability/emit-event.js +36 -2
- package/dist/observability/emit-event.js.map +1 -1
- package/dist/observability/file-logger.d.ts +69 -0
- package/dist/observability/file-logger.js +177 -0
- package/dist/observability/file-logger.js.map +1 -0
- package/dist/observability/redact-secrets.d.ts +65 -0
- package/dist/observability/redact-secrets.js +300 -0
- package/dist/observability/redact-secrets.js.map +1 -0
- package/dist/observability/tar.d.ts +30 -0
- package/dist/observability/tar.js +102 -0
- package/dist/observability/tar.js.map +1 -0
- package/dist/plugins/learner/.claude-plugin/plugin.json +1 -1
- package/dist/plugins/learner/.codex-plugin/plugin.json +1 -1
- package/dist/plugins/learner/hooks/session-start +30 -1
- package/dist/plugins/learner/skills/learn/consolidator-prompt.md +22 -1
- package/dist/plugins/learner/skills/learn/promoter-prompt.md +72 -1
- package/dist/preflight/deployment-readiness.d.ts +147 -0
- package/dist/preflight/deployment-readiness.js +366 -0
- package/dist/preflight/deployment-readiness.js.map +1 -0
- package/dist/preflight/pidfile-liveness.d.ts +50 -0
- package/dist/preflight/pidfile-liveness.js +117 -0
- package/dist/preflight/pidfile-liveness.js.map +1 -0
- package/dist/preflight/rpc-network.d.ts +40 -0
- package/dist/preflight/rpc-network.js +67 -1
- package/dist/preflight/rpc-network.js.map +1 -1
- package/dist/rpc/transport.d.ts +145 -0
- package/dist/rpc/transport.js +319 -0
- package/dist/rpc/transport.js.map +1 -0
- package/dist/scripts/donation-consumption-acceptance.js +7 -28
- package/dist/scripts/donation-consumption-acceptance.js.map +1 -1
- package/dist/scripts/swe-rebench-v2-pytest-missing.json +16 -0
- package/dist/solver-nets/prediction-operator-ux.d.ts +1 -2
- package/dist/solver-nets/prediction-operator-ux.js +56 -53
- package/dist/solver-nets/prediction-operator-ux.js.map +1 -1
- package/dist/solver-nets/registry.d.ts +19 -1
- package/dist/solver-nets/registry.js +37 -24
- package/dist/solver-nets/registry.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.d.ts +76 -0
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.js +156 -0
- package/dist/solver-types/_swe-rebench-v2-held-out-slate.js.map +1 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.d.ts +81 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.js +116 -0
- package/dist/solver-types/_swe-rebench-v2-pool-recovery.js.map +1 -0
- package/dist/solver-types/_swe-rebench-v2-pool.d.ts +9 -2
- package/dist/solver-types/_swe-rebench-v2-pool.js +15 -20
- package/dist/solver-types/_swe-rebench-v2-pool.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-state.d.ts +24 -0
- package/dist/solver-types/_swe-rebench-v2-state.js +33 -0
- package/dist/solver-types/_swe-rebench-v2-state.js.map +1 -1
- package/dist/solver-types/_swe-rebench-v2-validated-pool.d.ts +116 -2
- package/dist/solver-types/_swe-rebench-v2-validated-pool.js +296 -21
- package/dist/solver-types/_swe-rebench-v2-validated-pool.js.map +1 -1
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json +20 -0
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.json +19 -0
- package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.screening-report.json +628 -0
- package/dist/solver-types/solver-type.d.ts +8 -0
- package/dist/solver-types/swe-rebench-v2-auto.d.ts +20 -11
- package/dist/solver-types/swe-rebench-v2-auto.js +64 -19
- package/dist/solver-types/swe-rebench-v2-auto.js.map +1 -1
- package/dist/solver-types/swe-rebench-v2.d.ts +10 -2
- package/dist/solver-types/swe-rebench-v2.js +233 -13
- package/dist/solver-types/swe-rebench-v2.js.map +1 -1
- package/dist/solvernets/daemon-init.d.ts +1 -1
- package/dist/solvernets/daemon-init.js +19 -4
- package/dist/solvernets/daemon-init.js.map +1 -1
- package/dist/solvernets/launched-record-dispatcher.d.ts +7 -0
- package/dist/solvernets/launched-record-dispatcher.js +10 -4
- package/dist/solvernets/launched-record-dispatcher.js.map +1 -1
- package/dist/solvernets/registry-client-erc8004.js +40 -37
- package/dist/solvernets/registry-client-erc8004.js.map +1 -1
- package/dist/solvernets/registry-client.d.ts +6 -0
- package/dist/solvernets/store.d.ts +2 -2
- package/dist/solvernets/store.js +7 -2
- package/dist/solvernets/store.js.map +1 -1
- package/dist/spend/ai-units-config.d.ts +49 -0
- package/dist/spend/ai-units-config.js +34 -0
- package/dist/spend/ai-units-config.js.map +1 -0
- package/dist/spend/ai-units.d.ts +140 -0
- package/dist/spend/ai-units.js +229 -0
- package/dist/spend/ai-units.js.map +1 -0
- package/dist/spend/cost-surface-status.d.ts +12 -0
- package/dist/spend/cost-surface-status.js +24 -0
- package/dist/spend/cost-surface-status.js.map +1 -0
- package/dist/spend/credential.d.ts +39 -0
- package/dist/spend/credential.js +71 -0
- package/dist/spend/credential.js.map +1 -0
- package/dist/spend/daemon-config.d.ts +13 -0
- package/dist/spend/daemon-config.js +24 -0
- package/dist/spend/daemon-config.js.map +1 -0
- package/dist/spend/pricing.d.ts +16 -0
- package/dist/spend/pricing.js +26 -0
- package/dist/spend/pricing.js.map +1 -0
- package/dist/spend/record.d.ts +13 -0
- package/dist/spend/record.js +43 -0
- package/dist/spend/record.js.map +1 -0
- package/dist/spend/usage.d.ts +27 -0
- package/dist/spend/usage.js +113 -0
- package/dist/spend/usage.js.map +1 -0
- package/dist/store/store.d.ts +187 -0
- package/dist/store/store.js +467 -4
- package/dist/store/store.js.map +1 -1
- package/dist/trajectory/transcript-parsers/codex-session.d.ts +12 -6
- package/dist/trajectory/transcript-parsers/codex-session.js +114 -13
- package/dist/trajectory/transcript-parsers/codex-session.js.map +1 -1
- package/dist/trajectory/transcript-parsers/types.d.ts +8 -8
- package/dist/trajectory/transcript-session-dirs.d.ts +18 -0
- package/dist/trajectory/transcript-session-dirs.js +85 -0
- package/dist/trajectory/transcript-session-dirs.js.map +1 -0
- package/dist/trajectory/transcript-watcher.d.ts +20 -1
- package/dist/trajectory/transcript-watcher.js +108 -32
- package/dist/trajectory/transcript-watcher.js.map +1 -1
- package/dist/tx-retry.d.ts +25 -0
- package/dist/tx-retry.js +95 -7
- package/dist/tx-retry.js.map +1 -1
- package/dist/types/payloads/portfolio-v0.d.ts +3 -3
- package/dist/types/payloads/prediction-apy-v0.d.ts +3 -3
- package/dist/types/payloads/prediction-v0.d.ts +12 -12
- package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.d.ts +108 -1
- package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.js +25 -1
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.d.ts +65 -0
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.js +123 -0
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.d.ts +2 -2
- package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.js +1 -1
- package/dist/vendor/@jinn-network/sdk/package.json +4 -0
- package/docker-compose.yml +3 -2
- package/package.json +30 -18
- package/plugins/learner/.claude-plugin/plugin.json +1 -1
- package/plugins/learner/.codex-plugin/plugin.json +1 -1
- package/plugins/learner/hooks/session-start +30 -1
- package/plugins/learner/skills/learn/consolidator-prompt.md +22 -1
- package/plugins/learner/skills/learn/promoter-prompt.md +72 -1
- package/plugins/swe-rebench-v2-diffmin/README.md +10 -9
- package/plugins/swe-rebench-v2-diffmin/jinn.plugin.json +1 -1
- package/plugins/swe-rebench-v2-diffmin/skills/diffmin/SKILL.md +15 -10
- package/plugins/swe-rebench-v2-diffmin/skills/test-map/SKILL.md +10 -12
- package/plugins/swe-rebench-v2-runtime/.claude-plugin/plugin.json +1 -1
- package/plugins/swe-rebench-v2-runtime/.codex-plugin/plugin.json +3 -3
- package/plugins/swe-rebench-v2-runtime/README.md +6 -6
- package/plugins/swe-rebench-v2-runtime/hooks/hooks.json +16 -0
- package/plugins/swe-rebench-v2-runtime/hooks/session-start +74 -0
- package/plugins/swe-rebench-v2-runtime/jinn.plugin.json +2 -3
- package/plugins/swe-rebench-v2-runtime/skills/task/SKILL.md +81 -0
- package/dist/dashboard/assets/index-BUlE8F3Y.js +0 -330
- package/dist/dashboard/assets/index-blqc7eqq.css +0 -32
- package/plugins/swe-rebench-v2-runtime/skills/orient/SKILL.md +0 -29
- package/plugins/swe-rebench-v2-runtime/skills/plan/SKILL.md +0 -53
|
@@ -151,6 +151,7 @@ export declare const PredictionApyV0VerdictPayloadSchema: z.ZodEffects<z.ZodObje
|
|
|
151
151
|
predictedBps: string;
|
|
152
152
|
submissionManifestCid?: string | undefined;
|
|
153
153
|
};
|
|
154
|
+
score: string;
|
|
154
155
|
solutionEnvelope: {
|
|
155
156
|
cid: string;
|
|
156
157
|
sha256: string;
|
|
@@ -171,7 +172,6 @@ export declare const PredictionApyV0VerdictPayloadSchema: z.ZodEffects<z.ZodObje
|
|
|
171
172
|
}[];
|
|
172
173
|
overall: "valid" | "invalid";
|
|
173
174
|
};
|
|
174
|
-
score: string;
|
|
175
175
|
scoreBasis: "absolute-error-linear.v1";
|
|
176
176
|
scoreVersion: string;
|
|
177
177
|
oracleReading: {
|
|
@@ -193,6 +193,7 @@ export declare const PredictionApyV0VerdictPayloadSchema: z.ZodEffects<z.ZodObje
|
|
|
193
193
|
predictedBps: string;
|
|
194
194
|
submissionManifestCid?: string | undefined;
|
|
195
195
|
};
|
|
196
|
+
score: string;
|
|
196
197
|
solutionEnvelope: {
|
|
197
198
|
cid: string;
|
|
198
199
|
sha256: string;
|
|
@@ -213,7 +214,6 @@ export declare const PredictionApyV0VerdictPayloadSchema: z.ZodEffects<z.ZodObje
|
|
|
213
214
|
}[];
|
|
214
215
|
overall: "valid" | "invalid";
|
|
215
216
|
};
|
|
216
|
-
score: string;
|
|
217
217
|
scoreBasis: "absolute-error-linear.v1";
|
|
218
218
|
scoreVersion: string;
|
|
219
219
|
oracleReading: {
|
|
@@ -235,6 +235,7 @@ export declare const PredictionApyV0VerdictPayloadSchema: z.ZodEffects<z.ZodObje
|
|
|
235
235
|
predictedBps: string;
|
|
236
236
|
submissionManifestCid?: string | undefined;
|
|
237
237
|
};
|
|
238
|
+
score: string;
|
|
238
239
|
solutionEnvelope: {
|
|
239
240
|
cid: string;
|
|
240
241
|
sha256: string;
|
|
@@ -255,7 +256,6 @@ export declare const PredictionApyV0VerdictPayloadSchema: z.ZodEffects<z.ZodObje
|
|
|
255
256
|
}[];
|
|
256
257
|
overall: "valid" | "invalid";
|
|
257
258
|
};
|
|
258
|
-
score: string;
|
|
259
259
|
scoreBasis: "absolute-error-linear.v1";
|
|
260
260
|
scoreVersion: string;
|
|
261
261
|
oracleReading: {
|
|
@@ -24,14 +24,14 @@ export declare const PredictionV0RestorationPayloadSchema: z.ZodObject<{
|
|
|
24
24
|
updatedAt: z.ZodNumber;
|
|
25
25
|
}, "strip", z.ZodTypeAny, {
|
|
26
26
|
feed: string;
|
|
27
|
+
updatedAt: number;
|
|
27
28
|
roundId: string;
|
|
28
29
|
answer: string;
|
|
29
|
-
updatedAt: number;
|
|
30
30
|
}, {
|
|
31
31
|
feed: string;
|
|
32
|
+
updatedAt: number;
|
|
32
33
|
roundId: string;
|
|
33
34
|
answer: string;
|
|
34
|
-
updatedAt: number;
|
|
35
35
|
}>>;
|
|
36
36
|
rationale: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
37
37
|
ts: z.ZodNumber;
|
|
@@ -55,9 +55,9 @@ export declare const PredictionV0RestorationPayloadSchema: z.ZodObject<{
|
|
|
55
55
|
}[] | undefined;
|
|
56
56
|
oracleSnapshot?: {
|
|
57
57
|
feed: string;
|
|
58
|
+
updatedAt: number;
|
|
58
59
|
roundId: string;
|
|
59
60
|
answer: string;
|
|
60
|
-
updatedAt: number;
|
|
61
61
|
} | undefined;
|
|
62
62
|
}, {
|
|
63
63
|
prediction: {
|
|
@@ -71,9 +71,9 @@ export declare const PredictionV0RestorationPayloadSchema: z.ZodObject<{
|
|
|
71
71
|
}[] | undefined;
|
|
72
72
|
oracleSnapshot?: {
|
|
73
73
|
feed: string;
|
|
74
|
+
updatedAt: number;
|
|
74
75
|
roundId: string;
|
|
75
76
|
answer: string;
|
|
76
|
-
updatedAt: number;
|
|
77
77
|
} | undefined;
|
|
78
78
|
}>;
|
|
79
79
|
export type PredictionV0RestorationPayload = z.infer<typeof PredictionV0RestorationPayloadSchema>;
|
|
@@ -139,15 +139,15 @@ export declare const PredictionV0VerdictPayloadSchema: z.ZodEffects<z.ZodObject<
|
|
|
139
139
|
nextRoundUpdatedAt: z.ZodOptional<z.ZodNumber>;
|
|
140
140
|
}, "strip", z.ZodTypeAny, {
|
|
141
141
|
feed: string;
|
|
142
|
+
updatedAt: number;
|
|
142
143
|
roundId: string;
|
|
143
144
|
answer: string;
|
|
144
|
-
updatedAt: number;
|
|
145
145
|
nextRoundUpdatedAt?: number | undefined;
|
|
146
146
|
}, {
|
|
147
147
|
feed: string;
|
|
148
|
+
updatedAt: number;
|
|
148
149
|
roundId: string;
|
|
149
150
|
answer: string;
|
|
150
|
-
updatedAt: number;
|
|
151
151
|
nextRoundUpdatedAt?: number | undefined;
|
|
152
152
|
}>;
|
|
153
153
|
claimed: z.ZodObject<{
|
|
@@ -188,6 +188,7 @@ export declare const PredictionV0VerdictPayloadSchema: z.ZodEffects<z.ZodObject<
|
|
|
188
188
|
modelId: string;
|
|
189
189
|
submissionManifestCid?: string | undefined;
|
|
190
190
|
};
|
|
191
|
+
score: string;
|
|
191
192
|
solutionEnvelope: {
|
|
192
193
|
cid: string;
|
|
193
194
|
sha256: string;
|
|
@@ -208,14 +209,13 @@ export declare const PredictionV0VerdictPayloadSchema: z.ZodEffects<z.ZodObject<
|
|
|
208
209
|
}[];
|
|
209
210
|
overall: "valid" | "invalid";
|
|
210
211
|
};
|
|
211
|
-
score: string;
|
|
212
212
|
scoreBasis: "brier.v1";
|
|
213
213
|
scoreVersion: string;
|
|
214
214
|
oracleReading: {
|
|
215
215
|
feed: string;
|
|
216
|
+
updatedAt: number;
|
|
216
217
|
roundId: string;
|
|
217
218
|
answer: string;
|
|
218
|
-
updatedAt: number;
|
|
219
219
|
nextRoundUpdatedAt?: number | undefined;
|
|
220
220
|
};
|
|
221
221
|
groundTruth: "YES" | "NO";
|
|
@@ -227,6 +227,7 @@ export declare const PredictionV0VerdictPayloadSchema: z.ZodEffects<z.ZodObject<
|
|
|
227
227
|
modelId: string;
|
|
228
228
|
submissionManifestCid?: string | undefined;
|
|
229
229
|
};
|
|
230
|
+
score: string;
|
|
230
231
|
solutionEnvelope: {
|
|
231
232
|
cid: string;
|
|
232
233
|
sha256: string;
|
|
@@ -247,14 +248,13 @@ export declare const PredictionV0VerdictPayloadSchema: z.ZodEffects<z.ZodObject<
|
|
|
247
248
|
}[];
|
|
248
249
|
overall: "valid" | "invalid";
|
|
249
250
|
};
|
|
250
|
-
score: string;
|
|
251
251
|
scoreBasis: "brier.v1";
|
|
252
252
|
scoreVersion: string;
|
|
253
253
|
oracleReading: {
|
|
254
254
|
feed: string;
|
|
255
|
+
updatedAt: number;
|
|
255
256
|
roundId: string;
|
|
256
257
|
answer: string;
|
|
257
|
-
updatedAt: number;
|
|
258
258
|
nextRoundUpdatedAt?: number | undefined;
|
|
259
259
|
};
|
|
260
260
|
groundTruth: "YES" | "NO";
|
|
@@ -266,6 +266,7 @@ export declare const PredictionV0VerdictPayloadSchema: z.ZodEffects<z.ZodObject<
|
|
|
266
266
|
modelId: string;
|
|
267
267
|
submissionManifestCid?: string | undefined;
|
|
268
268
|
};
|
|
269
|
+
score: string;
|
|
269
270
|
solutionEnvelope: {
|
|
270
271
|
cid: string;
|
|
271
272
|
sha256: string;
|
|
@@ -286,14 +287,13 @@ export declare const PredictionV0VerdictPayloadSchema: z.ZodEffects<z.ZodObject<
|
|
|
286
287
|
}[];
|
|
287
288
|
overall: "valid" | "invalid";
|
|
288
289
|
};
|
|
289
|
-
score: string;
|
|
290
290
|
scoreBasis: "brier.v1";
|
|
291
291
|
scoreVersion: string;
|
|
292
292
|
oracleReading: {
|
|
293
293
|
feed: string;
|
|
294
|
+
updatedAt: number;
|
|
294
295
|
roundId: string;
|
|
295
296
|
answer: string;
|
|
296
|
-
updatedAt: number;
|
|
297
297
|
nextRoundUpdatedAt?: number | undefined;
|
|
298
298
|
};
|
|
299
299
|
groundTruth: "YES" | "NO";
|
|
@@ -84,7 +84,7 @@ export declare const SweRebenchV2SolutionPayloadSchema: z.ZodObject<{
|
|
|
84
84
|
} | undefined;
|
|
85
85
|
}>;
|
|
86
86
|
export type SweRebenchV2SolutionPayload = z.infer<typeof SweRebenchV2SolutionPayloadSchema>;
|
|
87
|
-
export declare const
|
|
87
|
+
export declare const SweRebenchV2VerdictV1PayloadSchema: z.ZodObject<{
|
|
88
88
|
schemaVersion: z.ZodLiteral<"swe-rebench-v2-verdict.v1">;
|
|
89
89
|
/** Pass@1 score: 1 if the test suite passed, 0 otherwise. */
|
|
90
90
|
score: z.ZodUnion<[z.ZodLiteral<0>, z.ZodLiteral<1>]>;
|
|
@@ -107,4 +107,111 @@ export declare const SweRebenchV2VerdictPayloadSchema: z.ZodObject<{
|
|
|
107
107
|
passed_match: boolean;
|
|
108
108
|
evaluator_cost_usd: number;
|
|
109
109
|
}>;
|
|
110
|
+
/**
|
|
111
|
+
* v2 — additive graded signal (Lever A, #1019). Superset of v1: keeps the
|
|
112
|
+
* binary `score`/`passed_match` (the objective) and adds the per-test counts
|
|
113
|
+
* the grader already computes. `gradedScore = passedCount / totalCount` is
|
|
114
|
+
* derived downstream, never stored, to keep one source of truth.
|
|
115
|
+
*/
|
|
116
|
+
export declare const SweRebenchV2VerdictV2PayloadSchema: z.ZodEffects<z.ZodObject<{
|
|
117
|
+
schemaVersion: z.ZodLiteral<"swe-rebench-v2-verdict.v2">;
|
|
118
|
+
score: z.ZodUnion<[z.ZodLiteral<0>, z.ZodLiteral<1>]>;
|
|
119
|
+
passed_match: z.ZodBoolean;
|
|
120
|
+
evaluator_cost_usd: z.ZodNumber;
|
|
121
|
+
/** Count of individual tests that passed in this run. */
|
|
122
|
+
passedCount: z.ZodNumber;
|
|
123
|
+
/** Total gradeable tests in this run (FAIL_TO_PASS ∪ PASS_TO_PASS as the runner reported). */
|
|
124
|
+
totalCount: z.ZodNumber;
|
|
125
|
+
}, "strip", z.ZodTypeAny, {
|
|
126
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
127
|
+
score: 0 | 1;
|
|
128
|
+
passed_match: boolean;
|
|
129
|
+
evaluator_cost_usd: number;
|
|
130
|
+
passedCount: number;
|
|
131
|
+
totalCount: number;
|
|
132
|
+
}, {
|
|
133
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
134
|
+
score: 0 | 1;
|
|
135
|
+
passed_match: boolean;
|
|
136
|
+
evaluator_cost_usd: number;
|
|
137
|
+
passedCount: number;
|
|
138
|
+
totalCount: number;
|
|
139
|
+
}>, {
|
|
140
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
141
|
+
score: 0 | 1;
|
|
142
|
+
passed_match: boolean;
|
|
143
|
+
evaluator_cost_usd: number;
|
|
144
|
+
passedCount: number;
|
|
145
|
+
totalCount: number;
|
|
146
|
+
}, {
|
|
147
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
148
|
+
score: 0 | 1;
|
|
149
|
+
passed_match: boolean;
|
|
150
|
+
evaluator_cost_usd: number;
|
|
151
|
+
passedCount: number;
|
|
152
|
+
totalCount: number;
|
|
153
|
+
}>;
|
|
154
|
+
/** Accept either schema version on the read path. */
|
|
155
|
+
export declare const SweRebenchV2VerdictPayloadSchema: z.ZodUnion<[z.ZodObject<{
|
|
156
|
+
schemaVersion: z.ZodLiteral<"swe-rebench-v2-verdict.v1">;
|
|
157
|
+
/** Pass@1 score: 1 if the test suite passed, 0 otherwise. */
|
|
158
|
+
score: z.ZodUnion<[z.ZodLiteral<0>, z.ZodLiteral<1>]>;
|
|
159
|
+
/**
|
|
160
|
+
* Whether the actual passed/failed test set matched the expected
|
|
161
|
+
* `FAIL_TO_PASS ∪ PASS_TO_PASS` exactly. False if extra tests passed
|
|
162
|
+
* or expected tests failed unexpectedly.
|
|
163
|
+
*/
|
|
164
|
+
passed_match: z.ZodBoolean;
|
|
165
|
+
/** Cost of running the evaluator on this Solution (USDC-equivalent). */
|
|
166
|
+
evaluator_cost_usd: z.ZodNumber;
|
|
167
|
+
}, "strip", z.ZodTypeAny, {
|
|
168
|
+
schemaVersion: "swe-rebench-v2-verdict.v1";
|
|
169
|
+
score: 0 | 1;
|
|
170
|
+
passed_match: boolean;
|
|
171
|
+
evaluator_cost_usd: number;
|
|
172
|
+
}, {
|
|
173
|
+
schemaVersion: "swe-rebench-v2-verdict.v1";
|
|
174
|
+
score: 0 | 1;
|
|
175
|
+
passed_match: boolean;
|
|
176
|
+
evaluator_cost_usd: number;
|
|
177
|
+
}>, z.ZodEffects<z.ZodObject<{
|
|
178
|
+
schemaVersion: z.ZodLiteral<"swe-rebench-v2-verdict.v2">;
|
|
179
|
+
score: z.ZodUnion<[z.ZodLiteral<0>, z.ZodLiteral<1>]>;
|
|
180
|
+
passed_match: z.ZodBoolean;
|
|
181
|
+
evaluator_cost_usd: z.ZodNumber;
|
|
182
|
+
/** Count of individual tests that passed in this run. */
|
|
183
|
+
passedCount: z.ZodNumber;
|
|
184
|
+
/** Total gradeable tests in this run (FAIL_TO_PASS ∪ PASS_TO_PASS as the runner reported). */
|
|
185
|
+
totalCount: z.ZodNumber;
|
|
186
|
+
}, "strip", z.ZodTypeAny, {
|
|
187
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
188
|
+
score: 0 | 1;
|
|
189
|
+
passed_match: boolean;
|
|
190
|
+
evaluator_cost_usd: number;
|
|
191
|
+
passedCount: number;
|
|
192
|
+
totalCount: number;
|
|
193
|
+
}, {
|
|
194
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
195
|
+
score: 0 | 1;
|
|
196
|
+
passed_match: boolean;
|
|
197
|
+
evaluator_cost_usd: number;
|
|
198
|
+
passedCount: number;
|
|
199
|
+
totalCount: number;
|
|
200
|
+
}>, {
|
|
201
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
202
|
+
score: 0 | 1;
|
|
203
|
+
passed_match: boolean;
|
|
204
|
+
evaluator_cost_usd: number;
|
|
205
|
+
passedCount: number;
|
|
206
|
+
totalCount: number;
|
|
207
|
+
}, {
|
|
208
|
+
schemaVersion: "swe-rebench-v2-verdict.v2";
|
|
209
|
+
score: 0 | 1;
|
|
210
|
+
passed_match: boolean;
|
|
211
|
+
evaluator_cost_usd: number;
|
|
212
|
+
passedCount: number;
|
|
213
|
+
totalCount: number;
|
|
214
|
+
}>]>;
|
|
215
|
+
export type SweRebenchV2VerdictV1Payload = z.infer<typeof SweRebenchV2VerdictV1PayloadSchema>;
|
|
216
|
+
export type SweRebenchV2VerdictV2Payload = z.infer<typeof SweRebenchV2VerdictV2PayloadSchema>;
|
|
110
217
|
export type SweRebenchV2VerdictPayload = z.infer<typeof SweRebenchV2VerdictPayloadSchema>;
|
|
@@ -43,7 +43,7 @@ export const SweRebenchV2SolutionPayloadSchema = z.object({
|
|
|
43
43
|
})
|
|
44
44
|
.optional(),
|
|
45
45
|
});
|
|
46
|
-
export const
|
|
46
|
+
export const SweRebenchV2VerdictV1PayloadSchema = z.object({
|
|
47
47
|
schemaVersion: z.literal('swe-rebench-v2-verdict.v1'),
|
|
48
48
|
/** Pass@1 score: 1 if the test suite passed, 0 otherwise. */
|
|
49
49
|
score: z.union([z.literal(0), z.literal(1)]),
|
|
@@ -56,3 +56,27 @@ export const SweRebenchV2VerdictPayloadSchema = z.object({
|
|
|
56
56
|
/** Cost of running the evaluator on this Solution (USDC-equivalent). */
|
|
57
57
|
evaluator_cost_usd: z.number().nonnegative(),
|
|
58
58
|
});
|
|
59
|
+
/**
|
|
60
|
+
* v2 — additive graded signal (Lever A, #1019). Superset of v1: keeps the
|
|
61
|
+
* binary `score`/`passed_match` (the objective) and adds the per-test counts
|
|
62
|
+
* the grader already computes. `gradedScore = passedCount / totalCount` is
|
|
63
|
+
* derived downstream, never stored, to keep one source of truth.
|
|
64
|
+
*/
|
|
65
|
+
export const SweRebenchV2VerdictV2PayloadSchema = z.object({
|
|
66
|
+
schemaVersion: z.literal('swe-rebench-v2-verdict.v2'),
|
|
67
|
+
score: z.union([z.literal(0), z.literal(1)]),
|
|
68
|
+
passed_match: z.boolean(),
|
|
69
|
+
evaluator_cost_usd: z.number().nonnegative(),
|
|
70
|
+
// camelCase matches schemaVersion/totalUsd elsewhere in this file and the #1019 design.
|
|
71
|
+
/** Count of individual tests that passed in this run. */
|
|
72
|
+
passedCount: z.number().int().nonnegative(),
|
|
73
|
+
/** Total gradeable tests in this run (FAIL_TO_PASS ∪ PASS_TO_PASS as the runner reported). */
|
|
74
|
+
totalCount: z.number().int().nonnegative(),
|
|
75
|
+
}).refine((p) => p.passedCount <= p.totalCount, {
|
|
76
|
+
message: 'passedCount must not exceed totalCount',
|
|
77
|
+
});
|
|
78
|
+
/** Accept either schema version on the read path. */
|
|
79
|
+
export const SweRebenchV2VerdictPayloadSchema = z.union([
|
|
80
|
+
SweRebenchV2VerdictV1PayloadSchema,
|
|
81
|
+
SweRebenchV2VerdictV2PayloadSchema,
|
|
82
|
+
]);
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Held-out task slate for the swe-rebench-v2 RL eval harness (issue #817/#820).
|
|
3
|
+
*
|
|
4
|
+
* A slate is a versioned, content-addressed set of swe-rebench-v2
|
|
5
|
+
* `instance_id`s RESERVED from the training pool. #817 ships the canonical
|
|
6
|
+
* train-stream exclusion in `client/`; the indexer (this package's consumer)
|
|
7
|
+
* needs the same membership to slate-scope `frozenResolvedRate` (#820 AC#1),
|
|
8
|
+
* but the indexer cannot import from `client/` (see
|
|
9
|
+
* `packages/indexer/src/handlers.ts` header). So the membership is embedded
|
|
10
|
+
* here, in the SDK both the indexer and clients may depend on.
|
|
11
|
+
*
|
|
12
|
+
* The artifact is embedded (not read from disk) so the SDK stays a pure,
|
|
13
|
+
* bundler-friendly module with no `node:fs` runtime dependency. It carries a
|
|
14
|
+
* self-declared content hash: the loader recomputes a sha256 over the
|
|
15
|
+
* canonicalised, instance-id-sorted artifact and fails loud on mismatch. This
|
|
16
|
+
* preserves #817's fail-loud drift guard — a hand-edit that adds or removes an
|
|
17
|
+
* instance without re-deriving the hash throws. It is NOT a tamper-proof
|
|
18
|
+
* anchor (the declared hash lives beside the data it verifies); a deliberate
|
|
19
|
+
* edit that also re-derives the hash passes.
|
|
20
|
+
*
|
|
21
|
+
* The embedded `instanceIds` + `hash` are kept byte-identical to the client
|
|
22
|
+
* artifact (`client/src/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json`)
|
|
23
|
+
* by a cross-source drift test. Scores are only comparable WITHIN a version; a
|
|
24
|
+
* slate change is a distinct version (v2, ...), never an in-place edit.
|
|
25
|
+
*
|
|
26
|
+
* Hashing mirrors `_swe-rebench-v2-held-out-slate.ts` in `client/`:
|
|
27
|
+
* `sha256:` + sha256(RFC 8785 canonical JSON of the normalized artifact). The
|
|
28
|
+
* artifact shape is flat (string scalars + a string array), so a minimal
|
|
29
|
+
* inlined JCS canonicalizer reproduces the client's `canonicalize`-based hash
|
|
30
|
+
* exactly — verified by the drift test.
|
|
31
|
+
*/
|
|
32
|
+
export declare const HELD_OUT_SLATE_SCHEMA_VERSION: "held-out-slate.v1";
|
|
33
|
+
export interface HeldOutSlateArtifact {
|
|
34
|
+
schemaVersion: typeof HELD_OUT_SLATE_SCHEMA_VERSION;
|
|
35
|
+
solverType: string;
|
|
36
|
+
version: string;
|
|
37
|
+
generatedAt: string;
|
|
38
|
+
instanceIds: string[];
|
|
39
|
+
/** Declared content hash (sha256 over the canonical, sorted artifact). */
|
|
40
|
+
hash: `sha256:${string}`;
|
|
41
|
+
}
|
|
42
|
+
export interface LoadedHeldOutSlate {
|
|
43
|
+
version: string;
|
|
44
|
+
hash: `sha256:${string}`;
|
|
45
|
+
instanceIds: Set<string>;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Embedded v1 slate. Byte-identical (sans the `comment` doc field, which is not
|
|
49
|
+
* part of the hashed artifact) to
|
|
50
|
+
* `client/src/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json`.
|
|
51
|
+
* Guarded against drift by `test/solvernets/swe-rebench-v2-held-out-slate-cross-source.test.ts`.
|
|
52
|
+
*/
|
|
53
|
+
export declare const HELD_OUT_SLATE_V1: HeldOutSlateArtifact;
|
|
54
|
+
export declare function hashHeldOutSlateArtifact(artifact: HeldOutSlateArtifact): `sha256:${string}`;
|
|
55
|
+
/**
|
|
56
|
+
* Load the held-out slate membership for the swe-rebench-v2 solverType at
|
|
57
|
+
* `version`. Recomputes the content hash and fails loud on mismatch against the
|
|
58
|
+
* artifact's declared `hash` (catching accidental edit-without-rehash drift —
|
|
59
|
+
* not deliberate tampering; see the module header). Throws for an unknown
|
|
60
|
+
* version (scores are only comparable within a known version).
|
|
61
|
+
*
|
|
62
|
+
* `override` is for tests only — it lets the fail-loud guard be exercised
|
|
63
|
+
* against a tampered copy without mutating the canonical embedded artifact.
|
|
64
|
+
*/
|
|
65
|
+
export declare function loadHeldOutSlate(version: string, override?: HeldOutSlateArtifact): LoadedHeldOutSlate;
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Held-out task slate for the swe-rebench-v2 RL eval harness (issue #817/#820).
|
|
3
|
+
*
|
|
4
|
+
* A slate is a versioned, content-addressed set of swe-rebench-v2
|
|
5
|
+
* `instance_id`s RESERVED from the training pool. #817 ships the canonical
|
|
6
|
+
* train-stream exclusion in `client/`; the indexer (this package's consumer)
|
|
7
|
+
* needs the same membership to slate-scope `frozenResolvedRate` (#820 AC#1),
|
|
8
|
+
* but the indexer cannot import from `client/` (see
|
|
9
|
+
* `packages/indexer/src/handlers.ts` header). So the membership is embedded
|
|
10
|
+
* here, in the SDK both the indexer and clients may depend on.
|
|
11
|
+
*
|
|
12
|
+
* The artifact is embedded (not read from disk) so the SDK stays a pure,
|
|
13
|
+
* bundler-friendly module with no `node:fs` runtime dependency. It carries a
|
|
14
|
+
* self-declared content hash: the loader recomputes a sha256 over the
|
|
15
|
+
* canonicalised, instance-id-sorted artifact and fails loud on mismatch. This
|
|
16
|
+
* preserves #817's fail-loud drift guard — a hand-edit that adds or removes an
|
|
17
|
+
* instance without re-deriving the hash throws. It is NOT a tamper-proof
|
|
18
|
+
* anchor (the declared hash lives beside the data it verifies); a deliberate
|
|
19
|
+
* edit that also re-derives the hash passes.
|
|
20
|
+
*
|
|
21
|
+
* The embedded `instanceIds` + `hash` are kept byte-identical to the client
|
|
22
|
+
* artifact (`client/src/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json`)
|
|
23
|
+
* by a cross-source drift test. Scores are only comparable WITHIN a version; a
|
|
24
|
+
* slate change is a distinct version (v2, ...), never an in-place edit.
|
|
25
|
+
*
|
|
26
|
+
* Hashing mirrors `_swe-rebench-v2-held-out-slate.ts` in `client/`:
|
|
27
|
+
* `sha256:` + sha256(RFC 8785 canonical JSON of the normalized artifact). The
|
|
28
|
+
* artifact shape is flat (string scalars + a string array), so a minimal
|
|
29
|
+
* inlined JCS canonicalizer reproduces the client's `canonicalize`-based hash
|
|
30
|
+
* exactly — verified by the drift test.
|
|
31
|
+
*/
|
|
32
|
+
/// <reference types="node" />
|
|
33
|
+
import { createHash } from 'node:crypto';
|
|
34
|
+
export const HELD_OUT_SLATE_SCHEMA_VERSION = 'held-out-slate.v1';
|
|
35
|
+
/**
|
|
36
|
+
* Embedded v1 slate. Byte-identical (sans the `comment` doc field, which is not
|
|
37
|
+
* part of the hashed artifact) to
|
|
38
|
+
* `client/src/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json`.
|
|
39
|
+
* Guarded against drift by `test/solvernets/swe-rebench-v2-held-out-slate-cross-source.test.ts`.
|
|
40
|
+
*/
|
|
41
|
+
export const HELD_OUT_SLATE_V1 = {
|
|
42
|
+
schemaVersion: 'held-out-slate.v1',
|
|
43
|
+
solverType: 'swe-rebench-v2.v1',
|
|
44
|
+
version: 'v1',
|
|
45
|
+
generatedAt: '2026-05-29T00:00:00.000Z',
|
|
46
|
+
hash: 'sha256:2b029de15e271d5d2de35fe6477af98aef9fdc46f357e59139179edab1a42b15',
|
|
47
|
+
instanceIds: [
|
|
48
|
+
'ASPP__pelita-863',
|
|
49
|
+
'ASPP__pelita-875',
|
|
50
|
+
'AbsaOSS__generate-release-notes-207',
|
|
51
|
+
'All-Hands-AI__OpenHands-11914',
|
|
52
|
+
'BQSKit__bqskit-337',
|
|
53
|
+
'BerriAI__litellm-14715',
|
|
54
|
+
'BerriAI__litellm-15753',
|
|
55
|
+
'BrianPugh__cyclopts-609',
|
|
56
|
+
'carsdotcom__skelebot-280',
|
|
57
|
+
'pandas-dev__pandas-60736',
|
|
58
|
+
],
|
|
59
|
+
};
|
|
60
|
+
const SLATES_BY_VERSION = {
|
|
61
|
+
v1: HELD_OUT_SLATE_V1,
|
|
62
|
+
};
|
|
63
|
+
/**
|
|
64
|
+
* Minimal RFC 8785 (JCS) serialization for the flat slate artifact: object keys
|
|
65
|
+
* sorted by UTF-16 code-unit order, string values JSON-escaped, arrays in
|
|
66
|
+
* order. Reproduces the client loader's `canonicalize`-based output for this
|
|
67
|
+
* shape exactly (the cross-source drift test locks this against the client
|
|
68
|
+
* artifact's declared hash).
|
|
69
|
+
*/
|
|
70
|
+
function canonicalJson(value) {
|
|
71
|
+
if (typeof value === 'string')
|
|
72
|
+
return JSON.stringify(value);
|
|
73
|
+
if (Array.isArray(value))
|
|
74
|
+
return `[${value.map(canonicalJson).join(',')}]`;
|
|
75
|
+
if (value !== null && typeof value === 'object') {
|
|
76
|
+
const keys = Object.keys(value).sort();
|
|
77
|
+
return `{${keys
|
|
78
|
+
.map((k) => `${JSON.stringify(k)}:${canonicalJson(value[k])}`)
|
|
79
|
+
.join(',')}}`;
|
|
80
|
+
}
|
|
81
|
+
return JSON.stringify(value);
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* The hashed projection: schema/solver/version/generatedAt + the sorted
|
|
85
|
+
* instanceIds. Excludes the `hash` field itself (a hash never hashes itself).
|
|
86
|
+
*/
|
|
87
|
+
function normalizeHeldOutSlateArtifact(artifact) {
|
|
88
|
+
return {
|
|
89
|
+
schemaVersion: HELD_OUT_SLATE_SCHEMA_VERSION,
|
|
90
|
+
solverType: artifact.solverType,
|
|
91
|
+
version: artifact.version,
|
|
92
|
+
generatedAt: artifact.generatedAt,
|
|
93
|
+
instanceIds: [...artifact.instanceIds].sort((a, b) => a.localeCompare(b)),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
export function hashHeldOutSlateArtifact(artifact) {
|
|
97
|
+
const canonical = canonicalJson(normalizeHeldOutSlateArtifact(artifact));
|
|
98
|
+
return `sha256:${createHash('sha256').update(canonical).digest('hex')}`;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Load the held-out slate membership for the swe-rebench-v2 solverType at
|
|
102
|
+
* `version`. Recomputes the content hash and fails loud on mismatch against the
|
|
103
|
+
* artifact's declared `hash` (catching accidental edit-without-rehash drift —
|
|
104
|
+
* not deliberate tampering; see the module header). Throws for an unknown
|
|
105
|
+
* version (scores are only comparable within a known version).
|
|
106
|
+
*
|
|
107
|
+
* `override` is for tests only — it lets the fail-loud guard be exercised
|
|
108
|
+
* against a tampered copy without mutating the canonical embedded artifact.
|
|
109
|
+
*/
|
|
110
|
+
export function loadHeldOutSlate(version, override) {
|
|
111
|
+
const artifact = override ?? SLATES_BY_VERSION[version];
|
|
112
|
+
if (!artifact) {
|
|
113
|
+
throw new Error(`held-out slate not found for swe-rebench-v2 version=${version}`);
|
|
114
|
+
}
|
|
115
|
+
if (artifact.version !== version) {
|
|
116
|
+
throw new Error(`held-out slate version mismatch: artifact declares ${artifact.version}, requested ${version}`);
|
|
117
|
+
}
|
|
118
|
+
const computed = hashHeldOutSlateArtifact(artifact);
|
|
119
|
+
if (artifact.hash !== computed) {
|
|
120
|
+
throw new Error(`held-out slate hash mismatch for swe-rebench-v2 ${version}: declared ${artifact.hash}, computed ${computed} (artifact was edited without re-deriving the hash)`);
|
|
121
|
+
}
|
|
122
|
+
return { version: artifact.version, hash: computed, instanceIds: new Set(artifact.instanceIds) };
|
|
123
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export { SweRebenchV2LanguageSchema, SweRebenchV2TaskSchema, } from '../swe-rebench-v2.js';
|
|
2
2
|
export type { SweRebenchV2Task } from '../swe-rebench-v2.js';
|
|
3
|
-
export { SweRebenchV2SolutionPayloadSchema, SweRebenchV2VerdictPayloadSchema, } from '../payloads/swe-rebench-v2.js';
|
|
4
|
-
export type { SweRebenchV2SolutionPayload, SweRebenchV2VerdictPayload, } from '../payloads/swe-rebench-v2.js';
|
|
3
|
+
export { SweRebenchV2SolutionPayloadSchema, SweRebenchV2VerdictPayloadSchema, SweRebenchV2VerdictV2PayloadSchema, } from '../payloads/swe-rebench-v2.js';
|
|
4
|
+
export type { SweRebenchV2SolutionPayload, SweRebenchV2VerdictPayload, SweRebenchV2VerdictV2Payload, } from '../payloads/swe-rebench-v2.js';
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
export { SweRebenchV2LanguageSchema, SweRebenchV2TaskSchema, } from '../swe-rebench-v2.js';
|
|
2
|
-
export { SweRebenchV2SolutionPayloadSchema, SweRebenchV2VerdictPayloadSchema, } from '../payloads/swe-rebench-v2.js';
|
|
2
|
+
export { SweRebenchV2SolutionPayloadSchema, SweRebenchV2VerdictPayloadSchema, SweRebenchV2VerdictV2PayloadSchema, } from '../payloads/swe-rebench-v2.js';
|
|
@@ -31,6 +31,10 @@
|
|
|
31
31
|
"import": "./dist/solvernets/swe-rebench-v2.js",
|
|
32
32
|
"types": "./dist/solvernets/swe-rebench-v2.d.ts"
|
|
33
33
|
},
|
|
34
|
+
"./solvernets/swe-rebench-v2-held-out-slate": {
|
|
35
|
+
"import": "./dist/solvernets/swe-rebench-v2-held-out-slate.js",
|
|
36
|
+
"types": "./dist/solvernets/swe-rebench-v2-held-out-slate.d.ts"
|
|
37
|
+
},
|
|
34
38
|
"./checkpoint": {
|
|
35
39
|
"import": "./dist/checkpoint.js",
|
|
36
40
|
"types": "./dist/checkpoint.d.ts"
|
package/docker-compose.yml
CHANGED
|
@@ -33,8 +33,9 @@ services:
|
|
|
33
33
|
volumes:
|
|
34
34
|
# Persistent daemon state: database, keystore, and earning progress
|
|
35
35
|
- jinn-data:/data
|
|
36
|
-
# Persistent authentication state: stores Claude's OAuth token
|
|
37
|
-
-
|
|
36
|
+
# Persistent authentication state: stores Claude's OAuth token. The image
|
|
37
|
+
# now runs as the non-root `node` user, so file-auth lives at /home/node/.claude.
|
|
38
|
+
- jinn-claude-state:/home/node/.claude
|
|
38
39
|
command: ["run"]
|
|
39
40
|
|
|
40
41
|
volumes:
|