npm - @jinn-network/client - Versions diffs - 0.1.8 → 0.1.9-canary.050a41b1 - Mend

@jinn-network/client 0.1.8 → 0.1.9-canary.050a41b1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (263) hide show

package/dist/adapters/mech/adapter.d.ts +21 -1
package/dist/adapters/mech/adapter.js +77 -10
package/dist/adapters/mech/adapter.js.map +1 -1
package/dist/adapters/mech/contracts.js +62 -28
package/dist/adapters/mech/contracts.js.map +1 -1
package/dist/adapters/mech/safe-revert.d.ts +4 -0
package/dist/adapters/mech/safe-revert.js +5 -1
package/dist/adapters/mech/safe-revert.js.map +1 -1
package/dist/adapters/mech/safe.js +5 -1
package/dist/adapters/mech/safe.js.map +1 -1
package/dist/adapters/mech/verdict-code.js +1 -1
package/dist/adapters/mech/verdict-code.js.map +1 -1
package/dist/api/bootstrap-endpoint.d.ts +1 -0
package/dist/api/bootstrap-endpoint.js +1 -0
package/dist/api/bootstrap-endpoint.js.map +1 -1
package/dist/api/discovery-endpoint.d.ts +1 -0
package/dist/api/discovery-endpoint.js +24 -0
package/dist/api/discovery-endpoint.js.map +1 -1
package/dist/api/fleet-build.d.ts +1 -7
package/dist/api/fleet-build.js +0 -7
package/dist/api/fleet-build.js.map +1 -1
package/dist/api/gather-status.d.ts +8 -2
package/dist/api/gather-status.js +29 -117
package/dist/api/gather-status.js.map +1 -1
package/dist/api/loop-completion-build.d.ts +79 -0
package/dist/api/loop-completion-build.js +155 -0
package/dist/api/loop-completion-build.js.map +1 -0
package/dist/api/peers.js +2 -0
package/dist/api/peers.js.map +1 -1
package/dist/api/setup-endpoints.d.ts +32 -0
package/dist/api/setup-endpoints.js +93 -23
package/dist/api/setup-endpoints.js.map +1 -1
package/dist/api/solvernets-endpoints.js +3 -0
package/dist/api/solvernets-endpoints.js.map +1 -1
package/dist/api/status-build.d.ts +43 -33
package/dist/api/status-build.js +3 -26
package/dist/api/status-build.js.map +1 -1
package/dist/api/status-rollup-build.d.ts +0 -4
package/dist/api/status-rollup-build.js +0 -4
package/dist/api/status-rollup-build.js.map +1 -1
package/dist/build-info.json +4 -4
package/dist/build-meta.json +1 -1
package/dist/cli/commands/codedigest-revert-check.js +6 -2
package/dist/cli/commands/codedigest-revert-check.js.map +1 -1
package/dist/cli/commands/doctor.d.ts +3 -0
package/dist/cli/commands/doctor.js +37 -2
package/dist/cli/commands/doctor.js.map +1 -1
package/dist/cli/commands/eval.d.ts +76 -0
package/dist/cli/commands/eval.js +401 -0
package/dist/cli/commands/eval.js.map +1 -0
package/dist/cli/commands/rewards.d.ts +2 -0
package/dist/cli/commands/rewards.js +30 -3
package/dist/cli/commands/rewards.js.map +1 -1
package/dist/cli/commands/solver-nets.js +68 -0
package/dist/cli/commands/solver-nets.js.map +1 -1
package/dist/cli/commands/status.js +0 -1
package/dist/cli/commands/status.js.map +1 -1
package/dist/cli/index.js +2 -0
package/dist/cli/index.js.map +1 -1
package/dist/config.d.ts +58 -7
package/dist/config.js +96 -7
package/dist/config.js.map +1 -1
package/dist/daemon/ai-units-gate.d.ts +6 -6
package/dist/daemon/ai-units-gate.js +11 -10
package/dist/daemon/ai-units-gate.js.map +1 -1
package/dist/daemon/balance-topup-loop.js +3 -0
package/dist/daemon/balance-topup-loop.js.map +1 -1
package/dist/daemon/creator.js +2 -0
package/dist/daemon/creator.js.map +1 -1
package/dist/daemon/daemon.d.ts +15 -0
package/dist/daemon/daemon.js +78 -22
package/dist/daemon/daemon.js.map +1 -1
package/dist/daemon/eviction-loop.d.ts +7 -0
package/dist/daemon/eviction-loop.js +16 -0
package/dist/daemon/eviction-loop.js.map +1 -1
package/dist/daemon/jinn-claim-loop.js +3 -0
package/dist/daemon/jinn-claim-loop.js.map +1 -1
package/dist/daemon/join-applier.d.ts +35 -0
package/dist/daemon/join-applier.js +49 -0
package/dist/daemon/join-applier.js.map +1 -0
package/dist/daemon/loop-heartbeat.d.ts +34 -0
package/dist/daemon/loop-heartbeat.js +39 -0
package/dist/daemon/loop-heartbeat.js.map +1 -0
package/dist/daemon/reward-claim-loop.js +3 -0
package/dist/daemon/reward-claim-loop.js.map +1 -1
package/dist/daemon/watchdog-loop.d.ts +84 -0
package/dist/daemon/watchdog-loop.js +91 -0
package/dist/daemon/watchdog-loop.js.map +1 -0
package/dist/dashboard/assets/index-8tAiMbUV.css +1 -0
package/dist/dashboard/assets/index-CSFVwGFh.js +167 -0
package/dist/dashboard/index.html +2 -2
package/dist/discovery/http.d.ts +7 -0
package/dist/discovery/http.js +241 -25
package/dist/discovery/http.js.map +1 -1
package/dist/discovery/onchain.js +155 -1
package/dist/discovery/onchain.js.map +1 -1
package/dist/discovery/types.d.ts +106 -0
package/dist/discovery/types.js +40 -0
package/dist/discovery/types.js.map +1 -1
package/dist/discovery/with-fallback.js +14 -0
package/dist/discovery/with-fallback.js.map +1 -1
package/dist/earning/bootstrap.d.ts +23 -0
package/dist/earning/bootstrap.js +76 -27
package/dist/earning/bootstrap.js.map +1 -1
package/dist/earning/faucet.d.ts +1 -1
package/dist/earning/faucet.js +2 -2
package/dist/earning/faucet.js.map +1 -1
package/dist/earning/safe-adapter.js +11 -0
package/dist/earning/safe-adapter.js.map +1 -1
package/dist/eval/eval-harness-run.d.ts +63 -0
package/dist/eval/eval-harness-run.js +123 -0
package/dist/eval/eval-harness-run.js.map +1 -0
package/dist/eval/orchestrator.d.ts +163 -0
package/dist/eval/orchestrator.js +232 -0
package/dist/eval/orchestrator.js.map +1 -0
package/dist/eval/paired.d.ts +68 -0
package/dist/eval/paired.js +93 -0
package/dist/eval/paired.js.map +1 -0
package/dist/eval/resolve-slate-tasks.d.ts +35 -0
package/dist/eval/resolve-slate-tasks.js +56 -0
package/dist/eval/resolve-slate-tasks.js.map +1 -0
package/dist/eval/screen-discovery.d.ts +22 -0
package/dist/eval/screen-discovery.js +71 -0
package/dist/eval/screen-discovery.js.map +1 -0
package/dist/eval/screen-progress.d.ts +41 -0
package/dist/eval/screen-progress.js +60 -0
package/dist/eval/screen-progress.js.map +1 -0
package/dist/eval/screen-runner.d.ts +30 -0
package/dist/eval/screen-runner.js +289 -0
package/dist/eval/screen-runner.js.map +1 -0
package/dist/eval/screen.d.ts +107 -0
package/dist/eval/screen.js +159 -0
package/dist/eval/screen.js.map +1 -0
package/dist/eval/slope.d.ts +29 -0
package/dist/eval/slope.js +46 -0
package/dist/eval/slope.js.map +1 -0
package/dist/eval/train-sequence.d.ts +35 -0
package/dist/eval/train-sequence.js +59 -0
package/dist/eval/train-sequence.js.map +1 -0
package/dist/eval/wilson.d.ts +45 -0
package/dist/eval/wilson.js +48 -0
package/dist/eval/wilson.js.map +1 -0
package/dist/harnesses/engine/canonical-json.js +5 -3
package/dist/harnesses/engine/canonical-json.js.map +1 -1
package/dist/harnesses/engine/engine.d.ts +24 -0
package/dist/harnesses/engine/engine.js +72 -9
package/dist/harnesses/engine/engine.js.map +1 -1
package/dist/harnesses/engine/persistence.d.ts +17 -0
package/dist/harnesses/engine/persistence.js +28 -0
package/dist/harnesses/engine/persistence.js.map +1 -1
package/dist/harnesses/impls/hermes-agent/adapter.d.ts +2 -0
package/dist/harnesses/impls/hermes-agent/adapter.js +8 -5
package/dist/harnesses/impls/hermes-agent/adapter.js.map +1 -1
package/dist/harnesses/impls/hermes-agent/bootstrap.d.ts +1 -0
package/dist/harnesses/impls/hermes-agent/bootstrap.js +6 -1
package/dist/harnesses/impls/hermes-agent/bootstrap.js.map +1 -1
package/dist/harnesses/impls/hermes-agent/harness.d.ts +17 -3
package/dist/harnesses/impls/hermes-agent/harness.js +68 -5
package/dist/harnesses/impls/hermes-agent/harness.js.map +1 -1
package/dist/harnesses/impls/index.d.ts +2 -0
package/dist/harnesses/impls/index.js +2 -0
package/dist/harnesses/impls/index.js.map +1 -1
package/dist/harnesses/impls/learner/adapters/claude-code.js +5 -0
package/dist/harnesses/impls/learner/adapters/claude-code.js.map +1 -1
package/dist/harnesses/impls/learner/harness.d.ts +17 -1
package/dist/harnesses/impls/learner/harness.js +51 -1
package/dist/harnesses/impls/learner/harness.js.map +1 -1
package/dist/harnesses/impls/learner/harvest.d.ts +2 -0
package/dist/harnesses/impls/learner/harvest.js +7 -1
package/dist/harnesses/impls/learner/harvest.js.map +1 -1
package/dist/harnesses/impls/learner/plugin-path.js +1 -0
package/dist/harnesses/impls/learner/plugin-path.js.map +1 -1
package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js +3 -1
package/dist/harnesses/impls/swe-rebench-v2-evaluator/harness.js.map +1 -1
package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.d.ts +2 -2
package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js +3 -1
package/dist/harnesses/impls/swe-rebench-v2-evaluator/index.js.map +1 -1
package/dist/harnesses/readiness-registry.d.ts +10 -0
package/dist/harnesses/readiness-registry.js +13 -0
package/dist/harnesses/readiness-registry.js.map +1 -1
package/dist/harnesses/types.d.ts +14 -0
package/dist/learner/revert-decision.d.ts +16 -1
package/dist/learner/revert-decision.js +38 -18
package/dist/learner/revert-decision.js.map +1 -1
package/dist/learner/revert-stats.d.ts +14 -0
package/dist/learner/revert-stats.js +42 -0
package/dist/learner/revert-stats.js.map +1 -1
package/dist/local-provider-url.d.ts +3 -0
package/dist/local-provider-url.js +28 -0
package/dist/local-provider-url.js.map +1 -0
package/dist/main.js +94 -25
package/dist/main.js.map +1 -1
package/dist/plugins/learner/.claude-plugin/plugin.json +1 -1
package/dist/plugins/learner/.codex-plugin/plugin.json +1 -1
package/dist/plugins/learner/hooks/session-start +30 -1
package/dist/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
package/dist/preflight/deployment-readiness.d.ts +147 -0
package/dist/preflight/deployment-readiness.js +366 -0
package/dist/preflight/deployment-readiness.js.map +1 -0
package/dist/preflight/pidfile-liveness.d.ts +7 -1
package/dist/preflight/pidfile-liveness.js +14 -0
package/dist/preflight/pidfile-liveness.js.map +1 -1
package/dist/rpc/transport.d.ts +36 -0
package/dist/rpc/transport.js +123 -24
package/dist/rpc/transport.js.map +1 -1
package/dist/scripts/swe-rebench-v2-seed-pool.json +2 -1
package/dist/solver-nets/registry.d.ts +19 -0
package/dist/solver-nets/registry.js +92 -66
package/dist/solver-nets/registry.js.map +1 -1
package/dist/solver-types/_swe-rebench-v2-held-out-slate.d.ts +76 -0
package/dist/solver-types/_swe-rebench-v2-held-out-slate.js +156 -0
package/dist/solver-types/_swe-rebench-v2-held-out-slate.js.map +1 -0
package/dist/solver-types/_swe-rebench-v2-pool-recovery.d.ts +81 -0
package/dist/solver-types/_swe-rebench-v2-pool-recovery.js +116 -0
package/dist/solver-types/_swe-rebench-v2-pool-recovery.js.map +1 -0
package/dist/solver-types/_swe-rebench-v2-state.d.ts +9 -0
package/dist/solver-types/_swe-rebench-v2-state.js +14 -0
package/dist/solver-types/_swe-rebench-v2-state.js.map +1 -1
package/dist/solver-types/_swe-rebench-v2-validated-pool.d.ts +30 -0
package/dist/solver-types/_swe-rebench-v2-validated-pool.js +40 -0
package/dist/solver-types/_swe-rebench-v2-validated-pool.js.map +1 -1
package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v1.json +20 -0
package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.json +19 -0
package/dist/solver-types/slates/held-out-slate.swe-rebench-v2.v2.screening-report.json +628 -0
package/dist/solver-types/solver-type.d.ts +8 -0
package/dist/solver-types/swe-rebench-v2.d.ts +2 -0
package/dist/solver-types/swe-rebench-v2.js +115 -10
package/dist/solver-types/swe-rebench-v2.js.map +1 -1
package/dist/solvernets/launched-record-dispatcher.d.ts +3 -0
package/dist/solvernets/launched-record-dispatcher.js.map +1 -1
package/dist/solvernets/registry-client-erc8004.js +29 -37
package/dist/solvernets/registry-client-erc8004.js.map +1 -1
package/dist/solvernets/registry-client.d.ts +6 -0
package/dist/solvernets/store.js +7 -2
package/dist/solvernets/store.js.map +1 -1
package/dist/spend/ai-units-config.d.ts +10 -0
package/dist/spend/ai-units-config.js +7 -1
package/dist/spend/ai-units-config.js.map +1 -1
package/dist/spend/ai-units.d.ts +51 -0
package/dist/spend/ai-units.js +73 -0
package/dist/spend/ai-units.js.map +1 -1
package/dist/spend/record.js +12 -5
package/dist/spend/record.js.map +1 -1
package/dist/store/store.d.ts +91 -5
package/dist/store/store.js +170 -7
package/dist/store/store.js.map +1 -1
package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.d.ts +108 -1
package/dist/vendor/@jinn-network/sdk/dist/payloads/swe-rebench-v2.js +25 -1
package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.d.ts +65 -0
package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2-held-out-slate.js +123 -0
package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.d.ts +2 -2
package/dist/vendor/@jinn-network/sdk/dist/solvernets/swe-rebench-v2.js +1 -1
package/dist/vendor/@jinn-network/sdk/package.json +4 -0
package/docker-compose.yml +3 -2
package/package.json +22 -18
package/plugins/learner/.claude-plugin/plugin.json +1 -1
package/plugins/learner/.codex-plugin/plugin.json +1 -1
package/plugins/learner/hooks/session-start +30 -1
package/plugins/learner/skills/learn/consolidator-prompt.md +4 -0
package/plugins/swe-rebench-v2-runtime/hooks/hooks.json +16 -0
package/plugins/swe-rebench-v2-runtime/hooks/session-start +74 -0
package/dist/dashboard/assets/index-CzKxvMcU.css +0 -32
package/dist/dashboard/assets/index-yVemxHot.js +0 -351

package/dist/spend/ai-units-config.js CHANGED Viewed

@@ -1,11 +1,13 @@
 import { resolveCredentialId } from './credential.js';
-import { projectAiUnits, resolveReferenceCeiling } from './ai-units.js';
+import { projectAiUnits, projectTaskUsdMicros, resolveReferenceCeiling, resolveReferenceCeilingUsdMicros, } from './ai-units.js';
 export function buildAiUnitsConfig(config, env,
 /** Optional home dir override for resolveCredentialId's disk probe — tests only. */
 homeDirOverride) {
     const { units_per_block, units_per_week } = resolveReferenceCeiling(env);
+    const { usd_micros_per_block, usd_micros_per_week } = resolveReferenceCeilingUsdMicros(env);
     const manifestCredentials = {};
     const manifestProjectedAiUnits = {};
+    const manifestProjectedUsdMicros = {};
     const manifestModels = {};
     for (const [manifestCid, entry] of Object.entries(config.joinedSolverNets ?? {})) {
         const credentialId = resolveCredentialId(entry.harness, env, homeDirOverride);
@@ -13,6 +15,7 @@ homeDirOverride) {
             continue;
         manifestCredentials[manifestCid] = credentialId;
         manifestProjectedAiUnits[manifestCid] = projectAiUnits(entry.harness, entry.model, credentialId);
+        manifestProjectedUsdMicros[manifestCid] = projectTaskUsdMicros(entry.harness, entry.model, credentialId);
         manifestModels[manifestCid] = entry.model;
     }
     if (Object.keys(manifestCredentials).length === 0)
@@ -20,8 +23,11 @@ homeDirOverride) {
     return {
         capPerBlock: units_per_block,
         capPerWeek: units_per_week,
+        capPerBlockUsdMicros: usd_micros_per_block,
+        capPerWeekUsdMicros: usd_micros_per_week,
         manifestCredentials,
         manifestProjectedAiUnits,
+        manifestProjectedUsdMicros,
         manifestModels,
     };
 }

package/dist/spend/ai-units-config.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-units-config.js","sourceRoot":"","sources":["../../src/spend/ai-units-config.ts"],"names":[],"mappings":"AAgBA,OAAO,EAAE,mBAAmB,EAAqB,MAAM,iBAAiB,CAAC;AACzE,OAAO,~~EAAE~~,cAAc,~~EAAE~~,uBAAuB,~~EAAE~~,MAAM,eAAe,CAAC;~~AAsBxE~~,MAAM,UAAU,kBAAkB,CAChC,MAA4C,EAC5C,GAAsB;AACtB,oFAAoF;AACpF,eAAwB;IAExB,MAAM,EAAE,eAAe,EAAE,cAAc,EAAE,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;~~IAEzE~~,MAAM,mBAAmB,GAAiC,EAAE,CAAC;IAC7D,MAAM,wBAAwB,GAAkC,EAAE,CAAC;IACnE,MAAM,cAAc,GAAuC,EAAE,CAAC;IAE9D,KAAK,MAAM,CAAC,WAAW,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;QACjF,MAAM,YAAY,GAAG,mBAAmB,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;QAC9E,IAAI,CAAC,YAAY;YAAE,SAAS;QAC5B,mBAAmB,CAAC,WAAW,CAAC,GAAG,YAAY,CAAC;QAChD,wBAAwB,CAAC,WAAW,CAAC,GAAG,cAAc,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QACjG,cAAc,CAAC,WAAW,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;IAC5C,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAEpE,OAAO;QACL,WAAW,EAAE,eAAe;QAC5B,UAAU,EAAE,cAAc;QAC1B,mBAAmB;QACnB,wBAAwB;QACxB,cAAc;KACf,CAAC;AACJ,CAAC"}
1	+ {"version":3,"file":"ai-units-config.js","sourceRoot":"","sources":["../../src/spend/ai-units-config.ts"],"names":[],"mappings":"AAgBA,OAAO,EAAE,mBAAmB,EAAqB,MAAM,iBAAiB,CAAC;AACzE,OAAO,EACL,cAAc,EACd,oBAAoB,EACpB,uBAAuB,EACvB,gCAAgC,GACjC,MAAM,eAAe,CAAC;AAgCvB,MAAM,UAAU,kBAAkB,CAChC,MAA4C,EAC5C,GAAsB;AACtB,oFAAoF;AACpF,eAAwB;IAExB,MAAM,EAAE,eAAe,EAAE,cAAc,EAAE,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IACzE,MAAM,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,GAAG,gCAAgC,CAAC,GAAG,CAAC,CAAC;IAE5F,MAAM,mBAAmB,GAAiC,EAAE,CAAC;IAC7D,MAAM,wBAAwB,GAAkC,EAAE,CAAC;IACnE,MAAM,0BAA0B,GAAkC,EAAE,CAAC;IACrE,MAAM,cAAc,GAAuC,EAAE,CAAC;IAE9D,KAAK,MAAM,CAAC,WAAW,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;QACjF,MAAM,YAAY,GAAG,mBAAmB,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,EAAE,eAAe,CAAC,CAAC;QAC9E,IAAI,CAAC,YAAY;YAAE,SAAS;QAC5B,mBAAmB,CAAC,WAAW,CAAC,GAAG,YAAY,CAAC;QAChD,wBAAwB,CAAC,WAAW,CAAC,GAAG,cAAc,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QACjG,0BAA0B,CAAC,WAAW,CAAC,GAAG,oBAAoB,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QACzG,cAAc,CAAC,WAAW,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC;IAC5C,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAEpE,OAAO;QACL,WAAW,EAAE,eAAe;QAC5B,UAAU,EAAE,cAAc;QAC1B,oBAAoB,EAAE,oBAAoB;QAC1C,mBAAmB,EAAE,mBAAmB;QACxC,mBAAmB;QACnB,wBAAwB;QACxB,0BAA0B;QAC1B,cAAc;KACf,CAAC;AACJ,CAAC"}

package/dist/spend/ai-units.d.ts CHANGED Viewed

@@ -18,6 +18,13 @@
  * The cap is layered with the existing per-credential USD `spendCaps`
  * spend-cap gate from PR #345/#346 — both gates run; the more
  * conservative one wins. This module owns the AI-units side only.
+ *
+ * Issue #1004: the gate's comparison now runs in **USD** (actual harvested
+ * spend vs a USD ceiling), not projected AI units. The peg below survives
+ * as the calibration constant deriving that USD ceiling and as the
+ * presentation-layer conversion for the legacy unit fields the SPA still
+ * reads. For subscription credentials the USD ceiling is a *proxy* budget —
+ * it bounds Jinn-attributable model cost, not the provider's plan quota.
  */
 /**
  * Calibration peg: the USD cost of one GPT-5.4-mini Codex-Plus 6h block.
@@ -26,6 +33,11 @@
  *
  * Conservative — biases toward over-warning rather than under-warning so
  * the ceiling pauses early rather than late.
+ *
+ * Issue #1004: demoted to a calibration constant — it derives
+ * {@link REFERENCE_CEILING_USD_MICROS} and converts the legacy unit fields
+ * for presentation, but no longer appears in the gate's comparison. Kept
+ * exported because the SPA's `HarnessFootprintPanel.tsx` imports it (#1006).
  */
 export declare const GPT_5_4_MINI_USD_PER_BLOCK = 0.5;
 /**
@@ -40,6 +52,21 @@ export declare const REFERENCE_CEILING: {
     readonly units_per_block: number;
     readonly units_per_week: number;
 };
+/**
+ * USD-micros ceiling (issue #1004). The gate now compares **actual USD
+ * spend** against this ceiling rather than projected AI units. Derived
+ * directly from {@link REFERENCE_CEILING} through the GPT-5.4-mini peg:
+ *   usd_micros = units / 100 * GPT_5_4_MINI_USD_PER_BLOCK * 1_000_000.
+ * Default 100 units/block => $0.50/block (500_000 micros); 2800 units/week
+ * => $14/week (14_000_000 micros). Calibration is preserved; only the
+ * comparison unit changed. For subscription credentials this USD budget is
+ * a *proxy* — it bounds Jinn-attributable model cost, not the provider's
+ * plan quota directly.
+ */
+export declare const REFERENCE_CEILING_USD_MICROS: {
+    readonly usd_micros_per_block: number;
+    readonly usd_micros_per_week: number;
+};
 /**
  * Project the AI-unit cost of one task for a harness/model combination.
  *
@@ -64,6 +91,20 @@ export declare const REFERENCE_CEILING: {
  * call-site clarity and future-proofing.
  */
 export declare function projectAiUnits(harness: string | undefined, model: string | undefined, _credentialId?: string | null): number | null;
+/**
+ * Project the per-task cost of one harness/model combination in USD micros
+ * (issue #1004). This is the in-flight debit the gate books for a claim
+ * before its actual cost is harvested. Same harness classification as
+ * {@link projectAiUnits}:
+ *   - `0` for harnesses that make no marginal LLM call,
+ *   - `null` when a paid-LLM harness's model is unknown to the cost table
+ *     (gate fails open with a warn),
+ *   - otherwise `round(estimateModelCost(model).usd * 1_000_000)`.
+ *
+ * `_credentialId` does not change the projection (the model costs the same
+ * regardless of auth path); it labels the accounting bucket only.
+ */
+export declare function projectTaskUsdMicros(harness: string | undefined, model: string | undefined, _credentialId?: string | null): number | null;
 /**
  * Resolve the active ceiling from env. CI / tests override the baked-in
  * 100 units via `JINN_AI_UNITS_CEILING_OVERRIDE`. Accepts:
@@ -78,6 +119,16 @@ export declare function resolveReferenceCeiling(env: NodeJS.ProcessEnv): {
     units_per_block: number;
     units_per_week: number;
 };
+/**
+ * Resolve the active USD-micros ceiling from env (issue #1004). Reuses
+ * {@link resolveReferenceCeiling} so the `JINN_AI_UNITS_CEILING_OVERRIDE`
+ * parsing (integer or `<block>:<week>`, malformed-warn, default fallback)
+ * lives in one place, then converts both bounds through the peg.
+ */
+export declare function resolveReferenceCeilingUsdMicros(env: NodeJS.ProcessEnv): {
+    usd_micros_per_block: number;
+    usd_micros_per_week: number;
+};
 /** Start of the next 6h UTC-aligned block after `now`. */
 export declare function blockResetsAtUtc(now: Date): Date;
 /**

package/dist/spend/ai-units.js CHANGED Viewed

@@ -18,6 +18,13 @@
  * The cap is layered with the existing per-credential USD `spendCaps`
  * spend-cap gate from PR #345/#346 — both gates run; the more
  * conservative one wins. This module owns the AI-units side only.
+ *
+ * Issue #1004: the gate's comparison now runs in **USD** (actual harvested
+ * spend vs a USD ceiling), not projected AI units. The peg below survives
+ * as the calibration constant deriving that USD ceiling and as the
+ * presentation-layer conversion for the legacy unit fields the SPA still
+ * reads. For subscription credentials the USD ceiling is a *proxy* budget —
+ * it bounds Jinn-attributable model cost, not the provider's plan quota.
  */
 import { estimateModelCost } from '../harnesses/cost-estimates.js';
 import { CLAUDE_CODE_HARNESS, CODEX_HARNESS, HERMES_AGENT_HARNESS, canonicalHarnessName, } from '../harnesses/names.js';
@@ -28,6 +35,11 @@ import { CLAUDE_CODE_HARNESS, CODEX_HARNESS, HERMES_AGENT_HARNESS, canonicalHarn
  *
  * Conservative — biases toward over-warning rather than under-warning so
  * the ceiling pauses early rather than late.
+ *
+ * Issue #1004: demoted to a calibration constant — it derives
+ * {@link REFERENCE_CEILING_USD_MICROS} and converts the legacy unit fields
+ * for presentation, but no longer appears in the gate's comparison. Kept
+ * exported because the SPA's `HarnessFootprintPanel.tsx` imports it (#1006).
  */
 export const GPT_5_4_MINI_USD_PER_BLOCK = 0.5;
 /**
@@ -42,6 +54,25 @@ export const REFERENCE_CEILING = {
     units_per_block: 100,
     units_per_week: 100 * 28,
 };
+/**
+ * USD-micros ceiling (issue #1004). The gate now compares **actual USD
+ * spend** against this ceiling rather than projected AI units. Derived
+ * directly from {@link REFERENCE_CEILING} through the GPT-5.4-mini peg:
+ *   usd_micros = units / 100 * GPT_5_4_MINI_USD_PER_BLOCK * 1_000_000.
+ * Default 100 units/block => $0.50/block (500_000 micros); 2800 units/week
+ * => $14/week (14_000_000 micros). Calibration is preserved; only the
+ * comparison unit changed. For subscription credentials this USD budget is
+ * a *proxy* — it bounds Jinn-attributable model cost, not the provider's
+ * plan quota directly.
+ */
+export const REFERENCE_CEILING_USD_MICROS = {
+    usd_micros_per_block: unitsToUsdMicros(REFERENCE_CEILING.units_per_block),
+    usd_micros_per_week: unitsToUsdMicros(REFERENCE_CEILING.units_per_week),
+};
+/** Convert an AI-unit count to USD micros through the GPT-5.4-mini peg. */
+function unitsToUsdMicros(units) {
+    return Math.round((units / 100) * GPT_5_4_MINI_USD_PER_BLOCK * 1_000_000);
+}
 /** 6h block in milliseconds — UTC blocks start at 00:00 / 06:00 / 12:00 / 18:00. */
 const SIX_HOUR_BLOCK_MS = 6 * 60 * 60 * 1_000;
 /** 7 days in milliseconds — UTC-aligned. */
@@ -85,6 +116,35 @@ export function projectAiUnits(harness, model, _credentialId) {
         return null;
     return (cost.usd / GPT_5_4_MINI_USD_PER_BLOCK) * 100;
 }
+/**
+ * Project the per-task cost of one harness/model combination in USD micros
+ * (issue #1004). This is the in-flight debit the gate books for a claim
+ * before its actual cost is harvested. Same harness classification as
+ * {@link projectAiUnits}:
+ *   - `0` for harnesses that make no marginal LLM call,
+ *   - `null` when a paid-LLM harness's model is unknown to the cost table
+ *     (gate fails open with a warn),
+ *   - otherwise `round(estimateModelCost(model).usd * 1_000_000)`.
+ *
+ * `_credentialId` does not change the projection (the model costs the same
+ * regardless of auth path); it labels the accounting bucket only.
+ */
+export function projectTaskUsdMicros(harness, model, _credentialId) {
+    if (!harness)
+        return null;
+    const canonical = canonicalHarnessName(harness);
+    const isPaidLlmHarness = canonical === CLAUDE_CODE_HARNESS ||
+        canonical === CODEX_HARNESS ||
+        canonical === HERMES_AGENT_HARNESS;
+    if (!isPaidLlmHarness)
+        return 0;
+    if (!model)
+        return null;
+    const cost = estimateModelCost(model);
+    if (!cost)
+        return null;
+    return Math.round(cost.usd * 1_000_000);
+}
 /**
  * Resolve the active ceiling from env. CI / tests override the baked-in
  * 100 units via `JINN_AI_UNITS_CEILING_OVERRIDE`. Accepts:
@@ -118,6 +178,19 @@ export function resolveReferenceCeiling(env) {
     warnMalformedOverride(raw);
     return { units_per_block: REFERENCE_CEILING.units_per_block, units_per_week: REFERENCE_CEILING.units_per_week };
 }
+/**
+ * Resolve the active USD-micros ceiling from env (issue #1004). Reuses
+ * {@link resolveReferenceCeiling} so the `JINN_AI_UNITS_CEILING_OVERRIDE`
+ * parsing (integer or `<block>:<week>`, malformed-warn, default fallback)
+ * lives in one place, then converts both bounds through the peg.
+ */
+export function resolveReferenceCeilingUsdMicros(env) {
+    const units = resolveReferenceCeiling(env);
+    return {
+        usd_micros_per_block: unitsToUsdMicros(units.units_per_block),
+        usd_micros_per_week: unitsToUsdMicros(units.units_per_week),
+    };
+}
 function warnMalformedOverride(raw) {
     // One-time warn per resolve call — the function is only invoked at
     // startup, so a module-level memo is unnecessary. Surfaces the operator's

package/dist/spend/ai-units.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-units.js","sourceRoot":"","sources":["../../src/spend/ai-units.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;;;;;;;;;GAoBG~~;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,oBAAoB,GACrB,MAAM,uBAAuB,CAAC;AAE/B~~;;;;;;;GAOG~~;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG,GAAG,CAAC;AAE9C;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAA0E;IACtG,eAAe,EAAE,GAAG;IACpB,cAAc,EAAE,GAAG,GAAG,EAAE;CACzB,CAAC;AAEF,oFAAoF;AACpF,MAAM,iBAAiB,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;AAC9C,4CAA4C;AAC5C,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,cAAc,CAC5B,OAA2B,EAC3B,KAAyB,EACzB,aAA6B;IAE7B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,MAAM,SAAS,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,gBAAgB,GACpB,SAAS,KAAK,mBAAmB;QACjC,SAAS,KAAK,aAAa;QAC3B,SAAS,KAAK,oBAAoB,CAAC;IACrC,IAAI,CAAC,gBAAgB;QAAE,OAAO,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,CAAC,IAAI,CAAC,GAAG,GAAG,0BAA0B,CAAC,GAAG,GAAG,CAAC;AACvD,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CACrC,GAAsB;IAEtB,MAAM,GAAG,GAAG,GAAG,CAAC,gCAAgC,CAAC,CAAC;IAClD,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrC,OAAO,EAAE,eAAe,EAAE,iBAAiB,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,CAAC,cAAc,EAAE,CAAC;IAClH,CAAC;IACD,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAC3B,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACvB,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;YAC7E,OAAO,EAAE,eAAe,EAAE,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;QAC1D,CAAC;QACD,qBAAqB,CAAC,GAAG,CAAC,CAAC;QAC3B,OAAO,EAAE,eAAe,EAAE,iBAAiB,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,CAAC,cAAc,EAAE,CAAC;IAClH,CAAC;IACD,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;IAC9B,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACxC,OAAO,EAAE,eAAe,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,GAAG,EAAE,EAAE,CAAC;IAChE,CAAC;IACD,qBAAqB,CAAC,GAAG,CAAC,CAAC;IAC3B,OAAO,EAAE,eAAe,EAAE,iBAAiB,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,CAAC,cAAc,EAAE,CAAC;AAClH,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAW;IACxC,mEAAmE;IACnE,0EAA0E;IAC1E,iEAAiE;IACjE,OAAO,CAAC,IAAI,CACV,oDAAoD,GAAG,mBAAmB;QACxE,iBAAiB,iBAAiB,CAAC,eAAe,IAAI,iBAAiB,CAAC,cAAc,oBAAoB,CAC7G,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,GAAS;IAC9B,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,cAAc,EAAE,EAAE,GAAG,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;IACvF,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,EAAE,GAAG,UAAU,CAAC;IACjD,gEAAgE;IAChE,mEAAmE;IACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5E,OAAO,IAAI,IAAI,CAAC,UAAU,GAAG,QAAQ,GAAG,iBAAiB,CAAC,CAAC;AAC7D,CAAC;AAED,0DAA0D;AAC1D,MAAM,UAAU,gBAAgB,CAAC,GAAS;IACxC,OAAO,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,GAAG,iBAAiB,CAAC,CAAC;AACpE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,GAAS;IAClC,OAAO,aAAa,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;AAC1C,CAAC;AAED,iGAAiG;AACjG,MAAM,UAAU,eAAe,CAAC,GAAS;IACvC,OAAO,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,YAAY,CAAC,CAAC;AAChD,CAAC"}
1	+ {"version":3,"file":"ai-units.js","sourceRoot":"","sources":["../../src/spend/ai-units.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,oBAAoB,GACrB,MAAM,uBAAuB,CAAC;AAE/B;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG,GAAG,CAAC;AAE9C;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAA0E;IACtG,eAAe,EAAE,GAAG;IACpB,cAAc,EAAE,GAAG,GAAG,EAAE;CACzB,CAAC;AAEF;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAGrC;IACF,oBAAoB,EAAE,gBAAgB,CAAC,iBAAiB,CAAC,eAAe,CAAC;IACzE,mBAAmB,EAAE,gBAAgB,CAAC,iBAAiB,CAAC,cAAc,CAAC;CACxE,CAAC;AAEF,2EAA2E;AAC3E,SAAS,gBAAgB,CAAC,KAAa;IACrC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,0BAA0B,GAAG,SAAS,CAAC,CAAC;AAC5E,CAAC;AAED,oFAAoF;AACpF,MAAM,iBAAiB,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;AAC9C,4CAA4C;AAC5C,MAAM,YAAY,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,cAAc,CAC5B,OAA2B,EAC3B,KAAyB,EACzB,aAA6B;IAE7B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,MAAM,SAAS,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,gBAAgB,GACpB,SAAS,KAAK,mBAAmB;QACjC,SAAS,KAAK,aAAa;QAC3B,SAAS,KAAK,oBAAoB,CAAC;IACrC,IAAI,CAAC,gBAAgB;QAAE,OAAO,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,CAAC,IAAI,CAAC,GAAG,GAAG,0BAA0B,CAAC,GAAG,GAAG,CAAC;AACvD,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,oBAAoB,CAClC,OAA2B,EAC3B,KAAyB,EACzB,aAA6B;IAE7B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,MAAM,SAAS,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,gBAAgB,GACpB,SAAS,KAAK,mBAAmB;QACjC,SAAS,KAAK,aAAa;QAC3B,SAAS,KAAK,oBAAoB,CAAC;IACrC,IAAI,CAAC,gBAAgB;QAAE,OAAO,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC;AAC1C,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CACrC,GAAsB;IAEtB,MAAM,GAAG,GAAG,GAAG,CAAC,gCAAgC,CAAC,CAAC;IAClD,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrC,OAAO,EAAE,eAAe,EAAE,iBAAiB,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,CAAC,cAAc,EAAE,CAAC;IAClH,CAAC;IACD,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAC3B,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACvB,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;YAC7E,OAAO,EAAE,eAAe,EAAE,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;QAC1D,CAAC;QACD,qBAAqB,CAAC,GAAG,CAAC,CAAC;QAC3B,OAAO,EAAE,eAAe,EAAE,iBAAiB,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,CAAC,cAAc,EAAE,CAAC;IAClH,CAAC;IACD,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;IAC9B,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACxC,OAAO,EAAE,eAAe,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,GAAG,EAAE,EAAE,CAAC;IAChE,CAAC;IACD,qBAAqB,CAAC,GAAG,CAAC,CAAC;IAC3B,OAAO,EAAE,eAAe,EAAE,iBAAiB,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,CAAC,cAAc,EAAE,CAAC;AAClH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,gCAAgC,CAC9C,GAAsB;IAEtB,MAAM,KAAK,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IAC3C,OAAO;QACL,oBAAoB,EAAE,gBAAgB,CAAC,KAAK,CAAC,eAAe,CAAC;QAC7D,mBAAmB,EAAE,gBAAgB,CAAC,KAAK,CAAC,cAAc,CAAC;KAC5D,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAW;IACxC,mEAAmE;IACnE,0EAA0E;IAC1E,iEAAiE;IACjE,OAAO,CAAC,IAAI,CACV,oDAAoD,GAAG,mBAAmB;QACxE,iBAAiB,iBAAiB,CAAC,eAAe,IAAI,iBAAiB,CAAC,cAAc,oBAAoB,CAC7G,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,GAAS;IAC9B,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,cAAc,EAAE,EAAE,GAAG,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;IACvF,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,EAAE,GAAG,UAAU,CAAC;IACjD,gEAAgE;IAChE,mEAAmE;IACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5E,OAAO,IAAI,IAAI,CAAC,UAAU,GAAG,QAAQ,GAAG,iBAAiB,CAAC,CAAC;AAC7D,CAAC;AAED,0DAA0D;AAC1D,MAAM,UAAU,gBAAgB,CAAC,GAAS;IACxC,OAAO,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,GAAG,iBAAiB,CAAC,CAAC;AACpE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,GAAS;IAClC,OAAO,aAAa,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;AAC1C,CAAC;AAED,iGAAiG;AACjG,MAAM,UAAU,eAAe,CAAC,GAAS;IACvC,OAAO,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,YAAY,CAAC,CAAC;AAChD,CAAC"}

package/dist/spend/record.js CHANGED Viewed

@@ -22,11 +22,18 @@ export function recordTaskCost(store, args, env = process.env) {
             model: usage.model,
             detail: usage.estimated ? 'estimated' : 'observed',
         });
-        // Issue #815: fill actual_cost_usd_micros on the per-request claimed
-        // row + set claim_status='delivered'. ai_units stays as captured at
-        // claim time (estimates are the gate input, never recomputed).
-        // Idempotent: no-op when no claimed row exists for this request id.
-        store.finalizeClaimDelivered(args.requestId, actualCostUsdMicros);
+        // Issue #1004: fill actual_cost_usd_micros on the per-request claimed
+        // row + set claim_status='delivered'. The gate's accumulator reads this
+        // column (COALESCE actual, estimated) so the delivered actual replaces
+        // the claim-time estimate in the running total. For subscription
+        // credentials the resulting USD figure is a *proxy* budget — it bounds
+        // Jinn-attributable model cost, not the provider's plan quota directly.
+        // `usage.estimated` is carried through (AC4) so a telemetry-less harness
+        // such as Hermes, whose actual cost is itself a heuristic, is not
+        // presented as metered by the gate. ai_units stays as captured at claim
+        // time (legacy unit surface, #1006). Idempotent: no-op when no claimed
+        // row exists for this request id.
+        store.finalizeClaimDelivered(args.requestId, actualCostUsdMicros, usage.estimated);
     }
     catch (err) {
         console.warn(`[spend] failed to record task cost for ${args.requestId}: ` +

package/dist/spend/record.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"record.js","sourceRoot":"","sources":["../../src/spend/record.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAEjD;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAC5B,KAAY,EACZ,IAMC,EACD,MAAyB,OAAO,CAAC,GAAG;IAEpC,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC5D,IAAI,CAAC,YAAY;YAAE,OAAO;QAC1B,MAAM,KAAK,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7E,MAAM,mBAAmB,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;QAClE,KAAK,CAAC,mBAAmB,CAAC;YACxB,EAAE,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC5B,IAAI,EAAE,WAAW;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY;YACZ,aAAa,EAAE,mBAAmB;YAClC,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,MAAM,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU;SACnD,CAAC,CAAC;QACH,~~qEAAqE~~;~~QACrE~~,~~oEAAoE~~;~~QACpE,+DAA+D~~;~~QAC/D~~,~~oEAAoE~~;~~QACpE~~,KAAK,CAAC,sBAAsB,CAAC,IAAI,CAAC,SAAS,EAAE,mBAAmB,CAAC,CAAC;~~IACpE~~,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,IAAI,CACV,0CAA0C,IAAI,CAAC,SAAS,IAAI;YAC1D,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CACxD,CAAC;IACJ,CAAC;AACH,CAAC"}
1	+ {"version":3,"file":"record.js","sourceRoot":"","sources":["../../src/spend/record.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAEjD;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAC5B,KAAY,EACZ,IAMC,EACD,MAAyB,OAAO,CAAC,GAAG;IAEpC,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC5D,IAAI,CAAC,YAAY;YAAE,OAAO;QAC1B,MAAM,KAAK,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7E,MAAM,mBAAmB,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;QAClE,KAAK,CAAC,mBAAmB,CAAC;YACxB,EAAE,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC5B,IAAI,EAAE,WAAW;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY;YACZ,aAAa,EAAE,mBAAmB;YAClC,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,MAAM,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU;SACnD,CAAC,CAAC;QACH,sEAAsE;QACtE,wEAAwE;QACxE,uEAAuE;QACvE,iEAAiE;QACjE,uEAAuE;QACvE,wEAAwE;QACxE,yEAAyE;QACzE,kEAAkE;QAClE,wEAAwE;QACxE,uEAAuE;QACvE,kCAAkC;QAClC,KAAK,CAAC,sBAAsB,CAAC,IAAI,CAAC,SAAS,EAAE,mBAAmB,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACrF,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,IAAI,CACV,0CAA0C,IAAI,CAAC,SAAS,IAAI;YAC1D,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CACxD,CAAC;IACJ,CAAC;AACH,CAAC"}

package/dist/store/store.d.ts CHANGED Viewed

@@ -344,6 +344,39 @@ export declare class Store {
      * Issue #815.
      */
     aiUnitsThisWeek(credentialId: string, now?: Date): number;
+    /**
+     * Actual-spend accumulator for the current 6h UTC block (issue #1004).
+     *
+     * Sums `COALESCE(actual_cost_usd_micros, estimated_cost_usd_micros, 0)`
+     * over rows whose `claim_status` is `'claimed'` or `'delivered'`:
+     *   - delivered rows contribute the real harvested cost (`actual_*`),
+     *   - in-flight claimed rows contribute their estimate so a burst of
+     *     concurrent claims cannot slip the cap before any of them deliver,
+     *   - failed claims (status `'claim_failed'`) are excluded.
+     *
+     * `estimated` is true iff the summed figure includes any estimate-backed
+     * cost: an in-flight `claimed` row with no `actual_cost_usd_micros` yet,
+     * OR a `delivered` row whose actual cost is itself a heuristic
+     * (`actual_cost_estimated = 1` — a telemetry-less harness such as Hermes).
+     * It is false only when every contributing row is harvested actual
+     * telemetry. The gate surfaces this so an estimate-backed figure is not
+     * presented as metered. Block boundaries mirror `aiUnitsThisBlock`.
+     */
+    usdMicrosThisBlock(credentialId: string, now?: Date): {
+        usdMicros: number;
+        estimated: boolean;
+    };
+    /**
+     * Actual-spend accumulator for the trailing 7-day rolling window from
+     * `now` (issue #1004). Same COALESCE + claim_status filter + `estimated`
+     * semantics as {@link usdMicrosThisBlock}.
+     */
+    usdMicrosThisWeek(credentialId: string, now?: Date): {
+        usdMicros: number;
+        estimated: boolean;
+    };
+    /** Shared COALESCE-sum + estimate-flag query for the USD accumulators. */
+    private sumUsdMicros;
     /**
      * True iff an `ai_units_cap_reached` row exists for the given
      * (credentialId, window, blockId). Used by the daemon to hydrate the
@@ -357,12 +390,22 @@ export declare class Store {
     hasAiUnitsCapReachedFor(credentialId: string, window: 'block' | 'week', blockId: string): boolean;
     /**
      * Mark the per-request `claimed` row as `delivered` and record
-     * `actual_cost_usd_micros`. The `ai_units` projection captured at
-     * claim time is intentionally NOT recomputed — the issue spec says
-     * "estimates are the gate input, never recomputed". Idempotent: a
-     * no-op when no `claimed` row exists.
+     * `actual_cost_usd_micros` (issue #1004 — the gate's accumulator now
+     * reads this column via COALESCE, so a delivered row's real harvested
+     * cost replaces its claim-time estimate in the running total). The
+     * `ai_units` projection captured at claim time is intentionally NOT
+     * recomputed — it remains the per-task estimate for the legacy unit
+     * surfaces. For subscription credentials the resulting USD figure is a
+     * *proxy* budget, not an exact bound on the provider's plan quota.
+     *
+     * `actualCostEstimated` (issue #1004, AC4) records whether the actual
+     * cost is itself a heuristic — true for a telemetry-less harness such as
+     * Hermes whose `harvestHarnessUsage` falls back to an a-priori estimate,
+     * false when the figure is harvested telemetry. The gate reads it so a
+     * delivered-but-heuristic row reports `estimated: true` rather than being
+     * presented as metered. Idempotent: a no-op when no `claimed` row exists.
      */
-    finalizeClaimDelivered(requestId: string, actualCostUsdMicros: number): void;
+    finalizeClaimDelivered(requestId: string, actualCostUsdMicros: number, actualCostEstimated: boolean): void;
     /** Newer events first, then ascending id for `jinn logs --follow` (oldest in batch printed first in caller). */
     getActivityEventsAfterId(afterId: number, limit: number): ActivityEventRow[];
     /**
@@ -499,6 +542,49 @@ export declare class Store {
     queryEnvelopeProjections(query?: EnvelopeProjectionQuery): EnvelopeProjection[];
     saveErc8004Anchor(input: Erc8004AnchorInput): void;
     listErc8004AnchorsByEnvelopeCids(envelopeCids: readonly string[]): Erc8004AnchorRow[];
+    /**
+     * Upsert one held-out eval result (issue #818). PK is
+     * `(checkpoint_cid, slate_version, instance_id)` so a re-run overwrites.
+     * `passed` is null for `unscorable` rows.
+     */
+    recordEvalResult(args: EvalResultRecord): void;
+    /**
+     * Aggregate the eval results for a (checkpoint, slate version):
+     * `scorable` = rows with `unscorable = 0`; `passed` = scorable rows with
+     * `passed = 1`. Unscorable rows are counted separately and never enter the
+     * denominator. A checkpoint with no rows yields all-zero (the orchestrator
+     * reads this to detect a not-yet-evaluated parent).
+     */
+    getEvalAggregate(checkpoint_cid: string, slate_version: string): EvalAggregate;
+    /**
+     * Distinct `slate_hash` values recorded for a (checkpoint, slate version).
+     * The eval orchestrator reads this to detect slate-content drift under a
+     * stable version label — the held-out exam is only an honest before/after
+     * when the parent and child were scored on the SAME slate content (defeating
+     * confounder #1, task-selection). Empty when the checkpoint has no rows.
+     */
+    getEvalSlateHashes(checkpoint_cid: string, slate_version: string): string[];
+    /** Per-task eval results for a (checkpoint, slate version), ordered by instance_id. */
+    getEvalResults(checkpoint_cid: string, slate_version: string): EvalResultRow[];
     close(): void;
 }
+export interface EvalResultRecord {
+    checkpoint_cid: string;
+    slate_hash: string;
+    slate_version: string;
+    instance_id: string;
+    /** Pass/fail; ignored (stored NULL) when `unscorable` is true. */
+    passed: boolean | null;
+    unscorable: boolean;
+    code_digest: string;
+    run_at_ms: number;
+    test_log_excerpt?: string | null;
+}
+/** A persisted eval result read back from the store (same shape as the record written). */
+export type EvalResultRow = EvalResultRecord;
+export interface EvalAggregate {
+    passed: number;
+    scorable: number;
+    unscorable: number;
+}
 export {};

package/dist/store/store.js CHANGED Viewed

@@ -288,6 +288,25 @@ CREATE TABLE IF NOT EXISTS capture_spans (
 );
 CREATE INDEX IF NOT EXISTS capture_spans_session ON capture_spans (session_id);
+-- Held-out checkpoint eval results (issue #818). One row per
+-- (checkpoint, slate version, instance). Scores are only comparable WITHIN a
+-- slate version; slate_hash is stored so a version-bump / hash-drift is
+-- detectable. passed is nullable: an unscorable row (Docker down, disk
+-- floor) carries passed = NULL and is EXCLUDED from the denominator -- never
+-- coerced to a fail.
+CREATE TABLE IF NOT EXISTS eval_results (
+  checkpoint_cid TEXT NOT NULL,
+  slate_hash TEXT NOT NULL,
+  slate_version TEXT NOT NULL,
+  instance_id TEXT NOT NULL,
+  passed INTEGER,
+  unscorable INTEGER NOT NULL DEFAULT 0,
+  code_digest TEXT NOT NULL,
+  run_at_ms INTEGER NOT NULL,
+  test_log_excerpt TEXT,
+  PRIMARY KEY (checkpoint_cid, slate_version, instance_id)
+);
 `;
 export class Store {
     /** Exposed for engine persistence layer — treat as package-internal. */
@@ -400,6 +419,12 @@ export class Store {
         addActivityColumn('claim_status', 'claim_status TEXT');
         addActivityColumn('estimated_cost_usd_micros', 'estimated_cost_usd_micros INTEGER');
         addActivityColumn('actual_cost_usd_micros', 'actual_cost_usd_micros INTEGER');
+        // Issue #1004 (AC4): whether actual_cost_usd_micros is estimate-backed
+        // (1) or harvested telemetry (0/null). A telemetry-less harness such as
+        // Hermes still writes a NON-null actual cost via finalizeClaimDelivered,
+        // so the column distinguishes a heuristic figure from a metered one. The
+        // gate's estimated flag reads this so a heuristic is not shown as metered.
+        addActivityColumn('actual_cost_estimated', 'actual_cost_estimated INTEGER');
         this.db.exec(`CREATE INDEX IF NOT EXISTS idx_activity_events_credential ON activity_events (credential_id, ts)`);
         // Per-request lookup for the completion-time update path that fills
         // actualCostUsdMicros / sets claim_status='delivered'.
@@ -913,6 +938,56 @@ export class Store {
          AND claim_status IN ('claimed', 'delivered')`).get({ cid: credentialId, weekStart });
         return row.total ?? 0;
     }
+    /**
+     * Actual-spend accumulator for the current 6h UTC block (issue #1004).
+     *
+     * Sums `COALESCE(actual_cost_usd_micros, estimated_cost_usd_micros, 0)`
+     * over rows whose `claim_status` is `'claimed'` or `'delivered'`:
+     *   - delivered rows contribute the real harvested cost (`actual_*`),
+     *   - in-flight claimed rows contribute their estimate so a burst of
+     *     concurrent claims cannot slip the cap before any of them deliver,
+     *   - failed claims (status `'claim_failed'`) are excluded.
+     *
+     * `estimated` is true iff the summed figure includes any estimate-backed
+     * cost: an in-flight `claimed` row with no `actual_cost_usd_micros` yet,
+     * OR a `delivered` row whose actual cost is itself a heuristic
+     * (`actual_cost_estimated = 1` — a telemetry-less harness such as Hermes).
+     * It is false only when every contributing row is harvested actual
+     * telemetry. The gate surfaces this so an estimate-backed figure is not
+     * presented as metered. Block boundaries mirror `aiUnitsThisBlock`.
+     */
+    usdMicrosThisBlock(credentialId, now = new Date()) {
+        const startOfDay = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
+        const sinceDayStart = now.getTime() - startOfDay;
+        const sixHoursMs = 6 * 60 * 60 * 1_000;
+        const blocksIn = Math.min(Math.floor(sinceDayStart / sixHoursMs), 3);
+        const blockStart = new Date(startOfDay + blocksIn * sixHoursMs).toISOString();
+        const blockEnd = new Date(startOfDay + (blocksIn + 1) * sixHoursMs).toISOString();
+        return this.sumUsdMicros(credentialId, blockStart, blockEnd);
+    }
+    /**
+     * Actual-spend accumulator for the trailing 7-day rolling window from
+     * `now` (issue #1004). Same COALESCE + claim_status filter + `estimated`
+     * semantics as {@link usdMicrosThisBlock}.
+     */
+    usdMicrosThisWeek(credentialId, now = new Date()) {
+        const weekStart = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1_000).toISOString();
+        return this.sumUsdMicros(credentialId, weekStart, undefined);
+    }
+    /** Shared COALESCE-sum + estimate-flag query for the USD accumulators. */
+    sumUsdMicros(credentialId, fromIso, toIso) {
+        const upper = toIso ? 'AND ts < @to' : '';
+        const row = this.db
+            .prepare(`SELECT
+           COALESCE(SUM(COALESCE(actual_cost_usd_micros, estimated_cost_usd_micros, 0)), 0) AS total,
+           COALESCE(SUM(CASE WHEN actual_cost_usd_micros IS NULL OR actual_cost_estimated = 1 THEN 1 ELSE 0 END), 0) AS estimatedRows
+         FROM activity_events
+         WHERE credential_id = @cid
+           AND ts IS NOT NULL AND ts >= @from ${upper}
+           AND claim_status IN ('claimed', 'delivered')`)
+            .get({ cid: credentialId, from: fromIso, to: toIso });
+        return { usdMicros: row.total ?? 0, estimated: (row.estimatedRows ?? 0) > 0 };
+    }
     /**
      * True iff an `ai_units_cap_reached` row exists for the given
      * (credentialId, window, blockId). Used by the daemon to hydrate the
@@ -935,16 +1010,31 @@ export class Store {
     }
     /**
      * Mark the per-request `claimed` row as `delivered` and record
-     * `actual_cost_usd_micros`. The `ai_units` projection captured at
-     * claim time is intentionally NOT recomputed — the issue spec says
-     * "estimates are the gate input, never recomputed". Idempotent: a
-     * no-op when no `claimed` row exists.
+     * `actual_cost_usd_micros` (issue #1004 — the gate's accumulator now
+     * reads this column via COALESCE, so a delivered row's real harvested
+     * cost replaces its claim-time estimate in the running total). The
+     * `ai_units` projection captured at claim time is intentionally NOT
+     * recomputed — it remains the per-task estimate for the legacy unit
+     * surfaces. For subscription credentials the resulting USD figure is a
+     * *proxy* budget, not an exact bound on the provider's plan quota.
+     *
+     * `actualCostEstimated` (issue #1004, AC4) records whether the actual
+     * cost is itself a heuristic — true for a telemetry-less harness such as
+     * Hermes whose `harvestHarnessUsage` falls back to an a-priori estimate,
+     * false when the figure is harvested telemetry. The gate reads it so a
+     * delivered-but-heuristic row reports `estimated: true` rather than being
+     * presented as metered. Idempotent: a no-op when no `claimed` row exists.
      */
-    finalizeClaimDelivered(requestId, actualCostUsdMicros) {
+    finalizeClaimDelivered(requestId, actualCostUsdMicros, actualCostEstimated) {
         this.db.prepare(`UPDATE activity_events
          SET claim_status = 'delivered',
-             actual_cost_usd_micros = @actual
-       WHERE request_id = @req AND claim_status = 'claimed'`).run({ req: requestId, actual: actualCostUsdMicros });
+             actual_cost_usd_micros = @actual,
+             actual_cost_estimated = @estimated
+       WHERE request_id = @req AND claim_status = 'claimed'`).run({
+            req: requestId,
+            actual: actualCostUsdMicros,
+            estimated: actualCostEstimated ? 1 : 0,
+        });
     }
     /** Newer events first, then ascending id for `jinn logs --follow` (oldest in batch printed first in caller). */
     getActivityEventsAfterId(afterId, limit) {
@@ -1819,6 +1909,79 @@ export class Store {
             anchoredAt: r.anchored_at,
         }));
     }
+    /**
+     * Upsert one held-out eval result (issue #818). PK is
+     * `(checkpoint_cid, slate_version, instance_id)` so a re-run overwrites.
+     * `passed` is null for `unscorable` rows.
+     */
+    recordEvalResult(args) {
+        this.db
+            .prepare(`INSERT INTO eval_results
+           (checkpoint_cid, slate_hash, slate_version, instance_id, passed, unscorable, code_digest, run_at_ms, test_log_excerpt)
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+         ON CONFLICT(checkpoint_cid, slate_version, instance_id) DO UPDATE SET
+           slate_hash = excluded.slate_hash,
+           passed = excluded.passed,
+           unscorable = excluded.unscorable,
+           code_digest = excluded.code_digest,
+           run_at_ms = excluded.run_at_ms,
+           test_log_excerpt = excluded.test_log_excerpt`)
+            .run(args.checkpoint_cid, args.slate_hash, args.slate_version, args.instance_id, args.unscorable ? null : args.passed ? 1 : 0, args.unscorable ? 1 : 0, args.code_digest, args.run_at_ms, args.test_log_excerpt ?? null);
+    }
+    /**
+     * Aggregate the eval results for a (checkpoint, slate version):
+     * `scorable` = rows with `unscorable = 0`; `passed` = scorable rows with
+     * `passed = 1`. Unscorable rows are counted separately and never enter the
+     * denominator. A checkpoint with no rows yields all-zero (the orchestrator
+     * reads this to detect a not-yet-evaluated parent).
+     */
+    getEvalAggregate(checkpoint_cid, slate_version) {
+        const row = this.db
+            .prepare(`SELECT
+           COALESCE(SUM(CASE WHEN unscorable = 0 AND passed = 1 THEN 1 ELSE 0 END), 0) AS passed,
+           COALESCE(SUM(CASE WHEN unscorable = 0 THEN 1 ELSE 0 END), 0) AS scorable,
+           COALESCE(SUM(CASE WHEN unscorable = 1 THEN 1 ELSE 0 END), 0) AS unscorable
+         FROM eval_results
+         WHERE checkpoint_cid = ? AND slate_version = ?`)
+            .get(checkpoint_cid, slate_version);
+        return { passed: row.passed, scorable: row.scorable, unscorable: row.unscorable };
+    }
+    /**
+     * Distinct `slate_hash` values recorded for a (checkpoint, slate version).
+     * The eval orchestrator reads this to detect slate-content drift under a
+     * stable version label — the held-out exam is only an honest before/after
+     * when the parent and child were scored on the SAME slate content (defeating
+     * confounder #1, task-selection). Empty when the checkpoint has no rows.
+     */
+    getEvalSlateHashes(checkpoint_cid, slate_version) {
+        const rows = this.db
+            .prepare(`SELECT DISTINCT slate_hash
+         FROM eval_results
+         WHERE checkpoint_cid = ? AND slate_version = ?
+         ORDER BY slate_hash`)
+            .all(checkpoint_cid, slate_version);
+        return rows.map((r) => r.slate_hash);
+    }
+    /** Per-task eval results for a (checkpoint, slate version), ordered by instance_id. */
+    getEvalResults(checkpoint_cid, slate_version) {
+        const rows = this.db
+            .prepare(`SELECT checkpoint_cid, slate_hash, slate_version, instance_id, passed, unscorable, code_digest, run_at_ms, test_log_excerpt
+         FROM eval_results
+         WHERE checkpoint_cid = ? AND slate_version = ?
+         ORDER BY instance_id`)
+            .all(checkpoint_cid, slate_version);
+        return rows.map((r) => ({
+            checkpoint_cid: r.checkpoint_cid,
+            slate_hash: r.slate_hash,
+            slate_version: r.slate_version,
+            instance_id: r.instance_id,
+            passed: r.passed === null ? null : r.passed === 1,
+            unscorable: r.unscorable === 1,
+            code_digest: r.code_digest,
+            run_at_ms: r.run_at_ms,
+            test_log_excerpt: r.test_log_excerpt,
+        }));
+    }
     close() {
         this.db.close();
     }