dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/reward.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""reward — the reward-set admission verdict: may a training run TRAIN on this run? (docs/230/234)
|
|
2
|
+
|
|
3
|
+
The on-ramp that puts DOS *inside a training loop*. `effect_witness` answers a
|
|
4
|
+
**runtime** question (did the world change the way the agent claimed?); this module
|
|
5
|
+
re-aims the identical join at a **post-training** consumer — a reward-set admission
|
|
6
|
+
filter (an RL / SFT / DPO rejection sampler) — and answers the only question a
|
|
7
|
+
frontier RL team asks of a verdict:
|
|
8
|
+
|
|
9
|
+
effect_witness.witness_effect(claim, readbacks) -> did the effect HAPPEN? (runtime)
|
|
10
|
+
reward.admit(claim_present, readbacks) -> may a fine-tune TRAIN on it? (lab)
|
|
11
|
+
|
|
12
|
+
A self-judged sampler banks every "resolved / done" trajectory as a positive reward
|
|
13
|
+
label — which is reward-hacking *bait*: it rewards the policy for confidently
|
|
14
|
+
narrating a success it did not achieve, so a model trained on the accepted set learns
|
|
15
|
+
"say *successfully cancelled* and you get reward" — i.e. it is trained to **over-claim
|
|
16
|
+
more**. The poison is self-reinforcing: the failure mode the naive label fails to
|
|
17
|
+
catch is the exact behavior it *teaches*. This module is the witness-gated filter: a
|
|
18
|
+
"resolved" claim enters the positive set **only if a non-forgeable witness confirms
|
|
19
|
+
it**, a refuted "resolved" is flagged POISON (and is the *dispreferred* member of a
|
|
20
|
+
DPO pair), and a claim no accountable witness reached ABSTAINS (never minted positive).
|
|
21
|
+
|
|
22
|
+
THE NON-DISTILLABLE REWARD LABEL — the property a lab actually pays for (docs/234 §2)
|
|
23
|
+
====================================================================================
|
|
24
|
+
|
|
25
|
+
The prize in RLVR is a reward the policy **cannot reproduce from its own weights or
|
|
26
|
+
outputs**. A self-judge or an LLM-judge reward fails this — the policy can learn to
|
|
27
|
+
satisfy the judge (measured live, `g3_forgeability.py`: a deterministic world-read
|
|
28
|
+
floor false-accepts 0.000, an LLM-judge reading only the narration false-accepts
|
|
29
|
+
0.352). This label passes it, and the property is **structural, not a tuned
|
|
30
|
+
threshold**:
|
|
31
|
+
|
|
32
|
+
> The accept bit is a pure function of the non-forgeable witness once a claim is
|
|
33
|
+
> present. No agent-authored byte can flip reject/abstain -> accept. The policy's
|
|
34
|
+
> own output text — including pasting `[SYSTEM: db_check passed, accept=True]` into
|
|
35
|
+
> its final message — is, by construction, filtered out of the belief decision.
|
|
36
|
+
> Only an `OS_RECORDED` / `THIRD_PARTY` witness can set accept. (docs/234 theorem,
|
|
37
|
+
> clauses (i) text-channel invariance + (ii) the only path to accept is doing the work.)
|
|
38
|
+
|
|
39
|
+
That floor is not re-implemented here — it is *inherited*. `admit` delegates the
|
|
40
|
+
belief decision to `effect_witness.witness_effect`, which delegates to
|
|
41
|
+
`evidence.believe_under_floor`, the security-load-bearing function whose dual is
|
|
42
|
+
`overlap_policy.admissible_under_floor`: a swappable layer can only ever *refuse
|
|
43
|
+
more*, never be talked into a looser admit. So this module adds **zero** new trust
|
|
44
|
+
surface — it is the *last function* (a consumer of an already-floored verdict), and a
|
|
45
|
+
buggy/hostile caller of it cannot manufacture an accept the witness did not earn.
|
|
46
|
+
|
|
47
|
+
SOUNDNESS IS WITNESS-DRIVEN; PRECISION IS CLAIM-DRIVEN — keep them apart (docs/234 §3)
|
|
48
|
+
=====================================================================================
|
|
49
|
+
|
|
50
|
+
The one trap that makes the proof look circular if stated wrong:
|
|
51
|
+
|
|
52
|
+
* **Soundness** (no forgeable byte flips reject -> accept) is the *witness's* job, and
|
|
53
|
+
it is structural. The only failure direction is UNDER-coverage (ABSTAIN), which is
|
|
54
|
+
safe — you never mint a poison positive.
|
|
55
|
+
* **Precision** (is a given row a "resolved" bid *at all*?) is the *claim extractor's*
|
|
56
|
+
job. That extractor reads the agent's forgeable text — but it can only ever route a
|
|
57
|
+
row to ABSTAIN / NO_CLAIM, **never to a false ACCEPT**. An over-claim that fails to
|
|
58
|
+
trip the extractor is dropped (uncounted), not banked.
|
|
59
|
+
|
|
60
|
+
So this module takes the claim-present bit as an ALREADY-EXTRACTED boolean (the host's
|
|
61
|
+
extractor decided it at the boundary — e.g. tau2's `_confident_write_claim`, a CI
|
|
62
|
+
job's "the PR says FIXED", a tool-log's "a mutating call was issued"). The kernel does
|
|
63
|
+
**not** parse domain text: the extractor is host policy (the docs/216 §2 converged
|
|
64
|
+
confident-write detector is tau2's; another host has its own), exactly as `verify`
|
|
65
|
+
takes a claim and a witness and never invents either. `claim_present=False` is the
|
|
66
|
+
abstain-never-invent law, restated for the reward set: nothing claimed -> nothing to
|
|
67
|
+
bank, nothing to purge.
|
|
68
|
+
|
|
69
|
+
THE WITNESS IS THE NARROW CORRECTNESS BIT, NOT A COMPOSITE SCORE (docs/230 §4a)
|
|
70
|
+
==============================================================================
|
|
71
|
+
|
|
72
|
+
A subtle, load-bearing choice the kernel ENFORCES by construction: belief keys on the
|
|
73
|
+
read-back the agent authors **zero bytes of** (the env DB-hash, an OS exit code, a
|
|
74
|
+
third-party ledger), NOT on a softer composite reward that folds in text the policy
|
|
75
|
+
*can* shape. Measured live, tau2 airline/7 has `db_match=True` while the composite
|
|
76
|
+
`reward=0.0` (the write was right; the NL explanation missed a communicate-check).
|
|
77
|
+
Keying the LABEL on the least-gameable sub-witness is the point — and the kernel makes
|
|
78
|
+
it unavoidable, because the only thing `admit` will believe is a non-forgeable
|
|
79
|
+
`EvidenceFacts` (the host hands the witness in; a forgeable one is structurally
|
|
80
|
+
ignored).
|
|
81
|
+
|
|
82
|
+
THE FOUR-VALUED VERDICT (the typed-verdict family)
|
|
83
|
+
==================================================
|
|
84
|
+
|
|
85
|
+
ACCEPT — a present claim a non-forgeable witness CONFIRMED. The preferred
|
|
86
|
+
member; the only value that enters the positive reward set.
|
|
87
|
+
REJECT_POISON — a present claim a non-forgeable witness REFUTED. The load-bearing
|
|
88
|
+
value: this is exactly the label a naive self-judged sampler banks
|
|
89
|
+
as a positive WHILE the world disconfirms it — the poison the witness
|
|
90
|
+
PURGES, and the *dispreferred* member of a (witnessed, over-claimed)
|
|
91
|
+
DPO preference pair. J of the lab arm counts these.
|
|
92
|
+
ABSTAIN — a present claim no accountable witness reached (or only a forgeable
|
|
93
|
+
read-back). We never mint a positive on the unforgeable rung without
|
|
94
|
+
a witness, and the witness did not refute it either — the
|
|
95
|
+
`believe_under_floor` honest abstain. NOT a reject; NOT an accept.
|
|
96
|
+
NO_CLAIM — the host's extractor found no checkable claim (free prose, "I'm
|
|
97
|
+
done"). Nothing to bank, nothing to purge — read as "not a candidate",
|
|
98
|
+
never as a pass.
|
|
99
|
+
|
|
100
|
+
PURE — no I/O. The claim-present bit was decided at the boundary (a host extractor);
|
|
101
|
+
the read-backs were gathered at the boundary (`evidence.gather_evidence` over a
|
|
102
|
+
`drivers/*` witness). This module only folds them into a training-loader-shaped label.
|
|
103
|
+
It sits in the kernel layer beside `effect_witness` / `evidence` / `liveness` and
|
|
104
|
+
names no host, no provider, no benchmark. The tau2-specific extractor + mutating-tool
|
|
105
|
+
map stay in the benchmark (`writeadmit/`), which becomes a thin host adapter over this.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
from __future__ import annotations
|
|
109
|
+
|
|
110
|
+
import enum
|
|
111
|
+
from dataclasses import dataclass
|
|
112
|
+
|
|
113
|
+
from dos.effect_witness import EffectClaim, EffectWitnessVerdict, witness_effect
|
|
114
|
+
from dos.evidence import EvidenceFacts
|
|
115
|
+
|
|
116
|
+
__all__ = [
|
|
117
|
+
"RewardVerdict",
|
|
118
|
+
"ACCEPT",
|
|
119
|
+
"REJECT_POISON",
|
|
120
|
+
"ABSTAIN",
|
|
121
|
+
"NO_CLAIM",
|
|
122
|
+
"RewardLabel",
|
|
123
|
+
"admit",
|
|
124
|
+
"AcceptanceAB",
|
|
125
|
+
"acceptance_ab",
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class RewardVerdict(str, enum.Enum):
|
|
130
|
+
"""The four-valued reward-set admission verdict (the typed-verdict family).
|
|
131
|
+
|
|
132
|
+
`str`-valued so it round-trips through a CLI token / a JSONL training manifest
|
|
133
|
+
without a lookup table (the `Liveness` / `EffectStance` idiom). The mapping onto
|
|
134
|
+
`effect_witness`'s verdict is one-to-one and total, because this module IS that
|
|
135
|
+
verdict re-named for a reward-set consumer:
|
|
136
|
+
|
|
137
|
+
CONFIRMED -> ACCEPT (present + non-forgeable witness saw it: preferred)
|
|
138
|
+
REFUTED -> REJECT_POISON (present + non-forgeable witness disconfirmed: dispreferred)
|
|
139
|
+
UNWITNESSED -> ABSTAIN (present, no accountable witness: never mint a positive)
|
|
140
|
+
NO_CLAIM -> NO_CLAIM (nothing checkable claimed: not a candidate)
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
ACCEPT = "ACCEPT"
|
|
144
|
+
REJECT_POISON = "REJECT_POISON"
|
|
145
|
+
ABSTAIN = "ABSTAIN"
|
|
146
|
+
NO_CLAIM = "NO_CLAIM"
|
|
147
|
+
|
|
148
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
149
|
+
return self.value
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
ACCEPT = RewardVerdict.ACCEPT
|
|
153
|
+
REJECT_POISON = RewardVerdict.REJECT_POISON
|
|
154
|
+
ABSTAIN = RewardVerdict.ABSTAIN
|
|
155
|
+
NO_CLAIM = RewardVerdict.NO_CLAIM
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass(frozen=True)
|
|
159
|
+
class RewardLabel:
|
|
160
|
+
"""One trajectory's reward-set admission label — what a training loader consumes.
|
|
161
|
+
|
|
162
|
+
`verdict` is the four-valued token. The three booleans are the loader-facing
|
|
163
|
+
projection, each a pure function of `verdict` (computed once here so a DPO/SFT
|
|
164
|
+
loader reads a flat record, never re-deriving the join):
|
|
165
|
+
|
|
166
|
+
accept — does this row enter the SFT/DPO POSITIVE (preferred) set?
|
|
167
|
+
True ONLY on ACCEPT.
|
|
168
|
+
poison — would a NAIVE (self-judged, witness-blind) sampler have banked
|
|
169
|
+
this as a positive WHILE a non-forgeable witness REFUTES it? True
|
|
170
|
+
ONLY on REJECT_POISON. The labels the witness purges — the J of
|
|
171
|
+
the lab arm. (A naive sampler banks every present claim; this flags
|
|
172
|
+
the ones the world disconfirms.)
|
|
173
|
+
dispreferred — the DPO use: a refuted present claim is the *dispreferred* member
|
|
174
|
+
of a (witnessed-resolved, over-claimed) preference pair. Equal to
|
|
175
|
+
`poison` (a refuted claim is both purged AND trained against), kept
|
|
176
|
+
as a distinct field so a loader that only does rejection-sampling
|
|
177
|
+
(reads `accept`) and one that does DPO (reads `dispreferred`) each
|
|
178
|
+
have the name they expect.
|
|
179
|
+
|
|
180
|
+
`claim_present` echoes the host extractor's bit (was this a checkable claim at
|
|
181
|
+
all?); `witness` / `accountability` name the read-back behind the verdict (legible
|
|
182
|
+
distrust — WHICH witness, on which rung); `reason` is the one-line string for a CLI
|
|
183
|
+
/ `--json` / a manifest comment. `to_dict()` is the JSONL-row shape.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
verdict: RewardVerdict
|
|
187
|
+
accept: bool
|
|
188
|
+
poison: bool
|
|
189
|
+
dispreferred: bool
|
|
190
|
+
claim_present: bool
|
|
191
|
+
reason: str
|
|
192
|
+
witness: str = ""
|
|
193
|
+
accountability: str = ""
|
|
194
|
+
|
|
195
|
+
def to_dict(self) -> dict:
|
|
196
|
+
return {
|
|
197
|
+
"verdict": self.verdict.value,
|
|
198
|
+
"accept": self.accept,
|
|
199
|
+
"poison": self.poison,
|
|
200
|
+
"dispreferred": self.dispreferred,
|
|
201
|
+
"claim_present": self.claim_present,
|
|
202
|
+
"reason": self.reason,
|
|
203
|
+
"witness": self.witness,
|
|
204
|
+
"accountability": self.accountability,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _from_effect_verdict(claim_present: bool, v: EffectWitnessVerdict) -> RewardLabel:
|
|
209
|
+
"""Project an `EffectWitnessVerdict` onto the reward-set label. Total, pure.
|
|
210
|
+
|
|
211
|
+
The whole lab fork is this one re-naming — the join (claim ∧ floored witness) is
|
|
212
|
+
`effect_witness`'s, reused byte-for-byte. CONFIRMED is the only accept; REFUTED is
|
|
213
|
+
the only poison/dispreferred; everything else abstains or is a non-candidate.
|
|
214
|
+
"""
|
|
215
|
+
acct = v.accountability.value if v.accountability is not None else ""
|
|
216
|
+
if v.verdict.value == "NO_CLAIM":
|
|
217
|
+
return RewardLabel(
|
|
218
|
+
verdict=NO_CLAIM, accept=False, poison=False, dispreferred=False,
|
|
219
|
+
claim_present=claim_present, reason=v.reason, witness=v.witness,
|
|
220
|
+
accountability=acct,
|
|
221
|
+
)
|
|
222
|
+
if v.verdict.value == "CONFIRMED":
|
|
223
|
+
return RewardLabel(
|
|
224
|
+
verdict=ACCEPT, accept=True, poison=False, dispreferred=False,
|
|
225
|
+
claim_present=claim_present,
|
|
226
|
+
reason="witnessed claim — accepted into the positive reward set",
|
|
227
|
+
witness=v.witness, accountability=acct,
|
|
228
|
+
)
|
|
229
|
+
if v.verdict.value == "REFUTED":
|
|
230
|
+
return RewardLabel(
|
|
231
|
+
verdict=REJECT_POISON, accept=False, poison=True, dispreferred=True,
|
|
232
|
+
claim_present=claim_present,
|
|
233
|
+
reason="present claim a non-forgeable witness REFUTED — POISON positive purged "
|
|
234
|
+
"(dispreferred in a DPO pair)",
|
|
235
|
+
witness=v.witness, accountability=acct,
|
|
236
|
+
)
|
|
237
|
+
# UNWITNESSED — a present claim no accountable witness reached (or only a forgeable
|
|
238
|
+
# read-back). Never mint a positive without a witness; the witness did not refute it.
|
|
239
|
+
return RewardLabel(
|
|
240
|
+
verdict=ABSTAIN, accept=False, poison=False, dispreferred=False,
|
|
241
|
+
claim_present=claim_present,
|
|
242
|
+
reason="present claim but no accountable witness — abstain (never mint a positive unverified)",
|
|
243
|
+
witness=v.witness, accountability=acct,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def admit(
|
|
248
|
+
claim_present: bool,
|
|
249
|
+
readbacks: "tuple[EvidenceFacts, ...] | list[EvidenceFacts]",
|
|
250
|
+
*,
|
|
251
|
+
claim_key: str = "claim",
|
|
252
|
+
narrated: str = "",
|
|
253
|
+
) -> RewardLabel:
|
|
254
|
+
"""Decide whether one trajectory's claim may enter the positive reward set. PURE.
|
|
255
|
+
|
|
256
|
+
The two inputs are the same two independently-authored facts every DOS verdict
|
|
257
|
+
joins, never one re-read against itself:
|
|
258
|
+
|
|
259
|
+
* `claim_present` — the host extractor's bit: did this trajectory make a
|
|
260
|
+
checkable "resolved / done" claim? The agent authored the text this was read
|
|
261
|
+
from, so it is the FORGEABLE floor — on its own it can never grant ACCEPT. (The
|
|
262
|
+
host decides the extractor; the kernel never parses domain text. `narrated` is
|
|
263
|
+
the agent's phrasing, carried for the operator surface, never parsed for truth.)
|
|
264
|
+
* `readbacks` — `EvidenceFacts` from witnesses that RE-READ the world from a
|
|
265
|
+
surface the agent did not author (the env DB-hash, an OS exit code, a provider
|
|
266
|
+
ledger), each carrying its `accountability` rung. Gathered at the boundary.
|
|
267
|
+
|
|
268
|
+
The rule (inherited from `effect_witness` / `believe_under_floor`, not re-stated):
|
|
269
|
+
|
|
270
|
+
> ACCEPT ⟺ claim present AND a NON-FORGEABLE witness was reached and CONFIRMED.
|
|
271
|
+
> REJECT_POISON ⟺ claim present AND a NON-FORGEABLE witness was reached and REFUTED.
|
|
272
|
+
> ABSTAIN ⟺ claim present, but no accountable witness reached a presence answer
|
|
273
|
+
> (only forgeable-floor reads, or no signal).
|
|
274
|
+
> NO_CLAIM ⟺ no claim present (nothing to bank, nothing to purge).
|
|
275
|
+
|
|
276
|
+
NON-DISTILLABILITY (docs/234): for fixed `readbacks`, the verdict is INVARIANT under
|
|
277
|
+
arbitrary `narrated` text and cannot be moved reject->accept by `claim_present`
|
|
278
|
+
alone (a present-claim with no witness ABSTAINS; a present-claim a witness refutes
|
|
279
|
+
is POISON). A forgeable (`AGENT_AUTHORED`) read-back is recorded but structurally
|
|
280
|
+
filtered from the belief decision — it can never manufacture an ACCEPT. The policy
|
|
281
|
+
cannot write its way into the positive set.
|
|
282
|
+
"""
|
|
283
|
+
if not claim_present:
|
|
284
|
+
# No checkable claim -> not a candidate for the write-positive set. We pass an
|
|
285
|
+
# empty claim to witness_effect to get the canonical NO_CLAIM verdict + reason,
|
|
286
|
+
# rather than special-casing the string here (one source of the NO_CLAIM rule).
|
|
287
|
+
v = witness_effect(None, ())
|
|
288
|
+
return _from_effect_verdict(False, v)
|
|
289
|
+
|
|
290
|
+
claim = EffectClaim(key=claim_key or "claim", subject="effect", narrated=narrated)
|
|
291
|
+
v = witness_effect(claim, readbacks)
|
|
292
|
+
return _from_effect_verdict(True, v)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# ---------------------------------------------------------------------------------------
|
|
296
|
+
# The acceptance-precision A/B — the $0, log-replay measurement (docs/230 §3).
|
|
297
|
+
#
|
|
298
|
+
# believe-select = the naive self-judged sampler: accept every PRESENT claim as a
|
|
299
|
+
# positive (witness-blind). Today's default RLVR/RFT loop.
|
|
300
|
+
# adjudicate-select = the witness-gated filter: accept iff a non-forgeable witness CONFIRMS.
|
|
301
|
+
#
|
|
302
|
+
# The two Payoff-1 numbers: acceptance PRECISION of each arm (fraction of accepted
|
|
303
|
+
# positives that are genuinely witnessed), and J = the poison positives the witness
|
|
304
|
+
# PURGED (the believe arm banks them; the adjudicate arm does not). This is a pure fold
|
|
305
|
+
# over already-labeled rows — domain-free; a host supplies the (claim_present, readbacks)
|
|
306
|
+
# pairs from its own extractor + witness, and gets the lab arm's headline back.
|
|
307
|
+
# ---------------------------------------------------------------------------------------
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
@dataclass(frozen=True)
|
|
311
|
+
class AcceptanceAB:
|
|
312
|
+
"""The believe-select vs adjudicate-select acceptance A/B over a labeled corpus."""
|
|
313
|
+
|
|
314
|
+
n_rows: int # rows folded
|
|
315
|
+
n_claim_bids: int # rows with a present claim (the positive candidates)
|
|
316
|
+
believe_accepted: int # naive arm: every present-claim bid (witness-blind)
|
|
317
|
+
believe_poison: int # of those, how many a non-forgeable witness REFUTES
|
|
318
|
+
believe_precision: float # witnessed / accepted, naive arm
|
|
319
|
+
adjudicate_accepted: int # gated arm: only witness-CONFIRMED bids
|
|
320
|
+
adjudicate_poison: int # poison the gated arm banks (0 by construction)
|
|
321
|
+
adjudicate_precision: float # witnessed / accepted, gated arm (1.0 by construction)
|
|
322
|
+
j_poison_purged: int # J: poison positives the witness removed (= believe_poison)
|
|
323
|
+
delta_precision: float # adjudicate_precision - believe_precision (the ΔP lift)
|
|
324
|
+
|
|
325
|
+
def to_dict(self) -> dict:
|
|
326
|
+
return {
|
|
327
|
+
"n_rows": self.n_rows,
|
|
328
|
+
"n_claim_bids": self.n_claim_bids,
|
|
329
|
+
"believe_accepted": self.believe_accepted,
|
|
330
|
+
"believe_poison": self.believe_poison,
|
|
331
|
+
"believe_precision": self.believe_precision,
|
|
332
|
+
"adjudicate_accepted": self.adjudicate_accepted,
|
|
333
|
+
"adjudicate_poison": self.adjudicate_poison,
|
|
334
|
+
"adjudicate_precision": self.adjudicate_precision,
|
|
335
|
+
"j_poison_purged": self.j_poison_purged,
|
|
336
|
+
"delta_precision": self.delta_precision,
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def acceptance_ab(labels: "tuple[RewardLabel, ...] | list[RewardLabel]") -> AcceptanceAB:
|
|
341
|
+
"""Fold reward labels into the believe-select vs adjudicate-select acceptance A/B.
|
|
342
|
+
|
|
343
|
+
PURE over already-computed `RewardLabel`s (the host called `admit` per row at the
|
|
344
|
+
boundary — that is where the witness I/O happened). A "bid" is a present-claim row;
|
|
345
|
+
a witnessed bid is one the witness CONFIRMED (`accept`); a poison bid is one it
|
|
346
|
+
REFUTED. The naive arm banks every bid; the gated arm banks only the witnessed ones.
|
|
347
|
+
|
|
348
|
+
* believe_precision = witnessed bids / all bids (the naive arm's FPR shadow)
|
|
349
|
+
* adjudicate_precision = 1.0 when it accepts anything (every accept is witnessed,
|
|
350
|
+
by construction — report ΔP as the FPR cut, not a
|
|
351
|
+
capability delta, per docs/230 §6).
|
|
352
|
+
"""
|
|
353
|
+
labels = list(labels)
|
|
354
|
+
bids = [l for l in labels if l.claim_present]
|
|
355
|
+
witnessed = sum(1 for l in bids if l.accept) # CONFIRMED present claims
|
|
356
|
+
believe_poison = sum(1 for l in bids if l.poison) # REFUTED present claims
|
|
357
|
+
believe_accepted = len(bids) # naive accepts every bid
|
|
358
|
+
believe_precision = (witnessed / believe_accepted) if believe_accepted else 0.0
|
|
359
|
+
adjudicate_accepted = witnessed # gated accepts only CONFIRMED
|
|
360
|
+
adjudicate_precision = 1.0 if adjudicate_accepted else 0.0
|
|
361
|
+
return AcceptanceAB(
|
|
362
|
+
n_rows=len(labels),
|
|
363
|
+
n_claim_bids=len(bids),
|
|
364
|
+
believe_accepted=believe_accepted,
|
|
365
|
+
believe_poison=believe_poison,
|
|
366
|
+
believe_precision=believe_precision,
|
|
367
|
+
adjudicate_accepted=adjudicate_accepted,
|
|
368
|
+
adjudicate_poison=0, # by construction — a refuted bid is never accepted
|
|
369
|
+
adjudicate_precision=adjudicate_precision,
|
|
370
|
+
j_poison_purged=believe_poison,
|
|
371
|
+
delta_precision=adjudicate_precision - believe_precision,
|
|
372
|
+
)
|