dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/intervention.py
ADDED
|
@@ -0,0 +1,769 @@
|
|
|
1
|
+
"""INTERVENTION — the typed actuation ladder: *how hard should we act on a verdict?*
|
|
2
|
+
|
|
3
|
+
docs/143 §13 (the EnterpriseOps-Gym double-down). The whole history of DOS hardened the
|
|
4
|
+
*verdict* — the ORACLE→JUDGE→HUMAN trust ladder, the forgeability axiom, the evidence
|
|
5
|
+
floor. The live benchmark run (RESULTS.md "⚑ KEY DATA POINT") proved that is **necessary
|
|
6
|
+
but not sufficient**: a verdict that was *sound* (0 % false-nudge, 83 % recall) was still
|
|
7
|
+
**net-harmful** (−9 pp) because the *intervention* attached to it (skip-the-dispatch +
|
|
8
|
+
re-prompt) derailed the model mid-plan — even on a true-positive catch. **Detector
|
|
9
|
+
soundness and intervention safety are orthogonal properties.** This module is the next
|
|
10
|
+
frontier: hardening the *actuation*, the docs/99 / docs/126 PDP-vs-PEP seam.
|
|
11
|
+
|
|
12
|
+
The actuation dual of the closed refusal vocabulary
|
|
13
|
+
===================================================
|
|
14
|
+
|
|
15
|
+
`dos.reasons` gives the kernel a **closed reason set** — a no-pick verdict may carry only
|
|
16
|
+
a declared `reason_class`. This module gives the kernel the symmetric thing it lacked: a
|
|
17
|
+
**closed intervention set** — a verdict may be acted on only at a declared *strength*, on
|
|
18
|
+
a documented disruption-cost order:
|
|
19
|
+
|
|
20
|
+
OBSERVE < WARN < BLOCK < DEFER
|
|
21
|
+
rank 0 rank 10 rank 20 rank 30
|
|
22
|
+
|
|
23
|
+
with the **default the least-disruptive that still informs** (WARN). The shape is lifted
|
|
24
|
+
verbatim from `dos.reasons` — a `str`-enum vocabulary (`Intervention`), a frozen
|
|
25
|
+
`InterventionSpec` carrying the rung's data, a frozen `InterventionLadder` registry that
|
|
26
|
+
is closed + ordered + `extend`-not-mutate, a `BASE_INTERVENTIONS` built-in, and a
|
|
27
|
+
`dos.toml [intervention]` on-ramp. The *mechanism* (the typed vocabulary + the pure
|
|
28
|
+
verdict→intervention map) is kernel; the *actuation* (returning a synthetic result,
|
|
29
|
+
skipping a dispatch) lives in a consumer (`benchmark.enterpriseops.dos_react`), never here
|
|
30
|
+
— the kernel **reports a recommendation, it never acts** (the advisory-only doctrine that
|
|
31
|
+
makes DOS a PDP with no PEP).
|
|
32
|
+
|
|
33
|
+
The measured cost order supersedes §13.1's prose order
|
|
34
|
+
======================================================
|
|
35
|
+
|
|
36
|
+
§13.1 sketched the order `… DEFER ‹ BLOCK`. **The live run inverts it.** A DEFER (skip the
|
|
37
|
+
dispatch, re-prompt) *spends the agent's turn* — that is the −9 pp posture (RESULTS.md
|
|
38
|
+
lines 105-120: even a true-positive catch broke a *different* downstream step). A BLOCK
|
|
39
|
+
done right (return a synthetic "that id is unresolved — here is the read tool" result in
|
|
40
|
+
place of the mutation) *preserves the turn*: the agent gets the corrective observation
|
|
41
|
+
WITHOUT a wasted iteration. So **BLOCK is strictly less disruptive than DEFER** and ranks
|
|
42
|
+
below it (`rank 20 < rank 30`). This is deliberate and load-bearing — a later editor must
|
|
43
|
+
NOT "fix" the order back to §13.1's prose. `tests/test_intervention.py::test_block_cheaper_
|
|
44
|
+
than_defer` pins it.
|
|
45
|
+
|
|
46
|
+
Two-level closure (the hackability decision)
|
|
47
|
+
============================================
|
|
48
|
+
|
|
49
|
+
The *vocabulary* `Intervention` is a fixed 4-member kernel enum: it is the ABI a consumer
|
|
50
|
+
actuates against — a consumer cannot perform a rung the kernel does not define, so the set
|
|
51
|
+
of *actions* is closed at the kernel. The *ladder data* (ranks, summaries, a host-added
|
|
52
|
+
rung that still maps onto the closed `dispatches`/`returns_synthetic` actuation contract)
|
|
53
|
+
is extensible via `extend()` / `dos.toml`, exactly as `ReasonRegistry`. A host adds a
|
|
54
|
+
rung-with-rank as data; a consumer reads its actuation behavior off the `dispatches` /
|
|
55
|
+
`returns_synthetic` fields, never off the token name.
|
|
56
|
+
|
|
57
|
+
Confidence-gated escalation (docs/143 §13.3)
|
|
58
|
+
============================================
|
|
59
|
+
|
|
60
|
+
The −9 pp came from disruption spent on catches that did not matter to the verifier. The
|
|
61
|
+
fix is to couple intervention *strength* to verdict *confidence*: a whole-value-absent id
|
|
62
|
+
(a high-confidence mint) earns the strong-but-cheap BLOCK; a one-component-missing
|
|
63
|
+
composite (lower confidence) earns only a WARN. `assess_confidence` reads that signal off
|
|
64
|
+
a `ProvenanceVerdict`'s real fields, and `choose_intervention` maps it through a
|
|
65
|
+
`InterventionPolicy` that enforces the **refuse-LESS-only** direction: a lower-confidence
|
|
66
|
+
verdict can only ever map to a *no-more*-disruptive rung (the admission-floor / fail-to-
|
|
67
|
+
abstain discipline re-aimed at actuation). That guarantee is checked TWICE — once at policy
|
|
68
|
+
construction (vs `BASE_INTERVENTIONS`, the loud early catch) and again inside
|
|
69
|
+
`choose_intervention` against the ladder ACTUALLY clamped with (`policy.validate_against`).
|
|
70
|
+
The second check is load-bearing and not redundant: the construction check only constrains
|
|
71
|
+
the BASE rung order, so a rank-reordered `ladder` passed as `choose_intervention`'s third
|
|
72
|
+
argument would otherwise void it (the docs/144 adversarial-review finding). On such a
|
|
73
|
+
mismatch the verdict path fails SAFE to the ladder default rather than escalate — so the
|
|
74
|
+
guarantee is a property of the ladder-in-use, not merely of BASE.
|
|
75
|
+
|
|
76
|
+
⚓ Pure kernel, no I/O, advisory only (the dos idiom — mirrors `dos.reasons`,
|
|
77
|
+
`liveness.classify`, `arg_provenance.classify_call`): every function here is data-in /
|
|
78
|
+
verdict-out. The module returns *decisions* and *payloads*; it dispatches nothing, mutates
|
|
79
|
+
nothing, reads no clock and no disk (except the `dos.toml` on-ramp, the boundary loader
|
|
80
|
+
that mirrors `reasons.load_from_toml`).
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
from __future__ import annotations
|
|
84
|
+
|
|
85
|
+
import enum
|
|
86
|
+
from dataclasses import dataclass
|
|
87
|
+
from pathlib import Path
|
|
88
|
+
from typing import Iterable
|
|
89
|
+
|
|
90
|
+
from dos.arg_provenance import ProvenanceStance, ProvenanceVerdict
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
# The closed, ordered intervention vocabulary — the actuation ABI.
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
class Intervention(str, enum.Enum):
|
|
97
|
+
"""The strength a consumer may act on a verdict — closed, the `WedgeReason` analogue.
|
|
98
|
+
|
|
99
|
+
`str`-valued so it round-trips a CLI token / JSON / env var without a lookup table (the
|
|
100
|
+
`Accountability` / `ProvenanceStance` idiom). The vocabulary is closed AT THE KERNEL: a
|
|
101
|
+
consumer cannot actuate a rung the kernel does not define. The disruption-cost ORDER
|
|
102
|
+
lives on the `InterventionSpec.rank` (see the ladder), not on the enum's declaration
|
|
103
|
+
order — `OBSERVE < WARN < BLOCK < DEFER` (BLOCK below DEFER; see the module docstring).
|
|
104
|
+
|
|
105
|
+
OBSERVE — record the verdict only; the agent never sees it; the real call dispatches.
|
|
106
|
+
The zero-disruption rung (a pure sensor).
|
|
107
|
+
WARN — annotate the call with the verdict (the model sees it next turn) AND still
|
|
108
|
+
dispatch. The default: the least-disruptive rung that still INFORMS.
|
|
109
|
+
DEFER — RECOMMEND the consumer skip this dispatch and re-prompt; the agent retries.
|
|
110
|
+
Costs the turn (the live −9 pp posture). The most disruptive rung; opt-in.
|
|
111
|
+
BLOCK — RECOMMEND the consumer refuse the call but return a SYNTHETIC corrective
|
|
112
|
+
result in its place ("that id is unresolved — here is the read tool"), so
|
|
113
|
+
the agent gets a corrective observation WITHOUT losing the turn. Strong but
|
|
114
|
+
turn-preserving — strictly less disruptive than DEFER.
|
|
115
|
+
|
|
116
|
+
Note the advisory wording: DEFER/BLOCK *RECOMMEND* a consumer action; the enum is a
|
|
117
|
+
recommendation, never an action the kernel performs.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
OBSERVE = "OBSERVE"
|
|
121
|
+
WARN = "WARN"
|
|
122
|
+
DEFER = "DEFER"
|
|
123
|
+
BLOCK = "BLOCK"
|
|
124
|
+
|
|
125
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
126
|
+
return self.value
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class Confidence(str, enum.Enum):
|
|
130
|
+
"""The mint-confidence rung read off a `ProvenanceVerdict` — gates intervention strength.
|
|
131
|
+
|
|
132
|
+
`str`-valued (the `Intervention` idiom).
|
|
133
|
+
|
|
134
|
+
HIGH — a whole-value-absent mint: exactly one data-bearing component, and it is
|
|
135
|
+
unmatched (`len(components_checked) == 1 and len(components_unmatched) == 1`).
|
|
136
|
+
The maximally-confident "the model invented this id" shape. NB this is keyed on
|
|
137
|
+
the verdict's *component* shape, which a scalar mint AND a degenerate one-element
|
|
138
|
+
container (`['INC9999999']`) both collapse to identically — the verdict does not
|
|
139
|
+
record the scalar-vs-container distinction, and a one-element container IS a
|
|
140
|
+
whole-value mint, so both correctly read HIGH (and HIGH maps to the turn-
|
|
141
|
+
*preserving* BLOCK, not a turn-spending escalation).
|
|
142
|
+
LOW — a partial / composite / MULTI-component container mint: ≥1 component traced and
|
|
143
|
+
≥1 did not, OR the value decomposed into several components (a cross-leaf
|
|
144
|
+
superset, `len(components_checked) > 1`) from which whole-value absence cannot be
|
|
145
|
+
proven. The under-confident shape — biases to the LESS disruptive rung.
|
|
146
|
+
NONE — `believe=True` / no UNSUPPORTED arg — nothing was minted. No intervention beyond
|
|
147
|
+
OBSERVE.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
HIGH = "HIGH"
|
|
151
|
+
LOW = "LOW"
|
|
152
|
+
NONE = "NONE"
|
|
153
|
+
|
|
154
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
155
|
+
return self.value
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
# One rung as data — the InterventionSpec (mirror dos.reasons.ReasonSpec).
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
@dataclass(frozen=True)
|
|
162
|
+
class InterventionSpec:
|
|
163
|
+
"""One intervention rung, as data — the unit a workspace declares to add a rung.
|
|
164
|
+
|
|
165
|
+
The actuation dual of `ReasonSpec`: where a `ReasonSpec` carries a refusal's `fix`
|
|
166
|
+
(what the OPERATOR does), an `InterventionSpec` carries `actuation` (what a CONSUMER
|
|
167
|
+
does). The disruption cost lives HERE as `rank` (not in a separate map) so it cannot
|
|
168
|
+
drift from the rung — the `reasons.category_map` "no second map to keep in sync"
|
|
169
|
+
discipline.
|
|
170
|
+
|
|
171
|
+
Fields:
|
|
172
|
+
token — the `Intervention` string (canonical UPPER on `.key`).
|
|
173
|
+
rank — DISRUPTION COST as a strict total order (lower = less disruptive).
|
|
174
|
+
The ladder validates ranks are unique so the order is total.
|
|
175
|
+
summary — man NAME line: what the rung MEANS.
|
|
176
|
+
actuation — man line: what a CONSUMER does on this rung (the `ReasonSpec.fix`
|
|
177
|
+
dual). Curated prose co-located with the token (DOM Design-rule 1).
|
|
178
|
+
dispatches — does the REAL tool call still fire on this rung? OBSERVE/WARN True;
|
|
179
|
+
DEFER/BLOCK False. The data a consumer reads to decide whether to
|
|
180
|
+
run the call — never inferred from the token name.
|
|
181
|
+
returns_synthetic — does the consumer return a SYNTHETIC corrective result in place of
|
|
182
|
+
the withheld call? Only BLOCK. Implies `not dispatches` (validated).
|
|
183
|
+
see_also — man SEE ALSO pointers.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
token: str
|
|
187
|
+
rank: int
|
|
188
|
+
summary: str
|
|
189
|
+
actuation: str
|
|
190
|
+
dispatches: bool
|
|
191
|
+
returns_synthetic: bool = False
|
|
192
|
+
see_also: tuple[str, ...] = ()
|
|
193
|
+
|
|
194
|
+
def __post_init__(self) -> None:
|
|
195
|
+
if not self.token or not self.token.strip():
|
|
196
|
+
raise ValueError("InterventionSpec.token must be a non-empty string")
|
|
197
|
+
if self.rank < 0:
|
|
198
|
+
raise ValueError(f"InterventionSpec {self.token!r} rank must be >= 0")
|
|
199
|
+
if self.returns_synthetic and self.dispatches:
|
|
200
|
+
raise ValueError(
|
|
201
|
+
f"InterventionSpec {self.token!r}: returns_synthetic implies the real "
|
|
202
|
+
f"call is withheld — dispatches must be False"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
@property
|
|
206
|
+
def key(self) -> str:
|
|
207
|
+
"""The normalized lookup key (UPPER, stripped) — what the ladder matches."""
|
|
208
|
+
return self.token.strip().upper()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
# The closed, ordered registry — the InterventionLadder (mirror ReasonRegistry).
|
|
213
|
+
# ---------------------------------------------------------------------------
|
|
214
|
+
def _coerce_token(token: "str | Intervention | None") -> str | None:
|
|
215
|
+
if token is None:
|
|
216
|
+
return None
|
|
217
|
+
if isinstance(token, Intervention):
|
|
218
|
+
return token.value
|
|
219
|
+
return str(token)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@dataclass(frozen=True)
|
|
223
|
+
class InterventionLadder:
|
|
224
|
+
"""A closed, ordered set of `InterventionSpec`s — the active actuation vocabulary.
|
|
225
|
+
|
|
226
|
+
Immutable: `extend()` returns a NEW ladder (a process's active ladder is a value on the
|
|
227
|
+
`SubstrateConfig`, never a mutable global a plugin scribbles on — the `ReasonRegistry`
|
|
228
|
+
discipline that keeps "closed set" a real property). The ranks form a STRICT total
|
|
229
|
+
order (`__post_init__` rejects a duplicate rank), so `escalate`/`de_escalate`/`clamp`
|
|
230
|
+
are well-defined.
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
specs: tuple[InterventionSpec, ...] = ()
|
|
234
|
+
default_token: str = "WARN"
|
|
235
|
+
|
|
236
|
+
def __post_init__(self) -> None:
|
|
237
|
+
seen_tok: set[str] = set()
|
|
238
|
+
seen_rank: set[int] = set()
|
|
239
|
+
for s in self.specs:
|
|
240
|
+
if s.key in seen_tok:
|
|
241
|
+
raise ValueError(
|
|
242
|
+
f"duplicate intervention token {s.token!r} in ladder — a rung is "
|
|
243
|
+
f"declared exactly once (later declarations would shadow silently)"
|
|
244
|
+
)
|
|
245
|
+
if s.rank in seen_rank:
|
|
246
|
+
raise ValueError(
|
|
247
|
+
f"duplicate rank {s.rank} ({s.token!r}) — the disruption-cost order "
|
|
248
|
+
f"must be a STRICT total order so escalate/clamp are well-defined"
|
|
249
|
+
)
|
|
250
|
+
seen_tok.add(s.key)
|
|
251
|
+
seen_rank.add(s.rank)
|
|
252
|
+
if self.specs and self.get(self.default_token) is None:
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"default_token {self.default_token!r} is not a declared rung "
|
|
255
|
+
f"(known: {sorted(seen_tok)})"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# -- lookup ------------------------------------------------------------
|
|
259
|
+
def get(self, token: "str | Intervention | None") -> InterventionSpec | None:
|
|
260
|
+
"""The `InterventionSpec` for `token`, case-insensitive; coerces an `Intervention`."""
|
|
261
|
+
t = _coerce_token(token)
|
|
262
|
+
if not t:
|
|
263
|
+
return None
|
|
264
|
+
k = t.strip().upper()
|
|
265
|
+
for s in self.specs:
|
|
266
|
+
if s.key == k:
|
|
267
|
+
return s
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
def is_known(self, token: "str | Intervention | None") -> bool:
|
|
271
|
+
return self.get(token) is not None
|
|
272
|
+
|
|
273
|
+
def tokens(self) -> tuple[str, ...]:
|
|
274
|
+
"""Every declared token, in declaration order."""
|
|
275
|
+
return tuple(s.key for s in self.specs)
|
|
276
|
+
|
|
277
|
+
def by_rank(self) -> tuple[InterventionSpec, ...]:
|
|
278
|
+
"""Specs sorted by `rank` ascending — the canonical least→most-disruptive order."""
|
|
279
|
+
return tuple(sorted(self.specs, key=lambda s: s.rank))
|
|
280
|
+
|
|
281
|
+
# -- the ordering algebra (well-defined on the strict total order) ------
|
|
282
|
+
def rank_of(self, token: "str | Intervention") -> int:
|
|
283
|
+
"""The disruption rank of a token; raises `KeyError` on an unknown one.
|
|
284
|
+
|
|
285
|
+
Deliberately NOT a forgiving default — a gate that treats an unknown rung as rank 0
|
|
286
|
+
(least disruptive) could be tricked into never intervening, so an unknown token is a
|
|
287
|
+
loud error, not a silent floor.
|
|
288
|
+
"""
|
|
289
|
+
spec = self.get(token)
|
|
290
|
+
if spec is None:
|
|
291
|
+
raise KeyError(f"unknown intervention rung {token!r}")
|
|
292
|
+
return spec.rank
|
|
293
|
+
|
|
294
|
+
def _at_offset(self, token: "str | Intervention", offset: int) -> InterventionSpec:
|
|
295
|
+
ordered = self.by_rank()
|
|
296
|
+
idx = next((i for i, s in enumerate(ordered) if s.rank == self.rank_of(token)), None)
|
|
297
|
+
if idx is None: # pragma: no cover - rank_of would have raised
|
|
298
|
+
raise KeyError(f"unknown intervention rung {token!r}")
|
|
299
|
+
j = max(0, min(len(ordered) - 1, idx + offset))
|
|
300
|
+
return ordered[j]
|
|
301
|
+
|
|
302
|
+
def escalate(self, token: "str | Intervention", *, by: int = 1) -> InterventionSpec:
|
|
303
|
+
"""The next-MORE-disruptive rung, clamped at the top of the ladder."""
|
|
304
|
+
return self._at_offset(token, abs(by))
|
|
305
|
+
|
|
306
|
+
def de_escalate(self, token: "str | Intervention", *, by: int = 1) -> InterventionSpec:
|
|
307
|
+
"""The next-LESS-disruptive rung, clamped at the bottom of the ladder."""
|
|
308
|
+
return self._at_offset(token, -abs(by))
|
|
309
|
+
|
|
310
|
+
def clamp(self, token: "str | Intervention", *, floor: "str | Intervention",
|
|
311
|
+
ceiling: "str | Intervention") -> InterventionSpec:
|
|
312
|
+
"""Move `token` into the rank window `[floor, ceiling]`.
|
|
313
|
+
|
|
314
|
+
If `rank(floor) > rank(ceiling)` (an inverted window), the CEILING wins — a
|
|
315
|
+
deterministic tie-break that fails toward the LESS-disruptive rung (the fail-safe
|
|
316
|
+
direction; a buggy window can never over-disrupt). Returns the spec.
|
|
317
|
+
"""
|
|
318
|
+
r = self.rank_of(token)
|
|
319
|
+
rf = self.rank_of(floor)
|
|
320
|
+
rc = self.rank_of(ceiling)
|
|
321
|
+
if rf > rc:
|
|
322
|
+
return self.get(ceiling) # inverted window → fail toward less disruptive
|
|
323
|
+
target = max(rf, min(rc, r))
|
|
324
|
+
return next(s for s in self.specs if s.rank == target)
|
|
325
|
+
|
|
326
|
+
def default(self) -> InterventionSpec:
|
|
327
|
+
"""The `default_token` spec (the least-disruptive-that-still-informs, WARN)."""
|
|
328
|
+
spec = self.get(self.default_token)
|
|
329
|
+
if spec is None: # pragma: no cover - __post_init__ guarantees it
|
|
330
|
+
raise KeyError(f"default_token {self.default_token!r} not in ladder")
|
|
331
|
+
return spec
|
|
332
|
+
|
|
333
|
+
# -- the data-driven actuation contract --------------------------------
|
|
334
|
+
def dispatches(self, token: "str | Intervention") -> bool:
|
|
335
|
+
"""Does the REAL call fire on this rung? Unknown → conservative `False` (an unknown
|
|
336
|
+
rung withholds the call — the `reasons.is_refusal` fail-closed analogue)."""
|
|
337
|
+
spec = self.get(token)
|
|
338
|
+
return spec.dispatches if spec is not None else False
|
|
339
|
+
|
|
340
|
+
def actuates(self, token: "str | Intervention") -> bool:
|
|
341
|
+
"""True iff this rung WITHHOLDS the real call (`not dispatches`) — the data-driven
|
|
342
|
+
actuation-set test (the `reasons.refusal_tokens` analogue). The eval/consumer read
|
|
343
|
+
this off the `dispatches` DATA, never a hardcoded `{DEFER, BLOCK}` — a host-added
|
|
344
|
+
rung is bucketed correctly by construction."""
|
|
345
|
+
return not self.dispatches(token)
|
|
346
|
+
|
|
347
|
+
def returns_synthetic(self, token: "str | Intervention") -> bool:
|
|
348
|
+
"""True iff the consumer returns a synthetic corrective result on this rung."""
|
|
349
|
+
spec = self.get(token)
|
|
350
|
+
return bool(spec.returns_synthetic) if spec is not None else False
|
|
351
|
+
|
|
352
|
+
def disruption_cost(self, token: "str | Intervention", *, normalized: bool = True) -> float:
|
|
353
|
+
"""The rung's disruption cost. `normalized` (default) scales the rank onto `[0, 1]`
|
|
354
|
+
over the ladder's span (min-rank → 0.0, max-rank → 1.0); else the raw rank as a
|
|
355
|
+
float. Pure arithmetic — the eval reads this, never a hardcoded per-rung constant."""
|
|
356
|
+
r = float(self.rank_of(token))
|
|
357
|
+
if not normalized:
|
|
358
|
+
return r
|
|
359
|
+
ordered = self.by_rank()
|
|
360
|
+
if not ordered: # pragma: no cover - empty ladder
|
|
361
|
+
return 0.0
|
|
362
|
+
lo, hi = ordered[0].rank, ordered[-1].rank
|
|
363
|
+
if hi == lo:
|
|
364
|
+
return 0.0
|
|
365
|
+
return (r - lo) / (hi - lo)
|
|
366
|
+
|
|
367
|
+
# -- composition (the hackability verb) --------------------------------
|
|
368
|
+
def extend(self, more: Iterable[InterventionSpec]) -> "InterventionLadder":
|
|
369
|
+
"""Return a NEW ladder with `more` appended. The one way to add a rung. Raises on a
|
|
370
|
+
token OR a rank collision (the strict-total-order + declared-exactly-once guards)."""
|
|
371
|
+
return InterventionLadder(
|
|
372
|
+
specs=tuple(self.specs) + tuple(more), default_token=self.default_token
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
# ---------------------------------------------------------------------------
|
|
377
|
+
# The built-in ladder. Ranks gapped by 10 so a host can insert a custom rung
|
|
378
|
+
# between any two. Order: OBSERVE(0) < WARN(10) < BLOCK(20) < DEFER(30) — BLOCK
|
|
379
|
+
# below DEFER because BLOCK preserves the turn (the measured order, see docstring).
|
|
380
|
+
# ---------------------------------------------------------------------------
|
|
381
|
+
BASE_INTERVENTIONS = InterventionLadder(default_token="WARN", specs=(
|
|
382
|
+
InterventionSpec(
|
|
383
|
+
token="OBSERVE",
|
|
384
|
+
rank=0,
|
|
385
|
+
summary="Record the verdict only; the agent never sees it, the call dispatches.",
|
|
386
|
+
actuation="Append the verdict to the run ledger; dispatch the call unchanged.",
|
|
387
|
+
dispatches=True,
|
|
388
|
+
returns_synthetic=False,
|
|
389
|
+
see_also=("intervention WARN", "intervention-eval"),
|
|
390
|
+
),
|
|
391
|
+
InterventionSpec(
|
|
392
|
+
token="WARN",
|
|
393
|
+
rank=10,
|
|
394
|
+
summary="Annotate the call with the verdict (model sees it next turn) and still dispatch.",
|
|
395
|
+
actuation="Attach the verdict as an advisory note to the result; dispatch the call. "
|
|
396
|
+
"The model is informed and may self-correct next turn, without losing this one.",
|
|
397
|
+
dispatches=True,
|
|
398
|
+
returns_synthetic=False,
|
|
399
|
+
see_also=("intervention OBSERVE", "intervention BLOCK", "wedge"),
|
|
400
|
+
),
|
|
401
|
+
InterventionSpec(
|
|
402
|
+
token="BLOCK",
|
|
403
|
+
rank=20,
|
|
404
|
+
summary="Refuse the minted call but return a synthetic corrective result — the turn is NOT lost.",
|
|
405
|
+
actuation="Do NOT dispatch; the consumer returns a synthetic 'id unresolved; here is the "
|
|
406
|
+
"read tool' result in place of the mutation — a corrective observation, no wasted turn.",
|
|
407
|
+
dispatches=False,
|
|
408
|
+
returns_synthetic=True,
|
|
409
|
+
see_also=("intervention DEFER", "dos apply", "arg_provenance"),
|
|
410
|
+
),
|
|
411
|
+
InterventionSpec(
|
|
412
|
+
token="DEFER",
|
|
413
|
+
rank=30,
|
|
414
|
+
summary="Skip this dispatch; let the agent retry (costs the turn — the -9pp posture).",
|
|
415
|
+
actuation="Do NOT dispatch; re-prompt the agent so it can resolve the id and retry. "
|
|
416
|
+
"The most disruptive rung — opt-in only (BLOCK is cheaper and usually preferred).",
|
|
417
|
+
dispatches=False,
|
|
418
|
+
returns_synthetic=False,
|
|
419
|
+
see_also=("intervention WARN", "intervention BLOCK"),
|
|
420
|
+
),
|
|
421
|
+
))
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
# ---------------------------------------------------------------------------
|
|
425
|
+
# Confidence extraction — read the mint-confidence rung off a ProvenanceVerdict.
|
|
426
|
+
# ---------------------------------------------------------------------------
|
|
427
|
+
def assess_confidence(verdict: ProvenanceVerdict) -> Confidence:
|
|
428
|
+
"""The mint-confidence of a `ProvenanceVerdict` — HIGH / LOW / NONE. PURE.
|
|
429
|
+
|
|
430
|
+
Reads only real `arg_provenance` fields: `verdict.believe`, `verdict.unsupported`,
|
|
431
|
+
`verdict.args`, and each `ArgProvenance.stance` / `.components_checked` /
|
|
432
|
+
`.components_unmatched`. The signal is the SHAPE of the mint:
|
|
433
|
+
|
|
434
|
+
* HIGH — a whole-value-absent SCALAR mint: an UNSUPPORTED arg whose
|
|
435
|
+
`components_checked` is a single component AND that component is unmatched
|
|
436
|
+
(`len(checked) == 1 and len(unmatched) == 1`). This is exactly the shape
|
|
437
|
+
`_data_bearing_components` produces for a scalar minted id (e.g. minted
|
|
438
|
+
`INC9999999` → `components_checked=("9999999",)`,
|
|
439
|
+
`components_unmatched=("9999999",)`). The maximally-confident mint.
|
|
440
|
+
* LOW — anything else that fired: a composite where some components traced but ≥1 did
|
|
441
|
+
not (one-component-missing), or a MULTI-component container/arg whose
|
|
442
|
+
`components_checked` is a cross-leaf superset (`len > 1`) we cannot read whole-
|
|
443
|
+
value absence from. The under-confident shape. (A *one-element* container
|
|
444
|
+
collapses to the single-component scalar shape above and reads HIGH — it is a
|
|
445
|
+
whole-value mint; the verdict does not preserve the scalar-vs-1-list
|
|
446
|
+
distinction, and treating it as HIGH→BLOCK is the turn-preserving, not the
|
|
447
|
+
turn-spending, escalation.)
|
|
448
|
+
* NONE — `believe=True` (or no UNSUPPORTED arg) — nothing minted.
|
|
449
|
+
|
|
450
|
+
FAIL-SAFE (the load-bearing direction): every MULTI-component ambiguity resolves to LOW,
|
|
451
|
+
the LESS disruptive rung. The `matched_in` field is deliberately NOT read — it is polluted by
|
|
452
|
+
grammar/substring hits (an `INC` prefix substringing env bytes), so a `not matched_in`
|
|
453
|
+
conjunct would make HIGH never fire. Multi-arg aggregation: ANY single HIGH arg makes
|
|
454
|
+
the whole call HIGH (escalate to the strongest mint signal); every other fired shape is
|
|
455
|
+
LOW.
|
|
456
|
+
"""
|
|
457
|
+
if verdict.believe or not verdict.unsupported:
|
|
458
|
+
return Confidence.NONE
|
|
459
|
+
saw_high = False
|
|
460
|
+
for a in verdict.args:
|
|
461
|
+
if a.stance is not ProvenanceStance.UNSUPPORTED:
|
|
462
|
+
continue
|
|
463
|
+
checked = a.components_checked
|
|
464
|
+
unmatched = a.components_unmatched
|
|
465
|
+
# HIGH iff a single data-bearing component that is itself unmatched — the only shape
|
|
466
|
+
# from which whole-value absence is provable. A container's components_checked is a
|
|
467
|
+
# cross-leaf superset (len > 1) → cannot prove whole-value absence → LOW.
|
|
468
|
+
if len(checked) == 1 and len(unmatched) == 1:
|
|
469
|
+
saw_high = True
|
|
470
|
+
# else: composite / container / one-of-many-missing → contributes LOW
|
|
471
|
+
return Confidence.HIGH if saw_high else Confidence.LOW
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
# ---------------------------------------------------------------------------
|
|
475
|
+
# The confidence-gating policy — couple intervention strength to confidence.
|
|
476
|
+
# ---------------------------------------------------------------------------
|
|
477
|
+
@dataclass(frozen=True)
|
|
478
|
+
class InterventionPolicy:
|
|
479
|
+
"""The knobs mapping a confidence rung to an intervention — mechanism kernel, knobs config.
|
|
480
|
+
|
|
481
|
+
The `__post_init__` makes the **fail-safe direction structural**: a lower-confidence
|
|
482
|
+
verdict can only ever map to a *no-more-disruptive* rung (refuse-LESS-only), and every
|
|
483
|
+
inverted / dead-letter combination is rejected at construction, so a buggy or hostile
|
|
484
|
+
policy cannot escalate past what confidence warrants. Defaults validated against
|
|
485
|
+
`BASE_INTERVENTIONS`.
|
|
486
|
+
|
|
487
|
+
on_high_confidence — a whole-value-absent scalar mint → the non-disruptive PEP (BLOCK).
|
|
488
|
+
on_low_confidence — a partial / composite / container mint → inform, still dispatch (WARN).
|
|
489
|
+
on_none — `believe=True` → record only (OBSERVE).
|
|
490
|
+
floor — the least-disruptive rung ever applied to a FIRED verdict (WARN).
|
|
491
|
+
ceiling — the most-disruptive an escalation may reach (BLOCK; DEFER is opt-in
|
|
492
|
+
— a host raises the ceiling to "DEFER" to enable the turn-spending
|
|
493
|
+
rung).
|
|
494
|
+
"""
|
|
495
|
+
|
|
496
|
+
on_high_confidence: str = "BLOCK"
|
|
497
|
+
on_low_confidence: str = "WARN"
|
|
498
|
+
on_none: str = "OBSERVE"
|
|
499
|
+
floor: str = "WARN"
|
|
500
|
+
ceiling: str = "BLOCK"
|
|
501
|
+
|
|
502
|
+
def __post_init__(self) -> None:
|
|
503
|
+
# Construction-time validation is against BASE_INTERVENTIONS — the rungs a policy
|
|
504
|
+
# is *expected* to name. This catches the common mistake at the earliest point. But
|
|
505
|
+
# it is NOT the whole story: `choose_intervention` clamps against a `ladder`
|
|
506
|
+
# PARAMETER that may differ from BASE (a host-tuned ladder, a test ladder). The
|
|
507
|
+
# rank-order guarantees below only hold for the ladder they were checked against, so
|
|
508
|
+
# `choose_intervention` RE-validates the policy against the actual clamp-ladder (see
|
|
509
|
+
# `validate_against` + its call site). Construction-validation is the fast loud check;
|
|
510
|
+
# the verdict path is where the guarantee is truly enforced, on the ladder in hand.
|
|
511
|
+
self.validate_against(BASE_INTERVENTIONS)
|
|
512
|
+
|
|
513
|
+
def validate_against(self, ladder: "InterventionLadder") -> None:
|
|
514
|
+
"""Raise iff this policy's rung order is unsafe *on `ladder`* — the refuse-LESS-only
|
|
515
|
+
guarantee, checked against the ladder a caller will actually clamp with.
|
|
516
|
+
|
|
517
|
+
Pulled out of `__post_init__` so the SAME checks run at construction (vs BASE) AND in
|
|
518
|
+
`choose_intervention` (vs the passed ladder). The defect this closes: a rank-reordered
|
|
519
|
+
ladder (BLOCK below WARN) passed as `choose_intervention`'s third arg would silently
|
|
520
|
+
void the construction-time order checks, letting a LOW mint resolve harder than a HIGH
|
|
521
|
+
one. Re-validating here makes the cross-confidence monotonicity a property of the
|
|
522
|
+
ladder-in-use, not just of BASE (the adversarial-review finding, docs/144).
|
|
523
|
+
"""
|
|
524
|
+
for f in ("on_high_confidence", "on_low_confidence", "on_none", "floor", "ceiling"):
|
|
525
|
+
v = getattr(self, f)
|
|
526
|
+
if not ladder.is_known(v):
|
|
527
|
+
raise ValueError(
|
|
528
|
+
f"InterventionPolicy.{f}={v!r} is not a known rung "
|
|
529
|
+
f"(known: {list(ladder.tokens())})"
|
|
530
|
+
)
|
|
531
|
+
rk = ladder.rank_of
|
|
532
|
+
if rk(self.floor) > rk(self.ceiling):
|
|
533
|
+
raise ValueError(
|
|
534
|
+
f"floor {self.floor!r} is more disruptive than ceiling {self.ceiling!r}"
|
|
535
|
+
)
|
|
536
|
+
# refuse-LESS-only, the floor axis (the adversarial-review BUG-1 finding): the floor
|
|
537
|
+
# is a LOWER bound on a FIRED verdict's rung, so it must never EXCEED the least-
|
|
538
|
+
# disruptive confidence mapping — else `clamp(on_low, floor=…)` would silently
|
|
539
|
+
# escalate a LOW mint past the WARN the policy declared for it (a hole the other
|
|
540
|
+
# checks miss because they only relate the on_* rungs to each other). The least
|
|
541
|
+
# confidence-mapped rung that the floor can clamp is `on_low_confidence` (NONE
|
|
542
|
+
# short-circuits before the floor, so it is floor-immune and excluded here).
|
|
543
|
+
if rk(self.floor) > rk(self.on_low_confidence):
|
|
544
|
+
raise ValueError(
|
|
545
|
+
f"floor {self.floor!r} is more disruptive than on_low_confidence "
|
|
546
|
+
f"{self.on_low_confidence!r} — the floor must not escalate a low-confidence "
|
|
547
|
+
f"mint past its declared rung (refuse-LESS-only, the floor axis)"
|
|
548
|
+
)
|
|
549
|
+
# refuse-LESS-only: a lower-confidence verdict must map no harder than a higher one.
|
|
550
|
+
if rk(self.on_low_confidence) > rk(self.on_high_confidence):
|
|
551
|
+
raise ValueError(
|
|
552
|
+
f"on_low_confidence {self.on_low_confidence!r} is more disruptive than "
|
|
553
|
+
f"on_high_confidence {self.on_high_confidence!r} — a lower-confidence mint "
|
|
554
|
+
f"must never intervene harder (refuse-LESS-only)"
|
|
555
|
+
)
|
|
556
|
+
# a no-mint (NONE) call must not out-disrupt a low-confidence mint.
|
|
557
|
+
if rk(self.on_none) > rk(self.on_low_confidence):
|
|
558
|
+
raise ValueError(
|
|
559
|
+
f"on_none {self.on_none!r} is more disruptive than on_low_confidence "
|
|
560
|
+
f"{self.on_low_confidence!r} — a clean call must never intervene harder "
|
|
561
|
+
f"than a low-confidence mint"
|
|
562
|
+
)
|
|
563
|
+
# every confidence-mapped rung must be reachable under the ceiling (no dead letter).
|
|
564
|
+
for f in ("on_high_confidence", "on_low_confidence", "on_none"):
|
|
565
|
+
v = getattr(self, f)
|
|
566
|
+
if rk(v) > rk(self.ceiling):
|
|
567
|
+
raise ValueError(
|
|
568
|
+
f"{f} {v!r} exceeds ceiling {self.ceiling!r} — a dead-letter rung "
|
|
569
|
+
f"(raise the ceiling to reach it)"
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
DEFAULT_POLICY = InterventionPolicy()
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
@dataclass(frozen=True)
|
|
577
|
+
class InterventionDecision:
|
|
578
|
+
"""The advisory recommendation a consumer reads — what to do, at what strength, and why.
|
|
579
|
+
|
|
580
|
+
The kernel returns this; the consumer ACTS on it. It is a recommendation, never an act.
|
|
581
|
+
`disruption_cost` is the normalized [0,1] cost of the chosen rung (the eval reads it).
|
|
582
|
+
"""
|
|
583
|
+
|
|
584
|
+
intervention: Intervention
|
|
585
|
+
confidence: Confidence
|
|
586
|
+
rung: InterventionSpec
|
|
587
|
+
disruption_cost: float
|
|
588
|
+
unsupported: tuple[str, ...]
|
|
589
|
+
reason: str
|
|
590
|
+
|
|
591
|
+
def to_dict(self) -> dict:
|
|
592
|
+
return {
|
|
593
|
+
"intervention": self.intervention.value,
|
|
594
|
+
"confidence": self.confidence.value,
|
|
595
|
+
"rung": self.rung.key,
|
|
596
|
+
"dispatches": self.rung.dispatches,
|
|
597
|
+
"returns_synthetic": self.rung.returns_synthetic,
|
|
598
|
+
"disruption_cost": round(self.disruption_cost, 4),
|
|
599
|
+
"unsupported": list(self.unsupported),
|
|
600
|
+
"reason": self.reason,
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def choose_intervention(
|
|
605
|
+
verdict: ProvenanceVerdict,
|
|
606
|
+
policy: InterventionPolicy = DEFAULT_POLICY,
|
|
607
|
+
ladder: InterventionLadder = BASE_INTERVENTIONS,
|
|
608
|
+
) -> InterventionDecision:
|
|
609
|
+
"""Map a `ProvenanceVerdict` → an `InterventionDecision`, confidence-gated. PURE + ADVISORY.
|
|
610
|
+
|
|
611
|
+
Returns a recommendation; performs nothing. The mapping:
|
|
612
|
+
* NONE → `policy.on_none` (OBSERVE) — short-circuited BEFORE the floor, so a clean
|
|
613
|
+
`believe=True` call is never floored up to a spurious WARN annotation.
|
|
614
|
+
* HIGH → `policy.on_high_confidence`, clamped into `[floor, ceiling]`.
|
|
615
|
+
* LOW → `policy.on_low_confidence`, clamped into `[floor, ceiling]`.
|
|
616
|
+
|
|
617
|
+
The clamp (with the default `ceiling=BLOCK`) makes DEFER unreachable by default — the
|
|
618
|
+
turn-spending rung is opt-in (a host raises the ceiling).
|
|
619
|
+
|
|
620
|
+
FAIL-SAFE on a mismatched ladder. The policy was rank-validated at construction against
|
|
621
|
+
`BASE_INTERVENTIONS`; here it is RE-validated against the `ladder` actually in hand
|
|
622
|
+
(`policy.validate_against(ladder)`). A rank-reordered ladder that would let a LOW mint
|
|
623
|
+
resolve HARDER than a HIGH one (the unguarded clamp-ladder ≠ validation-ladder hole the
|
|
624
|
+
docs/144 adversarial review found) fails that check — and rather than raise from a pure
|
|
625
|
+
advisory path (a hostile ladder could weaponize a raise into a DoS), we degrade to the
|
|
626
|
+
ladder's own default rung (the least-disruptive-that-still-informs). So the refuse-LESS-
|
|
627
|
+
only guarantee holds for the ladder in use, structurally, not merely for BASE.
|
|
628
|
+
"""
|
|
629
|
+
conf = assess_confidence(verdict)
|
|
630
|
+
# Re-validate the policy against the ACTUAL clamp-ladder (not just the BASE it was built
|
|
631
|
+
# against). On a mismatch that would break refuse-LESS-only, fail safe to the default.
|
|
632
|
+
try:
|
|
633
|
+
policy.validate_against(ladder)
|
|
634
|
+
except ValueError as e:
|
|
635
|
+
spec = ladder.default()
|
|
636
|
+
return InterventionDecision(
|
|
637
|
+
intervention=Intervention(spec.token),
|
|
638
|
+
confidence=conf,
|
|
639
|
+
rung=spec,
|
|
640
|
+
disruption_cost=ladder.disruption_cost(spec.token),
|
|
641
|
+
unsupported=verdict.unsupported,
|
|
642
|
+
reason=f"fail-safe: policy unsafe on this ladder ({e}) → ladder default {spec.key}",
|
|
643
|
+
)
|
|
644
|
+
if conf is Confidence.NONE:
|
|
645
|
+
spec = ladder.get(policy.on_none) # OBSERVE — NOT floored up to WARN
|
|
646
|
+
reason = "no id/FK argument was minted — observe only"
|
|
647
|
+
else:
|
|
648
|
+
base = policy.on_high_confidence if conf is Confidence.HIGH else policy.on_low_confidence
|
|
649
|
+
spec = ladder.clamp(base, floor=policy.floor, ceiling=policy.ceiling)
|
|
650
|
+
reason = (
|
|
651
|
+
f"{conf.value}-confidence mint on {len(verdict.unsupported)} arg(s) "
|
|
652
|
+
f"({', '.join(verdict.unsupported)}) → {spec.key}"
|
|
653
|
+
)
|
|
654
|
+
if spec is None: # pragma: no cover - policy validated against the ladder
|
|
655
|
+
spec = ladder.default()
|
|
656
|
+
return InterventionDecision(
|
|
657
|
+
intervention=Intervention(spec.token),
|
|
658
|
+
confidence=conf,
|
|
659
|
+
rung=spec,
|
|
660
|
+
disruption_cost=ladder.disruption_cost(spec.token),
|
|
661
|
+
unsupported=verdict.unsupported,
|
|
662
|
+
reason=reason,
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
# ---------------------------------------------------------------------------
|
|
667
|
+
# The synthetic corrective result — the BLOCK content builder (pure; #4a).
|
|
668
|
+
# ---------------------------------------------------------------------------
|
|
669
|
+
def synthetic_corrective_result(
|
|
670
|
+
verdict: ProvenanceVerdict, tool_name: str, read_tool_hint: str = ""
|
|
671
|
+
) -> dict:
|
|
672
|
+
"""Build the synthetic tool-RESULT *content* a BLOCK returns in place of the real call.
|
|
673
|
+
|
|
674
|
+
docs/143 §13.4 — the non-disruptive enforcement primitive. PURE (a `build_nudge_text`
|
|
675
|
+
sibling): dict in, dict out, dispatches nothing. Shaped as a tool result the model reads
|
|
676
|
+
(status / error / remediation), so on a BLOCK the agent gets a corrective OBSERVATION on
|
|
677
|
+
the SAME turn — the docs/126 `dos apply` gate done right (prevent the bad effect while
|
|
678
|
+
preserving the agent's flow). The kernel BUILDS this content; the CONSUMER substitutes
|
|
679
|
+
it for the withheld mutation.
|
|
680
|
+
|
|
681
|
+
SECURITY — the anti-laundering shape (docs/143 §5a, the highest-severity finding). The
|
|
682
|
+
unresolved id VALUE is intentionally NOT echoed as a top-level corpus-bound field: the
|
|
683
|
+
payload summarizes the unresolved arg by NAME + the unresolved COMPONENT TOKENS only,
|
|
684
|
+
and the whole payload carries `dos_blocked: True`. The consumer EXCLUDES `dos_blocked`
|
|
685
|
+
entries from the provenance corpus on the next call — otherwise a BLOCK'd id would
|
|
686
|
+
re-enter the corpus and make `classify_arg`'s whole-value-direct-match return SUPPORTED
|
|
687
|
+
on the re-mint, i.e. a BLOCK would *teach the detector to trust the very id it blocked*.
|
|
688
|
+
"""
|
|
689
|
+
missing = [
|
|
690
|
+
{"arg": a.arg_name, "unresolved_components": list(a.components_unmatched)}
|
|
691
|
+
for a in verdict.args
|
|
692
|
+
if a.stance is ProvenanceStance.UNSUPPORTED
|
|
693
|
+
]
|
|
694
|
+
hint = f" (e.g. {read_tool_hint})" if read_tool_hint else ""
|
|
695
|
+
return {
|
|
696
|
+
"status": "blocked_unresolved_id",
|
|
697
|
+
"error": (
|
|
698
|
+
f"`{tool_name}` was NOT executed: it references id(s) that never appeared in "
|
|
699
|
+
f"any prior tool result or the task — they look invented, not resolved."
|
|
700
|
+
),
|
|
701
|
+
"unresolved": missing,
|
|
702
|
+
"remediation": (
|
|
703
|
+
f"Call a READ/QUERY tool{hint} to look up the correct id, then retry "
|
|
704
|
+
f"`{tool_name}`. The mutation did not run; the database is unchanged."
|
|
705
|
+
),
|
|
706
|
+
"dos_blocked": True,
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
# ---------------------------------------------------------------------------
|
|
711
|
+
# The declarative on-ramp — read a ladder out of dos.toml (mirror reasons.*).
|
|
712
|
+
# ---------------------------------------------------------------------------
|
|
713
|
+
def specs_from_table(table: dict) -> list[InterventionSpec]:
|
|
714
|
+
"""Turn a parsed `[intervention]` TOML table into a list of `InterventionSpec`.
|
|
715
|
+
|
|
716
|
+
`table` is `{token: {rank, summary?, actuation?, dispatches?, returns_synthetic?,
|
|
717
|
+
see_also?}}` — the shape `tomllib.load(...)["intervention"]` yields. Pure (no I/O);
|
|
718
|
+
raises `ValueError` (via `InterventionSpec.__post_init__`) on a bad rung, so a malformed
|
|
719
|
+
declaration fails loudly at load.
|
|
720
|
+
"""
|
|
721
|
+
specs: list[InterventionSpec] = []
|
|
722
|
+
for token, body in (table or {}).items():
|
|
723
|
+
if not isinstance(body, dict):
|
|
724
|
+
raise ValueError(
|
|
725
|
+
f"[intervention.{token}] must be a table, got {type(body).__name__}"
|
|
726
|
+
)
|
|
727
|
+
if "rank" not in body:
|
|
728
|
+
raise ValueError(f"[intervention.{token}] is missing required `rank`")
|
|
729
|
+
see = body.get("see_also") or ()
|
|
730
|
+
if isinstance(see, str):
|
|
731
|
+
see = (see,)
|
|
732
|
+
specs.append(InterventionSpec(
|
|
733
|
+
token=str(token),
|
|
734
|
+
rank=int(body["rank"]),
|
|
735
|
+
summary=str(body.get("summary", "")),
|
|
736
|
+
actuation=str(body.get("actuation", "")),
|
|
737
|
+
dispatches=bool(body.get("dispatches", True)),
|
|
738
|
+
returns_synthetic=bool(body.get("returns_synthetic", False)),
|
|
739
|
+
see_also=tuple(str(s) for s in see),
|
|
740
|
+
))
|
|
741
|
+
return specs
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
def load_from_toml(
|
|
745
|
+
path: "Path | str", *, base: InterventionLadder = BASE_INTERVENTIONS
|
|
746
|
+
) -> InterventionLadder:
|
|
747
|
+
"""Build an `InterventionLadder` from a `dos.toml`'s `[intervention]` table.
|
|
748
|
+
|
|
749
|
+
Returns `base` unchanged when the file is absent, has no `[intervention]` table, or
|
|
750
|
+
`tomllib` is unavailable — the declarative path is purely additive, so a missing/empty
|
|
751
|
+
config degrades to the built-in ladder, never an error. A *present but malformed* table
|
|
752
|
+
raises (`specs_from_table` / `InterventionSpec` / ladder validation). Reads with
|
|
753
|
+
`utf-8-sig` to strip a PowerShell-written BOM (the `reasons.load_from_toml` fix).
|
|
754
|
+
"""
|
|
755
|
+
p = Path(path)
|
|
756
|
+
if not p.exists():
|
|
757
|
+
return base
|
|
758
|
+
try:
|
|
759
|
+
import tomllib # py3.11+
|
|
760
|
+
except ModuleNotFoundError: # pragma: no cover - py<3.11 fallback
|
|
761
|
+
try:
|
|
762
|
+
import tomli as tomllib # type: ignore
|
|
763
|
+
except ModuleNotFoundError:
|
|
764
|
+
return base
|
|
765
|
+
data = tomllib.loads(p.read_text(encoding="utf-8-sig"))
|
|
766
|
+
table = data.get("intervention")
|
|
767
|
+
if not isinstance(table, dict) or not table:
|
|
768
|
+
return base
|
|
769
|
+
return base.extend(specs_from_table(table))
|