dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/result_state.py
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
r"""result_state — the fold-site result-state witness (docs/197 §7(1), the keystone).
|
|
2
|
+
|
|
3
|
+
> **An ultracode `Workflow` folds `agent()`'s self-authored return value as ground
|
|
4
|
+
> truth at exactly one place — the `${result}` interpolation — and 32% of real
|
|
5
|
+
> subagents (736/2305, docs/197 §2) fold a HARNESS-authored terminal-error string
|
|
6
|
+
> there as a finished "finding." The death is non-null, so it survives the
|
|
7
|
+
> `.filter(Boolean)` used in 89/114 real scripts; a smaller numerator is the only
|
|
8
|
+
> signal, and code that computes `failed = N − survivors.length` cannot tell a dead
|
|
9
|
+
> worker from a real negative. This module is the byte-clean referee at that fold:
|
|
10
|
+
> it classifies a subagent transcript's TERMINAL assistant message and refuses to
|
|
11
|
+
> believe a harness-synthesized abnormal termination — because the catch reads a
|
|
12
|
+
> DIFFERENT byte-author than the judged worker (`message.model == "<synthetic>"`
|
|
13
|
+
> means the Claude Code HARNESS synthesized the bytes, not the subagent's model).**
|
|
14
|
+
|
|
15
|
+
The byte-author law, restated for the fold (docs/138 / docs/116 §2.5):
|
|
16
|
+
|
|
17
|
+
> A verdict is *grounding* only when the byte-author of the evidence differs from
|
|
18
|
+
> the judged agent. A subagent re-narrating its own output is consistency, not
|
|
19
|
+
> grounding.
|
|
20
|
+
|
|
21
|
+
The terminal `model:"<synthetic>"` record is the cleanest possible grounding: the
|
|
22
|
+
`role:"assistant"` slot is merely the conversation position, but `<synthetic>` is
|
|
23
|
+
the harness's authorship stamp — the subagent's model did NOT generate it. So
|
|
24
|
+
asking "is this terminal record harness-authored?" is a pure byte question about
|
|
25
|
+
bytes the judged agent could not forge in its favor. This is the same shape as
|
|
26
|
+
`tool_stream` keying on the env-authored `result_digest`, one rung over: there the
|
|
27
|
+
env authors the repeated result, here the harness authors the death.
|
|
28
|
+
|
|
29
|
+
Why a NEW grammar — not a reuse of `terminal_error` (docs/197 §4c, VERIFIED)
|
|
30
|
+
===========================================================================
|
|
31
|
+
|
|
32
|
+
`benchmark/toolathlon/trajectory.py:terminal_error_fired` is the structural-error
|
|
33
|
+
detector for tool RESULTS — but it (a) walks ONLY `role=="tool"` messages
|
|
34
|
+
(`trajectory.py:471`), and (b) its `_STRUCT_ERR` grammar anchors `^\s*Error:`,
|
|
35
|
+
which does NOT match the synthetic string that LEADS with `API Error:`
|
|
36
|
+
(`trajectory.py:343`). The synthetic terminal is a `role:"assistant"` record with
|
|
37
|
+
`model:"<synthetic>"`, so it never reaches that classifier. This module is the
|
|
38
|
+
genuinely-new grammar over the ASSISTANT role the keystone needs — and it lives in
|
|
39
|
+
the kernel (not `benchmark/`) because the fold-site catch is a reusable distrust
|
|
40
|
+
primitive, not a one-benchmark instrument.
|
|
41
|
+
|
|
42
|
+
The discriminators (grounded in 2,935 REAL synthetic records, not the doc's spec)
|
|
43
|
+
=================================================================================
|
|
44
|
+
|
|
45
|
+
An empirical sweep of every `model:"<synthetic>"` record across the operator's real
|
|
46
|
+
`~/.claude/projects` corpus (2,935 records) fixes the byte-exact shape. Critically,
|
|
47
|
+
it is BROADER than docs/197's "429" framing — 43% of synthetic deaths are NOT 429:
|
|
48
|
+
|
|
49
|
+
* `message.model == "<synthetic>"` — 100% (the unforgeable harness marker)
|
|
50
|
+
* `message.stop_reason == "stop_sequence"`— 100%
|
|
51
|
+
* top-level `isApiErrorMessage == true` — 100%
|
|
52
|
+
* top-level `apiErrorStatus` — present with the HTTP code (429/401/
|
|
53
|
+
403/500) on 2885/2935; ABSENT on 50
|
|
54
|
+
(the subscription/limit-text deaths)
|
|
55
|
+
* `message.content[0].text` classes observed: "API Error: … Rate limited" (1688,
|
|
56
|
+
57%), "organization has disabled …" (248), "hit your weekly limit" (205),
|
|
57
|
+
"API Error: 500 Internal server error" (66), "out of extra usage" / "session
|
|
58
|
+
limit" (the rest). A 429-only match (docs/197's literal spec) would MISS 43%.
|
|
59
|
+
|
|
60
|
+
So the PRIMARY signal is `model == "<synthetic>"` (harness-authored) — the
|
|
61
|
+
unforgeable rung. `isApiErrorMessage` and `stop_reason == "stop_sequence"`
|
|
62
|
+
corroborate it. `apiErrorStatus` + a coarse `class` (RATE_LIMIT / USAGE_LIMIT /
|
|
63
|
+
AUTH / SERVER / OTHER) are reported as DETAIL, never as the gate (keying the gate
|
|
64
|
+
on 429 would conflate the HTTP code with the harness-authorship fact and miss the
|
|
65
|
+
non-HTTP limit-text deaths). docs/197 §2.1 also placed `isApiErrorMessage` /
|
|
66
|
+
`apiErrorStatus` INSIDE the `message` object; in real records they are TOP-LEVEL
|
|
67
|
+
siblings of `message` — corrected here.
|
|
68
|
+
|
|
69
|
+
Why it is ADVISORY (the docs/197 §6.5 line, the −9 pp wound)
|
|
70
|
+
============================================================
|
|
71
|
+
|
|
72
|
+
A DEAD verdict's safe action is to route the dead child to a DEAD bucket and
|
|
73
|
+
re-dispatch ITS OWN unit — never to re-prompt the synthesizer mid-plan (the
|
|
74
|
+
docs/143 −9 pp DEFER-shaped derail). This module REPORTS (a verdict + an exit
|
|
75
|
+
code a workflow branches on); it never re-runs a worker, never edits the fold. It
|
|
76
|
+
is a PDP, not a PEP (`enforce.py` is the proposal seam; nothing here actuates).
|
|
77
|
+
|
|
78
|
+
⚓ Kernel discipline (the litmus): a PURE verdict + a boundary reader. It imports
|
|
79
|
+
only sibling kernel modules (`claim_extract` for the transcript-read boundary,
|
|
80
|
+
`wedge_reason` for the refusal envelope, `config`), names no host beyond the
|
|
81
|
+
Claude-Code transcript JSON shape, resolves nothing against `__file__`, takes no
|
|
82
|
+
lease, carries no policy of its own. The transcript I/O is the caller's boundary
|
|
83
|
+
(reused via `claim_extract._read_lines`), exactly the `liveness`/`posttool_sensor`
|
|
84
|
+
"I/O at the boundary, data to the pure core" rule.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
from __future__ import annotations
|
|
88
|
+
|
|
89
|
+
import enum
|
|
90
|
+
from dataclasses import dataclass
|
|
91
|
+
from typing import Optional
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# The literal harness-authorship marker. A terminal `message.model` of this exact
|
|
95
|
+
# string means the Claude Code HARNESS synthesized the record (a rate-limit / quota
|
|
96
|
+
# / server-error stop it injected), NOT the subagent's model — the byte-author the
|
|
97
|
+
# fold must distrust. An exact-string compare, never a pattern (the real model field
|
|
98
|
+
# in a healthy record is a model id like `claude-opus-4-8`).
|
|
99
|
+
SYNTHETIC_MODEL = "<synthetic>"
|
|
100
|
+
|
|
101
|
+
# The terminal `stop_reason` every synthetic record carries (100% of 2,935 real
|
|
102
|
+
# records). A corroborating signal, never the sole gate — a healthy record can also
|
|
103
|
+
# carry `stop_sequence` in principle, so this only STRENGTHENS the `<synthetic>`
|
|
104
|
+
# verdict, it does not stand alone.
|
|
105
|
+
SYNTHETIC_STOP_REASON = "stop_sequence"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class TerminalState(str, enum.Enum):
|
|
109
|
+
"""The classification of a transcript's terminal assistant record. `str`-valued
|
|
110
|
+
so it round-trips a `--json` token without a lookup table.
|
|
111
|
+
|
|
112
|
+
* HEALTHY — the terminal assistant record was authored by a real model
|
|
113
|
+
(`model` is a real id, no synthetic/error markers). Its return
|
|
114
|
+
value is a genuine result the fold may believe (modulo the
|
|
115
|
+
well-formed-but-empty residue, which needs `effect_witness`).
|
|
116
|
+
* SYNTHETIC — the terminal record is HARNESS-authored (`model ==
|
|
117
|
+
"<synthetic>"` and/or `isApiErrorMessage`): an abnormal
|
|
118
|
+
termination (rate-limit / quota / auth / server error). The
|
|
119
|
+
"result" the fold would bank is the error string, not a
|
|
120
|
+
finding. → route to a DEAD bucket, count in the denominator,
|
|
121
|
+
REFUSE to fold.
|
|
122
|
+
* EMPTY — no assistant record with content was found at all (a worker
|
|
123
|
+
that produced nothing). Distinct from HEALTHY: there is no
|
|
124
|
+
result to fold. Treated as DEAD (no deliverable).
|
|
125
|
+
* UNREADABLE — the transcript could not be read/parsed (missing/garbled).
|
|
126
|
+
The fail-safe floor: we cannot witness a death, so we do NOT
|
|
127
|
+
claim one — UNREADABLE is NOT DEAD (the safe direction: a read
|
|
128
|
+
fault must not fabricate a death that drops a real result).
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
HEALTHY = "HEALTHY"
|
|
132
|
+
SYNTHETIC = "SYNTHETIC"
|
|
133
|
+
EMPTY = "EMPTY"
|
|
134
|
+
UNREADABLE = "UNREADABLE"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class TerminalClass(str, enum.Enum):
|
|
138
|
+
"""A coarse, DETAIL-only class of a SYNTHETIC terminal — reported, never the gate.
|
|
139
|
+
|
|
140
|
+
Keyed off the top-level `apiErrorStatus` (when present) and the leading text, so
|
|
141
|
+
an operator/log can see WHY the worker died without the classifier ever keying
|
|
142
|
+
its gate on the HTTP code (which would miss the 50/2935 limit-text deaths that
|
|
143
|
+
carry no `apiErrorStatus`). NONE for a non-synthetic terminal.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
RATE_LIMIT = "RATE_LIMIT" # 429 / "Rate limited"
|
|
147
|
+
USAGE_LIMIT = "USAGE_LIMIT" # 403 / weekly|session limit / "out of extra usage" / org-disabled
|
|
148
|
+
AUTH = "AUTH" # 401 / authentication_error
|
|
149
|
+
SERVER = "SERVER" # 500 / server-side
|
|
150
|
+
OTHER = "OTHER" # synthetic but an unrecognized class
|
|
151
|
+
NONE = "NONE" # not a synthetic terminal
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass(frozen=True)
|
|
155
|
+
class TerminalEvidence:
|
|
156
|
+
"""The frozen datum `classify_terminal` sees — the fields of the terminal assistant
|
|
157
|
+
record, gathered at the boundary (the transcript read). PURE-consumable.
|
|
158
|
+
|
|
159
|
+
* found — whether ANY assistant record was located in the transcript.
|
|
160
|
+
False → EMPTY (nothing produced) or UNREADABLE (read failed,
|
|
161
|
+
distinguished by `readable`).
|
|
162
|
+
* readable — whether the transcript could be read/parsed at all. False →
|
|
163
|
+
UNREADABLE (the fail-safe floor — never claim a death we
|
|
164
|
+
could not witness).
|
|
165
|
+
* model — the terminal assistant record's `message.model` (the
|
|
166
|
+
harness-authorship marker; `"<synthetic>"` is the tell).
|
|
167
|
+
* stop_reason — the terminal record's `message.stop_reason` (corroborating).
|
|
168
|
+
* is_api_error — the top-level `isApiErrorMessage` flag (corroborating).
|
|
169
|
+
* api_status — the top-level `apiErrorStatus` HTTP code, when present (detail).
|
|
170
|
+
* text — the leading text of the terminal record's first content block
|
|
171
|
+
(detail / class inference). Bounded — only a prefix is needed.
|
|
172
|
+
* has_content — whether the terminal assistant record carried any text/tool
|
|
173
|
+
content (distinguishes a real-but-empty terminal from EMPTY).
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
found: bool
|
|
177
|
+
readable: bool
|
|
178
|
+
model: Optional[str] = None
|
|
179
|
+
stop_reason: Optional[str] = None
|
|
180
|
+
is_api_error: bool = False
|
|
181
|
+
api_status: Optional[int] = None
|
|
182
|
+
text: str = ""
|
|
183
|
+
has_content: bool = False
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass(frozen=True)
|
|
187
|
+
class ResultStateVerdict:
|
|
188
|
+
"""The typed verdict — the result-state classification + the corroborating detail.
|
|
189
|
+
|
|
190
|
+
* state — the `TerminalState`.
|
|
191
|
+
* dead — convenience: True iff the fold must NOT believe this result
|
|
192
|
+
(SYNTHETIC or EMPTY). UNREADABLE is NOT dead (fail-safe).
|
|
193
|
+
* cls — the DETAIL-only `TerminalClass` (NONE unless SYNTHETIC).
|
|
194
|
+
* api_status — the HTTP code when known (detail).
|
|
195
|
+
* reason — a short, log-greppable explanation.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
state: TerminalState
|
|
199
|
+
dead: bool
|
|
200
|
+
cls: TerminalClass = TerminalClass.NONE
|
|
201
|
+
api_status: Optional[int] = None
|
|
202
|
+
reason: str = ""
|
|
203
|
+
|
|
204
|
+
def to_dict(self) -> dict:
|
|
205
|
+
return {
|
|
206
|
+
"state": self.state.value,
|
|
207
|
+
"dead": self.dead,
|
|
208
|
+
"class": self.cls.value,
|
|
209
|
+
"api_status": self.api_status,
|
|
210
|
+
"reason": self.reason,
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# ---------------------------------------------------------------------------
|
|
215
|
+
# The PURE verdict — evidence in, verdict out (no I/O).
|
|
216
|
+
# ---------------------------------------------------------------------------
|
|
217
|
+
def _infer_class(api_status: Optional[int], text: str) -> TerminalClass:
|
|
218
|
+
"""The DETAIL-only class of a synthetic terminal. PURE. Never the gate.
|
|
219
|
+
|
|
220
|
+
Prefers the HTTP code (precise), falling back to the leading text for the
|
|
221
|
+
no-`apiErrorStatus` limit-text deaths (the 50/2935 records). Conservative: an
|
|
222
|
+
unrecognized synthetic terminal is OTHER, never silently dropped.
|
|
223
|
+
"""
|
|
224
|
+
t = (text or "").lower()
|
|
225
|
+
if api_status == 429 or "rate limited" in t:
|
|
226
|
+
return TerminalClass.RATE_LIMIT
|
|
227
|
+
if api_status == 401 or "authentication" in t:
|
|
228
|
+
return TerminalClass.AUTH
|
|
229
|
+
if api_status == 500 or "internal server error" in t or "server-side" in t:
|
|
230
|
+
return TerminalClass.SERVER
|
|
231
|
+
if api_status == 403 or any(
|
|
232
|
+
s in t for s in ("weekly limit", "session limit", "out of extra usage",
|
|
233
|
+
"disabled claude", "usage limit")
|
|
234
|
+
):
|
|
235
|
+
return TerminalClass.USAGE_LIMIT
|
|
236
|
+
return TerminalClass.OTHER
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def classify_terminal(evidence: TerminalEvidence) -> ResultStateVerdict:
|
|
240
|
+
"""Classify a transcript's terminal assistant record. PURE.
|
|
241
|
+
|
|
242
|
+
The order is the safe-direction order:
|
|
243
|
+
|
|
244
|
+
1. UNREADABLE first — if the transcript could not be read, we cannot witness
|
|
245
|
+
a death, so we DECLINE to claim one (NOT dead). A read fault must never
|
|
246
|
+
fabricate a death that drops a real result (the fail-safe floor).
|
|
247
|
+
2. SYNTHETIC — the primary gate is `model == "<synthetic>"` (the unforgeable
|
|
248
|
+
harness-authorship marker). `isApiErrorMessage` is an alternative gate
|
|
249
|
+
(some builds may stamp the flag without the literal model string), so a
|
|
250
|
+
record carrying EITHER harness-death marker is SYNTHETIC. `stop_reason ==
|
|
251
|
+
"stop_sequence"` corroborates but never gates alone. → DEAD.
|
|
252
|
+
3. EMPTY — a located assistant terminal with no content at all (nothing
|
|
253
|
+
produced). → DEAD (no deliverable to fold).
|
|
254
|
+
4. HEALTHY — a real-model terminal with content. The fold may believe it
|
|
255
|
+
(modulo the well-formed-but-empty residue, which is `effect_witness`'s job,
|
|
256
|
+
not this terminal-state gate's — docs/197 §6.4).
|
|
257
|
+
"""
|
|
258
|
+
if not evidence.readable:
|
|
259
|
+
return ResultStateVerdict(
|
|
260
|
+
state=TerminalState.UNREADABLE,
|
|
261
|
+
dead=False,
|
|
262
|
+
reason="transcript unreadable — declining to claim a death (fail-safe)",
|
|
263
|
+
)
|
|
264
|
+
# The harness-death markers. `model == "<synthetic>"` is the load-bearing one
|
|
265
|
+
# (100% of real synthetic records); `isApiErrorMessage` is the corroborating
|
|
266
|
+
# top-level flag and an alternative gate (belt-and-braces against a build that
|
|
267
|
+
# stamps the flag but not the literal model string).
|
|
268
|
+
is_synthetic_model = evidence.model == SYNTHETIC_MODEL
|
|
269
|
+
if is_synthetic_model or evidence.is_api_error:
|
|
270
|
+
cls = _infer_class(evidence.api_status, evidence.text)
|
|
271
|
+
marker = (
|
|
272
|
+
"model=<synthetic>" if is_synthetic_model else "isApiErrorMessage=true"
|
|
273
|
+
)
|
|
274
|
+
corrob = (
|
|
275
|
+
" + stop_reason=stop_sequence"
|
|
276
|
+
if evidence.stop_reason == SYNTHETIC_STOP_REASON
|
|
277
|
+
else ""
|
|
278
|
+
)
|
|
279
|
+
status = f" apiErrorStatus={evidence.api_status}" if evidence.api_status is not None else ""
|
|
280
|
+
return ResultStateVerdict(
|
|
281
|
+
state=TerminalState.SYNTHETIC,
|
|
282
|
+
dead=True,
|
|
283
|
+
cls=cls,
|
|
284
|
+
api_status=evidence.api_status,
|
|
285
|
+
reason=(
|
|
286
|
+
f"harness-authored terminal ({marker}{corrob}{status}) — the result "
|
|
287
|
+
f"is a {cls.value} error string, not a finding; route to DEAD and do "
|
|
288
|
+
f"not fold"
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
if not evidence.found:
|
|
292
|
+
return ResultStateVerdict(
|
|
293
|
+
state=TerminalState.EMPTY,
|
|
294
|
+
dead=True,
|
|
295
|
+
reason="no assistant terminal record found — the worker produced no result",
|
|
296
|
+
)
|
|
297
|
+
if not evidence.has_content:
|
|
298
|
+
return ResultStateVerdict(
|
|
299
|
+
state=TerminalState.EMPTY,
|
|
300
|
+
dead=True,
|
|
301
|
+
reason="terminal assistant record carried no content — no result to fold",
|
|
302
|
+
)
|
|
303
|
+
return ResultStateVerdict(
|
|
304
|
+
state=TerminalState.HEALTHY,
|
|
305
|
+
dead=False,
|
|
306
|
+
reason="terminal assistant record is real-model authored with content",
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ---------------------------------------------------------------------------
|
|
311
|
+
# The PURE refusal-envelope renderer — a verdict in, a wedge_reason-style envelope out.
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
def refusal_envelope(verdict: ResultStateVerdict) -> dict:
|
|
314
|
+
"""A `wedge_reason`-shaped refusal envelope for a DEAD verdict. PURE.
|
|
315
|
+
|
|
316
|
+
Mirrors the no-pick envelope shape `wedge_reason.envelope_is_refusal` reads (the
|
|
317
|
+
`do_not_render`/`blocked`/`reason_class` rungs), so a DEAD result-state can be
|
|
318
|
+
surfaced through the SAME refusal plumbing as a dispatch no-pick. A non-DEAD
|
|
319
|
+
verdict yields a non-refusal (`all_clear`) envelope. `reason_class` carries a
|
|
320
|
+
stable, log-greppable token (`RESULT_DEAD_<CLASS>` / `RESULT_EMPTY`).
|
|
321
|
+
"""
|
|
322
|
+
if not verdict.dead:
|
|
323
|
+
return {
|
|
324
|
+
"all_clear": True,
|
|
325
|
+
"verdict": "LIVE",
|
|
326
|
+
"state": verdict.state.value,
|
|
327
|
+
"reason": verdict.reason,
|
|
328
|
+
}
|
|
329
|
+
if verdict.state is TerminalState.SYNTHETIC:
|
|
330
|
+
reason_class = f"RESULT_DEAD_{verdict.cls.value}"
|
|
331
|
+
else:
|
|
332
|
+
reason_class = "RESULT_EMPTY"
|
|
333
|
+
return {
|
|
334
|
+
"do_not_render": True,
|
|
335
|
+
"blocked": True,
|
|
336
|
+
"all_clear": False,
|
|
337
|
+
"verdict": "WEDGE",
|
|
338
|
+
"reason_class": reason_class,
|
|
339
|
+
"state": verdict.state.value,
|
|
340
|
+
"api_status": verdict.api_status,
|
|
341
|
+
"reason": verdict.reason,
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# ---------------------------------------------------------------------------
|
|
346
|
+
# Boundary I/O — read the terminal assistant record from a transcript JSONL.
|
|
347
|
+
# NOT pure (reads a file); reuses claim_extract's transcript reader so the two
|
|
348
|
+
# can't drift, the git_delta "I/O at the boundary" discipline.
|
|
349
|
+
# ---------------------------------------------------------------------------
|
|
350
|
+
def _leading_text(content: object) -> tuple[str, bool]:
|
|
351
|
+
"""The leading text of a message `content` + whether it carried ANY content. PURE.
|
|
352
|
+
|
|
353
|
+
A synthetic record's content is `[{"type":"text","text":"API Error: …"}]`; a
|
|
354
|
+
healthy record may be text and/or tool_use blocks. Returns `(leading_text,
|
|
355
|
+
has_content)` — `has_content` is True if there is any text OR tool_use/tool_result
|
|
356
|
+
block (so a tool-only terminal is not mis-flagged EMPTY). Bounded to a prefix.
|
|
357
|
+
"""
|
|
358
|
+
if isinstance(content, str):
|
|
359
|
+
s = content.strip()
|
|
360
|
+
return (s[:400], bool(s))
|
|
361
|
+
if isinstance(content, list):
|
|
362
|
+
lead = ""
|
|
363
|
+
has = False
|
|
364
|
+
for b in content:
|
|
365
|
+
if not isinstance(b, dict):
|
|
366
|
+
continue
|
|
367
|
+
bt = b.get("type")
|
|
368
|
+
if bt == "text":
|
|
369
|
+
t = b.get("text", "")
|
|
370
|
+
if isinstance(t, str) and t:
|
|
371
|
+
has = True
|
|
372
|
+
if not lead:
|
|
373
|
+
lead = t.strip()[:400]
|
|
374
|
+
elif bt in ("tool_use", "tool_result", "thinking", "image"):
|
|
375
|
+
has = True
|
|
376
|
+
return (lead, has)
|
|
377
|
+
return ("", False)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _api_status_int(value: object) -> Optional[int]:
|
|
381
|
+
"""Coerce a top-level `apiErrorStatus` to int, or None. PURE. Tolerant of a
|
|
382
|
+
string-coded status; any non-coercible value → None (detail-only, never gates)."""
|
|
383
|
+
if isinstance(value, bool):
|
|
384
|
+
return None
|
|
385
|
+
if isinstance(value, int):
|
|
386
|
+
return value
|
|
387
|
+
if isinstance(value, str) and value.strip().isdigit():
|
|
388
|
+
return int(value.strip())
|
|
389
|
+
return None
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def terminal_evidence_from_record(record: dict) -> Optional[TerminalEvidence]:
|
|
393
|
+
"""Build TerminalEvidence from ONE transcript record IFF it is an assistant turn.
|
|
394
|
+
|
|
395
|
+
Returns None for a non-assistant record (a user/tool_result line, a summary),
|
|
396
|
+
so the boundary reader can walk to the LAST assistant record. The synthetic
|
|
397
|
+
death is itself an assistant record (`type:"assistant"`, `message.role:
|
|
398
|
+
"assistant"`) — so it IS captured by this walk. PURE.
|
|
399
|
+
"""
|
|
400
|
+
if not isinstance(record, dict):
|
|
401
|
+
return None
|
|
402
|
+
msg = record.get("message")
|
|
403
|
+
if not isinstance(msg, dict) or msg.get("role") != "assistant":
|
|
404
|
+
return None
|
|
405
|
+
text, has_content = _leading_text(msg.get("content"))
|
|
406
|
+
model = msg.get("model")
|
|
407
|
+
return TerminalEvidence(
|
|
408
|
+
found=True,
|
|
409
|
+
readable=True,
|
|
410
|
+
model=model if isinstance(model, str) else None,
|
|
411
|
+
stop_reason=msg.get("stop_reason") if isinstance(msg.get("stop_reason"), str) else None,
|
|
412
|
+
# `isApiErrorMessage` and `apiErrorStatus` are TOP-LEVEL siblings of
|
|
413
|
+
# `message` in real records (NOT inside message — the docs/197 §2.1
|
|
414
|
+
# correction), so read them from the record, not msg.
|
|
415
|
+
is_api_error=bool(record.get("isApiErrorMessage")),
|
|
416
|
+
api_status=_api_status_int(record.get("apiErrorStatus")),
|
|
417
|
+
text=text,
|
|
418
|
+
has_content=has_content,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def terminal_evidence_from_transcript(path: str) -> TerminalEvidence:
|
|
423
|
+
"""Read a subagent transcript JSONL → the TerminalEvidence of its LAST assistant record.
|
|
424
|
+
|
|
425
|
+
Reuses `claim_extract._read_lines` (the one transcript reader in the kernel) so
|
|
426
|
+
the two cannot drift. Walks all records, keeping the LAST one that is an assistant
|
|
427
|
+
turn (the terminal record — a synthetic death is an assistant record, so it is
|
|
428
|
+
captured). Distinguishes the three not-found cases:
|
|
429
|
+
|
|
430
|
+
* read/parse failure → `readable=False` (→ UNREADABLE, the fail-safe floor:
|
|
431
|
+
never claim a death we could not witness).
|
|
432
|
+
* read OK but no assistant record at all → `readable=True, found=False`
|
|
433
|
+
(→ EMPTY).
|
|
434
|
+
* read OK, an assistant record found → its fields (→ SYNTHETIC / EMPTY /
|
|
435
|
+
HEALTHY by the pure verdict).
|
|
436
|
+
"""
|
|
437
|
+
from dos import claim_extract
|
|
438
|
+
try:
|
|
439
|
+
lines = claim_extract._read_lines(path)
|
|
440
|
+
except OSError:
|
|
441
|
+
return TerminalEvidence(found=False, readable=False)
|
|
442
|
+
last: Optional[TerminalEvidence] = None
|
|
443
|
+
for raw in lines:
|
|
444
|
+
s = raw.strip()
|
|
445
|
+
if not s:
|
|
446
|
+
continue
|
|
447
|
+
try:
|
|
448
|
+
import json
|
|
449
|
+
obj = json.loads(s)
|
|
450
|
+
except (ValueError, TypeError):
|
|
451
|
+
continue
|
|
452
|
+
ev = terminal_evidence_from_record(obj)
|
|
453
|
+
if ev is not None:
|
|
454
|
+
last = ev
|
|
455
|
+
if last is None:
|
|
456
|
+
return TerminalEvidence(found=False, readable=True)
|
|
457
|
+
return last
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def verify_transcript(path: str) -> ResultStateVerdict:
|
|
461
|
+
"""The composed fold-site check: read the terminal record + classify it.
|
|
462
|
+
|
|
463
|
+
The one call a workflow stage / the CLI makes: `verify_transcript(transcript)`
|
|
464
|
+
→ a `ResultStateVerdict` whose `.dead` is the branch signal at the
|
|
465
|
+
`.filter(Boolean)` fold. Boundary I/O + pure verdict, composed — the
|
|
466
|
+
`liveness.classify` over `git_delta` shape, one rung over.
|
|
467
|
+
"""
|
|
468
|
+
return classify_terminal(terminal_evidence_from_transcript(path))
|