dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/result_state.py ADDED
@@ -0,0 +1,468 @@
1
+ r"""result_state — the fold-site result-state witness (docs/197 §7(1), the keystone).
2
+
3
+ > **An ultracode `Workflow` folds `agent()`'s self-authored return value as ground
4
+ > truth at exactly one place — the `${result}` interpolation — and 32% of real
5
+ > subagents (736/2305, docs/197 §2) fold a HARNESS-authored terminal-error string
6
+ > there as a finished "finding." The death is non-null, so it survives the
7
+ > `.filter(Boolean)` used in 89/114 real scripts; a smaller numerator is the only
8
+ > signal, and code that computes `failed = N − survivors.length` cannot tell a dead
9
+ > worker from a real negative. This module is the byte-clean referee at that fold:
10
+ > it classifies a subagent transcript's TERMINAL assistant message and refuses to
11
+ > believe a harness-synthesized abnormal termination — because the catch reads a
12
+ > DIFFERENT byte-author than the judged worker (`message.model == "<synthetic>"`
13
+ > means the Claude Code HARNESS synthesized the bytes, not the subagent's model).**
14
+
15
+ The byte-author law, restated for the fold (docs/138 / docs/116 §2.5):
16
+
17
+ > A verdict is *grounding* only when the byte-author of the evidence differs from
18
+ > the judged agent. A subagent re-narrating its own output is consistency, not
19
+ > grounding.
20
+
21
+ The terminal `model:"<synthetic>"` record is the cleanest possible grounding: the
22
+ `role:"assistant"` slot is merely the conversation position, but `<synthetic>` is
23
+ the harness's authorship stamp — the subagent's model did NOT generate it. So
24
+ asking "is this terminal record harness-authored?" is a pure byte question about
25
+ bytes the judged agent could not forge in its favor. This is the same shape as
26
+ `tool_stream` keying on the env-authored `result_digest`, one rung over: there the
27
+ env authors the repeated result, here the harness authors the death.
28
+
29
+ Why a NEW grammar — not a reuse of `terminal_error` (docs/197 §4c, VERIFIED)
30
+ ===========================================================================
31
+
32
+ `benchmark/toolathlon/trajectory.py:terminal_error_fired` is the structural-error
33
+ detector for tool RESULTS — but it (a) walks ONLY `role=="tool"` messages
34
+ (`trajectory.py:471`), and (b) its `_STRUCT_ERR` grammar anchors `^\s*Error:`,
35
+ which does NOT match the synthetic string that LEADS with `API Error:`
36
+ (`trajectory.py:343`). The synthetic terminal is a `role:"assistant"` record with
37
+ `model:"<synthetic>"`, so it never reaches that classifier. This module is the
38
+ genuinely-new grammar over the ASSISTANT role the keystone needs — and it lives in
39
+ the kernel (not `benchmark/`) because the fold-site catch is a reusable distrust
40
+ primitive, not a one-benchmark instrument.
41
+
42
+ The discriminators (grounded in 2,935 REAL synthetic records, not the doc's spec)
43
+ =================================================================================
44
+
45
+ An empirical sweep of every `model:"<synthetic>"` record across the operator's real
46
+ `~/.claude/projects` corpus (2,935 records) fixes the byte-exact shape. Critically,
47
+ it is BROADER than docs/197's "429" framing — 43% of synthetic deaths are NOT 429:
48
+
49
+ * `message.model == "<synthetic>"` — 100% (the unforgeable harness marker)
50
+ * `message.stop_reason == "stop_sequence"`— 100%
51
+ * top-level `isApiErrorMessage == true` — 100%
52
+ * top-level `apiErrorStatus` — present with the HTTP code (429/401/
53
+ 403/500) on 2885/2935; ABSENT on 50
54
+ (the subscription/limit-text deaths)
55
+ * `message.content[0].text` classes observed: "API Error: … Rate limited" (1688,
56
+ 57%), "organization has disabled …" (248), "hit your weekly limit" (205),
57
+ "API Error: 500 Internal server error" (66), "out of extra usage" / "session
58
+ limit" (the rest). A 429-only match (docs/197's literal spec) would MISS 43%.
59
+
60
+ So the PRIMARY signal is `model == "<synthetic>"` (harness-authored) — the
61
+ unforgeable rung. `isApiErrorMessage` and `stop_reason == "stop_sequence"`
62
+ corroborate it. `apiErrorStatus` + a coarse `class` (RATE_LIMIT / USAGE_LIMIT /
63
+ AUTH / SERVER / OTHER) are reported as DETAIL, never as the gate (keying the gate
64
+ on 429 would conflate the HTTP code with the harness-authorship fact and miss the
65
+ non-HTTP limit-text deaths). docs/197 §2.1 also placed `isApiErrorMessage` /
66
+ `apiErrorStatus` INSIDE the `message` object; in real records they are TOP-LEVEL
67
+ siblings of `message` — corrected here.
68
+
69
+ Why it is ADVISORY (the docs/197 §6.5 line, the −9 pp wound)
70
+ ============================================================
71
+
72
+ A DEAD verdict's safe action is to route the dead child to a DEAD bucket and
73
+ re-dispatch ITS OWN unit — never to re-prompt the synthesizer mid-plan (the
74
+ docs/143 −9 pp DEFER-shaped derail). This module REPORTS (a verdict + an exit
75
+ code a workflow branches on); it never re-runs a worker, never edits the fold. It
76
+ is a PDP, not a PEP (`enforce.py` is the proposal seam; nothing here actuates).
77
+
78
+ ⚓ Kernel discipline (the litmus): a PURE verdict + a boundary reader. It imports
79
+ only sibling kernel modules (`claim_extract` for the transcript-read boundary,
80
+ `wedge_reason` for the refusal envelope, `config`), names no host beyond the
81
+ Claude-Code transcript JSON shape, resolves nothing against `__file__`, takes no
82
+ lease, carries no policy of its own. The transcript I/O is the caller's boundary
83
+ (reused via `claim_extract._read_lines`), exactly the `liveness`/`posttool_sensor`
84
+ "I/O at the boundary, data to the pure core" rule.
85
+ """
86
+
87
+ from __future__ import annotations
88
+
89
+ import enum
90
+ from dataclasses import dataclass
91
+ from typing import Optional
92
+
93
+
94
+ # The literal harness-authorship marker. A terminal `message.model` of this exact
95
+ # string means the Claude Code HARNESS synthesized the record (a rate-limit / quota
96
+ # / server-error stop it injected), NOT the subagent's model — the byte-author the
97
+ # fold must distrust. An exact-string compare, never a pattern (the real model field
98
+ # in a healthy record is a model id like `claude-opus-4-8`).
99
+ SYNTHETIC_MODEL = "<synthetic>"
100
+
101
+ # The terminal `stop_reason` every synthetic record carries (100% of 2,935 real
102
+ # records). A corroborating signal, never the sole gate — a healthy record can also
103
+ # carry `stop_sequence` in principle, so this only STRENGTHENS the `<synthetic>`
104
+ # verdict, it does not stand alone.
105
+ SYNTHETIC_STOP_REASON = "stop_sequence"
106
+
107
+
108
+ class TerminalState(str, enum.Enum):
109
+ """The classification of a transcript's terminal assistant record. `str`-valued
110
+ so it round-trips a `--json` token without a lookup table.
111
+
112
+ * HEALTHY — the terminal assistant record was authored by a real model
113
+ (`model` is a real id, no synthetic/error markers). Its return
114
+ value is a genuine result the fold may believe (modulo the
115
+ well-formed-but-empty residue, which needs `effect_witness`).
116
+ * SYNTHETIC — the terminal record is HARNESS-authored (`model ==
117
+ "<synthetic>"` and/or `isApiErrorMessage`): an abnormal
118
+ termination (rate-limit / quota / auth / server error). The
119
+ "result" the fold would bank is the error string, not a
120
+ finding. → route to a DEAD bucket, count in the denominator,
121
+ REFUSE to fold.
122
+ * EMPTY — no assistant record with content was found at all (a worker
123
+ that produced nothing). Distinct from HEALTHY: there is no
124
+ result to fold. Treated as DEAD (no deliverable).
125
+ * UNREADABLE — the transcript could not be read/parsed (missing/garbled).
126
+ The fail-safe floor: we cannot witness a death, so we do NOT
127
+ claim one — UNREADABLE is NOT DEAD (the safe direction: a read
128
+ fault must not fabricate a death that drops a real result).
129
+ """
130
+
131
+ HEALTHY = "HEALTHY"
132
+ SYNTHETIC = "SYNTHETIC"
133
+ EMPTY = "EMPTY"
134
+ UNREADABLE = "UNREADABLE"
135
+
136
+
137
+ class TerminalClass(str, enum.Enum):
138
+ """A coarse, DETAIL-only class of a SYNTHETIC terminal — reported, never the gate.
139
+
140
+ Keyed off the top-level `apiErrorStatus` (when present) and the leading text, so
141
+ an operator/log can see WHY the worker died without the classifier ever keying
142
+ its gate on the HTTP code (which would miss the 50/2935 limit-text deaths that
143
+ carry no `apiErrorStatus`). NONE for a non-synthetic terminal.
144
+ """
145
+
146
+ RATE_LIMIT = "RATE_LIMIT" # 429 / "Rate limited"
147
+ USAGE_LIMIT = "USAGE_LIMIT" # 403 / weekly|session limit / "out of extra usage" / org-disabled
148
+ AUTH = "AUTH" # 401 / authentication_error
149
+ SERVER = "SERVER" # 500 / server-side
150
+ OTHER = "OTHER" # synthetic but an unrecognized class
151
+ NONE = "NONE" # not a synthetic terminal
152
+
153
+
154
+ @dataclass(frozen=True)
155
+ class TerminalEvidence:
156
+ """The frozen datum `classify_terminal` sees — the fields of the terminal assistant
157
+ record, gathered at the boundary (the transcript read). PURE-consumable.
158
+
159
+ * found — whether ANY assistant record was located in the transcript.
160
+ False → EMPTY (nothing produced) or UNREADABLE (read failed,
161
+ distinguished by `readable`).
162
+ * readable — whether the transcript could be read/parsed at all. False →
163
+ UNREADABLE (the fail-safe floor — never claim a death we
164
+ could not witness).
165
+ * model — the terminal assistant record's `message.model` (the
166
+ harness-authorship marker; `"<synthetic>"` is the tell).
167
+ * stop_reason — the terminal record's `message.stop_reason` (corroborating).
168
+ * is_api_error — the top-level `isApiErrorMessage` flag (corroborating).
169
+ * api_status — the top-level `apiErrorStatus` HTTP code, when present (detail).
170
+ * text — the leading text of the terminal record's first content block
171
+ (detail / class inference). Bounded — only a prefix is needed.
172
+ * has_content — whether the terminal assistant record carried any text/tool
173
+ content (distinguishes a real-but-empty terminal from EMPTY).
174
+ """
175
+
176
+ found: bool
177
+ readable: bool
178
+ model: Optional[str] = None
179
+ stop_reason: Optional[str] = None
180
+ is_api_error: bool = False
181
+ api_status: Optional[int] = None
182
+ text: str = ""
183
+ has_content: bool = False
184
+
185
+
186
+ @dataclass(frozen=True)
187
+ class ResultStateVerdict:
188
+ """The typed verdict — the result-state classification + the corroborating detail.
189
+
190
+ * state — the `TerminalState`.
191
+ * dead — convenience: True iff the fold must NOT believe this result
192
+ (SYNTHETIC or EMPTY). UNREADABLE is NOT dead (fail-safe).
193
+ * cls — the DETAIL-only `TerminalClass` (NONE unless SYNTHETIC).
194
+ * api_status — the HTTP code when known (detail).
195
+ * reason — a short, log-greppable explanation.
196
+ """
197
+
198
+ state: TerminalState
199
+ dead: bool
200
+ cls: TerminalClass = TerminalClass.NONE
201
+ api_status: Optional[int] = None
202
+ reason: str = ""
203
+
204
+ def to_dict(self) -> dict:
205
+ return {
206
+ "state": self.state.value,
207
+ "dead": self.dead,
208
+ "class": self.cls.value,
209
+ "api_status": self.api_status,
210
+ "reason": self.reason,
211
+ }
212
+
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # The PURE verdict — evidence in, verdict out (no I/O).
216
+ # ---------------------------------------------------------------------------
217
+ def _infer_class(api_status: Optional[int], text: str) -> TerminalClass:
218
+ """The DETAIL-only class of a synthetic terminal. PURE. Never the gate.
219
+
220
+ Prefers the HTTP code (precise), falling back to the leading text for the
221
+ no-`apiErrorStatus` limit-text deaths (the 50/2935 records). Conservative: an
222
+ unrecognized synthetic terminal is OTHER, never silently dropped.
223
+ """
224
+ t = (text or "").lower()
225
+ if api_status == 429 or "rate limited" in t:
226
+ return TerminalClass.RATE_LIMIT
227
+ if api_status == 401 or "authentication" in t:
228
+ return TerminalClass.AUTH
229
+ if api_status == 500 or "internal server error" in t or "server-side" in t:
230
+ return TerminalClass.SERVER
231
+ if api_status == 403 or any(
232
+ s in t for s in ("weekly limit", "session limit", "out of extra usage",
233
+ "disabled claude", "usage limit")
234
+ ):
235
+ return TerminalClass.USAGE_LIMIT
236
+ return TerminalClass.OTHER
237
+
238
+
239
+ def classify_terminal(evidence: TerminalEvidence) -> ResultStateVerdict:
240
+ """Classify a transcript's terminal assistant record. PURE.
241
+
242
+ The order is the safe-direction order:
243
+
244
+ 1. UNREADABLE first — if the transcript could not be read, we cannot witness
245
+ a death, so we DECLINE to claim one (NOT dead). A read fault must never
246
+ fabricate a death that drops a real result (the fail-safe floor).
247
+ 2. SYNTHETIC — the primary gate is `model == "<synthetic>"` (the unforgeable
248
+ harness-authorship marker). `isApiErrorMessage` is an alternative gate
249
+ (some builds may stamp the flag without the literal model string), so a
250
+ record carrying EITHER harness-death marker is SYNTHETIC. `stop_reason ==
251
+ "stop_sequence"` corroborates but never gates alone. → DEAD.
252
+ 3. EMPTY — a located assistant terminal with no content at all (nothing
253
+ produced). → DEAD (no deliverable to fold).
254
+ 4. HEALTHY — a real-model terminal with content. The fold may believe it
255
+ (modulo the well-formed-but-empty residue, which is `effect_witness`'s job,
256
+ not this terminal-state gate's — docs/197 §6.4).
257
+ """
258
+ if not evidence.readable:
259
+ return ResultStateVerdict(
260
+ state=TerminalState.UNREADABLE,
261
+ dead=False,
262
+ reason="transcript unreadable — declining to claim a death (fail-safe)",
263
+ )
264
+ # The harness-death markers. `model == "<synthetic>"` is the load-bearing one
265
+ # (100% of real synthetic records); `isApiErrorMessage` is the corroborating
266
+ # top-level flag and an alternative gate (belt-and-braces against a build that
267
+ # stamps the flag but not the literal model string).
268
+ is_synthetic_model = evidence.model == SYNTHETIC_MODEL
269
+ if is_synthetic_model or evidence.is_api_error:
270
+ cls = _infer_class(evidence.api_status, evidence.text)
271
+ marker = (
272
+ "model=<synthetic>" if is_synthetic_model else "isApiErrorMessage=true"
273
+ )
274
+ corrob = (
275
+ " + stop_reason=stop_sequence"
276
+ if evidence.stop_reason == SYNTHETIC_STOP_REASON
277
+ else ""
278
+ )
279
+ status = f" apiErrorStatus={evidence.api_status}" if evidence.api_status is not None else ""
280
+ return ResultStateVerdict(
281
+ state=TerminalState.SYNTHETIC,
282
+ dead=True,
283
+ cls=cls,
284
+ api_status=evidence.api_status,
285
+ reason=(
286
+ f"harness-authored terminal ({marker}{corrob}{status}) — the result "
287
+ f"is a {cls.value} error string, not a finding; route to DEAD and do "
288
+ f"not fold"
289
+ ),
290
+ )
291
+ if not evidence.found:
292
+ return ResultStateVerdict(
293
+ state=TerminalState.EMPTY,
294
+ dead=True,
295
+ reason="no assistant terminal record found — the worker produced no result",
296
+ )
297
+ if not evidence.has_content:
298
+ return ResultStateVerdict(
299
+ state=TerminalState.EMPTY,
300
+ dead=True,
301
+ reason="terminal assistant record carried no content — no result to fold",
302
+ )
303
+ return ResultStateVerdict(
304
+ state=TerminalState.HEALTHY,
305
+ dead=False,
306
+ reason="terminal assistant record is real-model authored with content",
307
+ )
308
+
309
+
310
+ # ---------------------------------------------------------------------------
311
+ # The PURE refusal-envelope renderer — a verdict in, a wedge_reason-style envelope out.
312
+ # ---------------------------------------------------------------------------
313
+ def refusal_envelope(verdict: ResultStateVerdict) -> dict:
314
+ """A `wedge_reason`-shaped refusal envelope for a DEAD verdict. PURE.
315
+
316
+ Mirrors the no-pick envelope shape `wedge_reason.envelope_is_refusal` reads (the
317
+ `do_not_render`/`blocked`/`reason_class` rungs), so a DEAD result-state can be
318
+ surfaced through the SAME refusal plumbing as a dispatch no-pick. A non-DEAD
319
+ verdict yields a non-refusal (`all_clear`) envelope. `reason_class` carries a
320
+ stable, log-greppable token (`RESULT_DEAD_<CLASS>` / `RESULT_EMPTY`).
321
+ """
322
+ if not verdict.dead:
323
+ return {
324
+ "all_clear": True,
325
+ "verdict": "LIVE",
326
+ "state": verdict.state.value,
327
+ "reason": verdict.reason,
328
+ }
329
+ if verdict.state is TerminalState.SYNTHETIC:
330
+ reason_class = f"RESULT_DEAD_{verdict.cls.value}"
331
+ else:
332
+ reason_class = "RESULT_EMPTY"
333
+ return {
334
+ "do_not_render": True,
335
+ "blocked": True,
336
+ "all_clear": False,
337
+ "verdict": "WEDGE",
338
+ "reason_class": reason_class,
339
+ "state": verdict.state.value,
340
+ "api_status": verdict.api_status,
341
+ "reason": verdict.reason,
342
+ }
343
+
344
+
345
+ # ---------------------------------------------------------------------------
346
+ # Boundary I/O — read the terminal assistant record from a transcript JSONL.
347
+ # NOT pure (reads a file); reuses claim_extract's transcript reader so the two
348
+ # can't drift, the git_delta "I/O at the boundary" discipline.
349
+ # ---------------------------------------------------------------------------
350
+ def _leading_text(content: object) -> tuple[str, bool]:
351
+ """The leading text of a message `content` + whether it carried ANY content. PURE.
352
+
353
+ A synthetic record's content is `[{"type":"text","text":"API Error: …"}]`; a
354
+ healthy record may be text and/or tool_use blocks. Returns `(leading_text,
355
+ has_content)` — `has_content` is True if there is any text OR tool_use/tool_result
356
+ block (so a tool-only terminal is not mis-flagged EMPTY). Bounded to a prefix.
357
+ """
358
+ if isinstance(content, str):
359
+ s = content.strip()
360
+ return (s[:400], bool(s))
361
+ if isinstance(content, list):
362
+ lead = ""
363
+ has = False
364
+ for b in content:
365
+ if not isinstance(b, dict):
366
+ continue
367
+ bt = b.get("type")
368
+ if bt == "text":
369
+ t = b.get("text", "")
370
+ if isinstance(t, str) and t:
371
+ has = True
372
+ if not lead:
373
+ lead = t.strip()[:400]
374
+ elif bt in ("tool_use", "tool_result", "thinking", "image"):
375
+ has = True
376
+ return (lead, has)
377
+ return ("", False)
378
+
379
+
380
+ def _api_status_int(value: object) -> Optional[int]:
381
+ """Coerce a top-level `apiErrorStatus` to int, or None. PURE. Tolerant of a
382
+ string-coded status; any non-coercible value → None (detail-only, never gates)."""
383
+ if isinstance(value, bool):
384
+ return None
385
+ if isinstance(value, int):
386
+ return value
387
+ if isinstance(value, str) and value.strip().isdigit():
388
+ return int(value.strip())
389
+ return None
390
+
391
+
392
+ def terminal_evidence_from_record(record: dict) -> Optional[TerminalEvidence]:
393
+ """Build TerminalEvidence from ONE transcript record IFF it is an assistant turn.
394
+
395
+ Returns None for a non-assistant record (a user/tool_result line, a summary),
396
+ so the boundary reader can walk to the LAST assistant record. The synthetic
397
+ death is itself an assistant record (`type:"assistant"`, `message.role:
398
+ "assistant"`) — so it IS captured by this walk. PURE.
399
+ """
400
+ if not isinstance(record, dict):
401
+ return None
402
+ msg = record.get("message")
403
+ if not isinstance(msg, dict) or msg.get("role") != "assistant":
404
+ return None
405
+ text, has_content = _leading_text(msg.get("content"))
406
+ model = msg.get("model")
407
+ return TerminalEvidence(
408
+ found=True,
409
+ readable=True,
410
+ model=model if isinstance(model, str) else None,
411
+ stop_reason=msg.get("stop_reason") if isinstance(msg.get("stop_reason"), str) else None,
412
+ # `isApiErrorMessage` and `apiErrorStatus` are TOP-LEVEL siblings of
413
+ # `message` in real records (NOT inside message — the docs/197 §2.1
414
+ # correction), so read them from the record, not msg.
415
+ is_api_error=bool(record.get("isApiErrorMessage")),
416
+ api_status=_api_status_int(record.get("apiErrorStatus")),
417
+ text=text,
418
+ has_content=has_content,
419
+ )
420
+
421
+
422
+ def terminal_evidence_from_transcript(path: str) -> TerminalEvidence:
423
+ """Read a subagent transcript JSONL → the TerminalEvidence of its LAST assistant record.
424
+
425
+ Reuses `claim_extract._read_lines` (the one transcript reader in the kernel) so
426
+ the two cannot drift. Walks all records, keeping the LAST one that is an assistant
427
+ turn (the terminal record — a synthetic death is an assistant record, so it is
428
+ captured). Distinguishes the three not-found cases:
429
+
430
+ * read/parse failure → `readable=False` (→ UNREADABLE, the fail-safe floor:
431
+ never claim a death we could not witness).
432
+ * read OK but no assistant record at all → `readable=True, found=False`
433
+ (→ EMPTY).
434
+ * read OK, an assistant record found → its fields (→ SYNTHETIC / EMPTY /
435
+ HEALTHY by the pure verdict).
436
+ """
437
+ from dos import claim_extract
438
+ try:
439
+ lines = claim_extract._read_lines(path)
440
+ except OSError:
441
+ return TerminalEvidence(found=False, readable=False)
442
+ last: Optional[TerminalEvidence] = None
443
+ for raw in lines:
444
+ s = raw.strip()
445
+ if not s:
446
+ continue
447
+ try:
448
+ import json
449
+ obj = json.loads(s)
450
+ except (ValueError, TypeError):
451
+ continue
452
+ ev = terminal_evidence_from_record(obj)
453
+ if ev is not None:
454
+ last = ev
455
+ if last is None:
456
+ return TerminalEvidence(found=False, readable=True)
457
+ return last
458
+
459
+
460
+ def verify_transcript(path: str) -> ResultStateVerdict:
461
+ """The composed fold-site check: read the terminal record + classify it.
462
+
463
+ The one call a workflow stage / the CLI makes: `verify_transcript(transcript)`
464
+ → a `ResultStateVerdict` whose `.dead` is the branch signal at the
465
+ `.filter(Boolean)` fold. Boundary I/O + pure verdict, composed — the
466
+ `liveness.classify` over `git_delta` shape, one rung over.
467
+ """
468
+ return classify_terminal(terminal_evidence_from_transcript(path))