dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/skills/EXAMPLES.md ADDED
@@ -0,0 +1,584 @@
1
+ # DOS in skills — a worked cookbook
2
+
3
+ A skill is a **screenplay**, not a program: it *shells `dos` verbs* and reads the
4
+ verdict, never deciding ground truth itself. The kernel decides; the skill
5
+ narrates. That split is the whole point — the skill is the judged agent, the
6
+ kernel is the part that doesn't believe it. Every host-specific value a recipe
7
+ needs (which lanes exist, where plans live, what a gate exit code means) is
8
+ **data** you read once from `dos doctor --workspace . --json` or from `dos.toml`
9
+ — never a literal you hardcode. The recipes below are worked end-to-end: each is
10
+ *problem → the `dos` verbs → a real transcript → the rule it teaches*. Captured
11
+ transcripts use this repo's own generic workspace values; anything not captured
12
+ live is marked ILLUSTRATIVE.
13
+
14
+ ---
15
+
16
+ ## Recipe 0 — Discover the workspace once, never re-read (the WCR on-ramp)
17
+
18
+ **Problem.** A skill that re-reads the same state files on every step burns the
19
+ budget and reads stale bytes — the classic read-loop pathology, where one
20
+ session re-opens a handful of files dozens of times. The fix is the
21
+ **workspace-config-read (WCR) on-ramp**: cache the workspace facts *once* at
22
+ Step 0, then read paths/lanes/exit-codes from the parsed object for the rest of
23
+ the run.
24
+
25
+ **Verbs.** `dos doctor --workspace . --json`
26
+
27
+ **Transcript** (trimmed to the fields a skill actually consumes):
28
+
29
+ ```json
30
+ {
31
+ "dos_version": "0.22.0",
32
+ "git": true,
33
+ "workspace_facts": { "is_kernel_repo": true, "kernel_runtime_files_present": 11 },
34
+ "lanes": {
35
+ "concurrent": ["benchmark", "docs", "examples", "scripts", "spikes", "src", "tests"],
36
+ "exclusive": ["global"],
37
+ "autopick": ["benchmark", "docs", "examples", "scripts", "spikes", "src", "tests"],
38
+ "trees": { "docs": ["docs/**"], "src": ["src/**"], "global": ["**/*"] }
39
+ },
40
+ "paths": {
41
+ "plans_glob": "docs/**/*-plan.md",
42
+ "next_packets": ".dos/verdicts",
43
+ "runs": ".dos/runs",
44
+ "style": "dos"
45
+ },
46
+ "stamp": { "style": "grep" },
47
+ "overlap_policy": { "active": "prefix", "available": ["prefix", "semantic-groups"], "ratio_max": 0.3333333333333333 },
48
+ "exit_codes": {
49
+ "verify": { "shipped": 0, "not_shipped": 1, "contract_error": 2 },
50
+ "gate": { "LIVE": 0, "DRAIN": 3, "STALE-STAMP": 4, "BLOCKED": 5, "RACE": 6, "contract_error": 2, "unknown": 7 },
51
+ "liveness": { "ADVANCING": 0, "SPINNING": 3, "STALLED": 4, "contract_error": 2, "unknown": 5 },
52
+ "status": { "ADVANCING": 0, "SPINNING": 3, "STALLED": 4, "contract_error": 2, "unknown": 5 },
53
+ "arbitrate": { "acquire": 0, "refuse": 1, "contract_error": 2 }
54
+ }
55
+ }
56
+ ```
57
+
58
+ A skill reads this once and stashes it. The lane list, the plans glob, the
59
+ `.dos/verdicts` packet dir, the exit-code tables — all of it is *config data*, so
60
+ the same screenplay runs unchanged against a host with entirely different lane
61
+ names and whose plans live elsewhere. Note `stamp.style == "grep"`
62
+ and the *generic* stamp grammar: this workspace ships the domain-free default, so
63
+ nothing host-specific leaks into the skill.
64
+
65
+ **The rule.** Read the workspace once; treat its facts as data for the rest of
66
+ the run. Re-reading state files N times is the pathology DOS exists to make
67
+ unnecessary.
68
+
69
+ ---
70
+
71
+ ## Recipe 1 — Ask the truth syscall instead of grepping commit subjects (verify)
72
+
73
+ **Problem.** "Is this phase done?" is the question a loop most wants to answer
74
+ from its own narration ("I committed it, so it's shipped"). Don't. Ask the truth
75
+ syscall, which answers from git ancestry + ship-stamp grammar — and then **read
76
+ the rung**, because the rungs are not equally trustworthy.
77
+
78
+ **Verbs.** `dos verify --workspace . PLAN PHASE --json`
79
+
80
+ **Transcript — SHIPPED, grep rung:**
81
+
82
+ ```json
83
+ $ dos verify --workspace . docs/82_liveness-oracle-plan liveness --json
84
+ {"phase":"liveness","plan":"docs/82_liveness-oracle-plan","rung":"direct","sha":"80d4f30",
85
+ "shipped":true,"source":"grep-subject",
86
+ "summary":"80d4f30 liveness: exclude the BIRTH acquire from the ADVANCING event count"}
87
+ ```
88
+
89
+ **Transcript — NOT_SHIPPED, none rung:**
90
+
91
+ ```json
92
+ $ dos verify --workspace . docs/99_runtime-validation-and-the-actuation-boundary halt --json
93
+ {"phase":"halt","plan":"docs/99_runtime-validation-and-the-actuation-boundary",
94
+ "shipped":false,"source":"none"}
95
+ ```
96
+
97
+ **The rule.** `source` is the rung that answered, and it is one of exactly three
98
+ values: `registry`, `grep-subject`, or `none`. `grep-subject` means a commit
99
+ *subject* contained the phase token — which flips the verdict to SHIPPED even if
100
+ little was actually built. So a skill must **read the rung, not the bare
101
+ boolean**: a `grep-subject` SHIPPED is a weaker fact than a `registry` SHIPPED,
102
+ and `none` means git ancestry never stamped it. Branch on `exit_codes.verify`
103
+ (`shipped:0`, `not_shipped:1`, `contract_error:2`), never on parsing the prose.
104
+
105
+ ---
106
+
107
+ ## Recipe 1b — Gate "keep working until the goal is met" on the verdict, not self-report (hook stop)
108
+
109
+ **Problem.** A harness completion condition ("don't stop until X is done") is
110
+ normally evaluated by the model re-reading the session — so a fluent narration of
111
+ an X the world does not corroborate ends the work early. That is consistency, not
112
+ grounding: the part deciding done-ness *is* the part being judged. Replace the
113
+ self-judgment with a Stop hook that refuses to let the agent stop until git backs
114
+ every phase it claimed shipped. (The screenplay is `/dos-goal-gate`; this recipe
115
+ is the captured transcript.)
116
+
117
+ **Verbs.** `dos init --with-hooks` (wire it once) · `dos hook stop --json` (the
118
+ gate verdict; the runtime consumes the default non-`--json` bytes on a real stop).
119
+
120
+ **Transcript — a confidently-claimed phase git does NOT back → the Stop is blocked:**
121
+
122
+ ```json
123
+ $ echo '{}' | dos hook stop --workspace . \
124
+ --plan docs/99_runtime-validation-and-the-actuation-boundary --phase halt --json
125
+ {"ok": false,
126
+ "reason": "DOS verify: you claimed docs/99_runtime-validation-and-the-actuation-boundary halt (via none) shipped, but git has no commit backing it. Land the commit (with the ship-stamp grammar) or correct the claim before stopping.",
127
+ "results": [{"confident": true, "phase": "halt", "shipped": false, "source": "none", "rung": "frontmatter",
128
+ "plan": "docs/99_runtime-validation-and-the-actuation-boundary"}]}
129
+ ```
130
+
131
+ **Transcript — nothing confidently claimed (or every claim corroborated) → stop allowed:**
132
+
133
+ ```json
134
+ $ printf '%s' '{"transcript_path":"/nonexistent.jsonl","cwd":"."}' | dos hook stop --workspace . --json
135
+ {"checked": 0, "ok": true, "results": []}
136
+ ```
137
+
138
+ On a real stop (no `--json`), the first case prints `{"decision":"block","reason":…}`
139
+ — the exact bytes Claude Code honors to decline the stop and feed the reason back
140
+ as the next instruction — and the second prints nothing (CC's "allow stop").
141
+
142
+ **The rule.** "The goal is met" is a claim with a byte-author. The gate lets the
143
+ agent stop only when `dos verify` finds git backs every phase it claimed — a
144
+ `source:"none"` keeps it working, and the agent cannot overrule that by asserting
145
+ completion again. Decompose the goal into checkable effects and witness each;
146
+ never let "I'm done" be its own proof. (Do not confuse this with `dos hook
147
+ marker`, docs/259 — that bounds keep-alive *polling*; this refuses a *false done*.
148
+ Opposite triggers, both Stop hooks.)
149
+
150
+ ---
151
+
152
+ ## Recipe 2 — Take a lane before you write (arbitrate); honor the redirect
153
+
154
+ **Problem.** Two agents editing overlapping file-regions race. A lane is a
155
+ leased range-lock over a glob-region; you ask the arbiter for one *before* you
156
+ write, and you honor whatever it hands back.
157
+
158
+ **Verbs.** `dos arbitrate --workspace . --lane L [--kind cluster] [--leases <json>]`
159
+
160
+ **Transcript** — asked for `src`, a live lease made it busy, the arbiter
161
+ auto-picked a *different* free lane:
162
+
163
+ ```json
164
+ $ dos arbitrate --workspace . --lane src
165
+ {"auto_picked":true,"free_clusters":[],"lane":"benchmark","lane_kind":"cluster",
166
+ "outcome":"acquire","pick_count":null,
167
+ "reason":"auto-picked free cluster lane benchmark (requested src was busy).",
168
+ "tree":["benchmark/**"]}
169
+ ```
170
+
171
+ You asked for `src`; you got `benchmark`. That redirect **is the admission
172
+ kernel working** — a live lease over `src` made the region contended, and the
173
+ arbiter refused to double-book it, handing back a disjoint free lane instead. The
174
+ outcome is still `acquire` (exit 0), so the skill proceeds — *on the lane it was
175
+ given*, writing under `benchmark/**`, not the `src/**` it asked for.
176
+
177
+ **The rule.** Branch on `exit_codes.arbitrate` (`acquire:0`, `refuse:1`,
178
+ `contract_error:2`) and write only under the `tree` you were granted. **Never
179
+ `--force` past a refuse in automation** — a refuse is the kernel preventing a
180
+ collision, and forcing past it re-introduces the race the lane exists to stop.
181
+
182
+ ---
183
+
184
+ ## Recipe 3 — Gate the empty case by EXIT CODE, not prose (gate)
185
+
186
+ **Problem.** A skill that decides "is there work to do / is it safe to proceed?"
187
+ by string-matching a human-readable status line is brittle. `dos gate` folds a
188
+ dispositions file into a single **typed exit code** designed for exactly this.
189
+
190
+ **Verbs.** `dos gate <dispositions.json>`
191
+
192
+ **Exit-code table** (from `exit_codes.gate`):
193
+
194
+ | Code | Disposition | Meaning |
195
+ |---|---|---|
196
+ | `0` | `LIVE` | proceed — there is live work |
197
+ | `3` | `DRAIN` | drain only; do not start new work |
198
+ | `4` | `STALE-STAMP` | a ship-stamp is stale; re-verify before trusting |
199
+ | `5` | `BLOCKED` | refused upstream; do not proceed |
200
+ | `6` | `RACE` | a concurrency race was detected |
201
+ | `2` | `contract_error` | malformed input — fail loud |
202
+ | `7` | `unknown` | unrecognized disposition — fail loud |
203
+
204
+ **Branch on `$?`, never on the printed line:**
205
+
206
+ ```bash
207
+ dos gate "$DISPOSITIONS_JSON"
208
+ case $? in
209
+ 0) echo "LIVE — proceed" ;;
210
+ 3) echo "DRAIN — finish in-flight, start nothing new"; exit 0 ;;
211
+ 4) echo "STALE-STAMP — re-run dos verify before trusting the stamp"; exit 0 ;;
212
+ 5) echo "BLOCKED — refused upstream, stop"; exit 0 ;;
213
+ 6) echo "RACE — back off and retry"; exit 0 ;;
214
+ 2|7) echo "contract/unknown — failing loud" >&2; exit 1 ;;
215
+ esac
216
+ ```
217
+
218
+ **The rule.** The exit code is the contract; the prose is for humans. A skill
219
+ that greps the message will drift the moment the wording changes — branch on
220
+ `$?` against the `exit_codes.gate` table you read in Recipe 0.
221
+
222
+ ---
223
+
224
+ ## Recipe 4 — Fold a run into ONE digest instead of re-reading state files (status)
225
+
226
+ **Problem.** To answer "how is run R doing?" by hand you would re-read its
227
+ liveness evidence, its intent ledger, its lease, and its resume state — four
228
+ files, every time you ask. `dos status` folds all four into **one digest**.
229
+
230
+ **Verbs.** `dos status RUN_ID --json`
231
+
232
+ **Transcript** (ILLUSTRATIVE shape — fields are real, the values are
233
+ placeholders; do not quote these as captured):
234
+
235
+ ```json
236
+ $ dos status RUN_ID --json
237
+ {
238
+ "run_id": "RUN_ID",
239
+ "liveness": "ADVANCING",
240
+ "ledger": { "declared": 5, "verified": 4 },
241
+ "lease": { "lane": "docs", "held": true },
242
+ "resume": "RESUMABLE"
243
+ }
244
+ ```
245
+
246
+ Crucially, **there is no `claimed` field** anywhere in this digest. Every cell is
247
+ adjudicated ground truth — the liveness verdict from the git/journal delta, the
248
+ *verified* (not self-reported) ledger count, the live lease from the WAL. The
249
+ folded fact is the direct antidote to the read-loop: one `dos status` call
250
+ replaces re-reading liveness + ledger + lease + resume by hand on every step.
251
+ Branch on `exit_codes.status` (`ADVANCING:0`, `SPINNING:3`, `STALLED:4`).
252
+
253
+ **The rule.** When you need the state of a run, fold it once with `dos status` —
254
+ and trust it precisely because it carries *no* self-reported field. (For the
255
+ full cross-surface walk joined by `run_id`, `dos trace RUN_ID` follows the spine
256
+ + ledger + WAL + git together.)
257
+
258
+ ---
259
+
260
+ ## Recipe 5 — Catch a doomed tool-loop in-flight (tool_stream, Python API)
261
+
262
+ **Problem.** An agent re-issues the *same* tool call against unchanged inputs and
263
+ gets back identical bytes each time — a Read re-opening a file that has not
264
+ changed, dozens of times in a row (the read-loop pathology). It is making no
265
+ progress, but nothing crashes, so a naive supervisor sees a busy agent. DOS can
266
+ see the stall.
267
+
268
+ **There is NO `dos tool-stream` CLI verb.** The stall verdict is consumed via the
269
+ **Python API** (or wired into a host's tool-result hook). The only CLI surface
270
+ here is `dos tool-stream-eval`, the per-axis evaluation harness — not the
271
+ classifier. Authors must call the Python API:
272
+
273
+ ```python
274
+ from dos.tool_stream import ToolStream, StreamStep, StreamPolicy, classify_stream
275
+
276
+ # A Read tool re-issued 22 times against a file whose bytes never changed:
277
+ steps = tuple(
278
+ StreamStep("Read", "digest(<unchanged-file>)", "sha-of-file-bytes")
279
+ for _ in range(22)
280
+ )
281
+ v = classify_stream(ToolStream(steps))
282
+ # v.state -> "STALLED"
283
+ # v.repeat_run -> 22
284
+ # v.repeated_step -> the recurring (tool, args, result_digest) triple
285
+ ```
286
+
287
+ **Real captured output** for that 22× repeat:
288
+
289
+ ```text
290
+ state: STALLED
291
+ repeat_run: 22
292
+ reason: "the same (tool, args, result) triple repeated 22 consecutive times
293
+ (>= stall 5) — the loop is near-certainly doomed; the env returned
294
+ identical bytes each time (no new information)"
295
+ ```
296
+
297
+ **`DEFAULT_POLICY`** is `repeat_n=3` (fire REPEATING at the 3rd identical call)
298
+ and `stall_n=5` (fire STALLED at the 5th). So DOS would have surfaced REPEATING
299
+ at the 3rd identical read and STALLED at the 5th — re-surfacing the file bytes to
300
+ the agent and saving roughly 17 of those 22 reads.
301
+
302
+ **Why this is byte-clean** (the load-bearing argument). The `result_digest` is
303
+ **env-authored**: the file (or gym, or tool) produced the result bytes, not the
304
+ agent. The agent did *not* author the *identity* of its own repeated results. So
305
+ REPEATING is **provenance-of-repeated-output**, never an "is the agent
306
+ succeeding?" satisfaction predicate — DOS is reporting a fact about who emitted
307
+ the bytes, not grading the work. And because eventual-consistency polling (re-GET
308
+ until a value flips) is a *legitimate* repeat, the consumer attaches a
309
+ turn-preserving WARN that **re-surfaces the value**, never a cut. The verdict
310
+ informs; it does not kill the loop.
311
+
312
+ **The rule.** Distrust *repetition*, not correctness. A recurring env-authored
313
+ result is a clean signal you may re-surface; it is never license to author a
314
+ "you're failing" judgment.
315
+
316
+ ---
317
+
318
+ ## Recipe 6 — Keep the WAL beat alive so a supervisor can see you (lease-lane heartbeat + liveness + journal)
319
+
320
+ **Problem.** A long-running agent that holds a lane but emits no beat is
321
+ indistinguishable, to a supervisor, from a dead one — and a liveness check that
322
+ sees no journal events can only report SPINNING. The fix is to keep the
323
+ write-ahead log's heartbeat alive while you hold the lease.
324
+
325
+ **Verbs.** `dos lease-lane {acquire,heartbeat,release,live}` ·
326
+ `dos liveness --run-id R --start-sha SHA` · `dos journal --workspace . tail N`
327
+
328
+ A generic acquire → heartbeat → release cycle, then read it back:
329
+
330
+ ```bash
331
+ dos lease-lane acquire --workspace . --lane docs --run-id "$RUN"
332
+ dos lease-lane heartbeat --workspace . --lane docs --run-id "$RUN" # repeat on an interval
333
+ dos liveness --workspace . --run-id "$RUN" --start-sha "$START_SHA" # -> ADVANCING (exit 0)
334
+ dos lease-lane release --workspace . --lane docs --run-id "$RUN"
335
+ ```
336
+
337
+ **`dos journal --workspace . tail` of a generic sequence** (ILLUSTRATIVE shape —
338
+ op names and ordering are real, run-ids/timestamps are placeholders):
339
+
340
+ ```text
341
+ ACQUIRE lane=docs run=<RUN> loop_ts=<TS> tree=["docs/**"]
342
+ HEARTBEAT lane=docs run=<RUN> loop_ts=<TS> # the beat that makes SPINNING reachable
343
+ REFUSE lane=docs run=<RUN> reason="lane docs is already held by a live loop — pick a different --scope or wait."
344
+ RELEASE lane=docs run=<RUN> loop_ts=<TS>
345
+ ```
346
+
347
+ The HEARTBEAT op is what makes a true SPINNING verdict *reachable from real
348
+ evidence*: a heartbeat is a *beat*, not an *event*, so a run that beats but never
349
+ advances is correctly seen as spinning rather than dead. (`dos journal replay`
350
+ gives the full fold; `dos liveness` reads the commits-since-start rung when no
351
+ journal is present.)
352
+
353
+ **The rule.** The heartbeat is the join key. It is what lets `dos status`, a
354
+ fleet watcher, or a trajectory audit **join an agent's narration to ground
355
+ truth** — without a beat, the kernel cannot tell "working" from "wedged."
356
+
357
+ ---
358
+
359
+ ## Recipe 7 — Do not trust a recalled memory; re-verify at read (dos memory / dos_recall)
360
+
361
+ A memory store is the DOS problem turned inward: a frozen self-report, written
362
+ once and recalled later as if it were fact. Treat a recalled memory exactly like
363
+ any other unverified agent — **re-adjudicate it at read time**, not at write
364
+ time. `dos memory` / the `dos_recall` driver re-verify a recalled record against
365
+ current ground truth and return one of `FRESH` / `STALE` / `UNVERIFIABLE`. A
366
+ skill that consumes memory should branch on that verdict and never act on a
367
+ `STALE` or `UNVERIFIABLE` recall as though it were current.
368
+
369
+ **The rule.** Recall is not a fact; it is a claim with a timestamp. Verify on
370
+ read.
371
+
372
+ ---
373
+
374
+ ## Recipe 8 — Wrap a headless agent so it CAN call the referee (guard)
375
+
376
+ **Problem.** A headless agent run can't consult the kernel if the kernel isn't
377
+ reachable from inside the run. `dos guard` wraps the agent process and **mounts
378
+ the `dos-mcp` server**, so the agent gains the syscalls as MCP tools — it *can*
379
+ call `dos_verify`, `dos_arbitrate`, and the rest over JSON/stdio.
380
+
381
+ **Verbs.** `dos guard [--verify-on-stop] -- claude -p ...`
382
+
383
+ ```bash
384
+ dos guard --verify-on-stop -- claude -p "land the docs/NN phase, then verify it shipped"
385
+ ```
386
+
387
+ The win this delivers is the **MCP mount**: inside the wrapped run, the agent can
388
+ call the referee instead of self-certifying. Frame it that way.
389
+
390
+ **An honest caveat.** `dos guard --verify-on-stop` (and `dos hook stop`) emit a
391
+ stop-hook *dialect* that the live agent runtime **may not honor** — so do not
392
+ present this as a guaranteed BLOCK that halts an over-claiming agent at stop
393
+ time. What you can rely on is the mount (the agent *can* call `dos_verify`); what
394
+ you cannot yet rely on is the runtime enforcing a verify-on-stop refusal. Keep
395
+ the recipe about the capability you actually get.
396
+
397
+ **The rule.** Make the referee *reachable* (the MCP mount) and let the agent
398
+ consult it; don't claim an enforcement gate the runtime doesn't honor.
399
+
400
+ ---
401
+
402
+ ## Recipe 9 — Partition a fan-out's results on a DEATH, not on null — and where the witness can actually run (verify-result)
403
+
404
+ **Problem.** A multi-agent fan-out folds each worker's *return value* as a finding.
405
+ But a worker that died abnormally — a rate-limit / quota / auth / server error the
406
+ **harness** synthesized — still returns a non-null error STRING, so it survives a
407
+ `results.filter(Boolean)` and is banked as a finding. Worse, code that computes
408
+ `failed = N - survivors.length` counts a dead worker indistinguishably from a real
409
+ negative. Over the real workflow corpus this is **~32% of subagents** (measured): a
410
+ third of the array a naive fold banks is harness-authored death.
411
+
412
+ **Verbs.** `dos verify-result --transcript PATH` (or a hook event with
413
+ `transcript_path` on stdin).
414
+
415
+ **Exit-code table** (from `exit_codes.verify-result`):
416
+
417
+ | Code | State | Meaning |
418
+ |---|---|---|
419
+ | `0` | `HEALTHY` / `UNREADABLE` | a real-model result — fold it. (UNREADABLE is the fail-safe floor: a read fault never fabricates a death that drops a real result.) |
420
+ | `3` | `DEAD` (SYNTHETIC / EMPTY) | a harness-authored death or an empty deliverable — route to a DEAD bucket, count in the denominator, do NOT fold |
421
+ | `2` | `contract_error` | no transcript given — fail loud |
422
+
423
+ The catch reads a **different byte-author** than the judged worker:
424
+ `message.model == "<synthetic>"` is the harness's own authorship stamp on the
425
+ terminal record — the worker's model did not write it, so the `role:"assistant"`
426
+ slot is the conversation position, not authorship. That is grounding, not
427
+ consistency: it is a pure byte question about bytes the worker could not forge.
428
+
429
+ ### Where the witness can run — and where it CANNOT (read this first)
430
+
431
+ The fold lives inside an orchestrator's plain-JavaScript stage, and that stage is
432
+ **sandboxed: no filesystem, no child-process API, clock/RNG built-ins blocked.** Two
433
+ consequences decide the binding, and pretending otherwise ships a recipe that does
434
+ not run:
435
+
436
+ 1. **An in-script shell-out to `dos verify-result` is IMPOSSIBLE.** The stage cannot
437
+ `require('child_process')` or read a transcript file. Any recipe that shells the
438
+ verb from inside the orchestrator JS is wishful — it would throw in the sandbox.
439
+ 2. **The stage cannot get a transcript path on its own.** `agent()` returns TEXT, not
440
+ a path; the runtime exposes no workflow id and no per-child agent id, and the
441
+ transcripts are written out of band. The stage simply does not hold the two
442
+ identifiers needed to name the file.
443
+
444
+ So the witness runs **one rung out of the sandbox**, in a stop-of-child hook the host
445
+ invokes with the event JSON `{transcript_path, …}` on stdin — exactly the input
446
+ `dos verify-result` already reads. The hook is a real OS process (shell-out works)
447
+ and is *handed* the transcript path (no glob, no agent id needed). The orchestrator
448
+ stage never classifies a transcript itself.
449
+
450
+ ### Stage 1 — the stop-of-child hook (the witness; runs OUTSIDE the JS sandbox)
451
+
452
+ Register `dos verify-result` as a stop-of-child hook. The host pipes the event to it;
453
+ exit `3` marks the child DEAD. Append the `--json` verdict — **keyed by the child's
454
+ own id** — to a per-run sidecar a non-sandbox consumer will read (one JSON object per
455
+ child):
456
+
457
+ ```bash
458
+ # stop-of-child hook command (the host pipes the event JSON on stdin; dos
459
+ # verify-result reads transcript_path from it). DOS_FANOUT_SIDECAR must be set by the
460
+ # host to ONE path per run (else concurrent runs cross-contaminate — see preconditions).
461
+ dos verify-result --json >> "$DOS_FANOUT_SIDECAR"
462
+ ```
463
+
464
+ Each appended line is the `--json` verdict (`{"state","dead","class","api_status",
465
+ "reason","envelope"}`); `dead:true` is the signal and `envelope.reason_class` is a
466
+ greppable `RESULT_DEAD_<CLASS>` / `RESULT_EMPTY`. Capture the child's id alongside it
467
+ (from the event) so the partition keys on identity, not on array position.
468
+
469
+ ### Stage 2 — the partition is the CONDUCTOR's, keyed by child id (NOT an in-script index join)
470
+
471
+ The sandboxed orchestrator stage **cannot** read the sidecar and **cannot** reliably
472
+ align a `deadVerdicts[i]` against its `results[i]`: stop-of-child hooks fire in
473
+ completion order, not dispatch order, so a positional join is unsound. The honest
474
+ consumer of the sidecar is the **conductor** (the non-sandbox process that launched
475
+ the workflow) or a dedicated synthesis subagent the conductor *tells* to read it.
476
+ That consumer partitions by the child id the hook recorded:
477
+
478
+ ```text
479
+ read DOS_FANOUT_SIDECAR → for each {child_id, dead}:
480
+ dead == true → DEAD bucket (count in the denominator; re-dispatch child_id's OWN unit)
481
+ dead == false → LIVE bucket (eligible to fold)
482
+ ```
483
+
484
+ The partition lives where the data lives (outside the sandbox), keyed by identity —
485
+ never an in-script `filter((_, i) => …)` against a file the stage cannot see.
486
+
487
+ ### Stage 3 — the safe action on a DEAD child (do NOT re-prompt the synthesizer)
488
+
489
+ A DEAD verdict's safe action is to **re-dispatch the dead child's OWN unit** — re-run
490
+ that one subagent's task — never to re-prompt the synthesizer mid-plan, which is the
491
+ DEFER-shaped derail that measured net −9 pp. `dos verify-result` is a PDP, not a PEP:
492
+ it reports the death; the conductor re-dispatches the unit.
493
+
494
+ ### The degraded fallback (no hook available) — be honest about what you lose
495
+
496
+ If you cannot register a stop-of-child hook, the orchestrator stage's ONLY
497
+ script-visible death signal is the `null` count after the barrier (a dropped thunk).
498
+ That **undercounts** the keystone, because a synthetic-terminal death returns a
499
+ non-null error string, not `null` — exactly the case `.filter(Boolean)` cannot see.
500
+ The fallback catches dropped-thunk deaths but MISSES the 32% the witness exists for;
501
+ say so in the prompt — coverage computed without the hook is a floor, not the truth.
502
+
503
+ **The rule.** A non-null return is not a result — a harness-synthesized death is also
504
+ non-null. Partition the fan-out on the *terminal-state verdict*, not on `Boolean`, and
505
+ because the orchestrator JS is sandboxed, do it **outside the sandbox** (a
506
+ stop-of-child hook shells `dos verify-result`; the conductor folds the sidecar keyed
507
+ by child id), so a silent death becomes a counted, refusable, re-dispatchable event
508
+ instead of a laundered finding.
509
+
510
+ ---
511
+
512
+ ## Recipe 10 — Hand REAL coverage to the synthesizer, not a laundered 7/7 (coverage)
513
+
514
+ **Problem.** Recipe 9 partitions a fan-out into LIVE and DEAD; this recipe makes the
515
+ *size of the gap* legible to whatever consumes the fold. A synthesizer told "7 workers
516
+ returned" when only 4 were witnessed will confidently write up a sub-quorum as an
517
+ exhaustive survey — the laundering Recipe 9's `failed = N − survivors.length` could
518
+ not even see. `dos coverage` folds the partition against the **declared** fan-out
519
+ width and emits a sentence the synthesizer reads.
520
+
521
+ **Verbs.** `dos coverage --declared N {--transcript PATH … | --transcripts-glob GLOB | --states S1,S2,…}`
522
+
523
+ **Exit-code table** (from `exit_codes.coverage`):
524
+
525
+ | Code | State | Meaning |
526
+ |---|---|---|
527
+ | `0` | `FULL` / `EMPTY` | every declared worker returned a real result (or nothing was fanned out) — fold all, no caveat |
528
+ | `3` | `UNDERFILLED` / `STARVED` / `OVERFILLED` | the fold is degraded — inject the caveat, count the gap; STARVED = nothing real to synthesize; OVERFILLED = more results than declared (a dispatch bug) |
529
+ | `2` | `contract_error` | no `--declared`, an un-coercible state token, or nothing to fold |
530
+
531
+ **Two input modes — and the provenance they carry:**
532
+
533
+ ```bash
534
+ # HARNESS-GROUNDED: coverage runs verify-result itself per transcript, so the
535
+ # healthy/dead counts cannot be forged by the caller (--json stamps grounded:true).
536
+ dos coverage --declared 7 --transcripts-glob "<run-dir>/agent-*.jsonl" --json
537
+
538
+ # CALLER-ASSERTED: the workflow already ran verify-result per child and passes the
539
+ # states. PROVENANCE-DEGRADED — the tokens have no <synthetic> provenance; coverage
540
+ # cannot re-ground them (--json stamps grounded:false).
541
+ dos coverage --declared 7 --states HEALTHY,HEALTHY,HEALTHY,HEALTHY,SYNTHETIC,SYNTHETIC --json
542
+ ```
543
+
544
+ **`--declared` is the laundering fix.** It is REQUIRED and is **never inferred from the
545
+ input length** — that independence is the whole point: a short survivor list (a
546
+ workflow that already dropped the dead ones) reports `UNDERFILLED 4/7`, never `FULL`.
547
+
548
+ The `--json` carries a **`prompt_line`** — the deterministic sentence a workflow feeds
549
+ verbatim into its synthesis prompt:
550
+
551
+ ```text
552
+ COVERAGE CAVEAT: only 4 of 7 fan-out workers returned a real result (2 died on a
553
+ harness-authored terminal (rate-limit); 1 did not return a transcript). Treat the
554
+ findings below as a SUB-QUORUM SAMPLE (4/7), not an exhaustive survey; do not state
555
+ or imply full coverage, and flag the gap above.
556
+ ```
557
+
558
+ The reason text is built from the **real `(dead, unreadable, unaccounted)`
559
+ partition**, so it never asserts a death that was not witnessed: an unreadable slot
560
+ reads "could not be read", a missing slot "did not return a transcript", and only a
561
+ witnessed death (with its class) says "died".
562
+
563
+ **`dos coverage` is an HONEST AGGREGATOR.** It folds the per-worker `verify-result`
564
+ verdicts that were *already* adjudicated; it mints **zero** new ground-truth labels
565
+ (the same posture as `dos status`-folding tools — see the fold-mints-data law). Its
566
+ value is not a new label, it is two things: a denominator independent of the survivor
567
+ list (laundering becomes structurally impossible), and the `unaccounted` count the
568
+ naive fold discarded. It is **advisory**: it reports coverage; it never re-runs a dead
569
+ worker and never judges whether a HEALTHY return is *correct* — that is the
570
+ witness-routing rung (`/dos-witness-claim`).
571
+
572
+ **The rule.** Fold the partition against the *declared* width, not the survivor count,
573
+ and put the resulting `prompt_line` INTO the synthesis prompt — so a sub-quorum
574
+ fan-out cannot be laundered as a full one.
575
+
576
+ ---
577
+
578
+ ## The one rule under all recipes
579
+
580
+ Every recipe is the same move: **the skill never self-certifies — it shells a
581
+ `dos` verb and reads the verdict.** The kernel answers from git ancestry, the
582
+ WAL, the journal, the env-authored result bytes — evidence whose *byte-author is
583
+ not the judged agent*. That inequality is the invariant the whole cookbook
584
+ protects: the part that decides ground truth is never the part being judged.