dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/lane_journal.py ADDED
@@ -0,0 +1,1001 @@
1
+ """Lane-journal — a write-ahead log for the lane-lease arbiter (LJ-series).
2
+
3
+ The pure lane arbiter (`arbiter.arbitrate`) decides admission from a *live-lease
4
+ set* — current state only, no history. Without a durable record of what the
5
+ arbiter *decided*, "why was I refused at 14:03?", "when did this orphan die and
6
+ who reclaimed it?", and "reconstruct the lane state after a crash" are all
7
+ unanswerable, and the live set itself has nowhere durable to live across
8
+ processes.
9
+
10
+ This module is the **write-ahead log** that classic schedulers and lock managers
11
+ always keep: every lane decision (ACQUIRE / RELEASE / HEARTBEAT / SCAVENGE /
12
+ REFUSE / HALT / RECONCILE / ENFORCE / SPAWN) is appended — and `fsync`'d — to an append-only JSONL
13
+ file. `replay()` folds the log back into the authoritative live-lease set (so the
14
+ journal *is* the cross-process registry — there is no second store to keep in
15
+ sync), and `tail`/`read_all` answer history queries. The generic writer is the
16
+ Layer-3 `lane_lease` shell (`acquire`/`release`/`heartbeat`/`halt`) plus the
17
+ supervisor driver's `scavenge`; each appends inside its own `_Mutex`, so journal
18
+ append order equals decision order — the WAL invariant. `replay` folds by append
19
+ order and ignores `seq` (which is cosmetic), so an `O_APPEND` write under that
20
+ mutex is sufficient.
21
+
22
+ Design rules (the LJ scope boundary):
23
+
24
+ * **Pure where it can be.** `replay()` / `compact()` take entries and return
25
+ entries — entries in, list out, no disk — so the suite replays and compacts
26
+ them without touching a file. Only `append` / `read_all` / `tail` touch disk.
27
+ * **Log under the lock.** The writer appends inside the lease mutex that
28
+ serializes the decision, so a reader's `replay` sees a consistent order.
29
+ * **Torn-tail tolerant.** A process killed mid-`append` can leave a partial
30
+ final line. `read_all` skips an unparseable *trailing* line (and only the
31
+ trailing one) rather than raising — a half-written record is "didn't happen",
32
+ the safe WAL reading. A non-trailing corrupt line is kept as a `_CORRUPT`
33
+ sentinel so an audit still sees the integrity breach (and `compact` preserves
34
+ it — a rewrite must never silently erase it).
35
+ * **Host-local.** One journal per host. Every entry stamps `host_id` so a future
36
+ cross-host merge is *possible*, but cross-host coordination is out of scope.
37
+ * **Bounded by an explicit compaction, not auto-rotation.** The WAL is
38
+ append-only; `compact()` folds it to a single CHECKPOINT snapshot of the live
39
+ set when an operator runs `dos journal compact`. It is **live-set-preserving**
40
+ (`replay(compact(E)) == replay(E)` — the arbiter sees the identical leases), but
41
+ NOT liveness-fold-preserving: a CHECKPOINT carries no `ts`, so a mid-flight
42
+ compaction makes a still-live run read STALLED until its next beat (always the
43
+ safe direction — compaction can never fabricate a beat/event). Run it in a quiet
44
+ window. An automatic size/age trigger + a `[journal]` retention seam is deferred.
45
+
46
+ Read::
47
+
48
+ dos journal tail [N] # last N entries (default 20)
49
+ dos journal replay # reconstructed live-lease set
50
+ dos journal seq # current max seq
51
+ dos journal compact # fold to a CHECKPOINT snapshot (bound the file)
52
+
53
+ Write is library-only (the writers are `lane_lease` / the supervisor driver, each
54
+ under its own mutex) — there is deliberately no `append` CLI subcommand, so
55
+ nothing can journal a decision outside the lock that serializes it.
56
+ """
57
+ from __future__ import annotations
58
+
59
+ import datetime as dt
60
+ import json
61
+ import os
62
+ import sys
63
+ from pathlib import Path
64
+ from typing import Any, Iterable
65
+
66
+ try:
67
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
68
+ sys.stderr.reconfigure(encoding="utf-8", errors="replace")
69
+ except Exception:
70
+ pass
71
+
72
+ from dos import config as _config
73
+ from dos import durable_schema as _schema
74
+
75
+ # The durable-schema family + version for lane-journal records that carry a tag.
76
+ # Today ONLY the OP_ATTEMPT event tags itself (docs/207 §3) — the lease ops predate
77
+ # the tag contract and replay reads them as UNTAGGED (the tolerant legacy floor, so
78
+ # no existing journal needs migrating). The version is bumped ONLY on a non-additive
79
+ # change to a tagged record's shape; a new field is additive and never bumps it.
80
+ SCHEMA_FAMILY = "lane-journal"
81
+ LANE_JOURNAL_SCHEMA = 1
82
+
83
+ # Host-local WAL. The default resolves against the ACTIVE WORKSPACE (the injected
84
+ # config), never the package's own tree (the workspace-root rule). The
85
+ # `DISPATCH_LANE_JOURNAL_PATH` env override is the workspace-neutral alias;
86
+ # `JOB_LANE_JOURNAL_PATH` is a back-compat alias an early consumer still sets.
87
+
88
+
89
+ def _default_journal_path() -> Path:
90
+ return _config.active().paths.lane_journal
91
+
92
+
93
+ # Module-level convenience handle, resolved LAZILY (PEP 562 `__getattr__`) the
94
+ # first time `lane_journal.JOURNAL_PATH` is actually read — NOT at import. The
95
+ # original eager `JOURNAL_PATH = Path(... or _default_journal_path())` forced
96
+ # `config.active()` (→ `default_config` → the git-SHA subprocess + the WMI
97
+ # platform probe in `gather_env_print`) to run the instant `import dos`
98
+ # happened, taxing EVERY consumer's cold start ~tens of ms for a path almost no
99
+ # caller reads as a value (the live functions all call `_journal_path()` below,
100
+ # which re-resolves per call so a test that sets the env override after import
101
+ # still redirects). Deferring it keeps `import dos` cheap; the name stays exported
102
+ # for back-compat (`from dos.lane_journal import *` / the host re-export shims).
103
+ def __getattr__(name: str) -> Any: # noqa: D401 — PEP 562 module hook
104
+ if name == "JOURNAL_PATH":
105
+ return _journal_path()
106
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
107
+
108
+ # The decision vocabulary. ACQUIRE/RELEASE ship in LJ1 (the throughline);
109
+ # the rest are wired in LJ2/LJ5 but the replay folder already understands
110
+ # them so a forward-compatible journal replays cleanly the day they appear.
111
+ OP_ACQUIRE = "ACQUIRE"
112
+ OP_RELEASE = "RELEASE"
113
+ OP_HEARTBEAT = "HEARTBEAT"
114
+ OP_SCAVENGE = "SCAVENGE"
115
+ OP_REFUSE = "REFUSE" # LJ2 — recorded, but does NOT mutate lease state
116
+ OP_RECONCILE = "RECONCILE" # LJ5 — crash-recovery reconcile, recorded. NO
117
+ # in-kernel writer: RECONCILE re-asserts a lease into
118
+ # a SEPARATE live registry the WAL says is held. This
119
+ # kernel has ONE store — `replay` reconstructs the
120
+ # registry FROM the WAL — so there is no second store
121
+ # to reconcile into, and the writer is host-side (a
122
+ # host with its own execution-state.yaml). The op +
123
+ # the replay fold (folds it identically to ACQUIRE)
124
+ # stay for that forward-compat; the kernel just never
125
+ # emits one. (Contrast SCAVENGE, which IS in-repo —
126
+ # eviction is a real action against the one WAL.)
127
+ OP_HALT = "HALT" # docs/99 — a STOP DECISION for an in-flight run;
128
+ # recorded as INTENT, does NOT mutate lease state
129
+ # (the eventual RELEASE/SCAVENGE confirms eviction)
130
+ OP_ENFORCE = "ENFORCE" # docs/189 §C4 — an ENFORCEMENT OUTCOME: a handler
131
+ # (dos.enforce) proposed an effect on an intervention
132
+ # decision (observe/warn/block/defer). Recorded for
133
+ # forensics like REFUSE/HALT — it grants/removes NO
134
+ # lease, so replay ignores it for state. This is the
135
+ # missing PRODUCER that makes "which call was blocked,
136
+ # by which handler, and what was substituted?" answerable
137
+ # from the spine (the ARIES-recovery gap a blocking
138
+ # handler otherwise left no trace of). The kernel records
139
+ # the proposal; a host PEP performed (or did not) the act.
140
+ OP_ADOPT = "ADOPT" # C5 (docs/95) — a lease OWNERSHIP TRANSFER: a new
141
+ # acquirer takes over a lease whose holder is gone but
142
+ # whose recorded children are still live. replay rewrites
143
+ # the live lease's holder/pid/host_id to the adopter while
144
+ # KEEPING its (loop_ts, lane) identity, tree, and children
145
+ # — adoption is an ownership rewrite, NEVER a kill (the
146
+ # grandchildren keep running). The host decides WHEN to
147
+ # adopt (it measures child liveness at the boundary, now
148
+ # keyed on the kernel's recorded child pids via the
149
+ # proc-liveness rung); the kernel provides only the
150
+ # non-forgeable child-identity ANCHOR + this transfer op.
151
+ OP_ATTEMPT = "ATTEMPT" # docs/207 §3 — a PICK ATTEMPT was made on a unit, with
152
+ # its outcome when known. The anti-churn cross-run memory
153
+ # the bare loop lacked: `cooldown.cooldown_verdict` folds
154
+ # these to answer "have I already tried this unit and it
155
+ # didn't move?" Like REFUSE/HALT/ENFORCE it grants/removes
156
+ # NO lease, so replay ignores it for state — it is a
157
+ # forensic event the cooldown fold reads via `read_all`,
158
+ # never `replay`. Carries a `durable_schema` tag (the FIRST
159
+ # lane-journal record to — older readers see UNTAGGED and a
160
+ # tolerant fold accepts it; the tag future-proofs the fold).
161
+ OP_SPAWN = "SPAWN" # docs/reports/2026-06-09 (the dos-top visibility gap) —
162
+ # an INTENT-TO-TAKE-A-LANE recorded the instant a launcher
163
+ # commits to a lane, BEFORE preflight and before the durable
164
+ # ACQUIRE lands. It closes the SPAWN→ACQUIRE blind window:
165
+ # `dos top` reads only the WAL, so a loop that has decided
166
+ # its lane but not yet acquired is invisible (a *successful*
167
+ # `arbitrate` PERSISTS nothing — purity boundary). Like
168
+ # REFUSE/HALT/ENFORCE/ATTEMPT it grants/removes NO lease, so
169
+ # it is NOT in `_STATE_MUTATING_OPS` and `replay` ignores it
170
+ # for state — a not-yet-real run can therefore NEVER
171
+ # double-book a region (the docs/281 phantom-lease failure
172
+ # mode is structurally impossible here: an intention is not a
173
+ # hold). It is the durable, cross-process home for the
174
+ # supervisor's in-memory `pending` field (`supervise.py:106`):
175
+ # `dispatch_top` folds the RECENT SPAWNs for a lane with no
176
+ # live lease into a `SPAWNING` chip with a short TTL, so a
177
+ # launch that dies in preflight ages out on its own (the same
178
+ # self-heal `_expire_dead` gives a crashed holder). The
179
+ # eventual ACQUIRE supersedes the SPAWN (a held lease wins the
180
+ # chip); a RELEASE with no intervening ACQUIRE is a
181
+ # launch-aborted record.
182
+ OP_CHECKPOINT = "CHECKPOINT" # LJ compaction (docs/82) — a SNAPSHOT of the live
183
+ # set written at the head of a compacted journal.
184
+ # NOT a state-mutating op in the incremental sense:
185
+ # `replay` handles it specially — it RESETS the
186
+ # reconstructed live set to the checkpoint's payload,
187
+ # then folds the tail of fresh entries that follow it.
188
+ # This is what lets `compact` discard the long history
189
+ # of dead leases without losing a still-live one: the
190
+ # surviving leases ride forward in the snapshot, not as
191
+ # their (now-deleted) original ACQUIRE lines.
192
+
193
+ # Ops that change the reconstructed lease set. REFUSE is a decision worth
194
+ # logging (someone wanted a lane and couldn't have it) but it grants nothing,
195
+ # so replay ignores it for state reconstruction. HALT is likewise a recorded
196
+ # DECISION (docs/99): "stop this run that is not done" — but it is the kernel's
197
+ # *intent*, decoupled from the *fact* of the lease ending (the kernel cannot
198
+ # know the host's stop signal landed), so like REFUSE it grants/removes nothing
199
+ # in replay; a later RELEASE/SCAVENGE the driver appends is what actually evicts.
200
+ # SPAWN is the symmetrical INTENT on the acquire side: "a run is coming to this
201
+ # lane" — also decoupled from the *fact* of the hold, which only the eventual
202
+ # ACQUIRE records, so it too grants nothing in replay (an intention that never
203
+ # acquires can never strand a phantom hold). This is what lets an auditor tell a
204
+ # *kill* (HALT→SCAVENGE) from a *natural death* (RELEASE), and a *coming* run
205
+ # (SPAWN→ACQUIRE) from a *held* one — the forensic point of the closed op
206
+ # vocabulary.
207
+ _STATE_MUTATING_OPS = frozenset(
208
+ {OP_ACQUIRE, OP_RELEASE, OP_HEARTBEAT, OP_SCAVENGE, OP_RECONCILE, OP_ADOPT}
209
+ )
210
+
211
+
212
+ def journal_now_iso() -> str:
213
+ """Second-resolution UTC stamp for journal entries.
214
+
215
+ Deliberately finer than a minute-only loop stamp: the journal needs to order
216
+ events within a minute, and the monotonic `seq` is the real tiebreak, but a
217
+ second-resolution `ts` makes the log human-readable without ambiguity (and is
218
+ the instant the heartbeat-freshness fold trusts — `journal_delta`).
219
+ """
220
+ return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
221
+
222
+
223
+ def _journal_path() -> Path:
224
+ # Re-read the env var each call so a test that sets the override after
225
+ # import still redirects. Falls back to the active workspace config when no
226
+ # override is set.
227
+ return Path(
228
+ os.environ.get("DISPATCH_LANE_JOURNAL_PATH")
229
+ or os.environ.get("JOB_LANE_JOURNAL_PATH")
230
+ or _default_journal_path()
231
+ )
232
+
233
+
234
+ def read_all(path: Path | None = None) -> list[dict]:
235
+ """Return every journal entry in append order.
236
+
237
+ Skips an unparseable TRAILING line (a torn final record from a crash
238
+ mid-append) — but a non-trailing corrupt line is a real integrity problem
239
+ and is surfaced (kept as a sentinel so a caller/audit notices), never
240
+ silently dropped from the middle of the order.
241
+ """
242
+ p = path or _journal_path()
243
+ if not p.exists():
244
+ return []
245
+ try:
246
+ raw = p.read_text(encoding="utf-8", errors="replace")
247
+ except OSError:
248
+ return []
249
+ lines = raw.splitlines()
250
+ out: list[dict] = []
251
+ for i, line in enumerate(lines):
252
+ s = line.strip()
253
+ if not s:
254
+ continue
255
+ try:
256
+ obj = json.loads(s)
257
+ except json.JSONDecodeError:
258
+ # Tolerate ONLY a torn final line (crash mid-append). Any earlier
259
+ # corrupt line is a genuine integrity breach — record a sentinel
260
+ # so audit/replay can flag it rather than pretend order is intact.
261
+ if i == len(lines) - 1:
262
+ break
263
+ out.append({"op": "_CORRUPT", "_raw": s, "_line": i})
264
+ continue
265
+ if isinstance(obj, dict):
266
+ out.append(obj)
267
+ return out
268
+
269
+
270
+ def tail(n: int = 20, path: Path | None = None) -> list[dict]:
271
+ """The last `n` entries — reads the whole file then slices.
272
+
273
+ The journal is NOT auto-rotated: on a long-lived fleet it grows unbounded and
274
+ this (like `read_all`/`replay`/`next_seq`) is O(file). Run `dos journal
275
+ compact` (`compact()` + the `lane_lease.compact_journal` I/O shell) to bound
276
+ it: that folds the WAL to a single CHECKPOINT snapshot of the live set,
277
+ live-set-preserving (`replay(compact(E)) == replay(E)` — the arbiter sees the
278
+ identical leases; see `compact` for the liveness-fold caveat). An automatic
279
+ size/age-triggered rotation + a `[journal]` retention seam is deferred."""
280
+ entries = read_all(path)
281
+ return entries[-n:] if n > 0 else entries
282
+
283
+
284
+ def next_seq(path: Path | None = None) -> int:
285
+ """The seq to stamp on the next entry = max existing seq + 1 (1-based).
286
+
287
+ Read under the SAME `_StateFileLock` the caller holds for the registry
288
+ write, so two concurrent acquirers can't mint the same seq.
289
+ """
290
+ mx = 0
291
+ for e in read_all(path):
292
+ try:
293
+ s = int(e.get("seq") or 0)
294
+ except (TypeError, ValueError):
295
+ s = 0
296
+ # An OP_CHECKPOINT carries the high-water `seq` of the history it
297
+ # replaced (`seq_watermark`). After a compaction discards the lines that
298
+ # held the prior max seq, the watermark is the ONLY surviving record of
299
+ # it — so it must bound `next_seq` too, or a rewrite would let the next
300
+ # append REUSE a seq from the discarded prefix and corrupt append order.
301
+ try:
302
+ w = int(e.get("seq_watermark") or 0)
303
+ except (TypeError, ValueError):
304
+ w = 0
305
+ mx = max(mx, s, w)
306
+ return mx + 1
307
+
308
+
309
+ def append(entry: dict, path: Path | None = None) -> dict:
310
+ """Append one entry to the journal and `fsync` it to disk.
311
+
312
+ `entry` is the caller's decision payload; this stamps `seq` (if absent),
313
+ `ts` (if absent), and writes a single canonical-JSON line followed by a
314
+ newline, then `flush()` + `os.fsync()` so the record is durable before
315
+ the function returns (and thus before the caller mutates the registry).
316
+
317
+ Returns the stamped entry (with seq/ts filled in) so the caller can log
318
+ it. The caller is responsible for holding the state lock — `append` does
319
+ NOT lock, because journal order must equal registry-mutation order and
320
+ only the caller knows the surrounding critical section.
321
+ """
322
+ p = path or _journal_path()
323
+ e = dict(entry)
324
+ e.setdefault("seq", next_seq(p))
325
+ e.setdefault("ts", journal_now_iso())
326
+ line = json.dumps(e, sort_keys=True, default=str, ensure_ascii=False) + "\n"
327
+ p.parent.mkdir(parents=True, exist_ok=True)
328
+ # O_APPEND makes the write atomic w.r.t. other appenders at the OS level;
329
+ # the surrounding _StateFileLock already serializes our own callers, but
330
+ # O_APPEND is the belt to that suspenders.
331
+ fd = os.open(str(p), os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o644)
332
+ try:
333
+ os.write(fd, line.encode("utf-8"))
334
+ os.fsync(fd)
335
+ finally:
336
+ os.close(fd)
337
+ return e
338
+
339
+
340
+ def _lease_identity(rec: dict) -> tuple[str, str]:
341
+ """(loop_ts, lane) — the true lease identity (a loop_ts is minute-
342
+ resolution so two disjoint-lane loops can share one; lane disambiguates).
343
+ The same identity `journal_delta` scopes its liveness fold to."""
344
+ return (str(rec.get("loop_ts") or ""), str(rec.get("lane") or ""))
345
+
346
+
347
+ def replay(entries: Iterable[dict]) -> list[dict]:
348
+ """Fold the decision sequence into the authoritative live-lease set.
349
+
350
+ Pure: entries in, lease list out (no disk). This is the WAL-recovery core
351
+ and the LJ5 hero invariant — replaying the journal must reproduce the
352
+ authoritative live-lease set the arbiter admits against. Folding rules:
353
+
354
+ * ACQUIRE -> add/replace the (loop_ts, lane) lease with its payload.
355
+ * RELEASE -> remove the (loop_ts, lane) lease.
356
+ * SCAVENGE -> remove the (loop_ts, lane) lease (eviction).
357
+ * HEARTBEAT-> update the live lease's heartbeat_at (no-op if absent).
358
+ * RECONCILE-> re-assert a lease a separate registry was missing (LJ5; no
359
+ in-kernel writer — single-store kernels reconcile via this very replay).
360
+ * CHECKPOINT-> RESET the live set to the snapshot's `leases` payload, in
361
+ payload order, then keep folding the tail (LJ compaction, docs/82). This
362
+ is what lets `compact` discard the long dead-lease history without losing
363
+ a still-live lease — the surviving leases ride forward in the snapshot.
364
+ Handled BEFORE the state-mutating-ops gate so it can never be skipped.
365
+ * HALT / REFUSE / ENFORCE / ATTEMPT / SPAWN / _CORRUPT / unknown -> ignored for
366
+ state (HALT records a stop INTENT, REFUSE a denied request, ENFORCE an
367
+ enforcement outcome, ATTEMPT a pick attempt for the cooldown fold, SPAWN an
368
+ intent-to-take-a-lane for the dos-top SPAWNING chip — none grants or removes a
369
+ lease; a corrupt sentinel must not silently mutate state).
370
+
371
+ Returns leases in first-acquired order (stable), each a dict shaped like the
372
+ lease rows `lane_lease.acquire` writes, so an audit can diff byte-for-byte.
373
+ """
374
+ # Ordered by first-acquire so the reconstructed list is stable/comparable.
375
+ live: dict[tuple[str, str], dict] = {}
376
+ order: list[tuple[str, str]] = []
377
+
378
+ def _forget(key: tuple[str, str]) -> None:
379
+ live.pop(key, None)
380
+ if key in order:
381
+ order.remove(key)
382
+
383
+ for e in entries:
384
+ op = str(e.get("op") or "")
385
+ if op == OP_CHECKPOINT:
386
+ # A compaction snapshot: RESET the reconstructed live set to exactly
387
+ # the leases the checkpoint carries (in payload order), discarding
388
+ # whatever was folded so far. This must run BEFORE the
389
+ # _STATE_MUTATING_OPS gate below — a checkpoint is not an incremental
390
+ # op, it is a re-base of the fold. Because `compact` writes a snapshot
391
+ # of `replay(prefix)`, re-basing onto it yields the identical live set:
392
+ # the replay(compact(E)) == replay(E) invariant.
393
+ live.clear()
394
+ order.clear()
395
+ payload = e.get("leases")
396
+ if isinstance(payload, list):
397
+ for lease in payload:
398
+ if not isinstance(lease, dict):
399
+ continue
400
+ key = _lease_identity(lease)
401
+ if not key[0] and not key[1]:
402
+ continue
403
+ if key not in live:
404
+ order.append(key)
405
+ live[key] = dict(lease)
406
+ continue
407
+ if op not in _STATE_MUTATING_OPS:
408
+ continue # REFUSE, HALT, ENFORCE, _CORRUPT, unknown — recorded, not state
409
+ key = _lease_identity(e)
410
+ if not key[0] and not key[1]:
411
+ continue
412
+ if op in (OP_ACQUIRE, OP_RECONCILE):
413
+ lease = e.get("lease")
414
+ if not isinstance(lease, dict):
415
+ # Forward-compat: an ACQUIRE may carry the lease fields inline
416
+ # rather than nested under "lease". Reconstruct from the
417
+ # known lease keys present on the entry.
418
+ lease = {
419
+ k: e[k] for k in (
420
+ "lane", "lane_kind", "tree", "loop_ts", "host_id",
421
+ "pid", "acquired_at", "heartbeat_at", "ttl_minutes",
422
+ "holder", "run_id",
423
+ ) if k in e
424
+ }
425
+ if key not in live:
426
+ order.append(key)
427
+ live[key] = dict(lease)
428
+ elif op in (OP_RELEASE, OP_SCAVENGE):
429
+ _forget(key)
430
+ elif op == OP_HEARTBEAT:
431
+ if key in live:
432
+ hb = e.get("heartbeat_at") or e.get("ts")
433
+ if hb:
434
+ live[key]["heartbeat_at"] = hb
435
+ elif op == OP_ADOPT:
436
+ # Ownership TRANSFER (C5): a new acquirer takes over the live lease at
437
+ # this (loop_ts, lane). Rewrite ONLY ownership (holder/pid/host_id) +
438
+ # refresh the heartbeat so the adopted lease is not immediately stale;
439
+ # KEEP the lease's identity, tree, ttl, and children. NEVER add a lease
440
+ # that isn't live — adoption transfers an EXISTING hold, it does not
441
+ # grant one (an ADOPT against a released/scavenged key is a no-op, the
442
+ # safe direction: you cannot adopt a lease no one holds).
443
+ if key in live:
444
+ lease = live[key]
445
+ for fld in ("holder", "pid", "host_id"):
446
+ if fld in e and e[fld] is not None:
447
+ lease[fld] = e[fld]
448
+ hb = e.get("heartbeat_at") or e.get("ts")
449
+ if hb:
450
+ lease["heartbeat_at"] = hb
451
+ return [live[k] for k in order if k in live]
452
+
453
+
454
+ # --------------------------------------------------------------------------
455
+ # Entry builders — the writer (`lane_lease` / the supervisor driver) uses these
456
+ # so the entry shape is defined HERE (one home), not duplicated at each call
457
+ # site. Pure constructors.
458
+ # --------------------------------------------------------------------------
459
+
460
+
461
+ def adopt_entry(lease: dict, *, new_holder: str, new_pid: Any = None,
462
+ new_host_id: str = "", heartbeat_at: str = "", reason: str = "") -> dict:
463
+ """Build an ADOPT entry: transfer ownership of a live lease to `new_holder` (C5).
464
+
465
+ The eviction-free sibling of `scavenge_entry`. Where SCAVENGE removes a lease
466
+ whose holder is gone AND whose work is done, ADOPT transfers a lease whose holder
467
+ is gone but whose recorded children are STILL LIVE — so the lane keeps its
468
+ in-flight grandchildren instead of being reclaimed out from under them or wedged
469
+ to TTL. replay rewrites the live lease's `holder`/`pid`/`host_id` (and refreshes
470
+ the heartbeat) while keeping its identity, tree, ttl, and children.
471
+
472
+ The KERNEL never decides to adopt — it cannot non-forgeably tell orphaned-but-
473
+ working from stalled-dead (that needs grandchild liveness, host boundary I/O via
474
+ the proc-liveness rung). The host gathers that evidence, decides, and appends this
475
+ op; the kernel provides only the transfer mechanism + the child-identity anchor
476
+ `acquire_entry` records. `heartbeat_at` defaults to now so the adopted lease is not
477
+ instantly stale under the new owner.
478
+ """
479
+ return {
480
+ "op": OP_ADOPT,
481
+ "lane": lease.get("lane"),
482
+ "loop_ts": lease.get("loop_ts"),
483
+ "holder": new_holder,
484
+ "pid": new_pid,
485
+ "host_id": new_host_id or lease.get("host_id"),
486
+ "prev_holder": lease.get("holder"),
487
+ "heartbeat_at": heartbeat_at or journal_now_iso(),
488
+ "reason": reason,
489
+ }
490
+
491
+
492
+ def acquire_entry(lease: dict, *, reason: str = "", prev_holder: Any = None,
493
+ env_digest: str = "", children: Any = None,
494
+ run_id: Any = None) -> dict:
495
+ """Build an ACQUIRE entry from the lease dict the writer just minted.
496
+
497
+ `run_id` (OPTIONAL, docs/118 Size S / docs/137) is the CID spine id of the run
498
+ that took this lease — the field that closes the WAL↔spine join. `refuse_entry`
499
+ and `halt_entry` already carry a `run_id`; the GRANT side did not, so a *held*
500
+ lane (unlike a *refused* one) could not be traced back to the run that wanted
501
+ it — the exact gap docs/118 measured at `0` join-ready ACQUIREs. It rides on
502
+ the NESTED lease so `replay` reconstructs it onto the live lease (and an ADOPT
503
+ preserves it), where any reader keyed on `run_id` (`decisions`,
504
+ `trajectory_audit._lease_run_id`, `dos trace`) reads it off. Purely ADDITIVE
505
+ like `env_digest`/`children`: an ACQUIRE with no `run_id` replays byte-identically
506
+ (the lane-journal forward-compat contract). Recorded, never adjudicated on — the
507
+ kernel does not gate on which run holds a lane; it just makes the hold
508
+ *attributable* (the docs/76 record-don't-decide line).
509
+
510
+ `children` (OPTIONAL, C5) is the list of child identities the holder spawned —
511
+ `[{"run_id": ..., "pid": ...}, ...]` — the non-forgeable ANCHOR that lets a later
512
+ acquirer tell "the holder is gone but its grandchildren are still working" from
513
+ "this lease is simply dead." Purely ADDITIVE like `env_digest`: an ACQUIRE with no
514
+ `children` replays unchanged. The kernel RECORDS the anchor; it never measures the
515
+ children's liveness (that is host boundary I/O via the proc-liveness rung) — it
516
+ just makes the host's later child-liveness probe key on a durable identity instead
517
+ of a forgeable log-growth signal. Rides on the lease payload so replay carries it
518
+ onto the reconstructed lease (and an ADOPT preserves it).
519
+
520
+ `env_digest` (OPTIONAL) is the holder's environment-print digest — the
521
+ `env_print.EnvPrint.digest` of the runtime that took the lease (docs/115
522
+ primitive 1). The ACQUIRE is where a lease is BORN (once per run's hold), so it
523
+ is the right entry to carry *under what* the hold happened; later beats /
524
+ releases carry only identity. Just the cheap KEY rides here, not the full print
525
+ (that lands once per run-dir in the intent ledger's INTENT record) — so
526
+ `dos top` / replay can answer "which environment holds this lane" and join back
527
+ to the full print by digest. Purely ADDITIVE: an ACQUIRE with no `env_digest`
528
+ is a hold from a kernel that did not stamp prints, replayed unchanged (a new
529
+ optional field never disturbs the fold — the lane-journal forward-compat
530
+ contract). Recorded, never adjudicated on (the docs/76 line); the
531
+ `FLEET_ENV_MISMATCH` gate that COMPARES a digest to a pin is a later phase, and
532
+ it lives in the arbiter, not here.
533
+ """
534
+ e = {
535
+ "op": OP_ACQUIRE,
536
+ "lane": lease.get("lane"),
537
+ "lane_kind": lease.get("lane_kind"),
538
+ "tree": lease.get("tree"),
539
+ "loop_ts": lease.get("loop_ts"),
540
+ "host_id": lease.get("host_id"),
541
+ "pid": lease.get("pid"),
542
+ "ttl_minutes": lease.get("ttl_minutes"),
543
+ "prev_holder": prev_holder,
544
+ "reason": reason,
545
+ # Nest the full lease so replay reconstructs it exactly.
546
+ "lease": dict(lease),
547
+ }
548
+ if env_digest:
549
+ e["env_digest"] = env_digest
550
+ # The CID spine id (docs/118 S / docs/137) rides on the NESTED lease so replay
551
+ # carries it onto the reconstructed live lease and a later ADOPT preserves it —
552
+ # the WAL↔spine join key. Prefer an explicit `run_id` arg; else honor one already
553
+ # on the lease dict (a host that stamped it at mint time). Additive — absent ⇒ no
554
+ # `run_id` on the lease, replayed unchanged.
555
+ rid = run_id if run_id is not None else lease.get("run_id")
556
+ if rid:
557
+ e["lease"] = {**e["lease"], "run_id": str(rid)}
558
+ # The child-identity anchor (C5) rides on the nested lease so replay carries it
559
+ # onto the reconstructed lease and a later ADOPT preserves it. Prefer an explicit
560
+ # `children` arg; else honor one already on the lease dict (a host that stamps it
561
+ # at mint time). Additive — absent ⇒ no `children` key, replayed unchanged.
562
+ kids = children if children is not None else lease.get("children")
563
+ if kids:
564
+ e["lease"] = {**e["lease"], "children": list(kids)}
565
+ return e
566
+
567
+
568
+ def release_entry(lease: dict, *, reason: str = "explicit") -> dict:
569
+ """Build a RELEASE entry for a dropped lease."""
570
+ return {
571
+ "op": OP_RELEASE,
572
+ "lane": lease.get("lane"),
573
+ "loop_ts": lease.get("loop_ts"),
574
+ "host_id": lease.get("host_id"),
575
+ "reason": reason,
576
+ }
577
+
578
+
579
+ def heartbeat_entry(lease: dict, *, heartbeat_at: str = "") -> dict:
580
+ """Build a HEARTBEAT entry refreshing a live lease's liveness stamp.
581
+
582
+ The HEARTBEAT path is now complete end-to-end: this builder, the `replay`
583
+ fold (which sets a live lease's `heartbeat_at` from this entry's
584
+ `heartbeat_at` or its `ts`), the `journal_delta._HEARTBEAT_OPS` freshness
585
+ rung, AND the effectful writer (`lane_lease.heartbeat`, the verb behind
586
+ `dos lease-lane heartbeat`). That writer is what makes liveness SPINNING
587
+ reachable from real journal evidence — before it, nothing emitted an
588
+ OP_HEARTBEAT, so the newest beat was always the boundary ACQUIRE, which aged
589
+ out to STALLED.
590
+
591
+ A HEARTBEAT is a *beat*, not a state-change: replay keys it on the
592
+ `(loop_ts, lane)` identity and updates the freshness of an already-live lease
593
+ (a no-op if that lease isn't currently held), so it carries just the identity
594
+ + the stamp, not the full lease body — and it is deliberately EXCLUDED from
595
+ `journal_delta._EVENT_OPS`, so a fresh beat proves life without counting as
596
+ progress (the SPINNING rung). `heartbeat_at` defaults to the entry `ts`
597
+ (filled by `append`); `lane_lease.heartbeat` passes the append instant
598
+ explicitly so the fold trusts the writer's own clock.
599
+ """
600
+ e = {
601
+ "op": OP_HEARTBEAT,
602
+ "lane": lease.get("lane"),
603
+ "loop_ts": lease.get("loop_ts"),
604
+ "host_id": lease.get("host_id"),
605
+ }
606
+ if heartbeat_at:
607
+ e["heartbeat_at"] = heartbeat_at
608
+ return e
609
+
610
+
611
+ def attempt_entry(
612
+ unit_id: str,
613
+ *,
614
+ outcome: str,
615
+ run_id: Any = None,
616
+ lane: str = "",
617
+ loop_ts: str = "",
618
+ host_id: Any = None,
619
+ ) -> dict:
620
+ """Build an OP_ATTEMPT entry — a recorded PICK ATTEMPT on a unit (docs/207 §3).
621
+
622
+ The anti-churn cross-run memory the bare loop lacked: a loop re-picked the same
623
+ drained unit every iteration once its claim TTL lapsed (measured ~5% of runs
624
+ shipping). This event records that a pick was ATTEMPTED, carrying its
625
+ ``outcome`` when known, so `cooldown.cooldown_verdict` can fold the recent
626
+ history and answer "have I already tried this unit and it didn't move?" — the
627
+ `RECENTLY_ATTEMPTED` hold that skips a just-drained unit instead of re-dispatching.
628
+
629
+ ``outcome`` is a typed token the cooldown fold reads (the closed set lives in
630
+ `dos.cooldown.AttemptOutcome` — e.g. ``"shipped"`` / ``"drained"`` /
631
+ ``"blocked"`` / ``"error"``); recorded verbatim, interpreted only by the fold.
632
+ ``run_id`` is the CID spine id of the attempting run (optional). ``lane`` /
633
+ ``loop_ts`` / ``host_id`` correlate the attempt to a lease when known.
634
+
635
+ Like REFUSE/HALT/ENFORCE this is a FORENSIC event: OP_ATTEMPT is NOT in
636
+ `_STATE_MUTATING_OPS`, so `replay` ignores it for lease-state reconstruction (a
637
+ pick attempt grants/removes no lease) — journaling every attempt can never lose
638
+ or invent a live lease, it only adds the history the cooldown fold reads via
639
+ `read_all`. It carries a `durable_schema` tag (the FIRST lane-journal record to);
640
+ `append` merges it if absent, so the fold is version-forward-compatible.
641
+ """
642
+ e = {
643
+ **_schema.tag(SCHEMA_FAMILY, LANE_JOURNAL_SCHEMA),
644
+ "op": OP_ATTEMPT,
645
+ "unit_id": str(unit_id),
646
+ "outcome": str(outcome),
647
+ "lane": lane,
648
+ "loop_ts": loop_ts,
649
+ "host_id": host_id,
650
+ }
651
+ if run_id is not None:
652
+ e["run_id"] = str(run_id)
653
+ return e
654
+
655
+
656
+ def spawn_entry(
657
+ *,
658
+ lane: str,
659
+ loop_ts: str = "",
660
+ holder: str = "",
661
+ host_id: Any = None,
662
+ pid: Any = None,
663
+ run_id: Any = None,
664
+ reason: str = "",
665
+ ) -> dict:
666
+ """Build an OP_SPAWN entry — a recorded INTENT TO TAKE A LANE (the dos-top gap).
667
+
668
+ The acquire-side sibling of `halt_entry`. A HALT says "a held run is going to
669
+ stop"; a SPAWN says "a run is *coming* to this lane" — recorded the instant a
670
+ launcher commits to a lane, BEFORE preflight and before the durable ACQUIRE. It
671
+ exists to close the SPAWN→ACQUIRE blind window the audit names: `dos top` reads
672
+ only the WAL, and a *successful* `arbitrate` persists nothing, so between launch
673
+ and the first ACQUIRE a loop is invisible on the only surface the watchdog reads.
674
+
675
+ Like `halt_entry`/`refuse_entry`/`attempt_entry` this is a FORENSIC INTENT, not a
676
+ grant: OP_SPAWN is NOT in `_STATE_MUTATING_OPS`, so `replay` ignores it for lease
677
+ reconstruction. That is the whole safety argument — an intent that never acquires
678
+ can never strand a phantom hold (the docs/281 failure mode), and a not-yet-real
679
+ run can never double-book a region the arbiter admits against. The `dispatch_top`
680
+ SPAWNING chip is a SEPARATE fold over the recent SPAWNs (TTL-bounded, no-live-lease
681
+ only), never the admission live set.
682
+
683
+ `lane` is required (the region being committed to). `loop_ts`/`holder`/`host_id`/
684
+ `pid`/`run_id` correlate the intent to the eventual ACQUIRE when known — the same
685
+ identity tuple `acquire_entry` stamps, so a reader can join SPAWN→ACQUIRE. `reason`
686
+ is free text for the operator (e.g. the launch context). All optional but `lane`.
687
+ """
688
+ e: dict = {
689
+ "op": OP_SPAWN,
690
+ "lane": lane,
691
+ "loop_ts": loop_ts,
692
+ "holder": holder,
693
+ "host_id": host_id,
694
+ "pid": pid,
695
+ "reason": reason,
696
+ }
697
+ if run_id is not None:
698
+ e["run_id"] = str(run_id)
699
+ return e
700
+
701
+
702
+ def scavenge_entry(
703
+ lease: dict, *, reason: str = "scavenged", prev_holder: Any = None
704
+ ) -> dict:
705
+ """Build a SCAVENGE entry for an evicted (orphaned) lease.
706
+
707
+ The eviction sibling of `release_entry`: replay folds OP_SCAVENGE
708
+ identically to OP_RELEASE (it removes the `(loop_ts, lane)` lease), so this
709
+ carries the same eviction key — `loop_ts` + `lane` + `host_id` + `reason`.
710
+ A scavenge is an *eviction*, not a voluntary drop, so it ALSO carries the
711
+ forensic pair `pid` + `prev_holder` (`acquire_entry` stamps the same two):
712
+ an operator reading the journal can see exactly which process/holder was
713
+ reclaimed and why, without re-joining to the prior ACQUIRE. (The supervisor
714
+ driver writes this when `supervise()` returns a REAP for a STALLED lease.)
715
+ """
716
+ return {
717
+ "op": OP_SCAVENGE,
718
+ "lane": lease.get("lane"),
719
+ "loop_ts": lease.get("loop_ts"),
720
+ "host_id": lease.get("host_id"),
721
+ "pid": lease.get("pid"),
722
+ "prev_holder": prev_holder,
723
+ "reason": reason,
724
+ }
725
+
726
+
727
+ def halt_entry(
728
+ handle: str,
729
+ *,
730
+ reason: str = "",
731
+ lane: str = "",
732
+ loop_ts: str = "",
733
+ host_id: Any = None,
734
+ run_id: Any = None,
735
+ command: Any = None,
736
+ ) -> dict:
737
+ """Build a HALT entry — a recorded STOP DECISION for an in-flight run (docs/99).
738
+
739
+ The DOMAIN-FREE contract: `handle` is an **opaque** identifier the HOST
740
+ supplies for the thing to stop — a pid string, a container id, a remote-task
741
+ token, a harness `Workflow` id. The kernel records it verbatim and interprets
742
+ NOTHING about it (it never learns "a run is a pid on this host" — that is the
743
+ domain knowledge a substrate must not carry, docs/99 §3). `command`, if given,
744
+ is the equally host-supplied stop command echoed onto the spine for forensics
745
+ — the kernel records the proposed command, it never runs it.
746
+
747
+ Unlike `scavenge_entry`, HALT carries no lease payload and removes no lease in
748
+ `replay` (it is NOT in `_STATE_MUTATING_OPS`): it is the kernel's *intent* to
749
+ stop, decoupled from the *fact* of the lease ending, which only a later
750
+ RELEASE/SCAVENGE the driver appends (once the stop is confirmed) records. The
751
+ lane/loop_ts/host_id are carried when known purely so an operator can correlate
752
+ the HALT to the lease it targeted, without re-joining to the ACQUIRE.
753
+ """
754
+ return {
755
+ "op": OP_HALT,
756
+ "handle": handle,
757
+ "lane": lane,
758
+ "loop_ts": loop_ts,
759
+ "host_id": host_id,
760
+ "run_id": run_id,
761
+ "command": command,
762
+ "reason": reason,
763
+ }
764
+
765
+
766
+ def refuse_entry(
767
+ decision: Any,
768
+ *,
769
+ owner: str,
770
+ lane: str = "",
771
+ loop_ts: str = "",
772
+ host_id: Any = None,
773
+ run_id: Any = None,
774
+ reason_class: str = "",
775
+ ) -> dict:
776
+ """Build a REFUSE entry — a recorded DENIED lane request (LJ2 / docs/82).
777
+
778
+ The forensic sibling of `acquire_entry`: an ACQUIRE records that someone GOT a
779
+ lane; a REFUSE records that someone WANTED one and could not have it. Without
780
+ it the journal cannot answer the question its own module docstring poses —
781
+ "why was I refused at 14:03?" — because a denied `arbitrate` leaves no trace
782
+ at all. Three readers already CONSUME `OP_REFUSE` (the decisions queue, the
783
+ central-index home, the trajectory audit); this is the missing PRODUCER.
784
+
785
+ `decision` is duck-typed off the pure `arbiter.LaneDecision` (or any object
786
+ exposing `.reason` / `.lane`) — the builder reads only those two attributes,
787
+ so it stays a pure stdlib leaf with no kernel import of the arbiter. `owner`
788
+ is the requester tag (recorded as `holder`, mirroring how `acquire_entry`
789
+ threads the lease holder). `reason_class` is the *typed* refusal token for a
790
+ future arbiter surface that carries one (`AdmissionVerdict.reason_class`);
791
+ today it defaults to `""` and the readers degrade an empty token gracefully.
792
+
793
+ Crucially, `OP_REFUSE` is NOT in `_STATE_MUTATING_OPS`, so `replay` ignores it
794
+ for state reconstruction (a denied request grants nothing): journaling every
795
+ refuse can never lose or invent a live lease — it only adds history.
796
+ """
797
+ return {
798
+ "op": OP_REFUSE,
799
+ "lane": lane or getattr(decision, "lane", "") or "",
800
+ "loop_ts": loop_ts,
801
+ "host_id": host_id,
802
+ "run_id": run_id,
803
+ "holder": owner,
804
+ "reason": getattr(decision, "reason", "") or "",
805
+ "reason_class": reason_class,
806
+ }
807
+
808
+
809
+ def enforce_entry(
810
+ proposal: Any,
811
+ *,
812
+ owner: str = "",
813
+ lane: str = "",
814
+ loop_ts: str = "",
815
+ host_id: Any = None,
816
+ run_id: Any = None,
817
+ tool: str = "",
818
+ ) -> dict:
819
+ """Build an OP_ENFORCE entry — a recorded ENFORCEMENT OUTCOME (docs/189 §C4).
820
+
821
+ The forensic sibling of `refuse_entry`, for the actuation seam (`dos.enforce`):
822
+ a REFUSE records that a lane request was denied; an ENFORCE records that a
823
+ handler PROPOSED an effect on an intervention decision — observe / warn / block
824
+ (with a synthetic substitute) / defer. Without it, a handler that withholds a
825
+ tool call leaves no trace on the spine, so an auditor (or a `resume`) cannot
826
+ answer "which call was blocked at 14:03, by which handler, and what was
827
+ substituted?" — the ARIES-recovery gap docs/189 names.
828
+
829
+ `proposal` is duck-typed off `dos.enforce.EffectProposal` (the builder reads
830
+ only `.to_dict()`, or falls back to the bare attributes) so this stays a pure
831
+ stdlib leaf with no kernel import of the enforce module — the same discipline
832
+ `refuse_entry` uses to read a `LaneDecision` without importing the arbiter. The
833
+ proposal body is stored under `proposal`; the chosen rung is lifted to a
834
+ top-level `intervention` for cheap filtering, the typed `reason_class` is lifted
835
+ to the top level (the SAME closed-vocab token `refuse_entry` writes — the
836
+ decisions queue and the cause-resolution fold read it there, never the nested
837
+ body), and `dispatch_call` / `withheld` make "did the real call fire?" answerable
838
+ without re-reading the body.
839
+
840
+ `owner` is the requester/actor tag (recorded as `holder`, mirroring
841
+ `acquire_entry`/`refuse_entry`); `tool` is the host-supplied name of the tool
842
+ call the decision was about (opaque to the kernel, echoed for correlation).
843
+
844
+ Crucially, `OP_ENFORCE` is NOT in `_STATE_MUTATING_OPS`, so `replay` ignores it
845
+ for state reconstruction (an enforcement proposal grants/removes no lease):
846
+ journaling every enforcement outcome can never lose or invent a live lease — it
847
+ only adds history.
848
+ """
849
+ body = proposal.to_dict() if hasattr(proposal, "to_dict") else dict(proposal or {})
850
+ # Lift the rung + dispatch flag to the top level for cheap forensic filtering,
851
+ # tolerating either an EffectProposal (`.intervention` is an enum) or a raw dict.
852
+ rung = body.get("intervention", getattr(proposal, "intervention", ""))
853
+ rung = getattr(rung, "value", rung) or ""
854
+ dispatch = body.get("dispatch_call")
855
+ if dispatch is None:
856
+ dispatch = getattr(proposal, "dispatch_call", None)
857
+ # The TYPED refusal token is lifted to the top level for the SAME reason
858
+ # `refuse_entry` lifts it (and `intervention`/`reason` above): the decisions
859
+ # queue and the cause-resolution fold (`decisions._refusal_kind`,
860
+ # `picker_oracle.resolve_cause`) read the top-level `reason_class`, NOT the
861
+ # nested `proposal` body. Without this lift an ENFORCE-recorded refusal is
862
+ # LESS forensically recoverable than a REFUSE-recorded one — the closed-vocab
863
+ # token that the whole refusal-recovery story turns on is buried where no
864
+ # reader looks, so a SELF_MODIFY block reads as an UNCLASSIFIED refusal. An
865
+ # absent token degrades to "" exactly as `refuse_entry`'s does.
866
+ reason_class = (
867
+ body.get("reason_class", getattr(proposal, "reason_class", "")) or ""
868
+ )
869
+ return {
870
+ "op": OP_ENFORCE,
871
+ "lane": lane,
872
+ "loop_ts": loop_ts,
873
+ "host_id": host_id,
874
+ "run_id": run_id,
875
+ "holder": owner,
876
+ "tool": tool,
877
+ "intervention": str(rung),
878
+ "dispatch_call": bool(dispatch) if dispatch is not None else None,
879
+ "withheld": (not dispatch) if dispatch is not None else None,
880
+ "handler": body.get("handler", getattr(proposal, "handler", "")) or "",
881
+ "reason": body.get("reason", getattr(proposal, "reason", "")) or "",
882
+ "reason_class": reason_class,
883
+ "proposal": body,
884
+ }
885
+
886
+
887
+ def checkpoint_entry(leases: list[dict], *, seq_watermark: int) -> dict:
888
+ """Build an OP_CHECKPOINT snapshot of the authoritative live-lease set.
889
+
890
+ Written at the HEAD of a compacted journal (`compact`): it carries the full
891
+ live set folded from the discarded history so `replay` can reconstitute it
892
+ without the original ACQUIRE lines, plus `seq_watermark` (the max `seq` seen
893
+ in the discarded history) so `next_seq` stays monotonic across a rewrite that
894
+ deleted the lines holding the prior high-water mark. Pure constructor.
895
+ """
896
+ return {
897
+ "op": OP_CHECKPOINT,
898
+ "leases": [dict(l) for l in leases],
899
+ "seq_watermark": int(seq_watermark),
900
+ }
901
+
902
+
903
+ def compact(entries: Iterable[dict]) -> list[dict]:
904
+ """Fold a journal down to a single CHECKPOINT (+ preserved corrupt sentinels).
905
+
906
+ PURE — entries in, a SHORTER entry list out, no disk, no clock. This is the
907
+ compaction core the I/O shell (`lane_lease.compact_journal`) writes back over
908
+ the WAL crash-safely. The discipline that makes it safe to discard the long
909
+ history of dead leases is the same one `replay` uses: fold to the authoritative
910
+ live set, then SNAPSHOT it — so a still-live ACQUIRE older than any cutoff
911
+ survives in the checkpoint payload, never dropped. A naive "delete old lines"
912
+ would forget a held lane and the kernel would false-ADMIT a colliding tree —
913
+ the catastrophic lost-live-lease bug this fold-to-snapshot design forecloses.
914
+
915
+ The DIFFERENTIAL-EQUIVALENCE invariant (pinned by a test):
916
+ replay(compact(E)) == replay(E)
917
+ holds because `replay`'s CHECKPOINT branch RESETS its live set to exactly the
918
+ payload this writes — the leases `replay(E)` would itself reconstruct. This is
919
+ equivalence for the ARBITER's live set, NOT for the liveness fold: a CHECKPOINT
920
+ carries no `ts` and is in neither `journal_delta._EVENT_OPS` nor
921
+ `_HEARTBEAT_OPS`, so a still-live run's beat anchor is dropped by compaction and
922
+ it reads STALLED to the liveness oracle until its next ACQUIRE/HEARTBEAT. That
923
+ is always the SAFE direction — compaction can never fabricate an event or beat,
924
+ so it can never cause a false-ADVANCING/SPINNING — but it is why compaction is
925
+ an operator verb for a quiet window, not an automatic per-append rotation.
926
+
927
+ `seq_watermark` is derived from the input only (max existing `seq`), so the
928
+ fold reads no clock and `next_seq` over the compacted journal is `>=`
929
+ `next_seq` over the original — never a reused seq. A `_CORRUPT` sentinel in the
930
+ input is PRESERVED into the output (appended after the checkpoint): a mid-file
931
+ integrity breach is real signal an audit must still see, never silently erased
932
+ by a rewrite.
933
+ """
934
+ materialized = list(entries)
935
+ live = replay(materialized)
936
+ watermark = 0
937
+ corrupt: list[dict] = []
938
+ for e in materialized:
939
+ try:
940
+ s = int(e.get("seq") or 0)
941
+ except (TypeError, ValueError):
942
+ s = 0
943
+ # A pre-existing checkpoint's watermark also bounds the next seq.
944
+ try:
945
+ w = int(e.get("seq_watermark") or 0)
946
+ except (TypeError, ValueError):
947
+ w = 0
948
+ watermark = max(watermark, s, w)
949
+ if str(e.get("op") or "") == "_CORRUPT":
950
+ corrupt.append(dict(e))
951
+ return [checkpoint_entry(live, seq_watermark=watermark)] + corrupt
952
+
953
+
954
+ def main(argv: list[str] | None = None) -> int:
955
+ import argparse
956
+
957
+ ap = argparse.ArgumentParser(
958
+ description=__doc__,
959
+ formatter_class=argparse.RawDescriptionHelpFormatter,
960
+ )
961
+ sub = ap.add_subparsers(dest="cmd", required=True)
962
+ p_tail = sub.add_parser("tail", help="print the last N entries")
963
+ p_tail.add_argument("n", nargs="?", type=int, default=20)
964
+ p_tail.add_argument("--json", action="store_true", help="raw JSONL")
965
+ sub.add_parser("replay", help="print the reconstructed live-lease set")
966
+ sub.add_parser("seq", help="print the current max seq")
967
+ args = ap.parse_args(argv)
968
+
969
+ if args.cmd == "tail":
970
+ entries = tail(args.n)
971
+ if args.json:
972
+ for e in entries:
973
+ print(json.dumps(e, sort_keys=True, default=str))
974
+ else:
975
+ if not entries:
976
+ print("(journal empty)")
977
+ for e in entries:
978
+ seq = e.get("seq", "?")
979
+ ts = e.get("ts", "?")
980
+ op = e.get("op", "?")
981
+ lane = e.get("lane", "")
982
+ extra = e.get("reason") or ""
983
+ loop = e.get("loop_ts") or ""
984
+ print(f"#{seq:<5} {ts} {op:9} {str(lane):14} "
985
+ f"{str(loop):16} {extra}")
986
+ return 0
987
+
988
+ if args.cmd == "replay":
989
+ leases = replay(read_all())
990
+ print(json.dumps(leases, indent=2, sort_keys=True, default=str))
991
+ return 0
992
+
993
+ if args.cmd == "seq":
994
+ print(next_seq() - 1)
995
+ return 0
996
+
997
+ return 2
998
+
999
+
1000
+ if __name__ == "__main__":
1001
+ raise SystemExit(main())