dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/lane_lease.py
ADDED
|
@@ -0,0 +1,952 @@
|
|
|
1
|
+
"""Lane-lease write-back — the durable cross-process surface over the pure arbiter.
|
|
2
|
+
|
|
3
|
+
`arbiter.arbitrate` is the **pure** admission kernel: state in (`request`,
|
|
4
|
+
`live_leases`), decision out, no I/O (`arbiter.py`). That purity is load-bearing
|
|
5
|
+
— a verdict you can replay a year later from the journal and get byte-identical —
|
|
6
|
+
so the arbiter deliberately does **not** persist the lease it grants. In a single
|
|
7
|
+
process that is fine: the caller holds `live_leases` as an in-memory list and
|
|
8
|
+
threads it into the next `arbitrate` call (`benchmark/fleet_horizon/closed_loop.py`
|
|
9
|
+
does exactly this).
|
|
10
|
+
|
|
11
|
+
But an **ephemeral, multi-process** orchestrator — a harness `Workflow` whose
|
|
12
|
+
`parallel()` branches are separate `dos` invocations — has no shared in-memory
|
|
13
|
+
list. Each branch's `dos arbitrate` would see an empty `--leases`, both would
|
|
14
|
+
ADMIT a colliding tree, and the collision would be **detected after the fact by a
|
|
15
|
+
later `verify`, not PREVENTED at contention**. That is strictly weaker than the
|
|
16
|
+
in-process loop, and it is the one real gap between "harness orchestrates, DOS
|
|
17
|
+
adjudicates" and DOS owning its own dispatch (see `docs/98`).
|
|
18
|
+
|
|
19
|
+
This module closes that gap **without touching the arbiter's purity**. It is the
|
|
20
|
+
thin, I/O-bearing shell the lane journal's docstring already anticipates ("the
|
|
21
|
+
writer is the caller, under the lock that serializes it"): it runs the pure
|
|
22
|
+
`arbitrate`, and — on `acquire` — appends an `ACQUIRE` record to the lane-journal
|
|
23
|
+
WAL (`lane_journal.acquire_entry`), all inside an `O_EXCL` mutex so two
|
|
24
|
+
cross-process acquirers cannot both win a contended lane. A sibling branch then
|
|
25
|
+
reconstructs `live_leases` by folding the WAL (`live()` → `lane_journal.replay`)
|
|
26
|
+
*before* its own `arbitrate`, so the second branch sees the first's grant and is
|
|
27
|
+
correctly refused.
|
|
28
|
+
|
|
29
|
+
The split mirrors `liveness` vs its CLI boundary, and `arbitrate` vs
|
|
30
|
+
`cmd_arbitrate`: **the verdict stays pure; the durability lives at the edge.**
|
|
31
|
+
|
|
32
|
+
Layer: this is a Layer-3 helper (`CLAUDE.md`) — a thin shell over the kernel
|
|
33
|
+
(`arbiter` + `lane_journal`) carrying **no policy of its own**. It names no host,
|
|
34
|
+
reads its lock/journal paths from the injected `SubstrateConfig`, and adds no new
|
|
35
|
+
admission rule. The CLI verb is `dos lease-lane`.
|
|
36
|
+
"""
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import dataclasses
|
|
40
|
+
import datetime as dt
|
|
41
|
+
import json
|
|
42
|
+
import os
|
|
43
|
+
import time
|
|
44
|
+
from pathlib import Path
|
|
45
|
+
from typing import Optional
|
|
46
|
+
|
|
47
|
+
from dos import _filelock
|
|
48
|
+
from dos import arbiter, lane_journal
|
|
49
|
+
from dos import admission as _admission
|
|
50
|
+
from dos.config import SubstrateConfig
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# The lane-lease mutex is a SECOND, distinct lock from the archive lock
|
|
54
|
+
# (`archive_lock.py`, `dos lease`): the archive lock serializes the Step-9.5
|
|
55
|
+
# archive ceremony; this one serializes a lane-lease grant's read-arbitrate-append
|
|
56
|
+
# critical section. Keeping them separate is deliberate — conflating two locks
|
|
57
|
+
# under one owner-namespace invites a deadlock where the archive holder blocks a
|
|
58
|
+
# lane acquire. The lock lives beside the journal it guards.
|
|
59
|
+
DEFAULT_TTL_SECONDS = 300
|
|
60
|
+
DEFAULT_RETRIES = 5
|
|
61
|
+
DEFAULT_RETRY_INTERVAL = 0.2
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _now() -> dt.datetime:
|
|
65
|
+
return dt.datetime.now(dt.timezone.utc)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _now_iso() -> str:
|
|
69
|
+
return _now().strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _journal_path(config: SubstrateConfig) -> Path:
|
|
73
|
+
"""The lane-journal path this lease writes to / folds from.
|
|
74
|
+
|
|
75
|
+
MUST resolve the SAME way `lane_journal` itself does, or this module writes a
|
|
76
|
+
different journal than `dos journal replay` / `dispatch_top` read — the env
|
|
77
|
+
override (`DISPATCH_LANE_JOURNAL_PATH` / the `JOB_` alias) takes precedence over
|
|
78
|
+
the injected config's path, mirroring `lane_journal._journal_path` exactly so
|
|
79
|
+
there is ONE source of truth for where the WAL lives. (Falls back to the
|
|
80
|
+
explicitly-passed config, not `config.active()`, since callers inject it.)
|
|
81
|
+
"""
|
|
82
|
+
env = (os.environ.get("DISPATCH_LANE_JOURNAL_PATH")
|
|
83
|
+
or os.environ.get("JOB_LANE_JOURNAL_PATH"))
|
|
84
|
+
if env:
|
|
85
|
+
return Path(env)
|
|
86
|
+
return config.paths.lane_journal
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _lock_path(config: SubstrateConfig) -> Path:
|
|
90
|
+
"""The lane-lease mutex path: a sibling of the lane journal.
|
|
91
|
+
|
|
92
|
+
Resolved from the injected config (never `__file__`), the same seam every
|
|
93
|
+
other path uses. An env override (`DISPATCH_LANE_LEASE_LOCK_PATH`) exists for
|
|
94
|
+
tests, mirroring `archive_lock`/`lane_journal`.
|
|
95
|
+
"""
|
|
96
|
+
env = os.environ.get("DISPATCH_LANE_LEASE_LOCK_PATH")
|
|
97
|
+
if env:
|
|
98
|
+
return Path(env)
|
|
99
|
+
j = _journal_path(config)
|
|
100
|
+
return j.parent / ".lane-lease.lock"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _read_lock(config: SubstrateConfig) -> dict | None:
|
|
104
|
+
"""Parse the lane-lease lock body → dict (None if absent). Shared `_filelock` parser."""
|
|
105
|
+
return _filelock.read_lock(_lock_path(config))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _write_lock(config: SubstrateConfig, owner: str) -> None:
|
|
109
|
+
"""Atomic O_CREAT|O_EXCL create. Raises FileExistsError if held. Shared `_filelock`."""
|
|
110
|
+
_filelock.write_lock(_lock_path(config), owner)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _release_lock(config: SubstrateConfig, owner: str) -> None:
|
|
114
|
+
info = _read_lock(config)
|
|
115
|
+
if info is None:
|
|
116
|
+
return
|
|
117
|
+
if info.get("owner") not in (owner, None):
|
|
118
|
+
# someone stole/holds it; do not yank another holder's mutex
|
|
119
|
+
return
|
|
120
|
+
try:
|
|
121
|
+
_lock_path(config).unlink()
|
|
122
|
+
except FileNotFoundError:
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _age_seconds(info: dict) -> float | None:
|
|
127
|
+
raw = info.get("acquired_at", "")
|
|
128
|
+
return _stamp_age_seconds(raw)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _stamp_age_seconds(raw) -> float | None:
|
|
132
|
+
"""`now − ts` in seconds for a second-resolution UTC stamp; None if unparseable.
|
|
133
|
+
|
|
134
|
+
The shared parser behind `_age_seconds` (lock `acquired_at`) and beat coalescing
|
|
135
|
+
(a lease's `heartbeat_at`/`acquired_at`). None means "no credible stamp" — the
|
|
136
|
+
callers treat that as the safe direction (don't steal / don't elide).
|
|
137
|
+
"""
|
|
138
|
+
try:
|
|
139
|
+
ts = dt.datetime.strptime(raw, "%Y-%m-%dT%H:%M:%SZ").replace(
|
|
140
|
+
tzinfo=dt.timezone.utc)
|
|
141
|
+
except (ValueError, TypeError):
|
|
142
|
+
return None
|
|
143
|
+
return (_now() - ts).total_seconds()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class _Mutex:
|
|
147
|
+
"""A scoped O_EXCL hold with stale-steal, matching `archive_lock` semantics.
|
|
148
|
+
|
|
149
|
+
Used as a context manager around the read-arbitrate-append critical section so
|
|
150
|
+
the journal append happens UNDER the lock — honoring the lane-journal rule that
|
|
151
|
+
nothing journals a decision outside the lock that serializes it. Raises
|
|
152
|
+
`TimeoutError` if the lock cannot be taken within the retry budget (the caller
|
|
153
|
+
maps that to a non-acquire exit).
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
def __init__(self, config: SubstrateConfig, owner: str, *,
|
|
157
|
+
retries: int = DEFAULT_RETRIES,
|
|
158
|
+
retry_interval: float = DEFAULT_RETRY_INTERVAL,
|
|
159
|
+
ttl_seconds: int = DEFAULT_TTL_SECONDS) -> None:
|
|
160
|
+
self.config = config
|
|
161
|
+
self.owner = owner
|
|
162
|
+
self.retries = retries
|
|
163
|
+
self.retry_interval = retry_interval
|
|
164
|
+
self.ttl_seconds = ttl_seconds
|
|
165
|
+
|
|
166
|
+
def __enter__(self) -> "_Mutex":
|
|
167
|
+
for attempt in range(self.retries + 1):
|
|
168
|
+
try:
|
|
169
|
+
_write_lock(self.config, self.owner)
|
|
170
|
+
return self
|
|
171
|
+
except FileExistsError:
|
|
172
|
+
pass
|
|
173
|
+
info = _read_lock(self.config)
|
|
174
|
+
if info is None:
|
|
175
|
+
continue # unlinked between EEXIST and read; retry
|
|
176
|
+
if info.get("owner") == self.owner:
|
|
177
|
+
return self # re-entrant
|
|
178
|
+
age = _age_seconds(info)
|
|
179
|
+
if age is not None and age >= self.ttl_seconds:
|
|
180
|
+
# Atomic value-keyed CAS steal (shared `_filelock.steal_stale`) — the
|
|
181
|
+
# SAME primitive archive_lock uses. The old `unlink()` + retry-create
|
|
182
|
+
# was the non-value-keyed TOCTOU where two cross-process stealers of
|
|
183
|
+
# one stale lock could each displace the other's fresh lock and both
|
|
184
|
+
# win — here that means both fold the same pre-other's-ACQUIRE
|
|
185
|
+
# live-lease set, both ADMIT one colliding tree, and both append an
|
|
186
|
+
# ACQUIRE = the kernel admits two colliding lanes (the worst-class
|
|
187
|
+
# false-admit this module exists to prevent). The CAS displaces only
|
|
188
|
+
# the EXACT stale lock `info` we observed, so exactly one stealer wins.
|
|
189
|
+
if _filelock.steal_stale(_lock_path(self.config), self.owner, info):
|
|
190
|
+
return self # we won the steal — hold the mutex
|
|
191
|
+
continue # lost the steal (a racer won) — retry the normal path
|
|
192
|
+
if attempt < self.retries:
|
|
193
|
+
time.sleep(self.retry_interval)
|
|
194
|
+
continue
|
|
195
|
+
raise TimeoutError(
|
|
196
|
+
f"lane-lease lock busy (owner={(_read_lock(self.config) or {}).get('owner')})")
|
|
197
|
+
|
|
198
|
+
def __exit__(self, *exc) -> None:
|
|
199
|
+
_release_lock(self.config, self.owner)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@dataclasses.dataclass(frozen=True)
|
|
203
|
+
class LaneLeaseResult:
|
|
204
|
+
"""The outcome of an `acquire`: the pure decision PLUS whether it was durably
|
|
205
|
+
journaled. `journaled` is True only when the arbiter ACQUIRED and the WAL
|
|
206
|
+
append succeeded — so a caller can tell "admitted and recorded" from "admitted
|
|
207
|
+
but the record failed" (which it should treat as not-held)."""
|
|
208
|
+
decision: arbiter.LaneDecision
|
|
209
|
+
journaled: bool
|
|
210
|
+
owner: str
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# The heartbeat-freshness grace added on top of a lease's own `ttl_minutes` before
|
|
214
|
+
# the live-set fold treats it as expired. A lease is dropped only when its age
|
|
215
|
+
# exceeds `ttl_minutes + grace`, so a lease that is merely a beat-or-two late (the
|
|
216
|
+
# eventual-consistency window of a healthy-but-busy worker) is NEVER elided — only a
|
|
217
|
+
# lease that has gone quiet well past its own declared TTL. The default backstop TTL
|
|
218
|
+
# (`_DEFAULT_LIVE_TTL_MINUTES`) is the hard ceiling for a lease that declared none, so
|
|
219
|
+
# a malformed/legacy ACQUIRE with no `ttl_minutes` still cannot be immortal.
|
|
220
|
+
_DEFAULT_LIVE_TTL_MINUTES = 50.0 # matches lease_health.LeaseHealthPolicy.ttl_minutes
|
|
221
|
+
_LIVE_TTL_GRACE_MINUTES = 5.0
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _lease_is_dead(lease: dict, *, now: dt.datetime, this_host: str) -> bool:
|
|
225
|
+
"""Is this folded lease PROVABLY dead — safe to drop from the live set? PURE-ish.
|
|
226
|
+
|
|
227
|
+
The structural WAL fold (`lane_journal.replay`) returns every un-RELEASEd
|
|
228
|
+
ACQUIRE as "live", with NO regard for the clock: a loop that ACQUIREs and then
|
|
229
|
+
crashes/exits without RELEASE leaves an *immortal* lease that the PRE-admission
|
|
230
|
+
hook (which reads `live_leases`) then enforces against on every tool call —
|
|
231
|
+
docs/281 Defect 1. This predicate is the self-heal: it returns True ONLY when
|
|
232
|
+
the lease is **confidently** dead, so the live-set reader can elide it without
|
|
233
|
+
waiting for an external SCAVENGE. Two independent confident signals, OR'd:
|
|
234
|
+
|
|
235
|
+
(a) TTL/heartbeat staleness — the lease's newest stamp (`heartbeat_at`, else
|
|
236
|
+
`acquired_at`) is older than its own `ttl_minutes` (or the default
|
|
237
|
+
backstop) plus a grace. A fresh/heartbeating lease is never stale; a
|
|
238
|
+
crashed one stops beating and ages out. Uses `lease_health.parse_iso`
|
|
239
|
+
(the minute-OR-second stamp parser both the host and a `replay()` emit).
|
|
240
|
+
|
|
241
|
+
(b) Dead PID on THIS host — `proc_delta.probe` confidently reports the
|
|
242
|
+
holder process is gone — BUT only as a FASTER reclaim of a lease that is
|
|
243
|
+
ALSO already heartbeat-stale (past the grace window), NEVER as an outright
|
|
244
|
+
eviction of a still-FRESH lease. This is the load-bearing correction
|
|
245
|
+
(docs/283 acquire regression): a held lease is meant to OUTLIVE its
|
|
246
|
+
acquiring process. The primary acquire model is an EPHEMERAL `dos
|
|
247
|
+
lease-lane acquire` subprocess that journals its ACQUIRE and EXITS
|
|
248
|
+
immediately — so the recorded `pid` is dead by the time the very next
|
|
249
|
+
acquirer reads `live_leases`, while the reservation it took is perfectly
|
|
250
|
+
valid and must hold for its TTL. The original "dead PID wins outright"
|
|
251
|
+
rule dropped that fresh reservation and let a racing acquirer DOUBLE-BOOK
|
|
252
|
+
the region (a false-ADMIT collision — the exact failure the arbiter
|
|
253
|
+
exists to prevent, `test_coord_demo_k4_serializes_writes`). So a dead PID
|
|
254
|
+
can only SHORTEN the reclaim of a lease whose heartbeat has ALREADY gone
|
|
255
|
+
quiet (it is dead AND silent → reclaim now, don't wait the full TTL); a
|
|
256
|
+
fresh-beat lease is kept regardless of PID. Three-valued and foreign-host-
|
|
257
|
+
blind: a foreign host, a `pid<=0` sentinel, or any probe uncertainty
|
|
258
|
+
returns `None`, which is NOT death. (A cross-host orphan is left to signal
|
|
259
|
+
(a); the kernel never reads its own process table as another box's —
|
|
260
|
+
docs/95.)
|
|
261
|
+
|
|
262
|
+
FAIL-SAFE DIRECTION: a lease is dropped only when its TTL/heartbeat is stale
|
|
263
|
+
(signal a) — optionally reclaimed SOONER when a dead PID corroborates a lease
|
|
264
|
+
already past the grace window. A FRESH lease (beat within the grace) is NEVER
|
|
265
|
+
dropped, even if its (ephemeral-acquirer) PID is gone. An unparseable stamp with
|
|
266
|
+
no probeable-dead PID → kept (we cannot prove it dead, so it keeps its claim —
|
|
267
|
+
the genuine-collision-protection direction). This predicate can only ever make
|
|
268
|
+
the live set SMALLER by removing the provably/long-stale, never admit a
|
|
269
|
+
colliding live worker.
|
|
270
|
+
"""
|
|
271
|
+
from dos import lease_health, proc_delta
|
|
272
|
+
|
|
273
|
+
# (a) TTL/heartbeat age — the PRIMARY signal (the goal's `ttl_minutes/heartbeat
|
|
274
|
+
# expiry`). A lease with no credible stamp cannot be proven stale by time.
|
|
275
|
+
stamp = lease.get("heartbeat_at", "") or lease.get("acquired_at", "")
|
|
276
|
+
hb = lease_health.parse_iso(stamp) if stamp else None
|
|
277
|
+
age_min = None if hb is None else (now - hb).total_seconds() / 60.0
|
|
278
|
+
ttl = lease.get("ttl_minutes")
|
|
279
|
+
if not isinstance(ttl, (int, float)) or ttl <= 0:
|
|
280
|
+
ttl = _DEFAULT_LIVE_TTL_MINUTES
|
|
281
|
+
|
|
282
|
+
# A lease beaten within the grace window is FRESH — kept regardless of PID. This
|
|
283
|
+
# is what preserves the ephemeral-acquirer reservation: agent-1's `dos lease-lane`
|
|
284
|
+
# has exited (dead PID) but its just-journaled ACQUIRE is fresh, so a racing
|
|
285
|
+
# agent-2 reading live_leases still SEES it and is correctly refused.
|
|
286
|
+
if age_min is not None and age_min <= _LIVE_TTL_GRACE_MINUTES:
|
|
287
|
+
return False
|
|
288
|
+
|
|
289
|
+
# (b) Dead PID on THIS host — only a CORROBORATING faster-reclaim, gated on the
|
|
290
|
+
# lease being ALSO heartbeat-stale (past the grace above). A dead-and-silent
|
|
291
|
+
# holder is genuinely gone → reclaim now rather than wait the full TTL.
|
|
292
|
+
pid = lease.get("pid")
|
|
293
|
+
host_id = lease.get("host_id", "") or ""
|
|
294
|
+
probe = proc_delta.probe(
|
|
295
|
+
pid if isinstance(pid, int) else None,
|
|
296
|
+
host_id=host_id,
|
|
297
|
+
this_host=this_host,
|
|
298
|
+
)
|
|
299
|
+
if probe.alive is False and age_min is not None:
|
|
300
|
+
# Past the grace (checked above) AND the holder process is confirmed gone →
|
|
301
|
+
# the lease is both silent and dead; reclaim it without waiting the full TTL.
|
|
302
|
+
return True
|
|
303
|
+
|
|
304
|
+
# (a) continued — the hard TTL backstop. No credible stamp → cannot prove stale
|
|
305
|
+
# by time, and the PID was not a (gated) confident-dead → keep (claim-preserving).
|
|
306
|
+
if age_min is None:
|
|
307
|
+
return False
|
|
308
|
+
return age_min > (ttl + _LIVE_TTL_GRACE_MINUTES)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _expire_dead(leases: list[dict], *, now: dt.datetime, this_host: str) -> list[dict]:
|
|
312
|
+
"""Drop the provably-dead leases from a structurally-folded live set. PURE-ish.
|
|
313
|
+
|
|
314
|
+
The filter `live_leases` applies on top of `lane_journal.replay` so the live
|
|
315
|
+
set the admission hook + arbiter see self-heals past a crashed worker's orphan,
|
|
316
|
+
WITHOUT mutating the WAL (a real SCAVENGE is still appended by the
|
|
317
|
+
reconcile/supervisor writers; this is a read-time fold, replay-pure). Best
|
|
318
|
+
effort per-lease: a predicate error on one malformed lease must not blank the
|
|
319
|
+
whole set, so a raising `_lease_is_dead` keeps that lease (fail-safe)."""
|
|
320
|
+
kept: list[dict] = []
|
|
321
|
+
for l in leases:
|
|
322
|
+
try:
|
|
323
|
+
dead = _lease_is_dead(l, now=now, this_host=this_host)
|
|
324
|
+
except Exception:
|
|
325
|
+
dead = False # never let a probe/parse fault drop a real lease
|
|
326
|
+
if not dead:
|
|
327
|
+
kept.append(l)
|
|
328
|
+
return kept
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def live_leases(config: SubstrateConfig, *, expire_dead: bool = False) -> list[dict]:
|
|
332
|
+
"""The current live-lease set, reconstructed from the WAL (pure fold over I/O).
|
|
333
|
+
|
|
334
|
+
This is the cross-process channel: a sibling orchestrator branch calls this to
|
|
335
|
+
learn what lanes are already held before it arbitrates — the durable analogue
|
|
336
|
+
of the in-process `live_leases` list `closed_loop.py` threads by hand. Reads
|
|
337
|
+
the journal, folds it with the PURE `lane_journal.replay`. No lock needed: a
|
|
338
|
+
read of an append-only, torn-tail-tolerant log is always consistent-enough
|
|
339
|
+
(a half-written final ACQUIRE folds as "didn't happen", the safe WAL reading).
|
|
340
|
+
|
|
341
|
+
`expire_dead` (default **False** — the structural fold is the base contract):
|
|
342
|
+
when True, the provably-dead leases (`_expire_dead`) are dropped from the
|
|
343
|
+
returned set — a crashed worker's un-RELEASEd ACQUIRE whose TTL/heartbeat aged
|
|
344
|
+
out or whose holder PID is confidently gone on this host. This is the
|
|
345
|
+
**admission/contention** view: "which LIVE workers would I collide with",
|
|
346
|
+
which must self-heal past a phantom orphan (docs/281 Defect 1) instead of
|
|
347
|
+
enforcing it on every tool call until an external SCAVENGE lands. It is OFF by
|
|
348
|
+
default because the OTHER consumers — `adopt()` and the orphan-reclaim sweep —
|
|
349
|
+
need the dead orphan to remain VISIBLE precisely so they can transfer/scavenge
|
|
350
|
+
it; hiding it there would make a dead lane un-reclaimable. So: contention reads
|
|
351
|
+
pass `expire_dead=True`, reclaim reads keep the default. Either way `replay`
|
|
352
|
+
stays a pure structural fold (`dos journal replay` / the audit trail are
|
|
353
|
+
byte-identical); expiry is a read-time view, never a WAL mutation.
|
|
354
|
+
"""
|
|
355
|
+
entries = lane_journal.read_all(_journal_path(config))
|
|
356
|
+
folded = lane_journal.replay(entries)
|
|
357
|
+
if expire_dead:
|
|
358
|
+
return _expire_dead(folded, now=_now(), this_host=_hostname())
|
|
359
|
+
return folded
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def acquire(
|
|
363
|
+
config: SubstrateConfig,
|
|
364
|
+
*,
|
|
365
|
+
lane: str,
|
|
366
|
+
kind: str,
|
|
367
|
+
tree: list[str],
|
|
368
|
+
owner: str,
|
|
369
|
+
loop_ts: str = "",
|
|
370
|
+
extra_leases: list[dict] | None = None,
|
|
371
|
+
retries: int = DEFAULT_RETRIES,
|
|
372
|
+
retry_interval: float = DEFAULT_RETRY_INTERVAL,
|
|
373
|
+
ttl_seconds: int = DEFAULT_TTL_SECONDS,
|
|
374
|
+
run_id: str = "",
|
|
375
|
+
) -> LaneLeaseResult:
|
|
376
|
+
"""Arbitrate a lane request and, on ACQUIRE, durably journal the grant.
|
|
377
|
+
|
|
378
|
+
The whole read-arbitrate-append runs under the lane-lease mutex so two
|
|
379
|
+
cross-process acquirers serialize: the second sees the first's freshly
|
|
380
|
+
journaled ACQUIRE in `live_leases` and is refused. `arbitrate` itself stays
|
|
381
|
+
pure — this function is the I/O shell around it.
|
|
382
|
+
|
|
383
|
+
`extra_leases` are caller-supplied live leases to union with the journal's
|
|
384
|
+
(e.g. a test injecting state, or a host that tracks some leases out-of-band).
|
|
385
|
+
`loop_ts` is the lease's `(loop_ts, lane)` identity key — defaults to a
|
|
386
|
+
second-resolution stamp so each acquire is uniquely keyed.
|
|
387
|
+
|
|
388
|
+
`run_id` (OPTIONAL, docs/118 S / docs/137) is the CID spine id the lease (and,
|
|
389
|
+
on a genuine-collision, the REFUSE) carries — the field that closes the
|
|
390
|
+
WAL↔spine join, so a *held* lane is traceable back to its run exactly as a
|
|
391
|
+
*refused* one already is. Resolved at the CLI boundary (the flag / `CID_RUN_ID`
|
|
392
|
+
env), never inside the pure arbiter. Empty ⇒ the ACQUIRE replays unchanged.
|
|
393
|
+
"""
|
|
394
|
+
loop_ts = loop_ts or _now_iso()
|
|
395
|
+
extra = list(extra_leases or [])
|
|
396
|
+
preds = _admission.active_predicates(config=config)
|
|
397
|
+
|
|
398
|
+
with _Mutex(config, owner, retries=retries, retry_interval=retry_interval,
|
|
399
|
+
ttl_seconds=ttl_seconds):
|
|
400
|
+
# Read the durable lease set INSIDE the lock so a racing acquirer that
|
|
401
|
+
# already journaled its grant is visible to us — the serialization point.
|
|
402
|
+
#
|
|
403
|
+
# This is the STRUCTURAL fold (`expire_dead=False`), NOT the dead-elision
|
|
404
|
+
# the admission hook uses. The distinction is load-bearing (docs/283): a
|
|
405
|
+
# lease is held by a process that exits between ACQUIRE and RELEASE — its
|
|
406
|
+
# EFFECT (the booked region) outlives the short-lived process that took it.
|
|
407
|
+
# `expire_dead=True` runs the dead-PID rung (`_lease_is_dead` signal b),
|
|
408
|
+
# which probes the holder PID; for a fresh lease whose journaling subprocess
|
|
409
|
+
# has already exited (the `dos lease-lane acquire` shape — a child that
|
|
410
|
+
# journals then returns) that probe reports `alive=False`, so the
|
|
411
|
+
# still-held region is wrongly elided and a racing acquirer DOUBLE-BOOKS it
|
|
412
|
+
# (a lost update — the exact TOCTOU the lease exists to prevent). Inside
|
|
413
|
+
# this mutex we are already serialized against concurrent acquirers, so we
|
|
414
|
+
# do NOT need (and must not use) dead-elision here: the genuine live set is
|
|
415
|
+
# the right contention view. The phantom-orphan self-heal docs/281 wants is
|
|
416
|
+
# a property of the LONG-LIVED admission read (`pretool_sensor`, where a
|
|
417
|
+
# dead PID + no fresh tool activity really is abandonment), not of this
|
|
418
|
+
# short, lock-held acquire read. Coupling the two through one `expire_dead`
|
|
419
|
+
# flag is what regressed `test_coord_demo_k4_serializes_writes`.
|
|
420
|
+
live = live_leases(config) + extra
|
|
421
|
+
decision = arbiter.arbitrate(
|
|
422
|
+
requested_lane=lane,
|
|
423
|
+
requested_kind=kind,
|
|
424
|
+
requested_tree=tree,
|
|
425
|
+
live_leases=live,
|
|
426
|
+
config=config,
|
|
427
|
+
predicates=preds,
|
|
428
|
+
)
|
|
429
|
+
journaled = False
|
|
430
|
+
if decision.outcome == "acquire":
|
|
431
|
+
lease = {
|
|
432
|
+
"lane": decision.lane or lane,
|
|
433
|
+
"lane_kind": kind,
|
|
434
|
+
"tree": list(decision.tree or tree),
|
|
435
|
+
"loop_ts": loop_ts,
|
|
436
|
+
"host_id": os.environ.get("DISPATCH_HOST_ID") or _hostname(),
|
|
437
|
+
"pid": os.getpid(),
|
|
438
|
+
"holder": owner,
|
|
439
|
+
"acquired_at": _now_iso(),
|
|
440
|
+
}
|
|
441
|
+
if run_id:
|
|
442
|
+
lease["run_id"] = run_id # the WAL↔spine join key (docs/137)
|
|
443
|
+
lane_journal.append(
|
|
444
|
+
lane_journal.acquire_entry(lease, reason=f"lane-lease:{owner}",
|
|
445
|
+
run_id=run_id or None),
|
|
446
|
+
_journal_path(config),
|
|
447
|
+
)
|
|
448
|
+
journaled = True
|
|
449
|
+
else:
|
|
450
|
+
# A genuine-collision refuse — record WHY, inside the already-held
|
|
451
|
+
# mutex. Without this the journal cannot answer its own raison d'être,
|
|
452
|
+
# "why was I refused at 14:03?": a denied arbitrate left no trace, yet
|
|
453
|
+
# the decisions queue / central-index / trajectory-audit all CONSUME
|
|
454
|
+
# OP_REFUSE. `journaled` STAYS False — its contract is "the GRANT was
|
|
455
|
+
# durably recorded", which a refuse never is, so we do NOT overload it
|
|
456
|
+
# with "the refuse-record landed". A failed RECORD must never convert a
|
|
457
|
+
# refuse into anything else, so the append is best-effort (swallow
|
|
458
|
+
# OSError, mirroring `halt`). Only the genuine-collision path reaches
|
|
459
|
+
# here; the _Mutex lock-busy TimeoutError raises out of __enter__ above
|
|
460
|
+
# and records nothing (a contended LOCK is not a refused LANE).
|
|
461
|
+
try:
|
|
462
|
+
lane_journal.append(
|
|
463
|
+
lane_journal.refuse_entry(
|
|
464
|
+
decision,
|
|
465
|
+
owner=owner,
|
|
466
|
+
lane=lane,
|
|
467
|
+
loop_ts=loop_ts,
|
|
468
|
+
host_id=os.environ.get("DISPATCH_HOST_ID") or _hostname(),
|
|
469
|
+
run_id=run_id or None,
|
|
470
|
+
),
|
|
471
|
+
_journal_path(config),
|
|
472
|
+
)
|
|
473
|
+
except OSError:
|
|
474
|
+
pass
|
|
475
|
+
return LaneLeaseResult(decision=decision, journaled=journaled, owner=owner)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def release(
|
|
479
|
+
config: SubstrateConfig,
|
|
480
|
+
*,
|
|
481
|
+
lane: str,
|
|
482
|
+
owner: str,
|
|
483
|
+
loop_ts: str = "",
|
|
484
|
+
) -> bool:
|
|
485
|
+
"""Release a held lane lease by appending a RELEASE to the WAL.
|
|
486
|
+
|
|
487
|
+
Returns True if a matching live lease was found and released. If `loop_ts` is
|
|
488
|
+
omitted, releases the newest live lease on `lane` held by `owner` (the common
|
|
489
|
+
case: a branch that acquired without tracking its own loop_ts). Runs under the
|
|
490
|
+
mutex so the read-which-lease + append is atomic w.r.t. a concurrent acquire.
|
|
491
|
+
"""
|
|
492
|
+
with _Mutex(config, owner):
|
|
493
|
+
live = live_leases(config)
|
|
494
|
+
match = None
|
|
495
|
+
for l in live:
|
|
496
|
+
if l.get("lane") != lane:
|
|
497
|
+
continue
|
|
498
|
+
if loop_ts and str(l.get("loop_ts") or "") != loop_ts:
|
|
499
|
+
continue
|
|
500
|
+
if l.get("holder") not in (owner, None) and owner != "":
|
|
501
|
+
# only release our own lease unless owner is unset on the record
|
|
502
|
+
if str(l.get("holder") or "") != owner:
|
|
503
|
+
continue
|
|
504
|
+
match = l # keep last → newest on the lane
|
|
505
|
+
if match is None:
|
|
506
|
+
return False
|
|
507
|
+
lane_journal.append(
|
|
508
|
+
lane_journal.release_entry(match, reason=f"lane-lease:{owner}"),
|
|
509
|
+
_journal_path(config),
|
|
510
|
+
)
|
|
511
|
+
return True
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def adopt(
|
|
515
|
+
config: SubstrateConfig,
|
|
516
|
+
*,
|
|
517
|
+
lane: str,
|
|
518
|
+
new_owner: str,
|
|
519
|
+
loop_ts: str = "",
|
|
520
|
+
new_pid: int | None = None,
|
|
521
|
+
) -> bool:
|
|
522
|
+
"""Take over a live lease at `(loop_ts, lane)` for `new_owner` (C5 ownership transfer).
|
|
523
|
+
|
|
524
|
+
Returns True if a matching live lease was found and the ADOPT was journaled. The
|
|
525
|
+
CALLER (a host supervisor) has already decided this lease is an adoptable orphan —
|
|
526
|
+
its holder is gone but its recorded children are still live (it measured that at
|
|
527
|
+
the boundary, keyed on the children `acquire_entry` recorded, via the proc-liveness
|
|
528
|
+
rung). This just performs the durable ownership rewrite under the mutex: the lease
|
|
529
|
+
keeps its identity, tree, ttl, and children; only holder/pid/host_id move to the
|
|
530
|
+
adopter. NEVER kills anything — the grandchildren keep running.
|
|
531
|
+
|
|
532
|
+
`loop_ts` disambiguates a same-minute sibling; omitted, it adopts the newest live
|
|
533
|
+
lease on `lane`. Returns False (no-op) if no live lease matches — you cannot adopt
|
|
534
|
+
a lease no one holds. Runs under the mutex so the read-which-lease + append is
|
|
535
|
+
atomic w.r.t. a concurrent acquire/release (two adopters serialize; the second sees
|
|
536
|
+
the first's ADOPT in the fold)."""
|
|
537
|
+
with _Mutex(config, new_owner):
|
|
538
|
+
live = live_leases(config)
|
|
539
|
+
match = None
|
|
540
|
+
for l in live:
|
|
541
|
+
if l.get("lane") != lane:
|
|
542
|
+
continue
|
|
543
|
+
if loop_ts and str(l.get("loop_ts") or "") != loop_ts:
|
|
544
|
+
continue
|
|
545
|
+
match = l # keep last → newest on the lane
|
|
546
|
+
if match is None:
|
|
547
|
+
return False
|
|
548
|
+
lane_journal.append(
|
|
549
|
+
lane_journal.adopt_entry(
|
|
550
|
+
match, new_holder=new_owner, new_pid=new_pid,
|
|
551
|
+
new_host_id=os.environ.get("DISPATCH_HOST_ID") or _hostname(),
|
|
552
|
+
reason=f"adopt:{new_owner}"),
|
|
553
|
+
_journal_path(config),
|
|
554
|
+
)
|
|
555
|
+
return True
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def heartbeat(
|
|
559
|
+
config: SubstrateConfig,
|
|
560
|
+
*,
|
|
561
|
+
lane: str,
|
|
562
|
+
owner: str,
|
|
563
|
+
loop_ts: str = "",
|
|
564
|
+
coalesce_within_s: float = 0.0,
|
|
565
|
+
) -> bool:
|
|
566
|
+
"""Refresh a HELD lane lease by appending a HEARTBEAT to the WAL.
|
|
567
|
+
|
|
568
|
+
This is the writer the liveness oracle was waiting for. The HEARTBEAT op, its
|
|
569
|
+
`replay` fold, and the `journal_delta._HEARTBEAT_OPS` fold all already existed
|
|
570
|
+
— but NOTHING in the package ever wrote one, so the newest journal beat for a
|
|
571
|
+
held lane was always its boundary ACQUIRE, which ages past `spin_ms` and the
|
|
572
|
+
liveness verdict could only ever reach STALLED from real evidence. SPINNING
|
|
573
|
+
(alive-but-not-progressing) was unreachable in production. A held worker
|
|
574
|
+
calling `dos lease-lane heartbeat` on a cadence is what makes it reachable:
|
|
575
|
+
the fresh beat proves the lease is alive NOW while the absence of state-mutating
|
|
576
|
+
events keeps it short of ADVANCING — the exact SPINNING ladder rung.
|
|
577
|
+
|
|
578
|
+
Returns True if a matching LIVE lease was found and beaten; False (writing
|
|
579
|
+
NOTHING) if no live lease on `lane` is held by `owner`. The live-lease match
|
|
580
|
+
is **load-bearing for fold correctness, not ergonomics**: `journal_delta.
|
|
581
|
+
fold_since` credits a beat by `(loop_ts, lane)` identity + `ts` with NO
|
|
582
|
+
held-lease check, so a stray HEARTBEAT appended after a RELEASE/SCAVENGE for
|
|
583
|
+
the same identity would let the fold read a DEAD run alive (a false ADVANCING/
|
|
584
|
+
SPINNING). Beating only a currently-live lease — and carrying that lease's true
|
|
585
|
+
`(loop_ts, lane, host_id)` so the fold's identity match is exact — is the
|
|
586
|
+
writer-side defense for that hole. Runs under the same `_Mutex` as
|
|
587
|
+
acquire/release so the read-which-lease + append is atomic w.r.t. a concurrent
|
|
588
|
+
eviction (a lease scavenged out from under us is gone from `live` before we
|
|
589
|
+
decide to beat it).
|
|
590
|
+
|
|
591
|
+
If `loop_ts` is omitted, beats the NEWEST live lease on `lane` held by `owner`
|
|
592
|
+
(mirrors `release`). Pass the SAME `loop_ts` the acquire used to beat a
|
|
593
|
+
specific lease — re-defaulting a fresh stamp would mint a DIFFERENT identity
|
|
594
|
+
and the beat would fold as a no-op against the real lease.
|
|
595
|
+
|
|
596
|
+
**Beat coalescing (docs/106 §3.1a — the WAL-drain brake).** A worker that beats
|
|
597
|
+
every few seconds while `liveness`'s `spin_ms` window is minutes writes one WAL
|
|
598
|
+
line per beat, of which all but the freshest are pure noise: `journal_delta.
|
|
599
|
+
fold_since` keeps only the NEWEST beat per identity, so an older beat changes no
|
|
600
|
+
verdict. `coalesce_within_s` makes that redundancy free to skip: when the matched
|
|
601
|
+
live lease's CURRENT `heartbeat_at` is younger than this many seconds, the beat
|
|
602
|
+
is ELIDED — nothing is appended — and the call still returns True (the lease is
|
|
603
|
+
live and was beaten recently enough that re-stamping it would not move the
|
|
604
|
+
liveness verdict). This is verdict-preserving *by construction* and only in the
|
|
605
|
+
safe direction: eliding can only let an existing beat AGE, never fabricate a
|
|
606
|
+
fresher one, so it can never cause a false ADVANCING/SPINNING — the same
|
|
607
|
+
one-way-safety `compact` relies on. The append path stays append-only and
|
|
608
|
+
O(1)-atomic (no journal rewrite); coalescing simply doesn't write the line.
|
|
609
|
+
Default `0.0` elides nothing — byte-identical to the pre-coalescing writer — so
|
|
610
|
+
this is a pure opt-in: a caller that beats fast passes e.g. `coalesce_within_s`
|
|
611
|
+
a small fraction of `spin_ms` (a 5 s beat under a 900 s window coalesced at 60 s
|
|
612
|
+
cuts the beat lines ~12x while the lease never reads older than 60 s). Choose it
|
|
613
|
+
well under `spin_ms`: an elision floor at or above `spin_ms` could let the only
|
|
614
|
+
beat age past the alive bound between writes and flip a live lease to STALLED —
|
|
615
|
+
so the floor is the caller's concern, bounded by the policy it runs under, never
|
|
616
|
+
a kernel constant silently coupled to `spin_ms`.
|
|
617
|
+
"""
|
|
618
|
+
with _Mutex(config, owner):
|
|
619
|
+
live = live_leases(config)
|
|
620
|
+
match = None
|
|
621
|
+
for l in live:
|
|
622
|
+
if l.get("lane") != lane:
|
|
623
|
+
continue
|
|
624
|
+
if loop_ts and str(l.get("loop_ts") or "") != loop_ts:
|
|
625
|
+
continue
|
|
626
|
+
# HOLDER AUTHENTICATION — STRICTER than release's owner filter, and
|
|
627
|
+
# deliberately so. A HEARTBEAT is the one fail-DANGEROUS lease op: it
|
|
628
|
+
# REFRESHES a lease's beat, and `journal_delta.fold_since` credits that
|
|
629
|
+
# beat by (loop_ts, lane)+ts with NO held-lease check, so a beat written
|
|
630
|
+
# by anyone-but-the-holder flips a dead/crashed orphan's liveness verdict
|
|
631
|
+
# from STALLED (the orphan-sweep's input) to SPINNING (alive) — a
|
|
632
|
+
# false-revival, the catastrophic false-SPINNING this writer must never
|
|
633
|
+
# cause. release()'s loose `owner in (holder, None) or owner==''` filter
|
|
634
|
+
# is safe THERE because over-release only FORGETS a lease (→ STALLED, the
|
|
635
|
+
# safe direction); the SAME looseness here is unsafe because over-beat
|
|
636
|
+
# REVIVES one. So a beat requires the caller's non-empty `owner` to EXACTLY
|
|
637
|
+
# equal the lease's recorded `holder`:
|
|
638
|
+
# * owner=="" cannot authenticate a beat (an empty requester is not a
|
|
639
|
+
# holder — the wildcard hole that let any caller beat any live lease);
|
|
640
|
+
# * a lease with holder=None (a foreign / RECONCILE / inline-fields
|
|
641
|
+
# ACQUIRE the replay fold supports) is UNATTRIBUTABLE, so NO owner may
|
|
642
|
+
# beat it — it can only age out to STALLED and be scavenged, the
|
|
643
|
+
# correct fate of a lease whose holder can't be proven (the None hole
|
|
644
|
+
# that let any non-empty owner beat such a lease).
|
|
645
|
+
# Both holes produced a confirmed false-SPINNING on a crashed orphan.
|
|
646
|
+
if not owner or str(l.get("holder") or "") != owner:
|
|
647
|
+
continue
|
|
648
|
+
match = l # keep last → newest on the lane
|
|
649
|
+
if match is None:
|
|
650
|
+
return False
|
|
651
|
+
# Beat coalescing (docs/106 §3.1a): if this lease's current beat is younger
|
|
652
|
+
# than the caller's floor, the new beat would only re-stamp an already-fresh
|
|
653
|
+
# lease — `fold_since` keeps the newest beat, so the verdict is unchanged.
|
|
654
|
+
# Skip the append (the WAL stays append-only; we just don't write the line).
|
|
655
|
+
# The lease's freshest beat is its `heartbeat_at` (set by replay from the
|
|
656
|
+
# last HEARTBEAT) falling back to its `acquired_at` (the boundary ACQUIRE IS
|
|
657
|
+
# the first beat — `journal_delta._HEARTBEAT_OPS` counts ACQUIRE too). A
|
|
658
|
+
# missing/unparseable stamp yields None age → never elide (write the beat,
|
|
659
|
+
# the safe direction). Default floor 0.0 makes `< 0.0` always False → every
|
|
660
|
+
# beat writes, exactly as before.
|
|
661
|
+
if coalesce_within_s > 0.0:
|
|
662
|
+
beat_ts = match.get("heartbeat_at") or match.get("acquired_at")
|
|
663
|
+
age = _stamp_age_seconds(beat_ts)
|
|
664
|
+
if age is not None and age < coalesce_within_s:
|
|
665
|
+
return True # live and recently beaten — coalesced, nothing written
|
|
666
|
+
lane_journal.append(
|
|
667
|
+
lane_journal.heartbeat_entry(match, heartbeat_at=_now_iso()),
|
|
668
|
+
_journal_path(config),
|
|
669
|
+
)
|
|
670
|
+
return True
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
@dataclasses.dataclass(frozen=True)
|
|
674
|
+
class HaltResult:
|
|
675
|
+
"""The outcome of a `halt`: a recorded stop DECISION, never a delivered kill.
|
|
676
|
+
|
|
677
|
+
`handle` is the opaque host-supplied identifier echoed back. `recorded` is
|
|
678
|
+
True iff the `OP_HALT` WAL append succeeded (the only thing the kernel did).
|
|
679
|
+
`command` is the host-supplied stop command echoed for a driver/operator to
|
|
680
|
+
run — the kernel proposes it and exits; it NEVER executes it (docs/99 §5).
|
|
681
|
+
`lane`/`loop_ts` are filled from the matched live lease when the handle
|
|
682
|
+
correlated to one, else echoed from the args (forensic correlation only)."""
|
|
683
|
+
handle: str
|
|
684
|
+
recorded: bool
|
|
685
|
+
command: Optional[str] = None
|
|
686
|
+
lane: str = ""
|
|
687
|
+
loop_ts: str = ""
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def halt(
|
|
691
|
+
config: SubstrateConfig,
|
|
692
|
+
*,
|
|
693
|
+
handle: str,
|
|
694
|
+
lane: str = "",
|
|
695
|
+
owner: str = "",
|
|
696
|
+
loop_ts: str = "",
|
|
697
|
+
reason: str = "",
|
|
698
|
+
run_id: str = "",
|
|
699
|
+
command: Optional[str] = None,
|
|
700
|
+
) -> HaltResult:
|
|
701
|
+
"""Record a STOP DECISION for an in-flight run on the WAL — and NOTHING else.
|
|
702
|
+
|
|
703
|
+
docs/99 §5: the kernel's one effectful concession past `spawn`/`reap` is to
|
|
704
|
+
*record* a stop decision and *propose* a command; it deliberately stops short
|
|
705
|
+
of delivering any signal, because delivering it requires knowing WHAT the
|
|
706
|
+
`handle` is (a pid? a container? a remote task?), and that domain knowledge is
|
|
707
|
+
a driver's, never a domain-free kernel's. So this:
|
|
708
|
+
|
|
709
|
+
1. appends an `OP_HALT` entry to the lane journal (under the mutex, so the
|
|
710
|
+
optional live-lease correlation read + the append are atomic w.r.t. a
|
|
711
|
+
concurrent acquire/release), recording the opaque `handle`, the `reason`,
|
|
712
|
+
the (forensically-correlated) lane/loop_ts, and the proposed `command`;
|
|
713
|
+
2. returns a `HaltResult` carrying the proposed `command` for a driver or
|
|
714
|
+
operator to run.
|
|
715
|
+
|
|
716
|
+
It NEVER calls `os.kill`, `subprocess`, `TaskStop`, or any process API. A host
|
|
717
|
+
that wants the stop *enacted* writes a driver that consumes the `OP_HALT`
|
|
718
|
+
record and signals — exactly as `drivers/supervisor.py` consumes a REAP plan
|
|
719
|
+
and journals the SCAVENGE. The HALT records the *intent*; the lease only ends
|
|
720
|
+
when that driver appends the confirming RELEASE/SCAVENGE.
|
|
721
|
+
|
|
722
|
+
`handle` is REQUIRED and opaque — the kernel records it verbatim and branches
|
|
723
|
+
on nothing about it (the domain-free contract). `lane`/`loop_ts`/`owner` are
|
|
724
|
+
optional: when given (or when a single live lease matches the handle's pid),
|
|
725
|
+
they are stamped on the entry so an operator can correlate the HALT to the
|
|
726
|
+
lease it targeted; when absent, the HALT still records against the bare handle.
|
|
727
|
+
"""
|
|
728
|
+
with _Mutex(config, owner or handle):
|
|
729
|
+
# Best-effort forensic correlation: if the caller named a lane/loop_ts, or
|
|
730
|
+
# the handle matches a live lease's pid, carry that lease's identity onto
|
|
731
|
+
# the entry. This is purely so the journal reader can join HALT→lease; it
|
|
732
|
+
# is NEVER required, and a no-match handle records just fine.
|
|
733
|
+
corr_lane, corr_loop_ts, corr_host = lane, loop_ts, None
|
|
734
|
+
try:
|
|
735
|
+
for l in live_leases(config):
|
|
736
|
+
if lane and str(l.get("lane") or "") != lane:
|
|
737
|
+
continue
|
|
738
|
+
if loop_ts and str(l.get("loop_ts") or "") != loop_ts:
|
|
739
|
+
continue
|
|
740
|
+
if not lane and not loop_ts:
|
|
741
|
+
# Correlate by opaque handle == the recorded pid, if it parses
|
|
742
|
+
# as one. We do NOT interpret the handle as a pid for any
|
|
743
|
+
# ACTION — only to fill forensic fields — so domain-freedom
|
|
744
|
+
# holds (the kernel still kills nothing, reads no process).
|
|
745
|
+
if str(l.get("pid") or "") != handle:
|
|
746
|
+
continue
|
|
747
|
+
corr_lane = str(l.get("lane") or "") or corr_lane
|
|
748
|
+
corr_loop_ts = str(l.get("loop_ts") or "") or corr_loop_ts
|
|
749
|
+
corr_host = l.get("host_id")
|
|
750
|
+
break
|
|
751
|
+
except Exception:
|
|
752
|
+
# Correlation is best-effort; a fold failure must never block the
|
|
753
|
+
# decision record (the WAL read stance: degrade, don't raise).
|
|
754
|
+
pass
|
|
755
|
+
|
|
756
|
+
entry_reason = reason or (f"halt:{owner}" if owner else "halt")
|
|
757
|
+
entry = lane_journal.halt_entry(
|
|
758
|
+
handle,
|
|
759
|
+
reason=entry_reason,
|
|
760
|
+
lane=corr_lane,
|
|
761
|
+
loop_ts=corr_loop_ts,
|
|
762
|
+
host_id=corr_host,
|
|
763
|
+
run_id=run_id or None,
|
|
764
|
+
command=command,
|
|
765
|
+
)
|
|
766
|
+
recorded = True
|
|
767
|
+
try:
|
|
768
|
+
lane_journal.append(entry, _journal_path(config))
|
|
769
|
+
except OSError:
|
|
770
|
+
recorded = False
|
|
771
|
+
return HaltResult(
|
|
772
|
+
handle=handle,
|
|
773
|
+
recorded=recorded,
|
|
774
|
+
command=command,
|
|
775
|
+
lane=corr_lane,
|
|
776
|
+
loop_ts=corr_loop_ts,
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
@dataclasses.dataclass(frozen=True)
|
|
781
|
+
class SpawnResult:
|
|
782
|
+
"""The outcome of a `spawn`: a recorded INTENT to take a lane, never a hold.
|
|
783
|
+
|
|
784
|
+
`lane` is the region the launcher committed to. `recorded` is True iff the
|
|
785
|
+
`OP_SPAWN` WAL append succeeded — that is the only thing `spawn` does. It grants
|
|
786
|
+
NO lease (the eventual `acquire` does), so there is no `journaled`/lease field:
|
|
787
|
+
a SPAWN is a forensic intent the dos-top SPAWNING chip folds, not a grant the
|
|
788
|
+
arbiter admits against. `loop_ts`/`holder` are echoed for the SPAWN→ACQUIRE join.
|
|
789
|
+
"""
|
|
790
|
+
lane: str
|
|
791
|
+
recorded: bool
|
|
792
|
+
loop_ts: str = ""
|
|
793
|
+
holder: str = ""
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
def spawn(
|
|
797
|
+
config: SubstrateConfig,
|
|
798
|
+
*,
|
|
799
|
+
lane: str,
|
|
800
|
+
owner: str = "",
|
|
801
|
+
loop_ts: str = "",
|
|
802
|
+
run_id: str = "",
|
|
803
|
+
reason: str = "",
|
|
804
|
+
) -> SpawnResult:
|
|
805
|
+
"""Record an INTENT TO TAKE A LANE on the WAL — and NOTHING else (the dos-top gap).
|
|
806
|
+
|
|
807
|
+
The acquire-side sibling of `halt`. Where `acquire` durably GRANTS a lane and
|
|
808
|
+
`halt` records a STOP intent, `spawn` records a START intent: "a run is *coming*
|
|
809
|
+
to this lane," appended the instant a launcher commits to a lane — BEFORE the
|
|
810
|
+
heavy preflight (`dos doctor`, pick selection) and before the durable ACQUIRE.
|
|
811
|
+
|
|
812
|
+
It exists to close the SPAWN→ACQUIRE blind window the 2026-06-09 dos-top
|
|
813
|
+
operator audit (private archive) names:
|
|
814
|
+
`dos top` is a read-only projection over the WAL, and a *successful* `arbitrate`
|
|
815
|
+
PERSISTS nothing (the purity boundary), so between launch and the first ACQUIRE
|
|
816
|
+
a loop leaves zero trace on the only surface the watchdog reads. This append is
|
|
817
|
+
that trace.
|
|
818
|
+
|
|
819
|
+
Crucially it grants NO lease: `lane_journal.OP_SPAWN` is NOT in
|
|
820
|
+
`_STATE_MUTATING_OPS`, so `replay` ignores it and the arbiter never admits
|
|
821
|
+
against it. An intent that never acquires therefore strands no phantom hold (the
|
|
822
|
+
docs/281 failure mode) and a not-yet-real run can never double-book a region. The
|
|
823
|
+
durable SPAWN is the cross-process home for the supervisor's in-memory `pending`
|
|
824
|
+
field; `dispatch_top` folds the recent SPAWNs (TTL-bounded, no-live-lease-only)
|
|
825
|
+
into the `SPAWNING` chip — a separate fold, never the admission live set.
|
|
826
|
+
|
|
827
|
+
The append runs under the lane-lease `_Mutex` so it serializes against
|
|
828
|
+
concurrent acquire/release/halt appends (journal order = decision order, the WAL
|
|
829
|
+
invariant). The record is best-effort: an `OSError` on the append yields
|
|
830
|
+
`recorded=False` rather than raising, mirroring `halt` — a failed forensic record
|
|
831
|
+
must never block a launch.
|
|
832
|
+
"""
|
|
833
|
+
loop_ts = loop_ts or _now_iso()
|
|
834
|
+
holder = owner or f"{_hostname()}:{os.getpid()}"
|
|
835
|
+
with _Mutex(config, owner or f"spawn:{lane}"):
|
|
836
|
+
entry = lane_journal.spawn_entry(
|
|
837
|
+
lane=lane,
|
|
838
|
+
loop_ts=loop_ts,
|
|
839
|
+
holder=holder,
|
|
840
|
+
host_id=os.environ.get("DISPATCH_HOST_ID") or _hostname(),
|
|
841
|
+
pid=os.getpid(),
|
|
842
|
+
run_id=run_id or None,
|
|
843
|
+
reason=reason or (f"spawn:{owner}" if owner else "spawn"),
|
|
844
|
+
)
|
|
845
|
+
recorded = True
|
|
846
|
+
try:
|
|
847
|
+
lane_journal.append(entry, _journal_path(config))
|
|
848
|
+
except OSError:
|
|
849
|
+
recorded = False
|
|
850
|
+
return SpawnResult(
|
|
851
|
+
lane=lane, recorded=recorded, loop_ts=loop_ts, holder=holder
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
@dataclasses.dataclass(frozen=True)
|
|
856
|
+
class CompactResult:
|
|
857
|
+
"""The outcome of a `compact_journal`: the before/after size of the WAL.
|
|
858
|
+
|
|
859
|
+
`entries_before`/`entries_after` are line counts (the after count is the
|
|
860
|
+
single CHECKPOINT plus any preserved `_CORRUPT` sentinels); `bytes_reclaimed`
|
|
861
|
+
is the file shrink. A compaction is purely a size operation for the ARBITER:
|
|
862
|
+
`replay` over the compacted journal reconstructs a byte-identical live-lease
|
|
863
|
+
set (the differential invariant), so admission decisions are unchanged.
|
|
864
|
+
|
|
865
|
+
It is NOT, however, liveness-fold-preserving: a CHECKPOINT carries no `ts` and
|
|
866
|
+
is in neither `journal_delta._EVENT_OPS` nor `_HEARTBEAT_OPS`, so a mid-flight
|
|
867
|
+
compaction drops the beat anchor of a still-live run — that run reads STALLED
|
|
868
|
+
to the liveness oracle until its next ACQUIRE/HEARTBEAT lands. The direction is
|
|
869
|
+
always toward less-alive (compaction can never FABRICATE an event or beat), so
|
|
870
|
+
no false-ADVANCING/SPINNING can result; but for that reason `dos journal
|
|
871
|
+
compact` is meant for a quiet window, like the supervisor-lock caveat below.
|
|
872
|
+
"""
|
|
873
|
+
entries_before: int
|
|
874
|
+
entries_after: int
|
|
875
|
+
bytes_reclaimed: int
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
def compact_journal(
|
|
879
|
+
config: SubstrateConfig,
|
|
880
|
+
*,
|
|
881
|
+
owner: str = "journal-compact",
|
|
882
|
+
) -> CompactResult:
|
|
883
|
+
"""Compact the lane-journal WAL in place, crash-safely, under the lease mutex.
|
|
884
|
+
|
|
885
|
+
The WAL is append-only with no auto-rotation, so on a long-lived fleet it
|
|
886
|
+
grows unbounded and every lease op pays O(file) to `read_all`/`replay`/
|
|
887
|
+
`next_seq`. This operator-invoked verb bounds it: fold the whole journal to a
|
|
888
|
+
single OP_CHECKPOINT snapshot of the authoritative live set (the pure
|
|
889
|
+
`lane_journal.compact`) and rewrite the file to that snapshot.
|
|
890
|
+
|
|
891
|
+
The correctness rail is `lane_journal.compact`'s differential invariant —
|
|
892
|
+
`replay(compact(E)) == replay(E)` — so a still-live lease older than any
|
|
893
|
+
cutoff SURVIVES in the snapshot and the kernel can never false-ADMIT a
|
|
894
|
+
colliding tree after a compaction (the catastrophic lost-live-lease bug a
|
|
895
|
+
naive truncate-old-lines would cause). A `_CORRUPT` sentinel is preserved, and
|
|
896
|
+
`next_seq` stays monotonic via the checkpoint's `seq_watermark`.
|
|
897
|
+
|
|
898
|
+
Crash-safety: the new content is written to a tmp sibling, `flush()`+`fsync`'d,
|
|
899
|
+
then `_filelock.atomic_replace`'d over the journal (the same win32-hardened
|
|
900
|
+
primitive `home` uses) — a crash leaves either the full old WAL or the full new
|
|
901
|
+
one, NEVER a torn rewrite. The whole read-fold-rewrite runs under the same
|
|
902
|
+
`_Mutex` that acquire/release/heartbeat take, so no concurrent lease append
|
|
903
|
+
races the rewrite. (NOTE: `drivers/supervisor` serializes its SCAVENGE appends
|
|
904
|
+
under its OWN `.supervisor.lock`, not this mutex — so compaction does not
|
|
905
|
+
serialize against a concurrent supervisor reap; run compaction in a quiet
|
|
906
|
+
window. Unifying the two write-locks is a noted hardening follow-on.)
|
|
907
|
+
"""
|
|
908
|
+
jp = _journal_path(config)
|
|
909
|
+
with _Mutex(config, owner):
|
|
910
|
+
entries = lane_journal.read_all(jp)
|
|
911
|
+
before = len(entries)
|
|
912
|
+
try:
|
|
913
|
+
size_before = jp.stat().st_size
|
|
914
|
+
except OSError:
|
|
915
|
+
size_before = 0
|
|
916
|
+
if before == 0:
|
|
917
|
+
# Nothing to compact — short-circuit so an empty journal stays empty
|
|
918
|
+
# (a bare `compact` would otherwise materialize a spurious 1-line
|
|
919
|
+
# CHECKPOINT of an empty live set, "growing" a 0-byte file).
|
|
920
|
+
return CompactResult(entries_before=0, entries_after=0,
|
|
921
|
+
bytes_reclaimed=0)
|
|
922
|
+
compacted = lane_journal.compact(entries)
|
|
923
|
+
body = "".join(
|
|
924
|
+
json.dumps(e, sort_keys=True, default=str, ensure_ascii=False) + "\n"
|
|
925
|
+
for e in compacted
|
|
926
|
+
)
|
|
927
|
+
jp.parent.mkdir(parents=True, exist_ok=True)
|
|
928
|
+
tmp = jp.with_suffix(jp.suffix + ".compact.tmp")
|
|
929
|
+
fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o644)
|
|
930
|
+
try:
|
|
931
|
+
os.write(fd, body.encode("utf-8"))
|
|
932
|
+
os.fsync(fd)
|
|
933
|
+
finally:
|
|
934
|
+
os.close(fd)
|
|
935
|
+
_filelock.atomic_replace(tmp, jp)
|
|
936
|
+
try:
|
|
937
|
+
size_after = jp.stat().st_size
|
|
938
|
+
except OSError:
|
|
939
|
+
size_after = 0
|
|
940
|
+
return CompactResult(
|
|
941
|
+
entries_before=before,
|
|
942
|
+
entries_after=len(compacted),
|
|
943
|
+
bytes_reclaimed=max(0, size_before - size_after),
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
def _hostname() -> str:
|
|
948
|
+
try:
|
|
949
|
+
import socket
|
|
950
|
+
return socket.gethostname()
|
|
951
|
+
except Exception:
|
|
952
|
+
return "unknown"
|