dos-kernel 0.22.0__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
@@ -0,0 +1,363 @@
1
+ """dos.drivers.watchdog — the push-model supervisor that polls `liveness()`.
2
+
3
+ `liveness.classify` (docs/82) mints the in-flight verdict — is THIS run ADVANCING,
4
+ SPINNING, or STALLED? — but it is a *pull* verdict: something has to ask it. The
5
+ self-stop seam (`loop_decide.StopReason.SPINNING`) lets a loop ask it about itself;
6
+ the stop-recorder (`lane_lease.halt`) lets a verb record a stop decision. What was
7
+ still missing is the actor that asks the question **on a timer, from outside the
8
+ watched run's own process** — and acts on the answer. This driver is that actor.
9
+
10
+ It directly answers the most expensive incident in the historical record
11
+ (docs/99 §2.1): eight jobs hung ~4.4 h each because the wall-clock budget fired
12
+ 2.2 h late — the orchestrator loop stalled inside a long poll, so the timer meant
13
+ to kill the stuck run never got a turn. The fix is structural: a poller in its OWN
14
+ process, whose clock keeps ticking no matter what the watched runs do. That is why
15
+ the watchdog is a separate long-lived process, not a callback the dispatch loop
16
+ runs on itself (the thing that already failed).
17
+
18
+ ## Why this is a DIFFERENT driver from `drivers/supervisor.py`
19
+
20
+ Two axes, deliberately kept apart (docs/101 §1):
21
+
22
+ * `supervisor.py` — the POPULATION axis. `supervise()` → is the roster full?
23
+ SPAWN free lanes / REAP STALLED *leases* / FLAG spinners. It frees a lane so a
24
+ replacement can take it; it does NOTHING about a spinner beyond FLAG, because a
25
+ spinner still holds a live lease and the supervisor has no standing to halt a
26
+ peer's control flow (docs/99 §3.1).
27
+ * `watchdog.py` (THIS) — the PER-RUN-HEALTH axis. `liveness.classify` → is THIS
28
+ run moving? A SPINNING / hung-past-budget run → record an `OP_HALT` and propose
29
+ the stop command. The operator delegated the watchdog to watch a NAMED set of
30
+ runs, so (unlike the supervisor over a peer) it has standing to record the stop
31
+ decision and propose the kill.
32
+
33
+ The §2.1 incident is a per-run-health failure, not a population one: the roster was
34
+ *full* (eight workers alive); a supervisor would have reported AT_TARGET. Each of
35
+ those runs was hung, and the timer was asleep inside their loop. The watchdog,
36
+ independent by construction, is immune to that.
37
+
38
+ ## The actuation boundary holds (docs/99 §3, §5)
39
+
40
+ "Auto-halt-record" means the watchdog itself calls `lane_lease.halt` to RECORD the
41
+ `OP_HALT` and EMIT the host-supplied stop command — so the proposed stop is one
42
+ paste away (in the journal + the `dos decisions` queue). It does NOT mean the
43
+ watchdog kills anything: `lane_lease.halt` records intent and proposes a command
44
+ and NEVER signals a process, because *delivering* the signal requires knowing what
45
+ the opaque `handle` IS (a pid? a container? a remote task?), and that domain
46
+ knowledge is a driver's, never a domain-free kernel's. The watchdog (a driver)
47
+ *could* in principle carry that knowledge — but it deliberately does not: it stops
48
+ at the propose line, exactly where the supervisor stops (journal the decision, let
49
+ a human/driver enact). Enacting the kill is a separate, even-more-host-specific
50
+ act left to the operator's paste or a further driver that consumes `OP_HALT`. This
51
+ driver NEVER calls `os.kill`/`subprocess`/`TaskStop` (pinned by
52
+ `test_watchdog_proposes_does_not_signal`).
53
+
54
+ ## Structure (testable without real I/O — the supervisor-driver idiom)
55
+
56
+ `assess_run(cfg, tracked, *, now_ms)` is NEAR-PURE: it gathers this run's evidence
57
+ by calling the SAME boundary helpers `cmd_liveness` uses (`cli._git_delta_count`,
58
+ `cli._journal_delta`, `run_id.ts_ms_of`) and returns `liveness.classify(...)` — NO
59
+ effects, and no re-implementation of the git/journal rungs (the LVN-1b no-drift
60
+ rule: the watchdog's verdict can never diverge from `dos liveness`). `tick(...)`
61
+ calls `assess_run` per run, applies the verdict→action map, and records an
62
+ `OP_HALT` (via the injectable `halt`) for each run that warrants one. `run(...)`
63
+ loops `tick` + sleep on a long cadence. Tests drive `assess_run`/`tick` with the
64
+ evidence helpers and `halt` monkeypatched, so no real git, no real journal, no
65
+ real `claude`, and `os.kill`/`Popen` can be made to raise to prove they are never
66
+ called.
67
+ """
68
+
69
+ from __future__ import annotations
70
+
71
+ import subprocess # noqa: F401 — imported so a test can monkeypatch it to prove we never Popen
72
+ import time
73
+ from dataclasses import dataclass, field
74
+ from typing import Optional
75
+
76
+ from dos import config as _config
77
+ from dos import lane_lease, liveness, run_id
78
+
79
+ DEFAULT_INTERVAL_S = 300.0 # a watchdog wakes rarely — not a busy-poll
80
+ # One halt proposal per genuine spin episode, not one per tick. A SPINNING run
81
+ # stays SPINNING across many ticks; without this memory the watchdog would append
82
+ # an OP_HALT every tick forever. A run that recovers to ADVANCING is dropped from
83
+ # `proposed`, so a later re-spin earns a fresh proposal. Long by default — a halt
84
+ # proposal is not something to spam.
85
+ DEFAULT_REPROPOSE_MS = 1_800_000 # 30 min
86
+
87
+
88
+ @dataclass(frozen=True)
89
+ class TrackedRun:
90
+ """One run the watchdog watches — the tuple `liveness.classify` needs, plus the
91
+ opaque stop handle/command the proposal carries.
92
+
93
+ run_id — the CID token; decodes `run_started_ms` (the clock is free in
94
+ the token). REQUIRED — a run with no valid run-id is skipped.
95
+ start_sha — the git SHA the run started at (the commit-rung floor). "" ⇒
96
+ the commit rung is silent (0 commits) and the run is judged on
97
+ the journal rung alone (the discovered-run honest floor).
98
+ lane/loop_ts — the lease's `(loop_ts, lane)` identity; both required for the
99
+ journal rung to be attributed to this run (the LVN P2
100
+ identity rule). Also carried onto the OP_HALT for correlation.
101
+ handle — the OPAQUE stop handle (a pid string / container id / task
102
+ token). The kernel records it verbatim, interprets nothing.
103
+ Defaults to the lease pid when discovered; "" is recorded fine.
104
+ budget_ms — wall-clock budget. A STALLED run past it → halt; within it →
105
+ not yet (the grace guard, lifted to the budget axis). None ⇒
106
+ no budget, so any STALLED run is treated as past-budget (a
107
+ hung run with no declared budget is still hung).
108
+ stop_command — the host-supplied stop command echoed in the OP_HALT proposal
109
+ (the paste-to-stop). "" records the proposal with no command
110
+ (the operator supplies the kill by hand).
111
+ """
112
+
113
+ run_id: str
114
+ start_sha: str = ""
115
+ lane: str = ""
116
+ loop_ts: str = ""
117
+ handle: str = ""
118
+ budget_ms: Optional[int] = None
119
+ stop_command: str = ""
120
+
121
+
122
+ @dataclass
123
+ class WatchActions:
124
+ """What a tick did — the audit record a test asserts on."""
125
+
126
+ proposed_halts: list[str] = field(default_factory=list) # run-ids an OP_HALT was recorded for
127
+ advancing: list[str] = field(default_factory=list) # run-ids classified ADVANCING
128
+ spinning: list[str] = field(default_factory=list) # run-ids classified SPINNING
129
+ stalled_within_budget: list[str] = field(default_factory=list) # STALLED but too young to halt
130
+ skipped: list[str] = field(default_factory=list) # bad run-id / unclassifiable
131
+
132
+
133
+ def assess_run(cfg, tracked: TrackedRun, *, now_ms: int) -> Optional[liveness.LivenessVerdict]:
134
+ """Classify ONE tracked run's liveness — NEAR-PURE (the testable seam).
135
+
136
+ Gathers this run's evidence by calling the SAME boundary helpers `cmd_liveness`
137
+ uses, so the watchdog's verdict can NEVER drift from `dos liveness` (the LVN-1b
138
+ no-drift rule): the start ms decodes from the run-id, the commit rung is
139
+ `cli._git_delta_count(start_sha)`, the journal rung is `cli._journal_delta(...)`
140
+ scoped to this run's `(loop_ts, lane)` lease. No effects. Returns None for a run
141
+ whose run-id is not a valid CID token (it cannot be timed, so it is skipped).
142
+ """
143
+ from dos import cli # consumer→consumer import (a driver may import the CLI)
144
+
145
+ started_ms = run_id.ts_ms_of(tracked.run_id)
146
+ if started_ms is None:
147
+ return None
148
+
149
+ # The commit rung. A run with no start SHA has no commit-delta floor, so the
150
+ # rung is silent (0) and the journal rung carries the signal — the discovered-
151
+ # run honest floor (`_supervise_evidence` lives with the same: "a live lease
152
+ # records no start SHA, so the commit rung is 0").
153
+ commits = cli._git_delta_count(tracked.start_sha, cfg) if tracked.start_sha else 0
154
+
155
+ # The journal rung — scoped to THIS run's lease; identity required (the LVN P2
156
+ # rule). Without both lane and loop_ts the journal cannot be attributed to this
157
+ # run, so the rung stays silent (events 0, no journal heartbeat) and the commit
158
+ # rung + age decide.
159
+ lease_key = (
160
+ (tracked.loop_ts, tracked.lane)
161
+ if tracked.lane and tracked.loop_ts
162
+ else None
163
+ )
164
+ jd = cli._journal_delta(cfg, started_ms=started_ms, now_ms=now_ms, lease_key=lease_key)
165
+
166
+ ev = liveness.ProgressEvidence(
167
+ run_started_ms=started_ms,
168
+ now_ms=now_ms,
169
+ commits_since_start=commits,
170
+ journal_events_since=jd.events_since_start,
171
+ last_heartbeat_age_ms=jd.newest_heartbeat_age_ms,
172
+ tokens_spent_since=None,
173
+ )
174
+ return liveness.classify(ev)
175
+
176
+
177
+ def _run_age_ms(tracked: TrackedRun, now_ms: int) -> Optional[int]:
178
+ """`now_ms − run_started_ms`, clamped at 0; None for a bad run-id."""
179
+ started_ms = run_id.ts_ms_of(tracked.run_id)
180
+ if started_ms is None:
181
+ return None
182
+ return max(0, now_ms - started_ms)
183
+
184
+
185
+ def _warrants_halt(tracked: TrackedRun, verdict: liveness.Liveness, *, now_ms: int) -> bool:
186
+ """The §3 verdict→action map: does this run warrant an OP_HALT THIS tick?
187
+
188
+ ADVANCING -> no (the run is moving)
189
+ SPINNING -> yes (alive but landing zero delta — the
190
+ textbook hung-but-narrating shape)
191
+ STALLED, age < budget_ms -> no (too young — the grace guard)
192
+ STALLED, age >= budget_ms / no budget -> yes (the §2.1 case: hung past budget)
193
+ """
194
+ if verdict == liveness.Liveness.SPINNING:
195
+ return True
196
+ if verdict == liveness.Liveness.STALLED:
197
+ if tracked.budget_ms is None:
198
+ return True # no declared budget — a hung run is still hung
199
+ age = _run_age_ms(tracked, now_ms)
200
+ if age is None:
201
+ return True # cannot age it (shouldn't happen post-assess) — fail toward halt
202
+ return age >= tracked.budget_ms
203
+ return False # ADVANCING (or an unknown future verdict — never auto-halt on it)
204
+
205
+
206
+ def tick(
207
+ cfg,
208
+ tracked_runs,
209
+ *,
210
+ now_ms: int,
211
+ proposed: dict,
212
+ repropose_ms: int = DEFAULT_REPROPOSE_MS,
213
+ halt=lane_lease.halt,
214
+ ) -> "tuple[dict, WatchActions]":
215
+ """One watchdog tick: assess each tracked run, record an OP_HALT for the ones
216
+ that warrant one (auto-halt-record + emit-command), return (verdicts, actions).
217
+
218
+ Mutates `proposed` in place: records each proposal's ms; DROPS a run that
219
+ recovered to ADVANCING (so a later re-spin earns a fresh proposal). The
220
+ idempotence guard — at most one OP_HALT per run per `repropose_ms` window —
221
+ bounds the journal to one record per genuine spin episode, not one per poll.
222
+
223
+ `halt` is injectable (defaults to the kernel boundary verb `lane_lease.halt`,
224
+ which records the OP_HALT + proposes the command and NEVER signals) so a test
225
+ can assert the proposal without a real journal write, and can monkeypatch
226
+ `os.kill`/`subprocess` to raise and prove the watchdog never calls them.
227
+ """
228
+ actions = WatchActions()
229
+ verdicts: dict = {}
230
+
231
+ for tracked in tracked_runs:
232
+ verdict = assess_run(cfg, tracked, now_ms=now_ms)
233
+ if verdict is None:
234
+ actions.skipped.append(tracked.run_id)
235
+ continue
236
+ verdicts[tracked.run_id] = verdict
237
+ v = verdict.verdict
238
+
239
+ # 1. Tally the verdict + handle the ADVANCING (recovered) case.
240
+ if v == liveness.Liveness.ADVANCING:
241
+ actions.advancing.append(tracked.run_id)
242
+ # Recovered — drop any prior proposal memory so a later re-spin can be
243
+ # re-proposed (the recovered-run-can-be-reproposed property).
244
+ proposed.pop(tracked.run_id, None)
245
+ continue
246
+ if v == liveness.Liveness.SPINNING:
247
+ actions.spinning.append(tracked.run_id)
248
+
249
+ # 2. The §3 warrant decision. A STALLED run too young for its budget is
250
+ # tallied as within-budget and skipped; everything else that doesn't
251
+ # warrant a halt (an unknown future verdict) just continues.
252
+ if not _warrants_halt(tracked, v, now_ms=now_ms):
253
+ if v == liveness.Liveness.STALLED:
254
+ actions.stalled_within_budget.append(tracked.run_id)
255
+ continue
256
+
257
+ # 3. Idempotence: at most one proposal per run per repropose window.
258
+ last = proposed.get(tracked.run_id)
259
+ if last is not None and (now_ms - last) < repropose_ms:
260
+ continue
261
+
262
+ reason = (
263
+ f"watchdog: {v.value} "
264
+ f"({'no forward delta' if v == liveness.Liveness.SPINNING else 'hung past budget'})"
265
+ )
266
+ try:
267
+ halt(
268
+ cfg,
269
+ handle=tracked.handle,
270
+ lane=tracked.lane,
271
+ loop_ts=tracked.loop_ts,
272
+ owner="watchdog",
273
+ reason=reason,
274
+ run_id=tracked.run_id,
275
+ command=tracked.stop_command or None,
276
+ )
277
+ proposed[tracked.run_id] = now_ms
278
+ actions.proposed_halts.append(tracked.run_id)
279
+ except Exception: # noqa: BLE001 — a failed record is non-fatal; retry next tick
280
+ pass
281
+
282
+ return verdicts, actions
283
+
284
+
285
+ def discover_tracked_runs(cfg, *, budget_ms: Optional[int] = None) -> "list[TrackedRun]":
286
+ """Fold the live-lease set into tracked runs (the --discover mode, docs/101 §2).
287
+
288
+ Read-only: replays the lane journal's live leases (`lane_lease.live_leases`) and
289
+ derives `lane`/`loop_ts`/`handle`(pid) from each. A discovered run carries NO
290
+ start SHA (a journal lease records none — the honest floor), so it is judged on
291
+ the journal rung alone; that is strictly weaker but never wrong. The lease's
292
+ `loop_ts` doubles as a stand-in run-id ONLY if it parses as a CID token; a lease
293
+ whose `loop_ts` is not a run-id is skipped here (it cannot be timed by
294
+ `liveness`), the no-plan-per-run degrade. A host that wants the commit rung
295
+ passes an explicit `TrackedRun` with a real run-id + start SHA instead.
296
+ """
297
+ out: list[TrackedRun] = []
298
+ try:
299
+ leases = lane_lease.live_leases(cfg)
300
+ except Exception: # noqa: BLE001 — a bad journal yields no discovered runs
301
+ return out
302
+ for l in leases:
303
+ loop_ts = str(l.get("loop_ts") or "")
304
+ # A discovered run needs a CID-shaped identity to be timed. Prefer an
305
+ # explicit run_id on the lease; fall back to loop_ts only if it decodes.
306
+ rid = str(l.get("run_id") or "")
307
+ if run_id.ts_ms_of(rid) is None:
308
+ rid = loop_ts if run_id.ts_ms_of(loop_ts) is not None else ""
309
+ if not rid:
310
+ continue
311
+ out.append(
312
+ TrackedRun(
313
+ run_id=rid,
314
+ start_sha="", # the honest floor: a lease records no start SHA
315
+ lane=str(l.get("lane") or ""),
316
+ loop_ts=loop_ts,
317
+ handle=str(l.get("pid") or ""),
318
+ budget_ms=budget_ms,
319
+ stop_command="",
320
+ )
321
+ )
322
+ return out
323
+
324
+
325
+ def run(
326
+ config=None,
327
+ *,
328
+ tracked_runs,
329
+ interval: float = DEFAULT_INTERVAL_S,
330
+ max_ticks: Optional[int] = None,
331
+ repropose_ms: int = DEFAULT_REPROPOSE_MS,
332
+ clock_ms=None,
333
+ sleep=time.sleep,
334
+ halt=lane_lease.halt,
335
+ ) -> int:
336
+ """Run the watchdog until `max_ticks` or an operator interrupt.
337
+
338
+ Each tick assesses every tracked run and records an OP_HALT for the ones that
339
+ warrant one, then sleeps `interval` (long — a watchdog, not a busy-poll). The
340
+ clock keeps ticking in THIS process no matter what the watched runs do — the
341
+ structural independence that answers the §2.1 budget-late incident.
342
+ `clock_ms`/`sleep`/`halt` are injectable for deterministic, journal-free tests.
343
+ `tracked_runs` is fixed for the life of the run (a host re-launches `run` to
344
+ change the set, or passes a callable — kept simple here: a fixed list). Returns
345
+ 0 on a clean stop.
346
+ """
347
+ cfg = config if config is not None else _config.active()
348
+ runs = list(tracked_runs)
349
+ proposed: dict = {}
350
+ ticks = 0
351
+ _clock = clock_ms if clock_ms is not None else (lambda: int(time.time() * 1000))
352
+ try:
353
+ while max_ticks is None or ticks < max_ticks:
354
+ now_ms = _clock()
355
+ tick(cfg, runs, now_ms=now_ms, proposed=proposed,
356
+ repropose_ms=repropose_ms, halt=halt)
357
+ ticks += 1
358
+ if max_ticks is not None and ticks >= max_ticks:
359
+ break
360
+ sleep(interval)
361
+ except KeyboardInterrupt:
362
+ return 0
363
+ return 0
@@ -0,0 +1,160 @@
1
+ """dos.drivers.workshop — a generic, self-contained reference host policy pack.
2
+
3
+ This is the **copy-me template** for adding a new host to DOS. It is a driver
4
+ (layer 4): the *policy* a particular host workload supplies on top of the kernel
5
+ *mechanism*. Where `dos.drivers.job` (the kernel's first userland app) delegates
6
+ its taxonomy back to `dos.config` for backward-compatibility, `workshop` declares
7
+ everything it needs **inline, in this one file** — so a new host can read a single
8
+ module and see the whole shape of "what a driver is."
9
+
10
+ The "workshop" frame: a shop where two benches build distinct parts of one product
11
+ *concurrently*, and a single release bench *exclusively* ships it. It names no
12
+ company, no challenge, no real product — it is a deliberately generic stand-in
13
+ whose lanes are evocative enough to host real-looking trees.
14
+
15
+ A driver is two things, the same two `job` has:
16
+
17
+ * a `LaneTaxonomy` constant (`WORKSHOP_LANE_TAXONOMY`) — the concurrency policy
18
+ as pure data, and
19
+ * a `<name>_config(workspace)` factory (`workshop_config`) — binds that taxonomy
20
+ to a workspace root and returns a `SubstrateConfig`.
21
+
22
+ The factory name matches the module stem (`workshop` → `workshop_config`), which
23
+ is the **by-convention contract** the generic `dos --driver <name>` CLI loader
24
+ resolves (`dos.drivers.<name>.<name>_config`), exactly as `job` → `job_config`.
25
+ Adding a host = a module like this one; the kernel/CLI never learns its name.
26
+
27
+ ## The lane taxonomy — why these lanes, and the four things it teaches
28
+
29
+ Two **concurrent** cluster lanes, `frontend` and `backend`, plus an **exclusive**
30
+ `release` lane and the catch-all exclusive `global` (the same escape hatch the
31
+ generic `default_config` and `job_config` carry — keeping the taxonomy a clean
32
+ superset of the default).
33
+
34
+ 1. **Concurrent + tree-disjoint.** `frontend` (`app/`, `web/`, `ui/`) and
35
+ `backend` (`service/`, `api/`, `worker/`) touch provably disjoint file trees,
36
+ so the arbiter (`dos.arbiter` + `dos.lane_overlap`) admits a `backend` request
37
+ *alongside* a live `frontend` lease — two build agents run at once. No prefix of
38
+ one tree is a prefix of the other, which is the whole disjointness rule.
39
+
40
+ 2. **The docs-prefix distinction trick.** Both clusters also own a doc tree under
41
+ the SAME `docs/` directory, kept disjoint by FILENAME PREFIX: `frontend` owns
42
+ `docs/UI-*`, `backend` owns `docs/SVC-*`. `dos._tree.norm_tree_prefix` truncates
43
+ a glob at its first `*` but keeps the literal before it — so `docs/UI-*` →
44
+ `docs/UI-` and `docs/SVC-*` → `docs/SVC-`, which do NOT collide (neither
45
+ `startswith` the other). A bare `docs/` would normalize to `docs/` and collide,
46
+ defeating concurrency — so this is the load-bearing teaching point: two lanes can
47
+ share a parent directory and still run concurrently if their globs discriminate.
48
+
49
+ 3. **Exclusive `release`.** While `release` is held, every other request refuses;
50
+ a deploy / version-cut never races a build. NOTE the honesty of its tree:
51
+ `**/VERSION` normalizes to the *universal* (empty) prefix, so `release`'s blast
52
+ radius really is the whole repo — which is exactly WHY it must run alone. An
53
+ exclusive lane is admitted/refused on liveness (is another lease live?), never on
54
+ tree-disjointness, so this whole-repo glob is correct, not a bug. (One consequence
55
+ worth knowing: because `**/VERSION` collides with the kernel's own source files,
56
+ a `release` request arbitrated through the workspace-blind PURE path would trip
57
+ the SELF_MODIFY guard; the CLI's `dos arbitrate` scopes the guard to files that
58
+ actually exist under the served workspace, so in a foreign repo `release` admits.)
59
+
60
+ 4. **`--lane` keyword aliases.** A request can say `--lane ui` / `--lane api` /
61
+ `--lane ship` and reach the canonical lane; `aliases` routes keyword → named lane.
62
+
63
+ The lane trees are the discriminating *path prefixes* the kernel normalizes a glob
64
+ to (`dos._tree.norm_tree_prefix`), so `docs/UI-` and `docs/SVC-` stay distinct even
65
+ though both live under `docs/`.
66
+ """
67
+
68
+ from __future__ import annotations
69
+
70
+ from pathlib import Path
71
+
72
+ from dos.config import (
73
+ LaneTaxonomy,
74
+ PathLayout,
75
+ SubstrateConfig,
76
+ gather_workspace_facts,
77
+ resolve_workspace_root,
78
+ )
79
+
80
+ # The workshop's concurrency policy, as data. `frontend` ∩ `backend` is provably
81
+ # tree-disjoint (`app/` vs `service/`; `docs/UI-` vs `docs/SVC-`), so the two build
82
+ # agents run concurrently; `release`/`global` are exclusive so a deploy/version-cut
83
+ # runs alone.
84
+ WORKSHOP_LANE_TAXONOMY = LaneTaxonomy(
85
+ concurrent=("frontend", "backend"),
86
+ exclusive=("release", "global"),
87
+ autopick=("frontend", "backend"),
88
+ trees={
89
+ # The UI half — its source + its plan/ship docs (docs/UI-*).
90
+ "frontend": (
91
+ "app/**/*",
92
+ "web/**/*",
93
+ "ui/**/*",
94
+ "docs/UI-*",
95
+ ),
96
+ # The service half — API + workers + its docs (docs/SVC-*).
97
+ "backend": (
98
+ "service/**/*",
99
+ "api/**/*",
100
+ "worker/**/*",
101
+ "docs/SVC-*",
102
+ ),
103
+ # The exclusive deploy / version-cut ceremony. `**/VERSION` is a
104
+ # whole-repo glob (honest: a release touches everything), which is why
105
+ # the lane is exclusive.
106
+ "release": (
107
+ "deploy/**/*",
108
+ ".github/workflows/**/*",
109
+ "docs/REL-*",
110
+ "**/VERSION",
111
+ ),
112
+ # The catch-all exclusive lane (mirrors the kernel default's escape hatch).
113
+ "global": ("**/*",),
114
+ },
115
+ aliases={
116
+ # Keyword routing so a request can say `--lane ui` / `--lane api` /
117
+ # `--lane ship` and reach the canonical lane.
118
+ "ui": "frontend",
119
+ "web": "frontend",
120
+ "frontend": "frontend",
121
+ "svc": "backend",
122
+ "api": "backend",
123
+ "service": "backend",
124
+ "backend": "backend",
125
+ "ship": "release",
126
+ "deploy": "release",
127
+ "release": "release",
128
+ },
129
+ )
130
+
131
+
132
+ def workshop_config(workspace: Path | str | None = None) -> SubstrateConfig:
133
+ """The workshop reference policy, pointed at ``workspace``.
134
+
135
+ Mirrors `dos.config.job_config`: binds this driver's lane taxonomy to the
136
+ workspace root (resolved by the standard precedence — explicit arg ›
137
+ ``DISPATCH_WORKSPACE`` › cwd) with the job-repo-shaped default path layout.
138
+ A host whose plans/state live elsewhere either swaps `PathLayout` here or
139
+ declares `[paths]` in its workspace's ``dos.toml`` (the no-code path); the
140
+ ship-stamp grammar is likewise layered from ``dos.toml`` ``[stamp]``, so it is
141
+ not hardcoded — the factory stays minimal and parallel to `job_config`.
142
+
143
+ Like `job_config` / `default_config`, it gathers the workspace facts
144
+ (`gather_workspace_facts`) and caches them on the config so the SELF_MODIFY
145
+ guard is workspace-scoped: in a foreign repo (no `src/dos/` runtime files) the
146
+ exclusive `release` lane's whole-repo `**/VERSION` glob admits rather than
147
+ tripping self-modify against kernel files that aren't there. Omitting this
148
+ leaves `config.workspace=None`, which forces the guard to the conservative full
149
+ static set and (wrongly) refuses `release` — so a driver factory MUST gather
150
+ facts, exactly as the kernel's own factories do.
151
+ """
152
+ root = resolve_workspace_root(workspace)
153
+ return SubstrateConfig(
154
+ lanes=WORKSHOP_LANE_TAXONOMY,
155
+ paths=PathLayout.for_root(root),
156
+ workspace=gather_workspace_facts(root),
157
+ )
158
+
159
+
160
+ __all__ = ["WORKSHOP_LANE_TAXONOMY", "workshop_config"]