dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/home.py ADDED
@@ -0,0 +1,934 @@
1
+ """The state-home — DOS's per-project `.dos/` scaffolding and the machine-local
2
+ projection store (docs/75_state-home-plan.md).
3
+
4
+ DOS was lifted out of the reference userland app and inherited its body-plan:
5
+ the generic default scattered DOS's own emissions across the served repo's
6
+ `docs/` tree. This module
7
+ is the generic default's *own* body — a per-project **`.dos/`** home (auto-created
8
+ on the first write, gitignored-by-default) plus a machine-local **DOS_HOME**
9
+ (`~/.dos`) holding a rebuildable projection over every workspace DOS has served.
10
+
11
+ Two hard properties this module exists to guarantee (both pinned by tests):
12
+
13
+ * **Read-only syscalls write nothing.** Nothing here runs on a `verify` / `man`
14
+ / `doctor` / `decisions` / `judge` / `journal-read` path. `ensure_project_home`
15
+ is invoked ONLY by the CLI's persisting handlers (`dos lease`,
16
+ `dos arbitrate --force`-on-capture). So `dos verify` in a stranger's repo
17
+ creates no `.dos/`, no `~/.dos` row.
18
+ * **The central store is a projection, never a source of truth.** Per-project
19
+ `.dos/project.json` is authoritative; `~/.dos/{projects/index.jsonl,
20
+ decisions.jsonl}` are rebuildable digests that `dos reindex` regenerates by
21
+ walking the `.dos/` dirs. A corrupt or deleted central index is never a
22
+ data-loss event.
23
+
24
+ Layering (CLAUDE.md): this is layer-1 kernel — it imports only `dos.config` and
25
+ `dos.archive_lock` (a *downward* edge; `archive_lock` itself imports only
26
+ `dos.config`, so the graph stays a DAG) plus stdlib. No kernel module imports
27
+ `home`; only the CLI (layer 3) wires it in. It names no host.
28
+
29
+ Determinism (Law 5): `project_id` is a pure function of the resolved path (no
30
+ clock, no randomness). The two genuinely time-sourced fields (`created_at`,
31
+ `ts_ms`) are *event* stamps and take an injectable `clock=` for reproducible
32
+ tests. Central-store writes reuse `lane_journal`'s fsync/torn-tail discipline AND
33
+ take a real `O_CREAT|O_EXCL` cross-process lock — `O_APPEND` alone is not atomic
34
+ on win32 (the platform), and unlike `lane_journal` the central store has no
35
+ surrounding `_StateFileLock`.
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import datetime as dt
41
+ import hashlib
42
+ import json
43
+ import os
44
+ import sys
45
+ import tempfile
46
+ import time
47
+ from contextlib import contextmanager
48
+ from pathlib import Path
49
+ from typing import Callable, Iterator
50
+
51
+ from dos import _filelock
52
+ from dos import config as _config
53
+
54
+ # Re-derivable identity-card / index schema version. Bumped only on a
55
+ # breaking shape change; readers tolerate older rows (best-effort projection).
56
+ SCHEMA = 1
57
+
58
+ # The shipped `.dos/.gitignore` — a self-ignoring directory, so a host repo needs
59
+ # zero `.gitignore` edits of its own. `*` ignores everything under `.dos/` from
60
+ # the host repo's view; `!.gitignore` keeps this marker visible.
61
+ _DOT_DOS_GITIGNORE = """\
62
+ # DOS per-project state — re-derivable emissions (runs, leases, verdicts,
63
+ # lane journal, soak index). DOS auto-created this directory and ignores its own
64
+ # contents so they never enter your repo's history. Safe to delete; DOS rebuilds
65
+ # with `dos reindex`. See dos/CLAUDE.md.
66
+ *
67
+ !.gitignore
68
+ """
69
+
70
+ _COURTESY = (
71
+ "dos: created .dos/ for this workspace ({dot_dos}) — gitignored DOS state; "
72
+ "`dos reindex` rebuilds central indices"
73
+ )
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # Time — event stamps only (injectable for deterministic tests).
78
+ # ---------------------------------------------------------------------------
79
+
80
+
81
+ def _now_iso(clock: Callable[[], int] | None = None) -> str:
82
+ """Second-resolution UTC stamp for an event field (created_at/last_seen)."""
83
+ ms = clock() if clock is not None else int(time.time() * 1000)
84
+ return dt.datetime.fromtimestamp(ms / 1000, dt.timezone.utc).strftime(
85
+ "%Y-%m-%dT%H:%M:%SZ"
86
+ )
87
+
88
+
89
+ def _now_ms(clock: Callable[[], int] | None = None) -> int:
90
+ return clock() if clock is not None else int(time.time() * 1000)
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # project_id — deterministic, path-derived (no clock, no random). The id is
95
+ # minted ONCE into `.dos/project.json` and read back thereafter; the card is
96
+ # authoritative (so a re-mint under the SAME path view always agrees, and a
97
+ # cross-OS-path-view divergence is a known, out-of-scope limitation — §5.6).
98
+ # ---------------------------------------------------------------------------
99
+
100
+
101
+ def project_id_for(workspace_root: Path | str) -> str:
102
+ """16 hex chars (64 bits) of SHA-256 over the resolved POSIX path.
103
+
104
+ Deterministic: the same realpath always yields the same id. Used to MINT on
105
+ first ensure (when no card exists); thereafter the stored card id is read
106
+ back. Cross-OS-path-view stability (a Windows drive path vs its
107
+ ``/mnt/...`` WSL view) is explicitly out of scope — each view gets its
108
+ own card/id.
109
+ """
110
+ real = Path(workspace_root).resolve().as_posix()
111
+ return hashlib.sha256(real.encode("utf-8")).hexdigest()[:16]
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # The cross-process lock for the central JSONL writes. A tiny re-implementation
116
+ # of archive_lock's O_CREAT|O_EXCL primitive (we do NOT import archive_lock's CLI
117
+ # surface; we only need the bare mutex). Serializes every central-store write —
118
+ # both the hot `ensure_project_home` append and reindex's whole-file rewrite —
119
+ # so an append can't land between reindex's read and its os.replace.
120
+ # ---------------------------------------------------------------------------
121
+
122
+ _HOME_LOCK_TTL_S = 60.0 # a central write is sub-second; older = a dead holder.
123
+ _HOME_LOCK_RETRIES = 50
124
+ _HOME_LOCK_INTERVAL_S = 0.05
125
+
126
+
127
+ @contextmanager
128
+ def _home_lock(home_lock: Path) -> Iterator[None]:
129
+ """Hold the DOS_HOME write mutex for the duration of the block.
130
+
131
+ Atomic O_CREAT|O_EXCL acquire with bounded retry; steals a lock older than the
132
+ TTL (a crashed holder must not wedge the store forever) through the shared
133
+ value-keyed CAS (`_filelock.steal_stale`) — the SAME primitive archive_lock and
134
+ lane_lease use, so the naive unlink-then-create steal (two stealers both win, a
135
+ lost/duplicated central row) cannot be re-introduced here. Best-effort: if the
136
+ lock can't be acquired within the retry budget we proceed anyway rather than
137
+ fail a telemetry write (the central store is rebuildable; a lost row is
138
+ recoverable by `dos reindex`, a hung CLI is not).
139
+ """
140
+ owner = f"home-{os.getpid()}"
141
+ acquired = False
142
+ for _ in range(_HOME_LOCK_RETRIES):
143
+ try:
144
+ _filelock.write_lock(home_lock, owner)
145
+ acquired = True
146
+ break
147
+ except FileExistsError:
148
+ info = _filelock.read_lock(home_lock)
149
+ if info is None:
150
+ continue # unlinked between EEXIST and read — retry the create
151
+ age = _filelock_age_seconds(info)
152
+ if age is not None and age >= _HOME_LOCK_TTL_S:
153
+ # Value-keyed steal of the EXACT stale lock observed (not a bare
154
+ # unlink) so two concurrent stealers can't both win and clobber the
155
+ # store's read-modify-append.
156
+ if _filelock.steal_stale(home_lock, owner, info):
157
+ acquired = True
158
+ break
159
+ continue # lost the steal — retry
160
+ time.sleep(_HOME_LOCK_INTERVAL_S)
161
+ try:
162
+ yield
163
+ finally:
164
+ if acquired:
165
+ # Release only OUR lock — a stealer past the TTL may now hold it.
166
+ info = _filelock.read_lock(home_lock)
167
+ if info is None or info.get("owner") in (owner, None):
168
+ try:
169
+ home_lock.unlink()
170
+ except FileNotFoundError:
171
+ pass
172
+
173
+
174
+ def _filelock_age_seconds(info: dict) -> float | None:
175
+ """Seconds since the lock's `acquired_at` stamp; None if unparseable. Local to
176
+ the home lock's TTL check (the shared `_filelock` body stamps `acquired_at`)."""
177
+ raw = str((info or {}).get("acquired_at", ""))
178
+ try:
179
+ ts = dt.datetime.strptime(raw, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=dt.timezone.utc)
180
+ except (ValueError, TypeError):
181
+ return None
182
+ return (dt.datetime.now(dt.timezone.utc) - ts).total_seconds()
183
+
184
+
185
+ # ---------------------------------------------------------------------------
186
+ # JSONL append/read — lane_journal's fsync + torn-tail discipline, minus the
187
+ # `seq` (the central store has no replay-order invariant that needs a monotonic
188
+ # seq, and computing max+1 would reintroduce a read-modify-write race). The
189
+ # CALLER holds `_home_lock` around the append; O_APPEND is the belt to that
190
+ # suspenders, the way lane_journal frames it.
191
+ # ---------------------------------------------------------------------------
192
+
193
+
194
+ def _append_jsonl(path: Path, row: dict) -> dict:
195
+ """Append one canonical-JSON row to a JSONL file and fsync it. Caller locks."""
196
+ path.parent.mkdir(parents=True, exist_ok=True)
197
+ line = json.dumps(row, sort_keys=True, default=str, ensure_ascii=False) + "\n"
198
+ fd = os.open(str(path), os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o644)
199
+ try:
200
+ os.write(fd, line.encode("utf-8"))
201
+ os.fsync(fd)
202
+ finally:
203
+ os.close(fd)
204
+ return row
205
+
206
+
207
+ def read_jsonl(path: Path) -> list[dict]:
208
+ """Every row of a JSONL store, torn-tail tolerant (lane_journal's rule).
209
+
210
+ Skips an unparseable TRAILING line (a crash mid-append); a non-trailing
211
+ corrupt line is surfaced as a `_CORRUPT` sentinel so a reindex/audit notices
212
+ rather than silently dropping a row from the middle.
213
+ """
214
+ p = Path(path)
215
+ if not p.exists():
216
+ return []
217
+ try:
218
+ raw = p.read_text(encoding="utf-8", errors="replace")
219
+ except OSError:
220
+ return []
221
+ lines = raw.splitlines()
222
+ out: list[dict] = []
223
+ for i, line in enumerate(lines):
224
+ s = line.strip()
225
+ if not s:
226
+ continue
227
+ try:
228
+ obj = json.loads(s)
229
+ except json.JSONDecodeError:
230
+ if i == len(lines) - 1:
231
+ break # torn final line — "didn't happen"
232
+ out.append({"_CORRUPT": True, "_raw": s, "_line": i})
233
+ continue
234
+ if isinstance(obj, dict):
235
+ out.append(obj)
236
+ return out
237
+
238
+
239
+ def _atomic_write_jsonl(path: Path, rows: list[dict]) -> None:
240
+ """Rewrite a JSONL store wholesale via tmp+os.replace (reindex compaction).
241
+ Caller holds `_home_lock`."""
242
+ path.parent.mkdir(parents=True, exist_ok=True)
243
+ tmp = path.with_suffix(path.suffix + ".tmp")
244
+ body = "".join(
245
+ json.dumps(r, sort_keys=True, default=str, ensure_ascii=False) + "\n"
246
+ for r in rows
247
+ )
248
+ tmp.write_text(body, encoding="utf-8")
249
+ _filelock.atomic_replace(tmp, path)
250
+
251
+
252
+ # ---------------------------------------------------------------------------
253
+ # DOS_HOME creation (the only writer to ~/.dos that creates the tree).
254
+ # ---------------------------------------------------------------------------
255
+
256
+
257
+ def ensure_dos_home(home: Path | str | None = None) -> _config.HomeLayout:
258
+ """Resolve and CREATE the machine-local DOS_HOME tree; return its layout.
259
+
260
+ Idempotent. Unlike `resolve_dos_home` (pure path math, never creates), this
261
+ is a deliberate write — called only from a persisting path.
262
+ """
263
+ layout = _config.HomeLayout.for_home(home)
264
+ layout.home.mkdir(parents=True, exist_ok=True)
265
+ layout.projects_index.parent.mkdir(parents=True, exist_ok=True)
266
+ return layout
267
+
268
+
269
+ # ---------------------------------------------------------------------------
270
+ # The per-project identity card.
271
+ # ---------------------------------------------------------------------------
272
+
273
+
274
+ def _read_card(card_path: Path) -> dict | None:
275
+ if not card_path.exists():
276
+ return None
277
+ try:
278
+ data = json.loads(card_path.read_text(encoding="utf-8"))
279
+ return data if isinstance(data, dict) else None
280
+ except (OSError, json.JSONDecodeError):
281
+ return None
282
+
283
+
284
+ def _write_card(card_path: Path, card: dict) -> None:
285
+ """Atomic tmp+os.replace write of the identity card."""
286
+ card_path.parent.mkdir(parents=True, exist_ok=True)
287
+ tmp = card_path.with_suffix(".json.tmp")
288
+ tmp.write_text(json.dumps(card, indent=2, sort_keys=True), encoding="utf-8")
289
+ _filelock.atomic_replace(tmp, card_path)
290
+
291
+
292
+ def _projects_row(cfg, card: dict, *, clock=None) -> dict:
293
+ """Build the central projects/index.jsonl row from the card + on-disk counts.
294
+
295
+ Counts are derived by counting artifacts, never a clock read. A missing
296
+ emissions dir simply counts zero (a fresh project).
297
+ """
298
+ root = cfg.paths.root
299
+ runs_dir = cfg.paths.fanout_runs # == chained_runs == dispatch_loops under .dos/
300
+ verdicts = cfg.paths.next_packets
301
+ journal = cfg.paths.lane_journal
302
+
303
+ run_count = 0
304
+ if runs_dir.exists():
305
+ run_count = sum(1 for c in runs_dir.iterdir() if c.is_dir())
306
+ wedge_count = 0
307
+ if verdicts.exists():
308
+ wedge_count = sum(1 for _ in verdicts.glob(".verdict-*.json"))
309
+ refusal_count = 0
310
+ if journal.exists():
311
+ for e in read_jsonl(journal):
312
+ if e.get("op") == "REFUSE":
313
+ refusal_count += 1
314
+
315
+ return {
316
+ "schema": SCHEMA,
317
+ "project_id": card["project_id"],
318
+ "root": str(root),
319
+ "dos_dir": str(cfg.paths.dot_dos),
320
+ "label": root.name,
321
+ "status": "active",
322
+ "first_seen": card.get("created_at"),
323
+ "last_indexed": _now_iso(clock),
324
+ "run_count": run_count,
325
+ "wedge_count": wedge_count,
326
+ "refusal_count": refusal_count,
327
+ }
328
+
329
+
330
+ # ---------------------------------------------------------------------------
331
+ # ensure_project_home — the auto-create-on-first-write entry point.
332
+ # ---------------------------------------------------------------------------
333
+
334
+
335
+ def ensure_project_home(
336
+ cfg,
337
+ *,
338
+ home: Path | str | None = None,
339
+ clock: Callable[[], int] | None = None,
340
+ _stderr=None,
341
+ ) -> Path:
342
+ """Lazily scaffold `<root>/.dos/` and register the project centrally.
343
+
344
+ Idempotent and safe to call on every persisting syscall. Invoked ONLY from
345
+ the CLI's persisting handlers — never from a read-only path (so the
346
+ read-only-writes-nothing property holds). Steps:
347
+
348
+ 1. atomic `os.mkdir(.dos)` — the process that wins is `first_time` (this is
349
+ the exactly-once signal across concurrent first-persists, NOT a
350
+ check-then-act `.exists()` which races);
351
+ 2. write `.dos/.gitignore` if absent (never overwrite a host's edit);
352
+ 3. write/update `.dos/project.json` (preserve project_id + created_at);
353
+ 4. under the DOS_HOME write-lock, fold the project's row into
354
+ `~/.dos/projects/index.jsonl` (best-effort — a central-store failure is
355
+ logged, never raised: the card is truth, the index is rebuildable);
356
+ 5. if `first_time`, emit exactly one stderr courtesy line.
357
+
358
+ Returns the `.dos/` path.
359
+ """
360
+ stderr = _stderr if _stderr is not None else sys.stderr
361
+ dot_dos = cfg.paths.dot_dos
362
+
363
+ # (1) Atomic first-time detection: only the winner of the create is first.
364
+ first_time = False
365
+ try:
366
+ os.mkdir(dot_dos)
367
+ first_time = True
368
+ except FileExistsError:
369
+ pass
370
+ except OSError:
371
+ # Parent missing (shouldn't happen — root exists) — fall back to makedirs.
372
+ pass
373
+ dot_dos.mkdir(parents=True, exist_ok=True)
374
+
375
+ # (2) Self-ignoring marker (write-if-absent).
376
+ gitignore = dot_dos / ".gitignore"
377
+ if not gitignore.exists():
378
+ gitignore.write_text(_DOT_DOS_GITIGNORE, encoding="utf-8")
379
+
380
+ # (3) The identity card — mint id+created_at once, refresh last_seen.
381
+ card_path = cfg.paths.project_card or (dot_dos / "project.json")
382
+ card = _read_card(card_path)
383
+ if card is None:
384
+ card = {
385
+ "schema": SCHEMA,
386
+ "project_id": project_id_for(cfg.paths.root),
387
+ "root": str(cfg.paths.root),
388
+ "created_at": _now_iso(clock),
389
+ }
390
+ card["last_seen"] = _now_iso(clock)
391
+ card["dos_version"] = _dos_version()
392
+ _write_card(card_path, card)
393
+
394
+ # (4) Central registration — best-effort, never fatal. SKIPPED for a
395
+ # throwaway workspace that would pollute the REAL machine-global index: a
396
+ # root under the OS temp dir with no explicit home override is a test/tmp
397
+ # workspace whose row would outlive the workspace forever (the index is
398
+ # append-only; the 2026-06-10 audit found 87% of the live index was dead
399
+ # pytest tmp dirs — the same disease docs/139 fixed on the lane journal).
400
+ # An explicit `home=` arg or a `DISPATCH_HOME` env override means the caller
401
+ # ALREADY redirected the central store (the hermetic-test idiom), so a
402
+ # temp-rooted project still registers there. The per-project `.dos/` above
403
+ # is always scaffolded either way — only the machine-global projection skips.
404
+ skip_central = (
405
+ home is None
406
+ and not os.environ.get(_config.ENV_DOS_HOME)
407
+ and _is_temp_root(cfg.paths.root)
408
+ )
409
+ if not skip_central:
410
+ try:
411
+ h = ensure_dos_home(home)
412
+ row = _projects_row(cfg, card, clock=clock)
413
+ with _home_lock(h.home_lock):
414
+ _register_root(h.roots_log, str(cfg.paths.root))
415
+ _fold_projects_row(h.projects_index, row)
416
+ except Exception as exc: # noqa: BLE001 — telemetry must never break a persist
417
+ print(f"dos: warning: could not update central index: {exc}",
418
+ file=stderr)
419
+
420
+ # (5) One-time courtesy line.
421
+ if first_time:
422
+ print(_COURTESY.format(dot_dos=dot_dos), file=stderr)
423
+
424
+ return dot_dos
425
+
426
+
427
+ def _is_temp_root(root: Path | str, tempdir: Path | str | None = None) -> bool:
428
+ """True iff ``root`` lives under the OS temp dir (``tempdir`` overrides for tests).
429
+
430
+ Pure path containment — no I/O beyond `resolve()`. Used by
431
+ `ensure_project_home` to keep throwaway workspaces out of the REAL
432
+ machine-global index; a cross-drive pair (ValueError) or an unresolvable
433
+ path (OSError) is conservatively NOT temp, so a weird root still registers
434
+ rather than silently vanishing from the operator's registry.
435
+ """
436
+ try:
437
+ base = Path(tempdir if tempdir is not None else tempfile.gettempdir()).resolve()
438
+ return Path(root).resolve().is_relative_to(base)
439
+ except (OSError, ValueError):
440
+ return False
441
+
442
+
443
+ def _register_root(roots_log: Path, root: str) -> None:
444
+ """Append ``root`` to the durable path registry if not already present.
445
+
446
+ `roots.log` is a plain newline-delimited list of project roots — the one
447
+ central file a PLAIN `reindex` does NOT rewrite, so it survives an index
448
+ deletion and lets reindex rebuild the rich index purely from the live
449
+ `.dos/` cards. (Only `reindex --prune` compacts it, dropping exactly the
450
+ pruned projects' roots — see `_rewrite_roots`.) It is still a projection
451
+ (every root in it also has a `.dos/project.json`), just the durable spine
452
+ of the path list. Caller holds `_home_lock`."""
453
+ roots_log.parent.mkdir(parents=True, exist_ok=True)
454
+ existing = set()
455
+ if roots_log.exists():
456
+ try:
457
+ existing = {ln.strip() for ln in
458
+ roots_log.read_text(encoding="utf-8").splitlines() if ln.strip()}
459
+ except OSError:
460
+ existing = set()
461
+ if root not in existing:
462
+ fd = os.open(str(roots_log), os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o644)
463
+ try:
464
+ os.write(fd, (root + "\n").encode("utf-8"))
465
+ os.fsync(fd)
466
+ finally:
467
+ os.close(fd)
468
+
469
+
470
+ def _read_roots(roots_log: Path) -> list[str]:
471
+ if not roots_log.exists():
472
+ return []
473
+ try:
474
+ return [ln.strip() for ln in
475
+ roots_log.read_text(encoding="utf-8").splitlines() if ln.strip()]
476
+ except OSError:
477
+ return []
478
+
479
+
480
+ def _rewrite_roots(roots_log: Path, roots: list[str]) -> None:
481
+ """Rewrite the durable path registry wholesale — the `--prune` path ONLY.
482
+
483
+ Outside a prune, reindex never touches `roots.log` (it is the spine that
484
+ survives an index deletion). Tmp+`atomic_replace`, the same discipline as
485
+ the index rewrite. Caller holds `_home_lock`."""
486
+ roots_log.parent.mkdir(parents=True, exist_ok=True)
487
+ tmp = roots_log.with_suffix(roots_log.suffix + ".tmp")
488
+ tmp.write_text("".join(r + "\n" for r in roots), encoding="utf-8")
489
+ _filelock.atomic_replace(tmp, roots_log)
490
+
491
+
492
+ def _fold_projects_row(index_path: Path, row: dict) -> None:
493
+ """Append the row, keeping last-write-wins-by-project_id semantics.
494
+
495
+ The index is an append-only log folded on read (so the hot path is a cheap
496
+ append, no rewrite); reindex compacts it. We append unconditionally — the
497
+ reader/`reindex` keeps only the last row per `project_id` — but preserve the
498
+ original `first_seen` by carrying forward the earliest seen for this id.
499
+ """
500
+ existing = [r for r in read_jsonl(index_path)
501
+ if r.get("project_id") == row["project_id"] and not r.get("_CORRUPT")]
502
+ if existing:
503
+ earliest = min((r.get("first_seen") or row["first_seen"]) for r in existing)
504
+ if earliest:
505
+ row = {**row, "first_seen": earliest}
506
+ _append_jsonl(index_path, row)
507
+
508
+
509
+ def _dos_version() -> str:
510
+ try:
511
+ import dos
512
+ return getattr(dos, "__version__", "0")
513
+ except Exception: # pragma: no cover
514
+ return "0"
515
+
516
+
517
+ # ---------------------------------------------------------------------------
518
+ # Resolved-decision capture (docs/75 §5.7). A decision is "resolved" only on a
519
+ # genuinely-persisting operator act — today `dos arbitrate --force` producing an
520
+ # acquire a non-forced call would have refused. `dos judge` is read-only and is
521
+ # NOT a capture point (running it N times must not multiply rows). The digest is
522
+ # written by this function, called from the CLI ACTION layer — never by
523
+ # `decisions.py` (which stays a pure read-only projection, Law 3).
524
+ #
525
+ # To honor projection-not-sync exactly, the resolution is mirrored to the
526
+ # project's OWN `.dos/decisions/resolved.jsonl` (local truth) AND projected to
527
+ # `~/.dos/decisions.jsonl`; `dos reindex` rebuilds the central log from the local
528
+ # mirrors. The append is deduped by (project_id, lane, run_ts, action) so a
529
+ # repeated identical force is one logical resolution.
530
+ # ---------------------------------------------------------------------------
531
+
532
+
533
+ def _decision_identity(row: dict) -> tuple:
534
+ res = row.get("resolution") or {}
535
+ return (
536
+ row.get("project_id", ""),
537
+ row.get("lane", ""),
538
+ row.get("run_ts", ""),
539
+ res.get("action", "") if isinstance(res, dict) else "",
540
+ )
541
+
542
+
543
+ def append_decision(
544
+ cfg,
545
+ row: dict,
546
+ *,
547
+ home: Path | str | None = None,
548
+ clock: Callable[[], int] | None = None,
549
+ ) -> dict | None:
550
+ """Record a resolved-decision digest (local mirror + central projection).
551
+
552
+ `row` carries the digest fields (kind, resolver_kind, lane, reason_token,
553
+ reason_category, run_ts, resolution). This fills `project_id`/`label`/`ts_ms`
554
+ from `cfg`, mirrors it to the project's `.dos/decisions/resolved.jsonl`, and
555
+ projects it to `~/.dos/decisions.jsonl` under the home write-lock. Deduped by
556
+ `_decision_identity` against the local mirror (so re-running the same force is
557
+ idempotent). Returns the stamped row, or None if it was a duplicate.
558
+
559
+ Best-effort on the central projection (a failure is logged, never raised) —
560
+ the local mirror is the rebuildable truth.
561
+ """
562
+ # Read the project_id from the card (authoritative); fall back to deriving it.
563
+ card = _read_card(cfg.paths.project_card or (cfg.paths.dot_dos / "project.json"))
564
+ pid = (card or {}).get("project_id") or project_id_for(cfg.paths.root)
565
+ stamped = {
566
+ "schema": SCHEMA,
567
+ "project_id": pid,
568
+ "label": cfg.paths.root.name,
569
+ **row,
570
+ "ts_ms": _now_ms(clock),
571
+ }
572
+
573
+ local = cfg.paths.dot_dos / "decisions" / "resolved.jsonl"
574
+ identity = _decision_identity(stamped)
575
+ if any(_decision_identity(r) == identity for r in read_jsonl(local) if not r.get("_CORRUPT")):
576
+ return None # already recorded — idempotent
577
+
578
+ _append_jsonl(local, stamped) # local truth first
579
+ try:
580
+ h = ensure_dos_home(home)
581
+ with _home_lock(h.home_lock):
582
+ _append_jsonl(h.decisions_log, stamped)
583
+ except Exception as exc: # noqa: BLE001 — central projection is best-effort
584
+ import sys as _sys
585
+ print(f"dos: warning: could not project decision centrally: {exc}",
586
+ file=_sys.stderr)
587
+ return stamped
588
+
589
+
590
+ # ---------------------------------------------------------------------------
591
+ # reindex — rebuild the central store from the per-project `.dos/` dirs. This is
592
+ # the projection-not-sync authority: the central index is DERIVED, never the
593
+ # source of truth. It reads the existing index for the known-project PATH LIST
594
+ # (the registry), re-stats each `.dos/`, marks active/stale/moved, and rewrites
595
+ # the index atomically under the home write-lock. Rebuilds decisions.jsonl from
596
+ # each project's local `.dos/decisions/resolved.jsonl` mirror.
597
+ # ---------------------------------------------------------------------------
598
+
599
+
600
+ def _fold_latest(rows: list[dict]) -> dict[str, dict]:
601
+ """Last-row-wins per project_id (the index is an append log folded on read)."""
602
+ latest: dict[str, dict] = {}
603
+ earliest_seen: dict[str, str] = {}
604
+ for r in rows:
605
+ if r.get("_CORRUPT"):
606
+ continue
607
+ pid = r.get("project_id")
608
+ if not pid:
609
+ continue
610
+ fs = r.get("first_seen")
611
+ if fs and (pid not in earliest_seen or fs < earliest_seen[pid]):
612
+ earliest_seen[pid] = fs
613
+ latest[pid] = r
614
+ for pid, r in latest.items():
615
+ if earliest_seen.get(pid):
616
+ r["first_seen"] = earliest_seen[pid]
617
+ return latest
618
+
619
+
620
+ def reindex(
621
+ home: Path | str | None = None,
622
+ *,
623
+ prune: bool = False,
624
+ clock: Callable[[], int] | None = None,
625
+ ) -> dict:
626
+ """Rebuild the central store from the live `.dos/` dirs. Returns a summary.
627
+
628
+ Algorithm (docs/75 §7, Phase 4):
629
+ 1. read the existing `projects/index.jsonl` (torn-tail tolerant), fold to
630
+ the latest row per project_id — this is the known-project registry;
631
+ 2. for each, follow the recorded `root`; if its `.dos/` is gone mark
632
+ `stale`; if the card's id differs mark `moved`; else re-stat counts via
633
+ the generic `.dos/` layout (NEVER env vars — reindex reads cfg.paths.*);
634
+ 3. rebuild `decisions.jsonl` by concatenating each live project's local
635
+ `.dos/decisions/resolved.jsonl` mirror (so the central log is a pure
636
+ projection of local truth);
637
+ 4. atomically rewrite both files under the home write-lock.
638
+
639
+ Never crashes on a missing/moved project (marks it, continues). ``prune``
640
+ compacts the registry to real, live projects, in three coordinated drops:
641
+ stale rows leave the rewritten index; THROWAWAY rows — a root under the OS
642
+ temp dir (`_is_temp_root`), registry pollution even while its tmp dir still
643
+ exists, since pytest retains the last few run dirs for days — leave it too;
644
+ and `roots.log` is rewritten down to the kept roots, because a pruned root
645
+ left in the union below would resurrect its row as `stale` on the very next
646
+ plain reindex. The throwaway drop honors the same override exemption as
647
+ `ensure_project_home`'s registration guard: an explicit ``home=`` arg or a
648
+ `DISPATCH_HOME` env override means a deliberately-redirected store, where
649
+ temp-rooted projects are legitimate (the hermetic-test idiom), so only a
650
+ prune aimed at the machine-default home applies it.
651
+ """
652
+ from dos.config import PathLayout
653
+
654
+ h = ensure_dos_home(home)
655
+ existing = read_jsonl(h.projects_index)
656
+ folded = _fold_latest(existing)
657
+
658
+ # The retroactive twin of ensure_project_home's skip_central guard: armed
659
+ # only when this reindex targets the machine-default home (no home= arg, no
660
+ # env override) — exactly the store the registration guard protects.
661
+ prune_throwaway = (
662
+ prune
663
+ and home is None
664
+ and not os.environ.get(_config.ENV_DOS_HOME)
665
+ )
666
+
667
+ # The path list = the durable roots.log UNION the roots recorded in the index.
668
+ # Either alone can rebuild the other (both are projections of the live `.dos/`
669
+ # cards); the union means a deletion of EITHER central file still reindexes.
670
+ roots: dict[str, dict] = {} # root-string -> the folded index row, if any
671
+ for r in folded.values():
672
+ if r.get("root"):
673
+ roots[r["root"]] = r
674
+ for root_str in _read_roots(h.roots_log):
675
+ roots.setdefault(root_str, {"root": root_str})
676
+
677
+ rebuilt_rows: list[dict] = []
678
+ kept_roots: list[str] = [] # survives into roots.log when pruning
679
+ decisions: list[dict] = []
680
+ summary = {"active": 0, "stale": 0, "moved": 0, "throwaway": 0,
681
+ "id_collisions": []}
682
+ seen_ids: dict[str, str] = {} # project_id -> root, to surface collisions
683
+
684
+ for root_str, row in sorted(roots.items()):
685
+ root = Path(root_str)
686
+ if prune_throwaway and _is_temp_root(root):
687
+ summary["throwaway"] += 1
688
+ continue
689
+ layout = PathLayout.for_dos_dir(root)
690
+ card = _read_card(layout.project_card)
691
+ # The card is the authoritative id; fall back to the index row's id, then
692
+ # to deriving it from the path (so a row with no live card still has an id
693
+ # to key the summary on).
694
+ pid = ((card or {}).get("project_id")
695
+ or row.get("project_id")
696
+ or project_id_for(root))
697
+
698
+ status = "active"
699
+ if not layout.dot_dos.exists() or card is None:
700
+ status = "stale"
701
+ elif card.get("root") and Path(card["root"]).resolve() != root.resolve():
702
+ # The card records a different home than where we found it → moved.
703
+ status = "moved"
704
+
705
+ # Surface a 64-bit truncation collision (two distinct roots → one id) —
706
+ # never silently merge (docs/75 §5.6).
707
+ if status == "active":
708
+ prior = seen_ids.get(pid)
709
+ if prior is not None and prior != str(root):
710
+ summary["id_collisions"].append({"project_id": pid,
711
+ "roots": [prior, str(root)]})
712
+ seen_ids[pid] = str(root)
713
+
714
+ summary[status] = summary.get(status, 0) + 1
715
+ if status == "stale" and prune:
716
+ continue
717
+ kept_roots.append(root_str)
718
+
719
+ if status == "active":
720
+ new_row = _projects_row(_FakeCfg(layout), card, clock=clock)
721
+ new_row["status"] = "active"
722
+ rebuilt_rows.append(new_row)
723
+ # Collect this project's local resolved-decision mirror.
724
+ decisions.extend(
725
+ r for r in read_jsonl(layout.dot_dos / "decisions" / "resolved.jsonl")
726
+ if not r.get("_CORRUPT")
727
+ )
728
+ else:
729
+ row = {**row, "project_id": pid, "status": status,
730
+ "last_indexed": _now_iso(clock)}
731
+ rebuilt_rows.append(row)
732
+
733
+ rebuilt_rows.sort(key=lambda r: r.get("project_id", ""))
734
+ decisions.sort(key=lambda r: (r.get("ts_ms", 0), r.get("project_id", "")))
735
+
736
+ with _home_lock(h.home_lock):
737
+ _atomic_write_jsonl(h.projects_index, rebuilt_rows)
738
+ _atomic_write_jsonl(h.decisions_log, decisions)
739
+ if prune:
740
+ # A prune must be DURABLE: a pruned root left in roots.log re-enters
741
+ # the union above on the next plain reindex and resurrects its row
742
+ # as `stale`. The dropped roots have no surviving index row and no
743
+ # live `.dos/` card a future rebuild could use, so compacting the
744
+ # spine loses nothing rebuildable.
745
+ _rewrite_roots(h.roots_log, kept_roots)
746
+
747
+ summary["projects"] = len(rebuilt_rows)
748
+ summary["decisions"] = len(decisions)
749
+ return summary
750
+
751
+
752
+ class _FakeCfg:
753
+ """A minimal cfg shim exposing `.paths` so `_projects_row` can re-stat a
754
+ project during reindex without constructing a full SubstrateConfig (reindex
755
+ only needs the path layout, never lanes/reasons/stamp)."""
756
+
757
+ __slots__ = ("paths",)
758
+
759
+ def __init__(self, layout):
760
+ self.paths = layout
761
+
762
+
763
+ # ---------------------------------------------------------------------------
764
+ # Cross-project read-only queries (docs/75 §7, Phase 4) — pure group-bys over
765
+ # the central store. These WRITE NOTHING; they are the home-tier read syscalls.
766
+ # ---------------------------------------------------------------------------
767
+
768
+
769
+ def list_projects(home: Path | str | None = None) -> list[dict]:
770
+ """The known-project registry rows, folded latest-per-id, sorted by label."""
771
+ h = _config.HomeLayout.for_home(home)
772
+ rows = list(_fold_latest(read_jsonl(h.projects_index)).values())
773
+ return sorted(rows, key=lambda r: (r.get("label") or "", r.get("project_id") or ""))
774
+
775
+
776
+ def learn(axis: str, home: Path | str | None = None) -> list[dict]:
777
+ """Aggregate the resolved-decision log along one of three closed axes:
778
+
779
+ * ``wedge-hotspots`` — which projects accrue the most decisions (by label);
780
+ * ``lane-refusals`` — which lanes get force-overridden most (by lane);
781
+ * ``oracle-calibration`` — resolved decisions grouped by reason_category,
782
+ the signal for whether a deterministic oracle owns the right categories.
783
+
784
+ Pure read-only group-by; returns sorted (descending count) tally rows.
785
+ """
786
+ h = _config.HomeLayout.for_home(home)
787
+ rows = [r for r in read_jsonl(h.decisions_log) if not r.get("_CORRUPT")]
788
+ key = {
789
+ "wedge-hotspots": lambda r: r.get("label") or r.get("project_id") or "?",
790
+ "lane-refusals": lambda r: r.get("lane") or "(none)",
791
+ "oracle-calibration": lambda r: r.get("reason_category") or "(uncategorized)",
792
+ }.get(axis)
793
+ if key is None:
794
+ raise ValueError(
795
+ f"unknown learn axis {axis!r}; known: "
796
+ f"wedge-hotspots, lane-refusals, oracle-calibration"
797
+ )
798
+ tally: dict[str, int] = {}
799
+ for r in rows:
800
+ tally[key(r)] = tally.get(key(r), 0) + 1
801
+ return [{"group": g, "count": c}
802
+ for g, c in sorted(tally.items(), key=lambda kv: (-kv[1], kv[0]))]
803
+
804
+
805
+ # ---------------------------------------------------------------------------
806
+ # Scratch reaping (docs/106 §3.4) — keep-last-N over the per-project `.dos/`
807
+ # scratch classes the kernel never auto-reaped: verdict sidecars and audit
808
+ # reports (recency-floored, no liveness — "a point-in-time artifact"), plus
809
+ # run-dirs (recency fallback until the lease-liveness join lands — §3.4).
810
+ #
811
+ # The DECISION of what to drop is the pure `retention.plan_reap` (kernel leaf);
812
+ # this is the I/O half — the scandir + unlink — so it lives here in the home tier,
813
+ # never in the pure leaf. Every drop is RETURNED in the report (the CLI prints it):
814
+ # the docs/106 §3.4 "no silent caps — log() what you dropped and why" discipline,
815
+ # because a reaper that quietly eats a report an operator needed is the disease,
816
+ # not the cure. Dry-run (`apply=False`) is the default-safe mode: it computes the
817
+ # exact same plan and reports it, deleting nothing.
818
+ # ---------------------------------------------------------------------------
819
+
820
+ # The scratch classes this reaper knows, each: (report-key, the glob over the dir,
821
+ # the cap field on RetentionPolicy, whether liveness-gating is REQUIRED-but-unwired).
822
+ # A run-dir genuinely has a liveness (its lease may still be live); §3.4 says fall
823
+ # back to keep-last-N by mtime until the (loop_ts, lane)->run_id join exists, and
824
+ # announce that the gate is not yet applied. Verdicts/audits have no liveness, so
825
+ # recency is the honest-and-complete rule for them.
826
+ def _scratch_classes(cfg):
827
+ p = cfg.paths
828
+ return [
829
+ # (key, dir, child-predicate, cap-attr, liveness_unwired)
830
+ ("audits", p.dot_dos / "audits",
831
+ lambda e: e.is_file() and e.name.startswith("trajectory-audit-"),
832
+ "audits_keep_last", False),
833
+ ("verdicts", p.verdicts_dir,
834
+ lambda e: e.is_file() and ".verdict-" in e.name,
835
+ "verdicts_keep_last", False),
836
+ ("runs", p.fanout_runs,
837
+ lambda e: e.is_dir(),
838
+ "runs_keep_last", True),
839
+ ]
840
+
841
+
842
+ def reap_scratch(cfg, *, apply: bool = False) -> dict:
843
+ """Reap per-project `.dos/` scratch to the workspace's `[retention]` caps.
844
+
845
+ For each scratch class (audits, verdicts, runs) gather `(name, mtime)` at the
846
+ filesystem boundary, ask the pure `retention.plan_reap` which to drop by
847
+ recency (keep the newest ``keep_last``), and — when ``apply`` — unlink them.
848
+ Returns a per-class report: how many were kept, the identifiers dropped, and a
849
+ ``liveness_unwired`` note for the run-dir class (whose lease-liveness gate is
850
+ not yet built — docs/106 §3.4, the correlation-join gap; recency is the
851
+ documented fallback). The report lists EVERY dropped identifier — no silent
852
+ truncation.
853
+
854
+ ``apply=False`` (the default) is a dry run: identical plan, deletes nothing —
855
+ so an operator sees exactly what a sweep WOULD remove before authorizing it,
856
+ the same posture as `dos reindex --prune`'s preview. The reaper never touches
857
+ a host's working tree, only DOS's own `.dos/` scratch (docs/106 §5 non-goal).
858
+
859
+ The caps live on ``cfg.retention``; a ``None`` cap means "keep everything on
860
+ this axis" and the class is reported as ``unbounded`` (nothing scanned-to-drop).
861
+ The "never reap a live lease" floor is honored structurally here for the only
862
+ class that HAS a lease (runs) by *not yet* reaping on liveness at all — the
863
+ recency fallback can only ever keep MORE than a liveness gate would, never less,
864
+ so it cannot drop a live run that a future gate would spare. (A future
865
+ liveness-gated reaper tightens this; it can only become safer.)
866
+
867
+ Each class report also carries a ``data_class`` token — the
868
+ `cfg.data_class.classify` verdict for that scratch dir (TRAJECTORY / AUDIT /
869
+ BASELINE / PRODUCT) — purely as an annotation (the trajectory-vs-product tag).
870
+ It does NOT change WHAT is reaped (the retention caps decide that); it only
871
+ labels the report so a clutter audit / operator can roll the sweep up by kind.
872
+ """
873
+ from dos import retention as _retention
874
+
875
+ def _class_of(d: Path) -> str:
876
+ """The data-class of a scratch dir, as a repo-relative-path classify().
877
+ Annotation only — never gates reaping."""
878
+ try:
879
+ rel = d.relative_to(cfg.paths.root).as_posix()
880
+ except (ValueError, OSError):
881
+ rel = d.as_posix()
882
+ return cfg.data_class.classify(rel)
883
+
884
+ report: dict[str, dict] = {}
885
+ for key, d, pred, cap_attr, liveness_unwired in _scratch_classes(cfg):
886
+ cap = getattr(cfg.retention, cap_attr)
887
+ cls: dict = {"dir": str(d), "data_class": _class_of(d),
888
+ "cap": cap, "kept": 0, "dropped": []}
889
+ if liveness_unwired:
890
+ cls["liveness_unwired"] = True
891
+ if cap is None:
892
+ cls["unbounded"] = True
893
+ report[key] = cls
894
+ continue
895
+ if not d.is_dir():
896
+ report[key] = cls # nothing to reap (dir not created yet)
897
+ continue
898
+ # Gather (identifier, mtime) at the I/O boundary; the identifier is the
899
+ # entry NAME (unique within the dir), mtime drives recency.
900
+ entries: list[tuple[str, float]] = []
901
+ by_name: dict[str, os.DirEntry] = {}
902
+ with os.scandir(d) as it:
903
+ for e in it:
904
+ if not pred(e):
905
+ continue
906
+ try:
907
+ mtime = e.stat().st_mtime
908
+ except OSError:
909
+ continue # vanished mid-scan; skip
910
+ entries.append((e.name, mtime))
911
+ by_name[e.name] = e
912
+ drop = _retention.plan_reap(entries, cap)
913
+ cls["kept"] = len(entries) - len(drop)
914
+ for name in sorted(drop):
915
+ cls["dropped"].append(name)
916
+ if apply:
917
+ _reap_one(Path(by_name[name].path), is_dir=by_name[name].is_dir())
918
+ report[key] = cls
919
+ report["_applied"] = apply
920
+ return report
921
+
922
+
923
+ def _reap_one(path: Path, *, is_dir: bool) -> None:
924
+ """Delete one scratch entry (a file, or a run-dir tree). Best-effort: a
925
+ permission/race error on one entry never aborts the sweep — it is logged by its
926
+ ABSENCE from a later report, and the next sweep retries it."""
927
+ import shutil
928
+ try:
929
+ if is_dir:
930
+ shutil.rmtree(path, ignore_errors=True)
931
+ else:
932
+ path.unlink(missing_ok=True)
933
+ except OSError:
934
+ pass