dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/enumerate.py ADDED
@@ -0,0 +1,776 @@
1
+ """`enumerate` — the phase-list producer (docs/168 Concept 1, the unbuilt third).
2
+
3
+ The kernel owns `oracle` ("did *this id* ship?") and `completion`
4
+ ("residual = declared − verified"). Both need a `declared` set as INPUT — and
5
+ neither produces it. The host did, in its own code
6
+ (`job/scripts/plan_phases.py::derive_phase_universe`), and every bug in that
7
+ re-implementation was a fleet-wide wedge: the **picker-invisibility gap** — on
8
+ 2026-06-05 the `job` registry held 62 ACTIVE plans but only 14 carried a
9
+ machine-readable `remaining:[…]` list; the other ~38, several with real work and
10
+ rich phase tables, were SILENTLY DROPPED by the auto-pick ladder because a plan
11
+ with no nameable next phase has no pick. That is the operator's "losing plans"
12
+ bug, and the `ladder read slot not priority` class is its sibling (the ladder
13
+ read an obsolete field with a prose-digit regex and ranked a done plan top).
14
+
15
+ This module is that missing producer, lifted to the kernel and made generic:
16
+
17
+ > Given a plan-doc's BYTES and a declared GRAMMAR, enumerate the unit ids it
18
+ > declares, in document order, with a typed DriftNote where the doc disagrees
19
+ > with itself — never a silently-empty universe, never a raise.
20
+
21
+ It composes INTO `completion` (it is the producer of the `declared` set the
22
+ residual is measured against), it does not stand beside it (docs/168 §1).
23
+
24
+ Relocate, don't relax (Design Law 6, docs/207 §3)
25
+ =================================================
26
+
27
+ The `job` deriver is battle-scarred against a 38-invisible-plan corpus. Every
28
+ piece of its correctness moves here BYTE-FOR-BYTE; only the GRAMMAR (which
29
+ heading/table/bare shape declares a unit, which prefix anchors it) is lifted to
30
+ `[enumerate]` data (`EnumerateGrammar`), exactly as `[stamp]` lifted the ship
31
+ subject grammar:
32
+
33
+ * **Series-anchored token regex** — the anti-brittleness core. A unit id is a
34
+ declared `series` prefix then a digit / sub-phase / word-suffix tail. The
35
+ series anchor is the ONE rule that rejects every data-table trap (`| Class |
36
+ Count |`, `| (c) | 25 |`, sibling-plan rows, the literal `Phase`/`#`/`---`
37
+ header/separator cells) — none start with the series, so none enumerate.
38
+ * **Three id shapes** with range guards (`IFR4-IFR5` is a range, not a phase).
39
+ * **Code-fence stripping** — a phase id inside a ``` sample never enumerates.
40
+ * **Heading + table + bare-`Phase N` families**, UNION'd (the hybrid plan).
41
+ * **Sibling-clause masking** — the `(CD8 shipped this slot)` row trap.
42
+ * **Structural-stamp gate** — a prose "all-SHIPPED" must not read as a ship.
43
+ * **Parent/child rollup** to a fixpoint, with the not-done guard.
44
+ * **Degrade-never-crash** — a malformed body yields an empty `Enumeration` +
45
+ a typed `DriftNote`, never a raise (the picker-invisibility cure: a typed
46
+ refusal the picker can always produce, never the old silent `[]`).
47
+
48
+ The generic grammar default (a repo that declares nothing): markdown `### N.
49
+ NAME` / `### N — NAME` headings + `| Phase |` table first-cells + bare
50
+ `Phase N`. A repo with the reference series-anchored shape declares its grammar
51
+ in `dos.toml [enumerate]`.
52
+
53
+ ⚓ Pure; host gathers state. `enumerate_units(source_bytes, *, grammar)` makes no
54
+ file/git/clock call — the CLI reads the file and hands in the bytes, the same
55
+ seam as `liveness.classify` reading a `git_delta`. So the byte-parity gate
56
+ (docs/207 Phase 2, `test_enumerate_byte_parity_job`) replays on the `job` repo's
57
+ committed plan docs offline, at $0.
58
+
59
+ ⚓ The module is named ``enumerate`` so the CLI verb reads ``dos enumerate``, but
60
+ its public function is ``enumerate_units`` — NOT ``enumerate`` — and consumers
61
+ import it as ``from dos import enumerate as _enumerate`` / ``import
62
+ dos.enumerate``, NEVER the bare ``from dos import enumerate`` (which would shadow
63
+ the builtin in that scope; the kernel uses the builtin at 20+ call sites). See
64
+ docs/207-seam-ledger §4.1.
65
+ """
66
+
67
+ from __future__ import annotations
68
+
69
+ import re
70
+ from dataclasses import dataclass, field
71
+ from typing import Iterable, Literal, Optional
72
+
73
+ # READ-ONLY reuse of the sibling ship-verdict kernel — the SAME internals the
74
+ # `job` deriver reused, but they live HERE, so the shipped-state decision is one
75
+ # implementation, not a second heuristic. Guarded so a refactor of
76
+ # `phase_shipped`'s privates degrades the deriver to its own scans rather than
77
+ # crashing the picker that imports us (the durability seam, byte-for-byte from the
78
+ # job deriver's own fallback).
79
+ try: # pragma: no cover - the fallback IS the durability seam
80
+ from dos.phase_shipped import (
81
+ _phase_variants as _dos_phase_variants,
82
+ _section_says_shipped as _dos_section_says_shipped,
83
+ )
84
+ _DOS_OK = True
85
+ except Exception: # pragma: no cover - defensive
86
+ _DOS_OK = False
87
+
88
+ def _dos_phase_variants(phase: str, series: str = "") -> list[str]: # type: ignore[misc]
89
+ return [re.escape(phase)]
90
+
91
+ def _dos_section_says_shipped(section: str) -> Optional[bool]: # type: ignore[misc]
92
+ return True if "SHIPPED" in (section or "") else False
93
+
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # Where a unit id was discovered / how its shipped-state was decided.
97
+ # ---------------------------------------------------------------------------
98
+ UnitSource = Literal["header", "table-row", "header+table-row", "generic-header"]
99
+ ShippedBy = Literal["stamp", "child-rollup", "meta-shipped", "none"]
100
+
101
+
102
+ # ---------------------------------------------------------------------------
103
+ # The grammar — the per-workspace data the parser reads (the `[enumerate]` table).
104
+ # Modelled on `dos.stamp.StampConvention`: carries DATA, exposes the compiled
105
+ # patterns the scan interpolates. Declared in `dos.toml [enumerate]`, defaulting
106
+ # to a generic markdown grammar; the reference series-anchored shape is opt-in.
107
+ # ---------------------------------------------------------------------------
108
+
109
+
110
+ @dataclass(frozen=True)
111
+ class EnumerateGrammar:
112
+ """The closed set of shape parameters `enumerate_units` reads. PURE data.
113
+
114
+ Fields (each is policy a host declares in `[enumerate]`):
115
+
116
+ * ``series`` — the unit-id prefix that anchors a series-anchored scan
117
+ (e.g. ``"AUTH"``, ``"TF"``). When set, a unit id must be ``<series>`` then
118
+ a numeric / sub-phase / word-suffix tail (the anti-brittleness core). When
119
+ EMPTY (the generic default), no series anchor is used and enumeration falls
120
+ to the markdown-heading + bare-``Phase N`` families alone — so a generic
121
+ repo that declares nothing still enumerates `### 1. NAME` headings.
122
+ * ``heading_levels`` — which markdown heading depths declare a unit
123
+ (``(2,3,4,5,6)`` = `##`..`######`). A heading whose text LEADS with a unit
124
+ id (series-anchored) or with `### N. NAME` / `### Phase N` (generic) opens a
125
+ section running to the next heading of equal-or-higher level.
126
+ * ``scan_tables`` — whether a table data row whose FIRST cell is a unit id
127
+ contributes (the `| Phase | … |` family). The series anchor guards the
128
+ data-table trap; with no series, a numeric first-cell `| 1 | … |` is read
129
+ as unit `1` only when ``generic_numeric_table`` is on.
130
+ * ``generic_numeric_table`` — in the no-series generic mode, treat a leading
131
+ `| N | … |` / `| N. NAME |` first cell as unit `N` (off by default — most
132
+ generic docs use headings, not numbered tables, and a bare numeric column
133
+ is the data-table trap this is conservative about).
134
+ * ``bare_phase_fallback`` — whether bare `### Phase N` headings enumerate
135
+ when the series/heading scan found nothing (the OBS/DLO family). When a
136
+ ``series`` is set, the minted id is series-prefixed (`AB3`, not `Phase 3`)
137
+ so it joins the series-keyed shipped/cooldown stores; with no series the id
138
+ is `Phase N`.
139
+ * ``rollup_parents`` — whether a parent unit (`### AFR1.1`) with no stamp
140
+ whose every child sub-phase shipped is rolled up to shipped (with the
141
+ not-done guard). Off in the generic default (a generic doc rarely nests).
142
+
143
+ ``style`` is a human label echoed by `dos doctor` (``"series"`` vs
144
+ ``"generic"``); it is not load-bearing for the scan.
145
+ """
146
+
147
+ series: str = ""
148
+ heading_levels: tuple[int, ...] = (2, 3, 4, 5, 6)
149
+ scan_tables: bool = True
150
+ generic_numeric_table: bool = False
151
+ bare_phase_fallback: bool = True
152
+ rollup_parents: bool = False
153
+ style: str = "generic"
154
+
155
+ def to_dict(self) -> dict:
156
+ return {
157
+ "series": self.series,
158
+ "heading_levels": list(self.heading_levels),
159
+ "scan_tables": self.scan_tables,
160
+ "generic_numeric_table": self.generic_numeric_table,
161
+ "bare_phase_fallback": self.bare_phase_fallback,
162
+ "rollup_parents": self.rollup_parents,
163
+ "style": self.style,
164
+ }
165
+
166
+ @classmethod
167
+ def from_dict(cls, data: dict) -> "EnumerateGrammar":
168
+ d = dict(data or {})
169
+ levels = d.get("heading_levels")
170
+ return cls(
171
+ series=str(d.get("series", "") or ""),
172
+ heading_levels=tuple(int(x) for x in levels) if levels else (2, 3, 4, 5, 6),
173
+ scan_tables=bool(d.get("scan_tables", True)),
174
+ generic_numeric_table=bool(d.get("generic_numeric_table", False)),
175
+ bare_phase_fallback=bool(d.get("bare_phase_fallback", True)),
176
+ rollup_parents=bool(d.get("rollup_parents", False)),
177
+ style=str(d.get("style", "generic") or "generic"),
178
+ )
179
+
180
+
181
+ # The two named conventions that ship in the package — the `[stamp]` twin pattern.
182
+ GENERIC_GRAMMAR = EnumerateGrammar()
183
+ # The reference series-anchored grammar (the `job` deriver's shape), opt-in: a
184
+ # caller passes `series=<plan id>` to get it (so it stays per-plan, since the
185
+ # series differs per plan — unlike `[stamp]`'s repo-wide grammar).
186
+ JOB_GRAMMAR = EnumerateGrammar(
187
+ scan_tables=True, bare_phase_fallback=True, rollup_parents=True, style="series"
188
+ )
189
+
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # Compiled patterns (built from a grammar at scan time — none host-specific).
193
+ # ---------------------------------------------------------------------------
194
+
195
+ # A markdown heading at any level `## … / ###### …`.
196
+ _HEADING_RE = re.compile(r"(?m)^(#{1,6})\s+(.*)$")
197
+ # A markdown table data row; only the FIRST cell is inspected for a unit id.
198
+ _TABLE_ROW_RE = re.compile(r"(?m)^[ \t]*\|(?P<first>[^|]*)\|(?P<rest>.*)$")
199
+ # A table separator row `|---|:--:|` — never a unit.
200
+ _TABLE_SEP_RE = re.compile(r"^[ \t]*\|?[\s:|.\-]+$")
201
+ # A fenced code block delimiter.
202
+ _FENCE_RE = re.compile(r"^[ \t]*(?:```|~~~)")
203
+ # Bare generic `### Phase N` header (the OBS/DLO fallback). Ordinal w/ letter/decimal.
204
+ _GENERIC_PHASE_RE = re.compile(r"(?i)^phase\s+(\d+[a-z]?(?:\.\d+)*)\b")
205
+ # A generic numbered heading `### 1. NAME` / `### 2 — NAME` (the no-series default).
206
+ _GENERIC_NUM_HEADING_RE = re.compile(r"^(\d+(?:\.\d+)*)[.)]?(?:\s|—|–|-|$)")
207
+ # Lowercase-tolerant shipped marker for table-row prose (the `(shipped 2026-…)` form).
208
+ # `done` is excluded (false-trips on "not done"); `shipped` is word-bounded + not
209
+ # preceded by `not `. ✅/✓/[x] are unambiguous completion marks.
210
+ _ROW_SHIPPED_RE = re.compile(r"(?i)(?:(?<!not )(?<!not yet )\bshipped\b|✅|✓|\[x\])")
211
+ # A *structural* uppercase-SHIPPED STAMP (the operator's `— SHIPPED 2026-…` mark),
212
+ # distinguished from the word buried in spec prose. `SHIPPED` preceded by ws/`*`/`(`
213
+ # or a separator glyph that itself follows whitespace.
214
+ _STRUCTURAL_STAMP_RE = re.compile(r"(?:[\s*(]|(?<=\s)[—·\-])SHIPPED\b")
215
+ # An explicit not-done marker that BLOCKS a parent/child roll-up (the MC2/MC2.1 guard).
216
+ _NOT_DONE_RE = re.compile(
217
+ r"(?i)\b(pending|not[- ]shipped|not[- ]started|draft|tomb(?:ed|stone)?|"
218
+ r"deferred|blocked|in[- ]progress|abort)\b"
219
+ )
220
+
221
+
222
+ def _phase_token_re(series: str) -> "re.Pattern[str]":
223
+ """The series-anchored unit-id token regex (the anti-brittleness core).
224
+
225
+ Byte-for-byte the `job` deriver's `_phase_token_re`. Three id shapes after the
226
+ series, all requiring the series prefix:
227
+ * numeric / sub-phase: `TF0`, `MAS2.5`, `AFR1.1.0`, `HS1a`, `SF1.2-port`,
228
+ `PLA6.4-C`, `SVP-2` (leading `-?` lets the series hyphen-join a number).
229
+ * word-suffix satellites: `AFR-FQ282`, `WD-CREATE-ACCT`, `SV-FQ57`.
230
+ `(?<![A-Za-z0-9])` stops `XTF0`; the trailing boundary stops `MAS1`→`MAS10`. A
231
+ sub-suffix that re-opens with `series+digit` is a RANGE (`IFR4-IFR5`), guarded out.
232
+ """
233
+ s = re.escape(series)
234
+ num_arm = rf"-?\d+[a-z]?(?:\.\d+)*(?:-(?!{s}\d)[A-Za-z0-9.]+)?"
235
+ word_arm = rf"-(?!{s}\d)[A-Za-z0-9][A-Za-z0-9.\-]*"
236
+ return re.compile(
237
+ rf"(?<![A-Za-z0-9]){s}(?:{num_arm}|{word_arm})(?![A-Za-z0-9.\-])",
238
+ re.IGNORECASE,
239
+ )
240
+
241
+
242
+ # ---------------------------------------------------------------------------
243
+ # Observable result types.
244
+ # ---------------------------------------------------------------------------
245
+
246
+
247
+ @dataclass(frozen=True)
248
+ class DriftNote:
249
+ """A typed note that the doc disagrees with itself or could not be parsed.
250
+
251
+ The kernel-typed replacement for the `job` `audit_plan_pickability --drift`
252
+ surface and the picker-invisibility SILENT drop. ``kind``:
253
+
254
+ * ``unparseable`` — a heading/region the grammar could not read (carries the
255
+ ``span`` quote) — surfaced, never a raise, never a silently-empty universe.
256
+ * ``list_table_mismatch`` — a plan-meta cached list names a unit the doc body
257
+ does NOT declare, or vice-versa (the PPG "table is authority, cached list
258
+ is cache" lesson). ``detail`` carries which ids diverged.
259
+ * ``empty`` — the body declared no unit ids at all (an honestly-empty plan,
260
+ or a malformed one). The caller decides; a picker reads it as "no pickable
261
+ unit here," surfaced rather than dropped.
262
+ """
263
+
264
+ kind: str
265
+ detail: str
266
+ span: str = ""
267
+
268
+ def to_dict(self) -> dict:
269
+ return {"kind": self.kind, "detail": self.detail, "span": self.span}
270
+
271
+
272
+ @dataclass(frozen=True)
273
+ class UnitSpan:
274
+ """One enumerated unit id — its source, shipped-state, and the deciding line.
275
+
276
+ The observable record (the `job` `PhaseTrace`): a wrong enumeration is a query
277
+ (`dos enumerate --json`), not a debugging session.
278
+ """
279
+
280
+ unit: str
281
+ source: UnitSource
282
+ shipped: bool
283
+ shipped_by: ShippedBy
284
+ evidence: str # a short quote of the deciding line
285
+
286
+ def to_dict(self) -> dict:
287
+ return {
288
+ "unit": self.unit,
289
+ "source": self.source,
290
+ "shipped": self.shipped,
291
+ "shipped_by": self.shipped_by,
292
+ "evidence": self.evidence,
293
+ }
294
+
295
+
296
+ @dataclass(frozen=True)
297
+ class Enumeration:
298
+ """The full observable result of enumerating one plan-doc body.
299
+
300
+ ``units`` is the ordered list of ALL declared unit ids (shipped + remaining),
301
+ in document order. ``remaining`` / ``shipped`` are the partition. ``by_unit``
302
+ is the per-unit `UnitSpan` map. ``drift`` is the list of typed `DriftNote`s.
303
+ ``series`` echoes the grammar's series label.
304
+ """
305
+
306
+ series: str
307
+ units: tuple[str, ...] = ()
308
+ remaining: tuple[str, ...] = ()
309
+ shipped: tuple[str, ...] = ()
310
+ by_unit: dict = field(default_factory=dict)
311
+ drift: tuple[DriftNote, ...] = ()
312
+
313
+ def to_dict(self) -> dict:
314
+ return {
315
+ "series": self.series,
316
+ "units": list(self.units),
317
+ "remaining": list(self.remaining),
318
+ "shipped": list(self.shipped),
319
+ "by_unit": {u: s.to_dict() for u, s in self.by_unit.items()},
320
+ "drift": [d.to_dict() for d in self.drift],
321
+ }
322
+
323
+
324
+ # ---------------------------------------------------------------------------
325
+ # Scan helpers (pure, no I/O) — relocated from the `job` deriver.
326
+ # ---------------------------------------------------------------------------
327
+
328
+
329
+ def _strip_code_fences(body: str) -> str:
330
+ """Blank fenced code-block CONTENT (keeping line count + offsets stable)."""
331
+ out: list[str] = []
332
+ in_fence = False
333
+ for line in body.split("\n"):
334
+ if _FENCE_RE.match(line):
335
+ in_fence = not in_fence
336
+ out.append("")
337
+ continue
338
+ out.append("" if in_fence else line)
339
+ return "\n".join(out)
340
+
341
+
342
+ def _heading_id(title: str, tok_re: "re.Pattern[str]") -> Optional[str]:
343
+ """The series unit id a heading LEADS with (within 2 chars), or None."""
344
+ t = title.lstrip("*# ").strip()
345
+ tm = tok_re.search(t)
346
+ if tm and tm.start() <= 2:
347
+ return tm.group(0)
348
+ return None
349
+
350
+
351
+ def _iter_levels(body: str, heading_levels: tuple[int, ...]):
352
+ """Yield `(level, match)` for every heading at a permitted level, in order."""
353
+ for m in _HEADING_RE.finditer(body):
354
+ level = len(m.group(1))
355
+ if level in heading_levels:
356
+ yield level, m
357
+
358
+
359
+ def _section_of(body: str, headings: list, idx: int, level: int) -> str:
360
+ """The bounded section of heading `idx`: to the next heading of ≤ level."""
361
+ end = len(body)
362
+ for _lvl, nxt in headings[idx + 1:]:
363
+ if nxt and len(nxt.group(1)) <= level:
364
+ end = nxt.start()
365
+ break
366
+ return body[headings[idx][1].start():end]
367
+
368
+
369
+ def _mask_sibling_clauses(text: str, own_id: str, tok_re: "re.Pattern[str]") -> str:
370
+ """Blank short clauses naming a DIFFERENT unit id (the CD9 `(CD8 shipped)` trap)."""
371
+ own = own_id.lower()
372
+ out = list(text)
373
+ for m in tok_re.finditer(text):
374
+ if m.group(0).lower() == own:
375
+ continue
376
+ end = m.end()
377
+ while end < len(text) and text[end] not in ".;)|\n":
378
+ end += 1
379
+ for i in range(m.start(), end):
380
+ out[i] = " "
381
+ return "".join(out)
382
+
383
+
384
+ def _section_shipped(
385
+ text: str, own_id: str = "", tok_re: "Optional[re.Pattern[str]]" = None
386
+ ) -> Optional[bool]:
387
+ """Shipped-verdict for a section / row line. Kernel detector + lowercase marker
388
+ on a sibling-masked copy; the kernel uppercase True honored only on a structural
389
+ stamp. Returns True if shipped, else None (so the unit stays remaining)."""
390
+ try:
391
+ v = _dos_section_says_shipped(text)
392
+ except Exception: # pragma: no cover - defensive
393
+ v = None
394
+ stamp_text = text
395
+ if own_id and tok_re is not None:
396
+ stamp_text = _mask_sibling_clauses(text, own_id, tok_re)
397
+ if v is True and _STRUCTURAL_STAMP_RE.search(stamp_text):
398
+ return True
399
+ if _ROW_SHIPPED_RE.search(stamp_text):
400
+ return True
401
+ return None
402
+
403
+
404
+ def _norm_meta_shipped(shipped: Optional[Iterable[str]], series: str) -> set[str]:
405
+ """The plan-meta `shipped:[]` set normalised to comparable unit ids."""
406
+ out: set[str] = set()
407
+ for entry in shipped or []:
408
+ s = str(entry or "").strip()
409
+ if not s:
410
+ continue
411
+ m = re.match(r"\s*([A-Za-z][A-Za-z0-9.+\-]*?)(?:\s|—|-{2,}|:|$)", s)
412
+ tok = (m.group(1) if m else s).strip()
413
+ pm = re.match(r"(?i)^phase\s*(\d+(?:\.\d+)?)$", tok)
414
+ out.add(f"{series}{pm.group(1)}" if pm else tok)
415
+ return out
416
+
417
+
418
+ def _meta_says_shipped(unit: str, series: str, meta_shipped: set[str]) -> bool:
419
+ """True when `unit` (or a `_phase_variants` synonym) is in the meta shipped set."""
420
+ if unit in meta_shipped:
421
+ return True
422
+ try:
423
+ variants = {re.sub(r"\\(.)", r"\1", v) for v in _dos_phase_variants(unit, series)}
424
+ except Exception: # pragma: no cover - defensive
425
+ variants = {unit}
426
+ return bool(variants & meta_shipped)
427
+
428
+
429
+ def _parent_all_children_shipped(
430
+ parent: str, shipped_flag: dict, section_of: dict
431
+ ) -> bool:
432
+ """True iff `parent` has ≥1 child, EVERY child shipped, no not-done marker."""
433
+ children = [
434
+ p for p in shipped_flag
435
+ if p != parent and (p.startswith(parent + ".") or p.startswith(parent + "-"))
436
+ ]
437
+ if not children or not all(shipped_flag[c] for c in children):
438
+ return False
439
+ if _NOT_DONE_RE.search(section_of.get(parent, "")):
440
+ return False
441
+ return True
442
+
443
+
444
+ # ---------------------------------------------------------------------------
445
+ # Public API.
446
+ # ---------------------------------------------------------------------------
447
+
448
+
449
+ def enumerate_units(
450
+ source_bytes: Optional[str],
451
+ *,
452
+ grammar: EnumerateGrammar = GENERIC_GRAMMAR,
453
+ meta_shipped: Optional[Iterable[str]] = None,
454
+ ) -> Enumeration:
455
+ """Enumerate the unit ids a plan-doc body declares. PURE — no I/O, never raises.
456
+
457
+ ``source_bytes`` is the full plan-doc body (the CLI reads the file). ``grammar``
458
+ is the `EnumerateGrammar` (from `dos.toml [enumerate]` or `series=<id>` for the
459
+ series-anchored shape). ``meta_shipped`` is the plan-meta `shipped:[]` cache, if
460
+ any — an authoritative POSITIVE cache (ids here are forced shipped). The doc
461
+ body is the AUTHORITY for the unit *universe*; the cached list is only a
462
+ shipped-state hint and a drift signal (the PPG "table is authority" rule).
463
+
464
+ Returns an `Enumeration`. A body that declares nothing yields an empty one with
465
+ an `empty` `DriftNote` (the picker reads "no pickable unit," surfaced — never
466
+ the old silent `[]`). A parse error yields an empty one with an `unparseable`
467
+ `DriftNote`. The verdict is always a typed object, never an exception.
468
+ """
469
+ series = (grammar.series or "").strip().upper()
470
+ meta_set = _norm_meta_shipped(meta_shipped, series)
471
+
472
+ if not source_bytes:
473
+ return Enumeration(
474
+ series=series,
475
+ drift=(DriftNote("empty", "empty body — no unit ids declared"),),
476
+ shipped=tuple(sorted(meta_set)),
477
+ )
478
+
479
+ try:
480
+ scan = _strip_code_fences(source_bytes)
481
+ tok_re = _phase_token_re(series) if series else None
482
+
483
+ order: list[str] = []
484
+ seen: set[str] = set()
485
+ source_set: dict[str, set[str]] = {}
486
+ decide_texts: dict[str, list[str]] = {}
487
+ section_of: dict[str, str] = {}
488
+ evidence_line: dict[str, str] = {}
489
+
490
+ def _record(unit: str, source: str, decide: str, evidence: str, section: str):
491
+ if unit not in seen:
492
+ seen.add(unit)
493
+ order.append(unit)
494
+ source_set.setdefault(unit, set()).add(source)
495
+ decide_texts.setdefault(unit, []).append(decide)
496
+ section_of.setdefault(unit, section)
497
+ evidence_line.setdefault(unit, evidence)
498
+
499
+ headings = list(_iter_levels(scan, grammar.heading_levels))
500
+
501
+ # Headers — series-anchored OR generic numbered (`### 1. NAME`).
502
+ for idx, (level, m) in enumerate(headings):
503
+ title = m.group(2).lstrip("*# ").strip()
504
+ section = _section_of(scan, headings, idx, level)
505
+ unit: Optional[str] = None
506
+ if tok_re is not None:
507
+ unit = _heading_id(m.group(2), tok_re)
508
+ if unit is None and not series:
509
+ gm = _GENERIC_NUM_HEADING_RE.match(title)
510
+ if gm:
511
+ unit = gm.group(1)
512
+ if unit is not None:
513
+ _record(unit, "header", section, m.group(0).strip(), section)
514
+
515
+ # Table rows — first-cell is a unit id.
516
+ if grammar.scan_tables:
517
+ for m in _TABLE_ROW_RE.finditer(scan):
518
+ line = m.group(0)
519
+ if _TABLE_SEP_RE.match(line):
520
+ continue
521
+ first = m.group("first").strip().strip("*").strip("`").strip()
522
+ if tok_re is not None:
523
+ tm = tok_re.search(first)
524
+ if tm and (first == tm.group(0) or first.startswith(tm.group(0) + " ")):
525
+ _record(tm.group(0), "table-row", line, line.strip(), line)
526
+ elif grammar.generic_numeric_table:
527
+ gm = _GENERIC_NUM_HEADING_RE.match(first)
528
+ if gm:
529
+ _record(gm.group(1), "table-row", line, line.strip(), line)
530
+
531
+ # Fallback: bare `Phase N` headers only when nothing else matched.
532
+ if not order and grammar.bare_phase_fallback:
533
+ for idx, (level, m) in enumerate(headings):
534
+ gm = _GENERIC_PHASE_RE.match(m.group(2).lstrip("*# ").strip())
535
+ if not gm:
536
+ continue
537
+ pid = f"{series}{gm.group(1)}" if series else f"Phase {gm.group(1)}"
538
+ section = _section_of(scan, headings, idx, level)
539
+ _record(pid, "generic-header", section, m.group(0).strip(), section)
540
+ except Exception as exc: # pragma: no cover - defensive
541
+ return Enumeration(
542
+ series=series,
543
+ drift=(DriftNote("unparseable", f"parse error: {type(exc).__name__}"),),
544
+ shipped=tuple(sorted(meta_set)),
545
+ )
546
+
547
+ # Pass 1: per-unit shipped from meta OR own stamp (OR'd across surfaces).
548
+ shipped_flag: dict[str, bool] = {}
549
+ shipped_by: dict[str, ShippedBy] = {}
550
+ for unit in order:
551
+ if _meta_says_shipped(unit, series, meta_set):
552
+ shipped_flag[unit] = True
553
+ shipped_by[unit] = "meta-shipped"
554
+ elif any(
555
+ _section_shipped(txt, unit, tok_re) is True
556
+ for txt in decide_texts.get(unit, [])
557
+ ):
558
+ shipped_flag[unit] = True
559
+ shipped_by[unit] = "stamp"
560
+ else:
561
+ shipped_flag[unit] = False
562
+ shipped_by[unit] = "none"
563
+
564
+ # Pass 2: parent/child rollup to a fixpoint.
565
+ if grammar.rollup_parents:
566
+ changed = True
567
+ while changed:
568
+ changed = False
569
+ for unit in order:
570
+ if not shipped_flag[unit] and _parent_all_children_shipped(
571
+ unit, shipped_flag, section_of
572
+ ):
573
+ shipped_flag[unit] = True
574
+ shipped_by[unit] = "child-rollup"
575
+ changed = True
576
+
577
+ by_unit: dict[str, UnitSpan] = {}
578
+ remaining: list[str] = []
579
+ shipped_out: list[str] = []
580
+ for unit in order:
581
+ srcs = source_set.get(unit, set())
582
+ if {"header", "table-row"} <= srcs:
583
+ src_label: UnitSource = "header+table-row"
584
+ elif "generic-header" in srcs:
585
+ src_label = "generic-header"
586
+ elif "header" in srcs:
587
+ src_label = "header"
588
+ else:
589
+ src_label = "table-row"
590
+ ev = {
591
+ "meta-shipped": "(plan-meta shipped:[])",
592
+ "child-rollup": "all child sub-units shipped",
593
+ }.get(shipped_by[unit], evidence_line.get(unit, ""))
594
+ is_shipped = shipped_flag[unit]
595
+ by_unit[unit] = UnitSpan(unit, src_label, is_shipped, shipped_by[unit], ev)
596
+ (shipped_out if is_shipped else remaining).append(unit)
597
+
598
+ # Drift: a cached meta-shipped id the body never declared (cache names a ghost),
599
+ # and the empty-universe note. The body is authority; the cache is a hint, so a
600
+ # cache id absent from the body is a list↔table mismatch worth surfacing.
601
+ drift: list[DriftNote] = []
602
+ if not order:
603
+ drift.append(DriftNote("empty", "no unit ids found in body"))
604
+ ghosts = sorted(meta_set - set(order))
605
+ if ghosts and order:
606
+ drift.append(DriftNote(
607
+ "list_table_mismatch",
608
+ f"plan-meta shipped:[] names {len(ghosts)} unit(s) the doc body does "
609
+ f"not declare: {ghosts} — the cached list disagrees with the doc "
610
+ f"(the doc table/headings are authority, the list is cache)",
611
+ ))
612
+
613
+ return Enumeration(
614
+ series=series,
615
+ units=tuple(order),
616
+ remaining=tuple(remaining),
617
+ shipped=tuple(shipped_out),
618
+ by_unit=by_unit,
619
+ drift=tuple(drift),
620
+ )
621
+
622
+
623
+ # ---------------------------------------------------------------------------
624
+ # The `[enumerate]` config seam — the data-attachment, modelled on `dos.stamp`.
625
+ #
626
+ # The repo-wide `[enumerate]` table declares the STYLE knobs (heading levels,
627
+ # table scan, bare-Phase fallback, rollup). The `series` is supplied PER-PLAN at
628
+ # the call boundary (it differs per plan, unlike `[stamp]`'s repo-wide grammar),
629
+ # so it is deliberately NOT a TOML key — a caller layers it via `with_series`.
630
+ # ---------------------------------------------------------------------------
631
+
632
+
633
+ def grammar_from_table(
634
+ table: dict, *, base: EnumerateGrammar = GENERIC_GRAMMAR
635
+ ) -> EnumerateGrammar:
636
+ """Build an `EnumerateGrammar` from a parsed `[enumerate]` TOML table. PURE.
637
+
638
+ Each field the table names overrides ``base``; omitted fields inherit. An
639
+ unknown key raises (a typo'd field is a host mistake worth surfacing — the
640
+ `stamp.convention_from_table` posture). ``series`` is deliberately NOT a known
641
+ key: it is per-plan, layered at the call boundary, never repo-wide.
642
+ """
643
+ if not isinstance(table, dict):
644
+ raise ValueError(f"[enumerate] must be a table, got {type(table).__name__}")
645
+ known = {
646
+ "heading_levels", "scan_tables", "generic_numeric_table",
647
+ "bare_phase_fallback", "rollup_parents", "style",
648
+ }
649
+ unknown = set(table) - known
650
+ if unknown:
651
+ raise ValueError(
652
+ f"[enumerate] has unknown key(s) {sorted(unknown)}; "
653
+ f"known keys are {sorted(known)} (series is per-plan, not a table key)"
654
+ )
655
+ levels = base.heading_levels
656
+ if "heading_levels" in table:
657
+ raw = table["heading_levels"]
658
+ if not isinstance(raw, (list, tuple)) or not all(
659
+ isinstance(x, int) and not isinstance(x, bool) for x in raw
660
+ ):
661
+ raise ValueError("[enumerate].heading_levels must be a list of ints")
662
+ levels = tuple(int(x) for x in raw)
663
+
664
+ def _bool(key: str, current: bool) -> bool:
665
+ if key not in table:
666
+ return current
667
+ v = table[key]
668
+ if not isinstance(v, bool):
669
+ raise ValueError(f"[enumerate].{key} must be a boolean, got {type(v).__name__}")
670
+ return v
671
+
672
+ style = base.style
673
+ if "style" in table:
674
+ if not isinstance(table["style"], str):
675
+ raise ValueError(
676
+ f"[enumerate].style must be a string, got {type(table['style']).__name__}"
677
+ )
678
+ style = table["style"]
679
+ return EnumerateGrammar(
680
+ series=base.series, # per-plan; layered at the boundary, never from TOML
681
+ heading_levels=levels,
682
+ scan_tables=_bool("scan_tables", base.scan_tables),
683
+ generic_numeric_table=_bool("generic_numeric_table", base.generic_numeric_table),
684
+ bare_phase_fallback=_bool("bare_phase_fallback", base.bare_phase_fallback),
685
+ rollup_parents=_bool("rollup_parents", base.rollup_parents),
686
+ style=style,
687
+ )
688
+
689
+
690
+ def load_from_toml(
691
+ path, *, base: EnumerateGrammar = GENERIC_GRAMMAR
692
+ ) -> EnumerateGrammar:
693
+ """Build an `EnumerateGrammar` from a `dos.toml`'s `[enumerate]` table.
694
+
695
+ Returns ``base`` unchanged when the file is absent, has no `[enumerate]` table,
696
+ or `tomllib` is unavailable — the declarative path is purely additive. A present
697
+ but malformed table raises (`grammar_from_table`). Mirrors
698
+ `stamp.load_from_toml` exactly (incl. the `utf-8-sig` BOM strip).
699
+ """
700
+ from pathlib import Path
701
+ p = Path(path)
702
+ if not p.exists():
703
+ return base
704
+ try:
705
+ import tomllib
706
+ except ModuleNotFoundError: # pragma: no cover - py<3.11 fallback
707
+ try:
708
+ import tomli as tomllib # type: ignore
709
+ except ModuleNotFoundError:
710
+ return base
711
+ data = tomllib.loads(p.read_text(encoding="utf-8-sig"))
712
+ table = data.get("enumerate")
713
+ if not isinstance(table, dict) or not table:
714
+ return base
715
+ return grammar_from_table(table, base=base)
716
+
717
+
718
+ def with_series(grammar: EnumerateGrammar, series: str) -> EnumerateGrammar:
719
+ """Layer a per-plan ``series`` onto a repo-wide grammar (the call-boundary seam).
720
+
721
+ The CLI/host reads the plan's series id (plan-meta `id`/`phase_prefix`) and
722
+ layers it here, so the repo declares STYLE in `[enumerate]` and the per-plan
723
+ SERIES is supplied at the call. A non-empty series flips the default `style`
724
+ label to ``"series"`` for legibility (a series-anchored scan), unless the host
725
+ set a `style` explicitly.
726
+ """
727
+ import dataclasses
728
+ s = (series or "").strip().upper()
729
+ style = grammar.style
730
+ if s and style == "generic":
731
+ style = "series"
732
+ return dataclasses.replace(grammar, series=s, style=style)
733
+
734
+
735
+ # ---------------------------------------------------------------------------
736
+ # Phase 2c — `enumerate` as the doc-side producer of the `declared` extent.
737
+ #
738
+ # CORRECTION (docs/207-seam-ledger §4.5): `completion.classify` reads `declared`
739
+ # from `state.declared_steps` (the INTENT LEDGER), NOT a host callback. There is
740
+ # no callback to remove. The honest Phase-2c task is to make `enumerate` an
741
+ # ALTERNATIVE producer of the `declared` extent for a workspace that declares its
742
+ # units in PLAN DOCS rather than minting intent-ledger steps — the two are
743
+ # different sources of "declared" (doc-enumeration vs ledger-fossils). This bridge
744
+ # is the doc-side producer; a host hands its output to whatever consumes a declared
745
+ # set (the picker's residual, or a `LedgerState` it mints from the doc universe).
746
+ # The kernel keeps `completion` pure and ledger-grounded; this just closes the
747
+ # closed concept (oracle → enumerate → completion) on the doc side.
748
+ # ---------------------------------------------------------------------------
749
+
750
+
751
+ def declared_extent(enumeration: Enumeration) -> tuple[str, ...]:
752
+ """The ordered `declared` step ids a plan-doc declares — the doc-side producer
753
+ of the extent `completion`/the picker measures the residual against.
754
+
755
+ PURE accessor. Returns the full unit universe (shipped + remaining) in document
756
+ order — the `declared` set, exactly the role `LedgerState.declared_steps` plays
757
+ for the intent-ledger path. A host that declares its units in plan docs reads
758
+ this; a host that mints intent-ledger steps reads `state.declared_steps`. The
759
+ two paths converge on the same `declared` contract, so `completion`'s residual
760
+ arithmetic is unchanged — `enumerate` just supplies the doc-side input.
761
+ """
762
+ return enumeration.units
763
+
764
+
765
+ def residual_from_enumeration(enumeration: Enumeration) -> tuple[str, ...]:
766
+ """The doc-derived residual (declared − verified) from an `Enumeration` alone.
767
+
768
+ PURE. The `enumerate`-side analogue of `completion`'s residual: the remaining
769
+ (not-yet-shipped) unit ids in document order. A doc-declared workspace that has
770
+ no intent ledger can still compute "what is left" end-to-end from the plan doc
771
+ + the ship verdicts `enumerate` already folded — no host callback, the
772
+ modularity payoff docs/207 §2 calls "close the closed concept." (The
773
+ ledger-grounded `completion.classify` remains the authority where an intent
774
+ ledger exists; this is the floor for the doc-only case.)
775
+ """
776
+ return enumeration.remaining