dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/stamp.py ADDED
@@ -0,0 +1,1002 @@
1
+ """The ship-stamp convention — the grep rung's subject grammar, *as data*.
2
+
3
+ This is the hackability seam for the kernel's single most-important syscall:
4
+ `verify()` (the truth syscall). The registry-first path and the ancestry check
5
+ are domain-free already; the one piece that was NOT was the **grep fallback's
6
+ grammar** — what a commit subject has to look like to count as a direct ship.
7
+ `phase_shipped.py` hardcoded the *reference userland app's* convention:
8
+
9
+ _DIRECT_PREFIX = r"(?:docs|go|agents|job_search|scripts)" # the host's own top-level dirs
10
+
11
+ so a direct ship had to read `docs/<SERIES>: <PHASE>` / `go/<SERIES>:`. A foreign
12
+ repo committing `AUTH: AUTH2 — ship token refresh` (the `<SERIES>: <PHASE>` shape
13
+ with no dir prefix) resolved to `NOT_SHIPPED (via none)` even though the subject
14
+ literally names the phase. The North Star claims `verify` works against *any* git
15
+ repo from history alone; that was true only for the reference userland app's own
16
+ subject convention.
17
+
18
+ This module lifts that grammar into per-workspace data, exactly the way
19
+ `LaneTaxonomy` lifted the lane clusters and `ReasonRegistry` lifted the refusal
20
+ vocabulary. The *mechanism* (grep `git log`, ancestry-check, registry-first,
21
+ the progress-marker / bookkeeping demotions) stays in `phase_shipped.py`; the
22
+ *grammar of a ship subject* moves here as a `StampConvention` a host declares.
23
+
24
+ The shape
25
+ =========
26
+
27
+ A `StampConvention` is the closed set of subject-shape parameters the matcher
28
+ needs. It carries no regex itself — it carries the *data* (which dir prefixes,
29
+ which summary-bundle prefixes, which bookkeeping prefixes) and exposes the three
30
+ **regex fragments** `phase_shipped` interpolates into its compiled patterns:
31
+
32
+ * ``direct_prefix_re()`` — the alternation that anchors a direct ship.
33
+ Job: ``(?:docs|go|agents|job_search|scripts)/``. Generic (no ``subject_dirs``):
34
+ an *optional* prefix so a bare ``<SERIES>: <PHASE>`` matches with no dir at all.
35
+ * ``summary_subject_re()`` — the ``vX.Y.Z:`` release shape OR an allowlisted
36
+ standalone-summary prefix (job: ``docs/HYG:``). Gates the release-prefix and
37
+ body scans.
38
+ * ``bookkeeping_subject_re()`` — the ship-SHAPED-but-not-a-ship exclusion
39
+ (soft-claims, archive rollups, bulk snapshots). A subject matching this is
40
+ never counted as a ship on any scan path.
41
+
42
+ Two named constants ship in the package:
43
+
44
+ * ``JOB_STAMP_CONVENTION`` — the current hardcoded grammar, lifted verbatim, so
45
+ the reference userland app and the existing kernel suite are byte-for-byte
46
+ unchanged. It is a plain default the kernel falls back to (NOT an import from
47
+ ``drivers.job``) — the same pattern as the ``main``/``global`` lanes in ``config.py``.
48
+ * ``GENERIC_STAMP_CONVENTION`` — no dir prefix, no host-specific bundle/
49
+ bookkeeping prefixes (only the universal ``vX.Y.Z:`` release shape and the
50
+ universal ``... snapshot:`` bulk-commit guard). This is what an external
51
+ repo's subjects look like: a bare ``<SERIES>: <PHASE>`` / ``<SERIES><PHASE>``.
52
+
53
+ Pure stdlib — no third-party imports, no I/O — so `phase_shipped` imports it as a
54
+ leaf, the same way it would have used the module-level constants it replaces.
55
+ """
56
+
57
+ from __future__ import annotations
58
+
59
+ import re
60
+ from dataclasses import dataclass
61
+ from pathlib import Path
62
+
63
+
64
+ # The universal release-subject anchor — a `vX.Y.Z:` version-cut commit that
65
+ # bundles several ships into one free-form summary line. This shape is NOT
66
+ # host-specific (every repo that tags releases uses it), so it is baked into the
67
+ # fragment builder rather than declared per-workspace; a convention adds only its
68
+ # *extra* standalone-summary prefixes (job's `docs/HYG:`) on top of it.
69
+ _RELEASE_VERSION_ANCHOR = r"v\d+\.\d+\.\d+:"
70
+
71
+ # The universal bulk-snapshot guard — a `working-dir snapshot:` / `... snapshot:`
72
+ # commit sweeps hundreds of files and quotes phase ids descriptively, never as a
73
+ # ship attribution. Like the release anchor this is host-agnostic, so it is part
74
+ # of every bookkeeping regex regardless of the declared prefixes.
75
+ _SNAPSHOT_BOOKKEEPING_FRAGMENT = r"[^:]*\bsnapshot:"
76
+
77
+ # The universal run-archive-rollup guard — a `<prefix>: archive <RUN-ID> …` commit
78
+ # is a fan-out / dispatch run rollup that QUOTES the phase ids of the runs it
79
+ # archives, never a ship of any one of them (live false-positive: a foreign repo's
80
+ # `docs/fanout: archive 20260530T093407Z chain (vllm-p2p3, …)` resolved as a ship
81
+ # of a `fanout`/`archive` phase under the generic dir-free grammar). The
82
+ # discriminator is host-agnostic and TIGHT: the word `archive` (or `rollup`)
83
+ # immediately followed by a run-id-shaped timestamp token (`20260530T093407Z` or a
84
+ # bare `20260530`). Requiring the timestamp is what keeps it from excluding a
85
+ # legitimately-named `archive` PHASE — a real `… : archive` ship has no run-id
86
+ # tail.
87
+ #
88
+ # An OPTIONAL `<prefix>:` is allowed before `archive` (`docs/fanout: archive …`,
89
+ # `chore: archive …`, or a bare `archive …`) — a single non-colon prefix segment
90
+ # then one colon — so the guard fires regardless of whether the host declared the
91
+ # prefix as bookkeeping. This is the zero-config safety net BENEATH the declared
92
+ # `bookkeeping_prefixes`: a host that names its rollup prefix (`docs/fanout:`)
93
+ # still gets the precise exclusion; a host that declares nothing is still safe
94
+ # against the universal `archive <run-id>` shape.
95
+ #
96
+ # The run-id tail is `<YYYYMMDD>` optionally followed by a `T`-separated time of
97
+ # VARIABLE width and an optional trailing `Z` — real fan-out run-ids occur as
98
+ # `20260530T093407Z` (full HHMMSS), `20260529T0233Z` (shortened), and bare
99
+ # `20260530`. The time component is `t\d+z?` (one-or-more digits) rather than a
100
+ # fixed `\d{6}` so every observed run-id shape is caught; the leading 8-digit date
101
+ # is the load-bearing discriminator (a real `archive` phase ship has no date tail).
102
+ _RUN_ARCHIVE_BOOKKEEPING_FRAGMENT = (
103
+ r"(?:[^:]*:\s*)?(?:archive|rollup)\s+\d{8}(?:t\d+z?)?\b"
104
+ )
105
+
106
+ # The universal shared-infra basenames — hub files nearly every phase touches, so
107
+ # a coincidental edit to them is never the *distinctive* ship evidence the
108
+ # file-path rung counts on (`_check_phase_by_filepath`'s 2-file overlap rule). A
109
+ # section that names two of these alone must NOT let an unrelated commit
110
+ # false-ship the phase. This set is host-AGNOSTIC — `config.py`/`__init__.py`/
111
+ # `cli.py`/`conftest.py` are hub files in *any* Python repo — so it is baked into
112
+ # every convention. A host with its OWN hub file (the reference app's
113
+ # `fanout_state.py`) declares it as an EXTRA via `infra_basenames`, layered on top
114
+ # of this base (the additive discipline, same as the release anchor).
115
+ _UNIVERSAL_INFRA_BASENAMES = frozenset({
116
+ "config.py", "__init__.py", "models.py", "cli.py", "utils.py",
117
+ "constants.py", "settings.py", "conftest.py",
118
+ })
119
+
120
+ # The universal bulk-regenerated documentation guard — any rendered diagram under
121
+ # `docs/` (`.mmd` source or `.png` render) is co-regenerated wholesale by unrelated
122
+ # release commits, so it is shared-infra for the same reason the hub code files are
123
+ # (AAR-FQ-DL4). The *suffix* rule (any `docs/…*.mmd`/`*.png`) is host-agnostic; a
124
+ # host's SPECIFIC named reference hubs (the reference app's `architecture.mmd`,
125
+ # `00_subsystems-reference.md`) are declared as EXTRAS via `infra_doc_basenames`.
126
+ _UNIVERSAL_DIAGRAM_SUFFIXES = (".mmd", ".png")
127
+
128
+
129
+ # Phase-label tokens: `P3`, `P4.6`, `P1c`, `P3b.2`, or `Phase 1c` / `Phase 1`.
130
+ # The digit must immediately follow `P` (or `Phase `) so prose like "Python",
131
+ # "PR", or "GPT-3" never matches. Body = `<digit>[<sub-letter>][.<digit>]`, so a
132
+ # letter-then-decimal sub-phase (`P3b.2`) is captured; the trailing `\b` rejects
133
+ # `P3xyz`. Pure-stdlib leaf primitive (no I/O) — the subject grammar this module
134
+ # already owns, lifted UP from bench's scripts/next_context.py:_PHASE_LABEL_RE.
135
+ _PHASE_LABEL_RE = re.compile(
136
+ r"\b(?:Phase\s+|P)\d+[a-z]?(?:\.\d+)?\b", re.IGNORECASE
137
+ )
138
+
139
+
140
+ def parse_phase_labels(subject: str | None) -> list[str]:
141
+ """Extract normalized phase-id tokens from a commit subject.
142
+
143
+ "SGLang-Metrics P3 …" -> ["P3"]
144
+ "exec-sweep P4.6 done" -> ["P4.6"]
145
+ "exec-sweep P3b.2 …" -> ["P3b.2"] (letter-then-decimal sub-phase)
146
+ "L3 busy device Phase 1c" -> ["P1c"] (Phase N -> PN)
147
+ "close out all P0s" -> ["P0"] (plural artifact stripped)
148
+ "fix typo in readme" -> [] (no false positives on prose)
149
+ None -> [] (None-safe)
150
+
151
+ Returns a sorted, de-duplicated list. Pure (no I/O) — a leaf primitive on
152
+ the same module that owns the ship-subject grammar.
153
+ """
154
+ labels: set[str] = set()
155
+ for m in _PHASE_LABEL_RE.finditer(subject or ""):
156
+ tok = re.sub(r"(?i)^phase\s+", "P", m.group(0))
157
+ tok = tok[0].upper() + tok[1:] # normalize leading p3 -> P3
158
+ tok = re.sub(r"(?<=\d)s$", "", tok) # drop plural artifact: P0s -> P0
159
+ labels.add(tok)
160
+ return sorted(labels)
161
+
162
+
163
+ @dataclass(frozen=True)
164
+ class StampConvention:
165
+ """How a workspace stamps a shipped phase in its commit subjects — as data.
166
+
167
+ Every field is the *data* behind one regex fragment the grep rung compiles;
168
+ no field is a regex itself (a host declares dir names, not patterns). The
169
+ matcher in `phase_shipped` reads the three ``*_re()`` accessors and never the
170
+ raw constants it used to hardcode.
171
+
172
+ Fields:
173
+ subject_dirs
174
+ The top-level dirs a *direct* ship subject may carry before
175
+ ``<SERIES>:`` — the reference userland app's ``docs``/``go``/``agents``/
176
+ ``job_search``/``scripts``. An **empty** tuple means "no dir prefix": a bare
177
+ ``<SERIES>: <PHASE>`` (the generic external-repo shape). The accessor
178
+ makes the prefix optional in that case rather than impossible.
179
+ summary_bundle_prefixes
180
+ Standalone-summary subject prefixes (besides the universal ``vX.Y.Z:``)
181
+ that may bundle several phases into one free-form line — job's
182
+ ``docs/HYG:``. A foreign repo usually declares none and relies on the
183
+ release anchor alone.
184
+ bookkeeping_prefixes
185
+ Subject prefixes that NAME phase ids without shipping them (soft-claims,
186
+ run-archive rollups): job's ``docs/_plans:`` / ``docs/fanout:`` / …. A
187
+ subject matching one of these (or the universal ``snapshot:`` guard) is
188
+ excluded from ship-detection on every scan path. Matched
189
+ case-insensitively, anchored at subject start.
190
+ style
191
+ The detection style. Only ``"grep"`` is implemented (scan git-log
192
+ subjects). Kept as the forward extension point for a future tag- or
193
+ trailer-based style; a non-``"grep"`` value is accepted as data but the
194
+ kernel still runs the grep rung (the field is declarative-only for now).
195
+
196
+ code_dirs
197
+ The top-level dirs whose files count as a phase's *load-bearing
198
+ deliverables* for the **file-path backstop** rung
199
+ (`phase_shipped._check_phase_by_filepath`). That rung harvests the file
200
+ paths a phase's plan-doc section names, then asks "did one commit touch
201
+ ≥2 of them together?" — an artefact match that catches a ship whose
202
+ commit *subject* drifted off the phase token. To harvest a path the rung
203
+ must first RECOGNISE the token as a repo-file path, which it does by
204
+ rooting it at a known top-level dir. The reference app hardcoded its own
205
+ dirs (``agents|job_search|go|scripts|templates|config|docs|tests``); a
206
+ foreign repo whose deliverables live under ``engine/``/``models/``/
207
+ ``commands/`` saw the rung harvest **nothing**, so the artefact backstop
208
+ was dead and every subject-drifted ship resolved ``via none``.
209
+
210
+ This lifts that allowlist to data. An **empty** tuple (the generic
211
+ default) means "any plausible top-level dir": a path token rooted at any
212
+ ``<segment>/…<ext>`` is harvested. That is SOUND — the dir allowlist was
213
+ only ever a *recognition* narrowing, never a false-positive gate (those
214
+ are the 2-file-overlap, distinctive-file, bookkeeping-exclusion, and
215
+ cross-series guards downstream, all preserved). A host that wants the
216
+ tight allowlist (the reference app) declares its dirs here.
217
+ infra_basenames
218
+ EXTRA shared-infra hub *code* file basenames, layered ON TOP of the
219
+ universal set (`_UNIVERSAL_INFRA_BASENAMES`: ``config.py``/``cli.py``/
220
+ ``conftest.py``/…). A file whose basename is shared-infra is excluded
221
+ from the file-path rung's *distinctive*-overlap count — a coincidental
222
+ edit to a hub file is not ship evidence. The universal set covers any
223
+ Python repo; a host's OWN hub (the reference app's ``fanout_state.py``)
224
+ is declared here. Additive, never replace — you cannot un-declare a
225
+ universal hub (it is shared-infra by nature).
226
+ infra_doc_basenames
227
+ EXTRA bulk-regenerated documentation hub basenames, layered on top of
228
+ the universal diagram rule (any ``docs/…*.mmd``/``*.png`` is shared-infra
229
+ regardless). A host's named cross-cutting reference docs (the reference
230
+ app's ``architecture.mmd``/``00_subsystems-reference.md``) go here.
231
+ Additive, same discipline as ``infra_basenames``.
232
+ progress_markers
233
+ Words that, immediately after the phase id with a bare space (no
234
+ ``:``/``—``/``-`` separator), mark a commit as *progress on* a multi-step
235
+ phase rather than a *ship of* it — the reference app's soak/observation
236
+ vocabulary (``week-1``/``audit``/``baseline``/``soak``/…). The grep rung
237
+ DEMOTES a ``<dir>/<SERIES>: <PHASE> <marker>`` subject so an incremental
238
+ commit on a long-running phase is not mistaken for its close-out ship.
239
+
240
+ This was a hardcoded module frozenset, so it fired on EVERY repo — a
241
+ foreign repo's genuine direct ship ``cache: Phase 0 audit of …`` was
242
+ silently demoted to NOT_SHIPPED because ``audit`` followed the id (a real
243
+ Benchmark false-negative). An **empty** tuple (the generic default) means
244
+ "no progress vocabulary" → a foreign repo's real ships are never demoted;
245
+ the worst failure mode (a *lost* ship) cannot happen out of the box. The
246
+ reference app declares its markers here; a host with its own soak
247
+ vocabulary declares its own.
248
+ sub_phase_parent_fallback
249
+ Whether a hyphen-suffixed query (``RS4-port``) that misses every direct
250
+ pass should fall back to checking the bare PARENT phase (``RS4``) and
251
+ accept it if the suffix slug appears in the matched commit's subject — a
252
+ reference-app convenience for its sub-phase id habit. It was gated purely
253
+ on the QUERY shape (``if "-" in phase``), so it fired on any repo: a
254
+ fabricated ``P2-CLI`` false-resolved to a real ``P2`` ship whose subject
255
+ merely contained ``CLI`` (a real Benchmark false-positive). Lifting it to
256
+ a per-convention FLAG (default ``False``) makes the behaviour declared,
257
+ not inferred from a query the kernel doesn't control — the closed-enum
258
+ discipline applied to a feature toggle. The reference app sets it
259
+ ``True``; a generic repo never runs the fallback.
260
+ trailer_stamp
261
+ Whether a subject whose TAIL is ``(<PLAN> <PHASE>)`` — also
262
+ ``(<PLAN>: <PHASE>)`` and ``(refs <PLAN> <PHASE>)`` — counts as a
263
+ direct ship of that ``(plan, phase)`` (docs/289). The
264
+ Conventional-Commits shape: ``feat(pypi): … (docs/286 Phase 3)``
265
+ carries the stamp as a parenthesized trailer at the END of the
266
+ subject, which no start-anchored grammar can see. Opt-in (default
267
+ ``False``) because it widens what is *recognized*; the tightness the
268
+ start anchor provided comes from the end anchor + required parens
269
+ instead (`trailer_ship_core`). The trailer is exactly as forgeable as
270
+ the start-anchored subject, so the rung grades `grep-subject` like
271
+ the direct rung it mirrors.
272
+ """
273
+
274
+ subject_dirs: tuple[str, ...] = ()
275
+ summary_bundle_prefixes: tuple[str, ...] = ()
276
+ bookkeeping_prefixes: tuple[str, ...] = ()
277
+ style: str = "grep"
278
+ code_dirs: tuple[str, ...] = ()
279
+ infra_basenames: tuple[str, ...] = ()
280
+ infra_doc_basenames: tuple[str, ...] = ()
281
+ progress_markers: tuple[str, ...] = ()
282
+ sub_phase_parent_fallback: bool = False
283
+ trailer_stamp: bool = False
284
+
285
+ # -- serialization (crosses the grep-rung subprocess boundary) ----------
286
+ def to_dict(self) -> dict:
287
+ """Plain-data form (lists, not tuples) — JSON-serializable.
288
+
289
+ Used to carry the active convention into the `phase_shipped` SUBPROCESS:
290
+ the grep rung shells out to a fresh Python process whose `config.active()`
291
+ would otherwise re-derive the DEFAULT (job) convention, losing a
292
+ caller-installed or `dos.toml`-declared one. The parent serializes the
293
+ active convention into an env var; the child rebuilds it with `from_dict`.
294
+ This makes the in-process `set_active(cfg)` authoritative across the
295
+ process boundary, the same way it is in-process (design-law 2 — one
296
+ convention, every path, even the shelled-out one).
297
+ """
298
+ return {
299
+ "subject_dirs": list(self.subject_dirs),
300
+ "summary_bundle_prefixes": list(self.summary_bundle_prefixes),
301
+ "bookkeeping_prefixes": list(self.bookkeeping_prefixes),
302
+ "style": self.style,
303
+ "code_dirs": list(self.code_dirs),
304
+ "infra_basenames": list(self.infra_basenames),
305
+ "infra_doc_basenames": list(self.infra_doc_basenames),
306
+ "progress_markers": list(self.progress_markers),
307
+ "sub_phase_parent_fallback": self.sub_phase_parent_fallback,
308
+ "trailer_stamp": self.trailer_stamp,
309
+ }
310
+
311
+ @classmethod
312
+ def from_dict(cls, data: dict) -> "StampConvention":
313
+ """Rebuild a convention from its `to_dict` form. Tolerant of missing keys
314
+ (each defaults to the empty/`"grep"` value) so a partial/forward-compatible
315
+ payload never crashes the child — it degrades to the generic shape."""
316
+ return cls(
317
+ subject_dirs=tuple(data.get("subject_dirs", ()) or ()),
318
+ summary_bundle_prefixes=tuple(data.get("summary_bundle_prefixes", ()) or ()),
319
+ bookkeeping_prefixes=tuple(data.get("bookkeeping_prefixes", ()) or ()),
320
+ style=str(data.get("style", "grep") or "grep"),
321
+ code_dirs=tuple(data.get("code_dirs", ()) or ()),
322
+ infra_basenames=tuple(data.get("infra_basenames", ()) or ()),
323
+ infra_doc_basenames=tuple(data.get("infra_doc_basenames", ()) or ()),
324
+ progress_markers=tuple(data.get("progress_markers", ()) or ()),
325
+ sub_phase_parent_fallback=bool(data.get("sub_phase_parent_fallback", False)),
326
+ trailer_stamp=bool(data.get("trailer_stamp", False)),
327
+ )
328
+
329
+ # -- the three regex fragments the grep rung interpolates ---------------
330
+ def direct_prefix_re(self) -> str:
331
+ """The regex fragment anchoring a direct-ship subject's dir prefix.
332
+
333
+ With ``subject_dirs`` → ``(?:docs|go|…)/`` (the prefix is REQUIRED, the
334
+ job grammar). With no ``subject_dirs`` → ``(?:\\w[\\w.\\-]*/)?`` — an
335
+ OPTIONAL SINGLE-component path prefix, so both a bare ``AUTH: AUTH2`` (no
336
+ dir) and a ``src/AUTH: AUTH2`` (one dir) match. This is what makes the truth
337
+ syscall domain-free: an external repo that commits ``AUTH2: …`` with no dir
338
+ prefix is recognised, while a repo that scopes ships under a dir still works.
339
+
340
+ Returned WITHOUT the trailing ``<SERIES>:`` — the caller appends the
341
+ series + phase alternation, exactly as it did with the old
342
+ ``_DIRECT_PREFIX`` constant.
343
+ """
344
+ if self.subject_dirs:
345
+ alt = "|".join(re.escape(d) for d in self.subject_dirs)
346
+ return rf"(?:{alt})/"
347
+ # No declared dirs: accept an optional leading path segment so a bare
348
+ # `<SERIES>:` subject matches. The segment is a SINGLE path component
349
+ # (`\w[\w.\-]*/`, NO embedded `/`) made optional; it is NOT a greedy `.*`
350
+ # and NOT multi-segment. A `/` in the class let `docs/notes/sub/AUTH2:` (a
351
+ # deep, unrelated note that merely *names* the id) false-match a direct
352
+ # ship — the adversarial-review correctness finding. Keeping it one segment
353
+ # holds the direct anchor tight to the subject start; a release/bookkeeping
354
+ # subject is handled by its own guards.
355
+ return r"(?:\w[\w.\-]*/)?"
356
+
357
+ def direct_ship_core(self, series_re: str, phase_alt: str) -> str:
358
+ """The full direct-ship regex core (everything after the `<sha>\\s+`).
359
+
360
+ Builds the dir prefix + the series/phase shape, branching on whether the
361
+ convention declares ``subject_dirs``. The caller anchors a boundary after
362
+ it and compiles case-insensitively; ``series_re`` and ``phase_alt`` are
363
+ already-escaped fragments (the caller built them from `_phase_variants`).
364
+
365
+ Two distinct ship-subject shapes a host uses — and why generic needs both:
366
+
367
+ * **Prefixed** (the JOB shape, and the spaced generic form):
368
+ ``<dir>/<SERIES>:?\\s+<PHASE>`` — series, optional colon, whitespace,
369
+ then the phase token. This is `docs/AUTH: AUTH2` and the spaced
370
+ `AUTH: 2`. The ONLY shape the job convention emits, so when
371
+ ``subject_dirs`` is set this is returned alone — byte-identical to the
372
+ pre-SCV `{_DIRECT_PREFIX}/{series}:?\\s+{phase}` pattern.
373
+ * **Glued** (the bare-id generic shape):
374
+ ``<SERIES><PHASE>:`` — the *concatenated phase id* at subject start
375
+ followed by a colon. This is the North-Star `AUTH2: ship token
376
+ refresh`, where `AUTH2` = series+phase. A no-dir convention adds this
377
+ as a second alternative so a foreign repo that names the phase id
378
+ directly (the common external convention) is recognised.
379
+
380
+ A no-dir (generic) convention therefore matches EITHER shape; a
381
+ dir-scoped (job) convention matches only the prefixed shape, so nothing
382
+ about the job grep rung changes.
383
+ """
384
+ prefix = self.direct_prefix_re()
385
+ prefixed = rf"{prefix}{series_re}:?\s+(?:{phase_alt})"
386
+ if self.subject_dirs:
387
+ return prefixed
388
+ # Generic: also accept the glued `<SERIES><PHASE>:` form. The phase
389
+ # alternation already contains the bare phase tokens; gluing the series
390
+ # in front yields the full phase id (`AUTH` + `2` → `AUTH2`). Require the
391
+ # trailing colon so a glued match is unambiguously a ship attribution
392
+ # (`AUTH2:`), not an incidental substring. The series is optional in the
393
+ # glue so a query that already passes the full id as the phase (`AUTH2`)
394
+ # still matches without doubling the series.
395
+ glued = rf"{prefix}(?:{series_re})?(?:{phase_alt}):"
396
+ return rf"(?:{prefixed}|{glued})"
397
+
398
+ def trailer_ship_core(self, series_alt: str, phase_alt: str) -> str | None:
399
+ """The trailer-form direct-ship fragment, or None when the convention
400
+ doesn't opt in (docs/289).
401
+
402
+ Matches a parenthesized ``(<PLAN> <PHASE>)`` stamp at the END of a
403
+ subject — the Conventional-Commits shape (``feat(pypi): …
404
+ (docs/286 Phase 3)``), which the start-anchored `direct_ship_core` can
405
+ never see. Three spellings: ``(<PLAN> <PHASE>)``, ``(<PLAN>: <PHASE>)``,
406
+ ``(refs <PLAN> <PHASE>)``. Unlike the other fragments this one carries
407
+ its OWN anchor (``\\)\\s*$``) — the caller searches rather than appending
408
+ a boundary; the close paren immediately after the phase token IS the
409
+ right boundary (a ``Phase 3`` query cannot match ``(… Phase 30)`` or
410
+ ``(… Phase 3 audit)`` — a progress-marked trailer is not a ship,
411
+ fail-closed), and the end anchor is what keeps a subject that merely
412
+ NAMES an id in prose (or in a mid-subject paren) from matching.
413
+
414
+ ``series_alt`` is an already-escaped alternation of plan-id spellings
415
+ (the caller bridges ``docs/286_<slug>`` ↔ ``docs/286`` — see
416
+ `phase_shipped._series_variants`); ``phase_alt`` is the same
417
+ `_phase_variants` alternation every other rung uses. The convention's
418
+ dir prefix is admitted OPTIONALLY before the series — even when
419
+ ``subject_dirs`` makes it required at subject start — because a trailer
420
+ names the plan as written in the plan registry (``docs/286``), not as a
421
+ ship-subject prefix; the parens + end anchor carry the tightness the
422
+ required prefix used to.
423
+
424
+ Bookkeeping/summary exclusion is the CALLER's job (the same post-match
425
+ guards as the direct pass — `phase_shipped` Pass 1a′), exactly as it is
426
+ for `direct_ship_core`.
427
+ """
428
+ if not self.trailer_stamp:
429
+ return None
430
+ prefix = self.direct_prefix_re()
431
+ return (
432
+ rf"\(\s*(?:refs\s+)?(?:{prefix})?(?:{series_alt}):?\s+(?:{phase_alt})\s*\)\s*$"
433
+ )
434
+
435
+ def summary_subject_re(self) -> str:
436
+ """The regex fragment matching a summary-bundle subject.
437
+
438
+ ``vX.Y.Z:`` (the universal release anchor) OR any declared
439
+ ``summary_bundle_prefixes`` (job: ``docs/HYG:``). Used in place of the
440
+ bare release anchor in the release-prefix scan and the body-scan's
441
+ in-summary gate. Mirrors the old ``_SUMMARY_SUBJECT_RE`` construction.
442
+ """
443
+ parts = [_RELEASE_VERSION_ANCHOR]
444
+ parts += [re.escape(p) for p in self.summary_bundle_prefixes]
445
+ return r"(?:" + "|".join(parts) + r")"
446
+
447
+ def bookkeeping_subject_re(self) -> "re.Pattern[str]":
448
+ """The compiled, case-insensitive, start-anchored bookkeeping matcher.
449
+
450
+ A subject matching this NAMES phase ids as narrative (soft-claims,
451
+ archive rollups, bulk snapshots) and must never count as a ship. Always
452
+ includes TWO universal, host-agnostic guards — the ``... snapshot:`` bulk
453
+ guard and the ``… archive <RUN-ID>`` run-rollup guard — plus any declared
454
+ ``bookkeeping_prefixes``. Mirrors the old ``_BOOKKEEPING_SUBJECT_RE``,
455
+ extended with the run-archive guard (the L2 zero-config fix).
456
+
457
+ A convention with NO declared bookkeeping prefixes still excludes bulk
458
+ snapshots AND run-archive rollups (the two universal guards), so the
459
+ generic convention is safe out of the box against the two commonest
460
+ names-but-ships-nothing shapes; it just doesn't know about a host's named
461
+ rollup prefixes (job's `docs/_plans:`) — a foreign repo declares its own.
462
+ """
463
+ parts = [re.escape(p) for p in self.bookkeeping_prefixes]
464
+ parts.append(_SNAPSHOT_BOOKKEEPING_FRAGMENT)
465
+ parts.append(_RUN_ARCHIVE_BOOKKEEPING_FRAGMENT)
466
+ return re.compile(r"^(?:" + "|".join(parts) + r")", re.IGNORECASE)
467
+
468
+ # -- the file-path backstop rung (artefact match, see phase_shipped) ----
469
+ def repo_path_re(self) -> "re.Pattern[str]":
470
+ """The compiled regex that harvests repo-file paths from a plan-doc section.
471
+
472
+ The file-path backstop (`phase_shipped._extract_phase_files`) scans a
473
+ phase's plan-doc section for the file paths it names — both markdown link
474
+ targets (``[`engine/run.py`](../engine/run.py)``) and inline backtick paths
475
+ (`` `models/metrics.py` ``). Both reduce to a token rooted at a top-level
476
+ dir and ending in a file extension; the leading ``../`` link-relative
477
+ prefix is stripped. This builds that matcher from ``code_dirs``:
478
+
479
+ * ``code_dirs`` declared (the reference app) → a TIGHT allowlist:
480
+ ``(?:agents|job_search|…)/<path>.<ext>``. Only those dirs' paths are
481
+ harvested — byte-identical to the pre-genericization ``_REPO_PATH_RE``
482
+ when ``code_dirs`` is the reference app's dir set.
483
+ * ``code_dirs`` empty (the generic default) → ANY plausible top-level
484
+ dir: a single path segment (``\\w[\\w.\\-]*``) then ``/<path>.<ext>``.
485
+ This is what makes the artefact rung work on a foreign repo whose
486
+ deliverables live under ``engine/``/``models/``/``commands/`` — dirs
487
+ the reference allowlist never named. SOUND because the dir set was
488
+ only ever a recognition narrowing: the false-positive gates (2-file
489
+ overlap, distinctive-file, bookkeeping exclusion, cross-series) all
490
+ live downstream and are unchanged.
491
+
492
+ The capture group is group(1): the repo-relative path with the ``../``
493
+ link prefix stripped. The extension is required (``.<ext>``) so a bare
494
+ directory mention (``engine/``) is not harvested as a file.
495
+ """
496
+ if self.code_dirs:
497
+ # Tight allowlist: a closed set of real dir names. This branch is kept
498
+ # BYTE-IDENTICAL to the pre-genericization `_REPO_PATH_RE` (no left
499
+ # boundary) so the reference app's artefact rung is unchanged — its
500
+ # alternation is already a closed set, so a URL host can't sneak in.
501
+ alt = "|".join(re.escape(d) for d in self.code_dirs)
502
+ return re.compile(
503
+ rf"(?:\.\.?/)*((?:{alt})/[\w./-]+\.[A-Za-z0-9]+)"
504
+ )
505
+ # Generic (no declared dirs): a single top-level path component, but with
506
+ # NO dot in the FIRST segment — a real top-level dir (`src/`, `docs/`,
507
+ # `my_pkg/`) never carries a dot, whereas a URL host (`github.com`) and a
508
+ # version root (`v1.2.3`) always do. Excluding the dot stops the harvester
509
+ # lifting a URL / release-version string out of plan prose and treating it
510
+ # as a load-bearing file — the adversarial-review false-positive (and the
511
+ # `len(files)`-inflation false-negative). The `(?<![\w./-])` LEFT boundary
512
+ # is REQUIRED alongside the no-dot segment: without it the matcher just
513
+ # slides its start rightward and still extracts `com/user/repo.git` from a
514
+ # URL (the dot host is skipped, the next segment matches). NOT a greedy
515
+ # `.*`: one named segment, then the rest of the path + extension.
516
+ return re.compile(
517
+ r"(?<![\w./-])(?:\.\.?/)*(\w[\w\-]*/[\w./-]+\.[A-Za-z0-9]+)"
518
+ )
519
+
520
+ def infra_basename_set(self) -> frozenset[str]:
521
+ """The full shared-infra *code* basenames: universal ∪ declared extras.
522
+
523
+ A file whose basename is in this set is excluded from the file-path rung's
524
+ *distinctive*-overlap count (`is_shared_infra`). The universal set covers
525
+ any repo; ``infra_basenames`` adds a host's own hub (the reference app's
526
+ ``fanout_state.py``). Additive — a host extends, never replaces, the
527
+ universal set, because a universal hub is shared-infra by nature.
528
+ """
529
+ return _UNIVERSAL_INFRA_BASENAMES | frozenset(self.infra_basenames)
530
+
531
+ def infra_doc_basename_set(self) -> frozenset[str]:
532
+ """The full shared-infra *doc* basenames: declared extras only.
533
+
534
+ The universal diagram rule (any ``docs/…*.mmd``/``*.png``) is applied
535
+ separately in `is_shared_infra` by suffix; this set is the host's NAMED
536
+ reference hubs (the reference app's ``architecture.mmd``/
537
+ ``00_subsystems-reference.md``). Generic repos declare none.
538
+ """
539
+ return frozenset(self.infra_doc_basenames)
540
+
541
+ def is_shared_infra(self, path: str) -> bool:
542
+ """True if ``path`` is a hub file excluded from the file-path overlap count.
543
+
544
+ Three classes are excluded — all too widely-touched for a coincidental edit
545
+ to be ship evidence (the false-POSITIVE guard, see
546
+ `phase_shipped._check_phase_by_filepath`):
547
+
548
+ * hub *code* files (universal ∪ declared ``infra_basenames``), by basename;
549
+ * a host's named *documentation* hubs (declared ``infra_doc_basenames``);
550
+ * ANY rendered diagram under ``docs/`` (``*.mmd``/``*.png``) — host-agnostic.
551
+
552
+ **Case is folded** (`str.casefold`) on every comparison — the same discipline
553
+ `_tree.norm_tree_prefix` and the sibling `progress_marker_set` use. On a
554
+ case-INsensitive FS (Windows, the primary platform) ``agents/Config.py`` IS
555
+ ``agents/config.py``; without folding, a mis-cased hub file failed the
556
+ basename membership, was treated as a DISTINCTIVE phase deliverable, and could
557
+ FALSE-SHIP a phase (the file-path rung's all-infra skip never fired, and the
558
+ single-file gate passed a sole mis-cased hub). Folding unconditionally for the
559
+ same cross-platform-determinism reason `_tree` gives.
560
+
561
+ Pure (no I/O) so the file-path rung stays replay-testable, matching the
562
+ ``classify(Evidence, Policy)`` discipline of the rest of the kernel.
563
+ """
564
+ p = path.casefold()
565
+ base = p.rsplit("/", 1)[-1]
566
+ # Sets are folded to match (built lowercase by convention, but fold defensively
567
+ # so a host that declares a capitalized extra still matches a real edit).
568
+ code = {b.casefold() for b in self.infra_basename_set()}
569
+ docs = {b.casefold() for b in self.infra_doc_basename_set()}
570
+ if base in code or base in docs:
571
+ return True
572
+ # Any diagram under docs/ is a regenerated hub, not a distinctive deliverable.
573
+ if p.startswith("docs/") and base.endswith(_UNIVERSAL_DIAGRAM_SUFFIXES):
574
+ return True
575
+ return False
576
+
577
+ # -- progress-marker demotion + bundle-slug fallback (see phase_shipped) --
578
+ def progress_marker_set(self) -> frozenset[str]:
579
+ """The lowercased progress-marker words for this convention.
580
+
581
+ A subject of shape ``<dir>/<SERIES>: <PHASE> <marker>`` (bare space, no
582
+ separator) is demoted from a ship to *progress on* the phase when
583
+ ``<marker>`` is in this set. Empty (generic) → no demotion ever, so a
584
+ foreign repo's real ships are never silently lost (the L1 fix). Lowercased
585
+ here so the caller's comparison is case-insensitive without re-lowering.
586
+ """
587
+ return frozenset(w.lower() for w in self.progress_markers)
588
+
589
+ def bundle_slugs(self) -> frozenset[str]:
590
+ """The UPPERCASED series slugs derived from ``summary_bundle_prefixes``.
591
+
592
+ A standalone-summary prefix like ``docs/HYG:`` carries a series slug
593
+ (``HYG``) whose plan ids are snake-case (``dropbox_zero_apply``) but whose
594
+ commit subjects use prose (``docs/HYG: Dropbox zero-apply …``). The grep
595
+ rung runs a prose-slug fallback for exactly those series. This derives the
596
+ eligible slugs from the DECLARED bundle prefixes rather than a hardcoded
597
+ ``"HYG"`` literal (the L4 fix): the trailing ``:`` and any leading
598
+ ``<dir>/`` are stripped, the remainder uppercased. A generic convention
599
+ declares no bundle prefixes → no slug is eligible → the fallback is inert.
600
+ """
601
+ out: set[str] = set()
602
+ for p in self.summary_bundle_prefixes:
603
+ slug = p.strip().rstrip(":")
604
+ if "/" in slug:
605
+ slug = slug.rsplit("/", 1)[-1]
606
+ if slug:
607
+ out.add(slug.upper())
608
+ return frozenset(out)
609
+
610
+ def recognizes_direct_ship(self, subject: str) -> bool:
611
+ """True iff this convention's direct-ship anchor matches `subject` for
612
+ SOME plausible `<SERIES><PHASE>` — a convention-aware "does this look like
613
+ a ship I would count?" probe used by the completeness rail (SCV 3c).
614
+
615
+ Builds the direct-ship core with permissive series/phase placeholders and
616
+ anchors it at subject start (no sha prefix — these are bare subjects). A
617
+ bookkeeping subject, and a `vX.Y.Z:` release-bundle subject, are never a
618
+ direct ship, so both are excluded first. This is a HEURISTIC recognizer
619
+ (it does not know the repo's real series ids), used only to flag a
620
+ declared-but-mismatched grammar — never on the hot verify path, which
621
+ always knows the concrete series/phase it is checking.
622
+
623
+ The series placeholder admits **multi-word, hyphenated** slugs (`[A-Za-z]
624
+ [\\w .-]*[A-Za-z0-9]`), because real hosts name plans that way —
625
+ `hardware-thing`, `blktrace auto-install`, `SGLang charts`. The original
626
+ `[A-Za-z][A-Za-z0-9]*` matched none of these, so the rail could not even
627
+ SEE a repo's dominant `<slug> Phase <N>:` ships, found "nothing
628
+ ship-shaped to judge against", and stayed silent on a real mismatch (the
629
+ F8 false all-clear). The phase placeholder admits the `Phase N` / `P N`
630
+ keyword form the same hosts use, plus compound tokens (`P1+P2`, `3b.2`).
631
+ """
632
+ s = (subject or "").strip()
633
+ if not s or self.bookkeeping_subject_re().match(s):
634
+ return False
635
+ # A `vX.Y[.Z]:` release-cut bundles many phases into one free-form line;
636
+ # it is NOT a direct phase-ship attribution (the verify path treats it as
637
+ # the weak release-prefix rung, footprint-guarded). Counting it as
638
+ # "ship-shaped" let the rail cite a release commit as the repo's ship and
639
+ # masked the real convention — exclude it here so the rail judges against
640
+ # genuine direct ships only. Two- AND three-component versions occur in
641
+ # the wild (`v25.4:` and `v0.378.0:`), so match a looser anchor than the
642
+ # strict 3-part `_RELEASE_VERSION_ANCHOR`.
643
+ if re.match(r"^v\d+(?:\.\d+)+:", s):
644
+ return False
645
+ # Permissive placeholders. Series: an UPPER/lower-led run that may carry
646
+ # internal spaces, hyphens, and dots (a multi-word plan slug), ending on
647
+ # an alnum so it doesn't swallow the trailing separator. Phase: the
648
+ # `Phase N` / `P N` keyword form OR a bare id — but in EITHER case the
649
+ # phase token must CONTAIN A DIGIT (a ship references a *numbered* phase:
650
+ # `Phase 1`, `AUTH2`, `P1+P2`, `3b.2`). Requiring the digit is what
651
+ # separates a real ship-shape from an ordinary `chore: refactor` /
652
+ # `fix: typo` commit, which share the bare `<word>: <word>` shape but name
653
+ # no phase. Without it the heuristic flags every conventional-commit
654
+ # subject as ship-shaped (the rail's original over-match).
655
+ # Each placeholder is a SELF-CONTAINED group: `direct_ship_core`
656
+ # interpolates them into `{series}:?\s+{phase}` without adding its own
657
+ # parentheses, so a bare top-level `|` here would re-associate the whole
658
+ # alternation (making `{series-alt-1}` match alone, with no phase) — the
659
+ # `chore`/`Merge branch` false-positive. Wrap both in `(?:…)`.
660
+ series_ph = r"(?:[A-Za-z][\w .\-]*[A-Za-z0-9]|[A-Za-z])"
661
+ phase_ph = (
662
+ r"(?:(?:Phase|P)\s*\d+[A-Za-z0-9.\-+]*" # `Phase 1`, `P3.4`, `P1+P2`
663
+ r"|[A-Za-z]*\d[A-Za-z0-9.\-+]*)" # `AUTH2`, `3b.2`, `RS4` — has a digit
664
+ )
665
+ core = self.direct_ship_core(series_ph, phase_ph)
666
+ if re.match(rf"^{core}", s, re.IGNORECASE):
667
+ return True
668
+ # The trailer probe (docs/289): a `(<PLAN> <PHASE>)` tail. The series
669
+ # placeholder here is WIDER than the start-anchored one — a trailer
670
+ # names the plan as registered, and plan ids are commonly digit-led
671
+ # (`docs/286` → `286` after the dir prefix), a shape the start-anchored
672
+ # placeholder deliberately rejects (it would over-match prose there).
673
+ # Inside the parens + end anchor the digit-led form is safe. The phase
674
+ # placeholder is unchanged: a digit is still what separates a phase
675
+ # stamp from prose (`(docs/286 follow-up)` is a reference, not a ship).
676
+ trailer_series_ph = r"(?:[A-Za-z0-9][\w .\-]*[A-Za-z0-9]|[A-Za-z0-9])"
677
+ trailer = self.trailer_ship_core(trailer_series_ph, phase_ph)
678
+ if trailer and re.search(trailer, s, re.IGNORECASE):
679
+ return True
680
+ return False
681
+
682
+
683
+ def ship_shaped_under_generic(subject: str) -> bool:
684
+ """True iff `subject` looks like a ship under the most permissive (generic)
685
+ grammar — used by the completeness rail to decide "this commit is a SHIP that
686
+ SOME convention would recognize," independent of the active one.
687
+
688
+ Deliberately broad: an optional path prefix, then `<SERIES><sep><PHASE>:` in
689
+ either the spaced or glued form — OR a `(<PLAN> <PHASE>)` trailer at the end
690
+ of the subject (docs/289: the probe runs with `trailer_stamp` ON, because
691
+ this predicate's contract is "would SOME convention recognize it?", and the
692
+ trailer convention exists to be declared — a Conventional-Commits repo whose
693
+ stamps live in trailers should hear "reconcile [stamp]", not "none of your
694
+ commits name a unit of work"). Excludes bulk snapshots (the universal
695
+ bookkeeping guard) so a `working-dir snapshot:` is never counted. This is the
696
+ "is this even a ship subject?" predicate; the active convention's
697
+ `recognizes_direct_ship` is the "would MY grammar catch it?" predicate. A
698
+ subject that is ship-shaped-generic but NOT recognized-by-active is the
699
+ declared-grammar-misses-its-own-commits finding (SCV 3c).
700
+ """
701
+ return _GENERIC_TRAILER_PROBE.recognizes_direct_ship(subject)
702
+
703
+
704
+ def convention_coverage_finding(
705
+ convention: StampConvention, subjects: list[str], *, declared: bool
706
+ ) -> str | None:
707
+ """The SCV 3c completeness finding, or None when the grammar looks fine.
708
+
709
+ The rail (HACKING.md's `--check` invariant, stamp analogue of "a reason
710
+ emitted but not in the registry"): if a workspace DECLARED a `[stamp]` table
711
+ but its active convention recognizes NONE of the repo's own recent
712
+ ship-shaped commits, the declared grammar almost certainly mismatches how the
713
+ repo actually stamps ships — so `verify` will silently resolve `via none` for
714
+ real ships. Surface that.
715
+
716
+ Returns a one-line finding string when:
717
+ * ``declared`` is True (an inherited default on a foreign repo is a
718
+ different, expected situation — only a *declared* grammar is the host's
719
+ own claim to check), AND
720
+ * at least one `subject` is ship-shaped under the generic grammar, AND
721
+ * the active ``convention`` recognizes NONE of those ship-shaped subjects.
722
+
723
+ Returns None otherwise (no declaration, no ship-shaped commits to judge
724
+ against, or the convention recognizes ≥1 — the healthy case). Pure: takes the
725
+ subjects list so it is unit-testable without git.
726
+ """
727
+ if not declared:
728
+ return None
729
+ ship_shaped = [s for s in subjects if ship_shaped_under_generic(s)]
730
+ if not ship_shaped:
731
+ return None # nothing ship-shaped to judge the grammar against
732
+ if any(convention.recognizes_direct_ship(s) for s in ship_shaped):
733
+ return None # the declared grammar catches at least one real ship — fine
734
+ sample = ship_shaped[0]
735
+ dirs = ", ".join(convention.subject_dirs) or "(none — generic)"
736
+ return (
737
+ f"declared [stamp] (subject_dirs={dirs}) recognizes none of this repo's "
738
+ f"{len(ship_shaped)} recent ship-shaped commit(s) — e.g. {sample!r}. "
739
+ f"verify will resolve `via none` for real ships; reconcile [stamp] to how "
740
+ f"this repo stamps (see `dos doctor` / HACKING.md)."
741
+ )
742
+
743
+
744
+ # ---------------------------------------------------------------------------
745
+ # The reference userland app's convention — the current hardcoded grammar, lifted
746
+ # VERBATIM from `phase_shipped.py`'s module constants so the existing
747
+ # kernel suite is byte-for-byte unchanged. This is a plain default the kernel
748
+ # falls back to (the `stamp` field on SubstrateConfig defaults to it), NOT an
749
+ # import from `drivers.job` — same pattern as the `main`/`global` lane default.
750
+ #
751
+ # Provenance of each tuple (the constants this replaces, all in phase_shipped):
752
+ # subject_dirs <- _DIRECT_PREFIX = (docs|go|agents|job_search|scripts)
753
+ # summary_bundle_prefixes <- _SUMMARY_BUNDLE_PREFIXES = ("docs/HYG:",)
754
+ # bookkeeping_prefixes <- _BOOKKEEPING_SUBJECT_PREFIXES
755
+ # ---------------------------------------------------------------------------
756
+ # code_dirs <- _REPO_PATH_RE allowlist
757
+ # (agents|job_search|go|scripts|templates|config|docs|tests)
758
+ # infra_basenames <- the reference app's OWN hub beyond the universal set
759
+ # (_SHARED_INFRA_BASENAMES minus the universal ones)
760
+ # infra_doc_basenames <- _SHARED_INFRA_DOC_BASENAMES (the named diagram/ref hubs)
761
+ JOB_STAMP_CONVENTION = StampConvention(
762
+ subject_dirs=("docs", "go", "agents", "job_search", "scripts"),
763
+ summary_bundle_prefixes=("docs/HYG:",),
764
+ bookkeeping_prefixes=(
765
+ "docs/_plans:",
766
+ "docs/fanout:",
767
+ "docs/dispatch:",
768
+ "docs/dispatch-loop:",
769
+ "docs/_soaks:",
770
+ ),
771
+ style="grep",
772
+ # The file-path backstop allowlist (`_REPO_PATH_RE`), lifted verbatim so the
773
+ # reference app's artefact rung is byte-for-byte unchanged.
774
+ code_dirs=(
775
+ "agents", "job_search", "go", "scripts",
776
+ "templates", "config", "docs", "tests",
777
+ ),
778
+ # The reference app's OWN hub file beyond the universal set. `config.py` etc.
779
+ # are now universal (`_UNIVERSAL_INFRA_BASENAMES`); `fanout_state.py` is the
780
+ # one host-specific addition. The resolved set (`infra_basename_set()`) is the
781
+ # original `_SHARED_INFRA_BASENAMES` exactly.
782
+ infra_basenames=("fanout_state.py",),
783
+ # The reference app's named bulk-regenerated doc hubs (`_SHARED_INFRA_DOC_BASENAMES`).
784
+ infra_doc_basenames=(
785
+ "00_subsystems-reference.md", "architecture.mmd", "data-flow.mmd",
786
+ "pipeline-flow.mmd", "state-machine.mmd", "scoring-model.mmd",
787
+ "model-tiering.mmd",
788
+ ),
789
+ # The reference app's soak/observation progress vocabulary, lifted verbatim
790
+ # from `phase_shipped._PROGRESS_MARKER_WORDS` so the demotion is byte-for-byte
791
+ # unchanged for the reference app. A `<PHASE> <marker>` subject is incremental
792
+ # progress on a multi-step phase, not its ship.
793
+ progress_markers=(
794
+ "week-1", "week-2", "week-3", "week-4",
795
+ "day-1", "day-2", "day-3", "day-4", "day-5", "day-6", "day-7",
796
+ "audit", "re-audit", "baseline", "re-baseline", "rebaseline",
797
+ "read", "reading", "snapshot", "obs", "observation", "measurement",
798
+ "progress", "soak", "wip", "partial",
799
+ "§why", "todo",
800
+ ),
801
+ # The reference app uses hyphen-suffixed sub-phase ids (`RS4-port`) and wants
802
+ # the parent-phase fallback; a generic repo does not (it false-resolves a
803
+ # fabricated `P2-CLI` against a real `P2`). Declared on, off-by-default.
804
+ sub_phase_parent_fallback=True,
805
+ )
806
+
807
+
808
+ # ---------------------------------------------------------------------------
809
+ # The generic convention — what an EXTERNAL repo's ship subjects look like: a
810
+ # bare `<SERIES>: <PHASE>` / `<SERIES><PHASE>` with no dir prefix and no
811
+ # host-specific bundle/bookkeeping prefixes. Only the universal release anchor
812
+ # (`vX.Y.Z:`) and the universal bulk-snapshot guard apply. This is the value a
813
+ # foreign workspace gets by default once it has no `[stamp]` table of its own
814
+ # beyond `style="grep"` — and the value `test_verify_no_plan` exercises to prove
815
+ # `verify` is domain-free.
816
+ # ---------------------------------------------------------------------------
817
+ GENERIC_STAMP_CONVENTION = StampConvention(
818
+ subject_dirs=(),
819
+ summary_bundle_prefixes=(),
820
+ bookkeeping_prefixes=(),
821
+ style="grep",
822
+ )
823
+
824
+ # The breadth-probe convention behind `ship_shaped_under_generic` (docs/289):
825
+ # generic, with the trailer rung ON. NOT a default any workspace inherits —
826
+ # `verify` still recognizes trailers only where `[stamp] trailer_stamp = true`
827
+ # is declared. This probe only widens what the completeness rail / verifiability
828
+ # headline can SEE as ship-shaped, so a trailer-stamping repo is told to declare
829
+ # the flag instead of being told it has nothing checkable.
830
+ _GENERIC_TRAILER_PROBE = StampConvention(style="grep", trailer_stamp=True)
831
+
832
+
833
+ # ---------------------------------------------------------------------------
834
+ # The declarative on-ramp: read a `[stamp]` table out of a workspace's dos.toml.
835
+ #
836
+ # `dos init` already scaffolds `[stamp] style="grep"`; these turn that table into
837
+ # a `StampConvention`. Mirrors `reasons.specs_from_table` / `reasons.load_from_toml`
838
+ # exactly: a present table OVERRIDES the base (a host declaring `subject_dirs`
839
+ # means "these are MY dirs", not "these plus job's"); absent/empty degrades to the
840
+ # base; present-but-malformed raises (surfaced, not swallowed).
841
+ #
842
+ # TOML shape (every key optional; the omitted ones fall back to `base`'s value):
843
+ #
844
+ # [stamp]
845
+ # style = "grep"
846
+ # subject_dirs = ["src", "lib", "app"] # this repo's top-level dirs
847
+ # summary_bundle_prefixes = ["docs/HYG:"] # extra standalone-summary prefixes
848
+ # bookkeeping_prefixes = ["docs/_plans:"]# subjects that NAME but don't ship
849
+ # trailer_stamp = true # also ship via a `(<PLAN> <PHASE>)`
850
+ # # end-of-subject trailer (docs/289)
851
+ # ---------------------------------------------------------------------------
852
+
853
+
854
+ def _str_tuple(value: object, key: str) -> tuple[str, ...]:
855
+ """Coerce a TOML value to a tuple of strings, or raise naming the bad key.
856
+
857
+ Accepts a single string (wrapped) or a list of strings. Anything else — a
858
+ number, a nested table, a list with a non-string element — is a host mistake
859
+ worth surfacing loudly at load (the same posture `reasons.specs_from_table`
860
+ takes on a bad category).
861
+ """
862
+ if isinstance(value, str):
863
+ return (value,)
864
+ if isinstance(value, (list, tuple)):
865
+ out: list[str] = []
866
+ for item in value:
867
+ if not isinstance(item, str):
868
+ raise ValueError(
869
+ f"[stamp].{key} must be a list of strings; got a "
870
+ f"{type(item).__name__} element ({item!r})"
871
+ )
872
+ out.append(item)
873
+ return tuple(out)
874
+ raise ValueError(
875
+ f"[stamp].{key} must be a string or list of strings, "
876
+ f"got {type(value).__name__}"
877
+ )
878
+
879
+
880
+ def convention_from_table(
881
+ table: dict, *, base: StampConvention = JOB_STAMP_CONVENTION
882
+ ) -> StampConvention:
883
+ """Build a `StampConvention` from a parsed `[stamp]` TOML table.
884
+
885
+ Pure (no I/O). Each field the table names overrides ``base``; omitted fields
886
+ inherit ``base``'s value. An unknown key raises (a typo'd field is a host
887
+ mistake worth surfacing, mirroring `PathLayout.with_overrides`' posture in
888
+ the sibling WCR plan). A malformed value (non-string-list) raises via
889
+ `_str_tuple`.
890
+
891
+ Note the override (not merge) semantics on the list fields: a host that
892
+ declares ``subject_dirs = ["src"]`` gets exactly ``["src"]``, NOT
893
+ ``["src"] + job's``. Declaring your dirs means declaring your dirs.
894
+ """
895
+ if not isinstance(table, dict):
896
+ raise ValueError(f"[stamp] must be a table, got {type(table).__name__}")
897
+ known = {
898
+ "style", "subject_dirs", "summary_bundle_prefixes", "bookkeeping_prefixes",
899
+ "code_dirs", "infra_basenames", "infra_doc_basenames",
900
+ "progress_markers", "sub_phase_parent_fallback", "trailer_stamp",
901
+ }
902
+ unknown = set(table) - known
903
+ if unknown:
904
+ raise ValueError(
905
+ f"[stamp] has unknown key(s) {sorted(unknown)}; "
906
+ f"known keys are {sorted(known)}"
907
+ )
908
+ style = base.style
909
+ if "style" in table:
910
+ if not isinstance(table["style"], str):
911
+ raise ValueError(
912
+ f"[stamp].style must be a string, got {type(table['style']).__name__}"
913
+ )
914
+ style = table["style"]
915
+ sub_phase = base.sub_phase_parent_fallback
916
+ if "sub_phase_parent_fallback" in table:
917
+ if not isinstance(table["sub_phase_parent_fallback"], bool):
918
+ raise ValueError(
919
+ "[stamp].sub_phase_parent_fallback must be a boolean, got "
920
+ f"{type(table['sub_phase_parent_fallback']).__name__}"
921
+ )
922
+ sub_phase = table["sub_phase_parent_fallback"]
923
+ trailer = base.trailer_stamp
924
+ if "trailer_stamp" in table:
925
+ if not isinstance(table["trailer_stamp"], bool):
926
+ raise ValueError(
927
+ "[stamp].trailer_stamp must be a boolean, got "
928
+ f"{type(table['trailer_stamp']).__name__}"
929
+ )
930
+ trailer = table["trailer_stamp"]
931
+ return StampConvention(
932
+ subject_dirs=(
933
+ _str_tuple(table["subject_dirs"], "subject_dirs")
934
+ if "subject_dirs" in table
935
+ else base.subject_dirs
936
+ ),
937
+ summary_bundle_prefixes=(
938
+ _str_tuple(table["summary_bundle_prefixes"], "summary_bundle_prefixes")
939
+ if "summary_bundle_prefixes" in table
940
+ else base.summary_bundle_prefixes
941
+ ),
942
+ bookkeeping_prefixes=(
943
+ _str_tuple(table["bookkeeping_prefixes"], "bookkeeping_prefixes")
944
+ if "bookkeeping_prefixes" in table
945
+ else base.bookkeeping_prefixes
946
+ ),
947
+ style=style,
948
+ code_dirs=(
949
+ _str_tuple(table["code_dirs"], "code_dirs")
950
+ if "code_dirs" in table
951
+ else base.code_dirs
952
+ ),
953
+ infra_basenames=(
954
+ _str_tuple(table["infra_basenames"], "infra_basenames")
955
+ if "infra_basenames" in table
956
+ else base.infra_basenames
957
+ ),
958
+ infra_doc_basenames=(
959
+ _str_tuple(table["infra_doc_basenames"], "infra_doc_basenames")
960
+ if "infra_doc_basenames" in table
961
+ else base.infra_doc_basenames
962
+ ),
963
+ progress_markers=(
964
+ _str_tuple(table["progress_markers"], "progress_markers")
965
+ if "progress_markers" in table
966
+ else base.progress_markers
967
+ ),
968
+ sub_phase_parent_fallback=sub_phase,
969
+ trailer_stamp=trailer,
970
+ )
971
+
972
+
973
+ def load_from_toml(
974
+ path: Path | str, *, base: StampConvention = JOB_STAMP_CONVENTION
975
+ ) -> StampConvention:
976
+ """Build a `StampConvention` from a `dos.toml`'s `[stamp]` table.
977
+
978
+ Returns ``base`` unchanged when the file is absent, has no `[stamp]` table, or
979
+ `tomllib` is unavailable (Python < 3.11 with no `tomli`) — the declarative
980
+ path is purely additive, so a missing/empty config degrades to the supplied
981
+ base, never an error. A *present but malformed* `[stamp]` table raises
982
+ (`convention_from_table`), because a host that declared its grammar wrong
983
+ wants that surfaced, not swallowed. Mirrors `reasons.load_from_toml` exactly.
984
+ """
985
+ p = Path(path)
986
+ if not p.exists():
987
+ return base
988
+ try:
989
+ import tomllib # py3.11+
990
+ except ModuleNotFoundError: # pragma: no cover - py<3.11 fallback
991
+ try:
992
+ import tomli as tomllib # type: ignore
993
+ except ModuleNotFoundError:
994
+ return base
995
+ # `utf-8-sig` transparently strips a UTF-8 BOM (PowerShell's default `utf8`
996
+ # encoding writes one; raw `tomllib.load(rb)` chokes on it and would silently
997
+ # drop a valid declared table — see the same fix in `config._load_toml_table`).
998
+ data = tomllib.loads(p.read_text(encoding="utf-8-sig"))
999
+ table = data.get("stamp")
1000
+ if not isinstance(table, dict) or not table:
1001
+ return base
1002
+ return convention_from_table(table, base=base)