dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/stamp.py
ADDED
|
@@ -0,0 +1,1002 @@
|
|
|
1
|
+
"""The ship-stamp convention — the grep rung's subject grammar, *as data*.
|
|
2
|
+
|
|
3
|
+
This is the hackability seam for the kernel's single most-important syscall:
|
|
4
|
+
`verify()` (the truth syscall). The registry-first path and the ancestry check
|
|
5
|
+
are domain-free already; the one piece that was NOT was the **grep fallback's
|
|
6
|
+
grammar** — what a commit subject has to look like to count as a direct ship.
|
|
7
|
+
`phase_shipped.py` hardcoded the *reference userland app's* convention:
|
|
8
|
+
|
|
9
|
+
_DIRECT_PREFIX = r"(?:docs|go|agents|job_search|scripts)" # the host's own top-level dirs
|
|
10
|
+
|
|
11
|
+
so a direct ship had to read `docs/<SERIES>: <PHASE>` / `go/<SERIES>:`. A foreign
|
|
12
|
+
repo committing `AUTH: AUTH2 — ship token refresh` (the `<SERIES>: <PHASE>` shape
|
|
13
|
+
with no dir prefix) resolved to `NOT_SHIPPED (via none)` even though the subject
|
|
14
|
+
literally names the phase. The North Star claims `verify` works against *any* git
|
|
15
|
+
repo from history alone; that was true only for the reference userland app's own
|
|
16
|
+
subject convention.
|
|
17
|
+
|
|
18
|
+
This module lifts that grammar into per-workspace data, exactly the way
|
|
19
|
+
`LaneTaxonomy` lifted the lane clusters and `ReasonRegistry` lifted the refusal
|
|
20
|
+
vocabulary. The *mechanism* (grep `git log`, ancestry-check, registry-first,
|
|
21
|
+
the progress-marker / bookkeeping demotions) stays in `phase_shipped.py`; the
|
|
22
|
+
*grammar of a ship subject* moves here as a `StampConvention` a host declares.
|
|
23
|
+
|
|
24
|
+
The shape
|
|
25
|
+
=========
|
|
26
|
+
|
|
27
|
+
A `StampConvention` is the closed set of subject-shape parameters the matcher
|
|
28
|
+
needs. It carries no regex itself — it carries the *data* (which dir prefixes,
|
|
29
|
+
which summary-bundle prefixes, which bookkeeping prefixes) and exposes the three
|
|
30
|
+
**regex fragments** `phase_shipped` interpolates into its compiled patterns:
|
|
31
|
+
|
|
32
|
+
* ``direct_prefix_re()`` — the alternation that anchors a direct ship.
|
|
33
|
+
Job: ``(?:docs|go|agents|job_search|scripts)/``. Generic (no ``subject_dirs``):
|
|
34
|
+
an *optional* prefix so a bare ``<SERIES>: <PHASE>`` matches with no dir at all.
|
|
35
|
+
* ``summary_subject_re()`` — the ``vX.Y.Z:`` release shape OR an allowlisted
|
|
36
|
+
standalone-summary prefix (job: ``docs/HYG:``). Gates the release-prefix and
|
|
37
|
+
body scans.
|
|
38
|
+
* ``bookkeeping_subject_re()`` — the ship-SHAPED-but-not-a-ship exclusion
|
|
39
|
+
(soft-claims, archive rollups, bulk snapshots). A subject matching this is
|
|
40
|
+
never counted as a ship on any scan path.
|
|
41
|
+
|
|
42
|
+
Two named constants ship in the package:
|
|
43
|
+
|
|
44
|
+
* ``JOB_STAMP_CONVENTION`` — the current hardcoded grammar, lifted verbatim, so
|
|
45
|
+
the reference userland app and the existing kernel suite are byte-for-byte
|
|
46
|
+
unchanged. It is a plain default the kernel falls back to (NOT an import from
|
|
47
|
+
``drivers.job``) — the same pattern as the ``main``/``global`` lanes in ``config.py``.
|
|
48
|
+
* ``GENERIC_STAMP_CONVENTION`` — no dir prefix, no host-specific bundle/
|
|
49
|
+
bookkeeping prefixes (only the universal ``vX.Y.Z:`` release shape and the
|
|
50
|
+
universal ``... snapshot:`` bulk-commit guard). This is what an external
|
|
51
|
+
repo's subjects look like: a bare ``<SERIES>: <PHASE>`` / ``<SERIES><PHASE>``.
|
|
52
|
+
|
|
53
|
+
Pure stdlib — no third-party imports, no I/O — so `phase_shipped` imports it as a
|
|
54
|
+
leaf, the same way it would have used the module-level constants it replaces.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
from __future__ import annotations
|
|
58
|
+
|
|
59
|
+
import re
|
|
60
|
+
from dataclasses import dataclass
|
|
61
|
+
from pathlib import Path
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# The universal release-subject anchor — a `vX.Y.Z:` version-cut commit that
|
|
65
|
+
# bundles several ships into one free-form summary line. This shape is NOT
|
|
66
|
+
# host-specific (every repo that tags releases uses it), so it is baked into the
|
|
67
|
+
# fragment builder rather than declared per-workspace; a convention adds only its
|
|
68
|
+
# *extra* standalone-summary prefixes (job's `docs/HYG:`) on top of it.
|
|
69
|
+
_RELEASE_VERSION_ANCHOR = r"v\d+\.\d+\.\d+:"
|
|
70
|
+
|
|
71
|
+
# The universal bulk-snapshot guard — a `working-dir snapshot:` / `... snapshot:`
|
|
72
|
+
# commit sweeps hundreds of files and quotes phase ids descriptively, never as a
|
|
73
|
+
# ship attribution. Like the release anchor this is host-agnostic, so it is part
|
|
74
|
+
# of every bookkeeping regex regardless of the declared prefixes.
|
|
75
|
+
_SNAPSHOT_BOOKKEEPING_FRAGMENT = r"[^:]*\bsnapshot:"
|
|
76
|
+
|
|
77
|
+
# The universal run-archive-rollup guard — a `<prefix>: archive <RUN-ID> …` commit
|
|
78
|
+
# is a fan-out / dispatch run rollup that QUOTES the phase ids of the runs it
|
|
79
|
+
# archives, never a ship of any one of them (live false-positive: a foreign repo's
|
|
80
|
+
# `docs/fanout: archive 20260530T093407Z chain (vllm-p2p3, …)` resolved as a ship
|
|
81
|
+
# of a `fanout`/`archive` phase under the generic dir-free grammar). The
|
|
82
|
+
# discriminator is host-agnostic and TIGHT: the word `archive` (or `rollup`)
|
|
83
|
+
# immediately followed by a run-id-shaped timestamp token (`20260530T093407Z` or a
|
|
84
|
+
# bare `20260530`). Requiring the timestamp is what keeps it from excluding a
|
|
85
|
+
# legitimately-named `archive` PHASE — a real `… : archive` ship has no run-id
|
|
86
|
+
# tail.
|
|
87
|
+
#
|
|
88
|
+
# An OPTIONAL `<prefix>:` is allowed before `archive` (`docs/fanout: archive …`,
|
|
89
|
+
# `chore: archive …`, or a bare `archive …`) — a single non-colon prefix segment
|
|
90
|
+
# then one colon — so the guard fires regardless of whether the host declared the
|
|
91
|
+
# prefix as bookkeeping. This is the zero-config safety net BENEATH the declared
|
|
92
|
+
# `bookkeeping_prefixes`: a host that names its rollup prefix (`docs/fanout:`)
|
|
93
|
+
# still gets the precise exclusion; a host that declares nothing is still safe
|
|
94
|
+
# against the universal `archive <run-id>` shape.
|
|
95
|
+
#
|
|
96
|
+
# The run-id tail is `<YYYYMMDD>` optionally followed by a `T`-separated time of
|
|
97
|
+
# VARIABLE width and an optional trailing `Z` — real fan-out run-ids occur as
|
|
98
|
+
# `20260530T093407Z` (full HHMMSS), `20260529T0233Z` (shortened), and bare
|
|
99
|
+
# `20260530`. The time component is `t\d+z?` (one-or-more digits) rather than a
|
|
100
|
+
# fixed `\d{6}` so every observed run-id shape is caught; the leading 8-digit date
|
|
101
|
+
# is the load-bearing discriminator (a real `archive` phase ship has no date tail).
|
|
102
|
+
_RUN_ARCHIVE_BOOKKEEPING_FRAGMENT = (
|
|
103
|
+
r"(?:[^:]*:\s*)?(?:archive|rollup)\s+\d{8}(?:t\d+z?)?\b"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# The universal shared-infra basenames — hub files nearly every phase touches, so
|
|
107
|
+
# a coincidental edit to them is never the *distinctive* ship evidence the
|
|
108
|
+
# file-path rung counts on (`_check_phase_by_filepath`'s 2-file overlap rule). A
|
|
109
|
+
# section that names two of these alone must NOT let an unrelated commit
|
|
110
|
+
# false-ship the phase. This set is host-AGNOSTIC — `config.py`/`__init__.py`/
|
|
111
|
+
# `cli.py`/`conftest.py` are hub files in *any* Python repo — so it is baked into
|
|
112
|
+
# every convention. A host with its OWN hub file (the reference app's
|
|
113
|
+
# `fanout_state.py`) declares it as an EXTRA via `infra_basenames`, layered on top
|
|
114
|
+
# of this base (the additive discipline, same as the release anchor).
|
|
115
|
+
_UNIVERSAL_INFRA_BASENAMES = frozenset({
|
|
116
|
+
"config.py", "__init__.py", "models.py", "cli.py", "utils.py",
|
|
117
|
+
"constants.py", "settings.py", "conftest.py",
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
# The universal bulk-regenerated documentation guard — any rendered diagram under
|
|
121
|
+
# `docs/` (`.mmd` source or `.png` render) is co-regenerated wholesale by unrelated
|
|
122
|
+
# release commits, so it is shared-infra for the same reason the hub code files are
|
|
123
|
+
# (AAR-FQ-DL4). The *suffix* rule (any `docs/…*.mmd`/`*.png`) is host-agnostic; a
|
|
124
|
+
# host's SPECIFIC named reference hubs (the reference app's `architecture.mmd`,
|
|
125
|
+
# `00_subsystems-reference.md`) are declared as EXTRAS via `infra_doc_basenames`.
|
|
126
|
+
_UNIVERSAL_DIAGRAM_SUFFIXES = (".mmd", ".png")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Phase-label tokens: `P3`, `P4.6`, `P1c`, `P3b.2`, or `Phase 1c` / `Phase 1`.
|
|
130
|
+
# The digit must immediately follow `P` (or `Phase `) so prose like "Python",
|
|
131
|
+
# "PR", or "GPT-3" never matches. Body = `<digit>[<sub-letter>][.<digit>]`, so a
|
|
132
|
+
# letter-then-decimal sub-phase (`P3b.2`) is captured; the trailing `\b` rejects
|
|
133
|
+
# `P3xyz`. Pure-stdlib leaf primitive (no I/O) — the subject grammar this module
|
|
134
|
+
# already owns, lifted UP from bench's scripts/next_context.py:_PHASE_LABEL_RE.
|
|
135
|
+
_PHASE_LABEL_RE = re.compile(
|
|
136
|
+
r"\b(?:Phase\s+|P)\d+[a-z]?(?:\.\d+)?\b", re.IGNORECASE
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def parse_phase_labels(subject: str | None) -> list[str]:
|
|
141
|
+
"""Extract normalized phase-id tokens from a commit subject.
|
|
142
|
+
|
|
143
|
+
"SGLang-Metrics P3 …" -> ["P3"]
|
|
144
|
+
"exec-sweep P4.6 done" -> ["P4.6"]
|
|
145
|
+
"exec-sweep P3b.2 …" -> ["P3b.2"] (letter-then-decimal sub-phase)
|
|
146
|
+
"L3 busy device Phase 1c" -> ["P1c"] (Phase N -> PN)
|
|
147
|
+
"close out all P0s" -> ["P0"] (plural artifact stripped)
|
|
148
|
+
"fix typo in readme" -> [] (no false positives on prose)
|
|
149
|
+
None -> [] (None-safe)
|
|
150
|
+
|
|
151
|
+
Returns a sorted, de-duplicated list. Pure (no I/O) — a leaf primitive on
|
|
152
|
+
the same module that owns the ship-subject grammar.
|
|
153
|
+
"""
|
|
154
|
+
labels: set[str] = set()
|
|
155
|
+
for m in _PHASE_LABEL_RE.finditer(subject or ""):
|
|
156
|
+
tok = re.sub(r"(?i)^phase\s+", "P", m.group(0))
|
|
157
|
+
tok = tok[0].upper() + tok[1:] # normalize leading p3 -> P3
|
|
158
|
+
tok = re.sub(r"(?<=\d)s$", "", tok) # drop plural artifact: P0s -> P0
|
|
159
|
+
labels.add(tok)
|
|
160
|
+
return sorted(labels)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@dataclass(frozen=True)
|
|
164
|
+
class StampConvention:
|
|
165
|
+
"""How a workspace stamps a shipped phase in its commit subjects — as data.
|
|
166
|
+
|
|
167
|
+
Every field is the *data* behind one regex fragment the grep rung compiles;
|
|
168
|
+
no field is a regex itself (a host declares dir names, not patterns). The
|
|
169
|
+
matcher in `phase_shipped` reads the three ``*_re()`` accessors and never the
|
|
170
|
+
raw constants it used to hardcode.
|
|
171
|
+
|
|
172
|
+
Fields:
|
|
173
|
+
subject_dirs
|
|
174
|
+
The top-level dirs a *direct* ship subject may carry before
|
|
175
|
+
``<SERIES>:`` — the reference userland app's ``docs``/``go``/``agents``/
|
|
176
|
+
``job_search``/``scripts``. An **empty** tuple means "no dir prefix": a bare
|
|
177
|
+
``<SERIES>: <PHASE>`` (the generic external-repo shape). The accessor
|
|
178
|
+
makes the prefix optional in that case rather than impossible.
|
|
179
|
+
summary_bundle_prefixes
|
|
180
|
+
Standalone-summary subject prefixes (besides the universal ``vX.Y.Z:``)
|
|
181
|
+
that may bundle several phases into one free-form line — job's
|
|
182
|
+
``docs/HYG:``. A foreign repo usually declares none and relies on the
|
|
183
|
+
release anchor alone.
|
|
184
|
+
bookkeeping_prefixes
|
|
185
|
+
Subject prefixes that NAME phase ids without shipping them (soft-claims,
|
|
186
|
+
run-archive rollups): job's ``docs/_plans:`` / ``docs/fanout:`` / …. A
|
|
187
|
+
subject matching one of these (or the universal ``snapshot:`` guard) is
|
|
188
|
+
excluded from ship-detection on every scan path. Matched
|
|
189
|
+
case-insensitively, anchored at subject start.
|
|
190
|
+
style
|
|
191
|
+
The detection style. Only ``"grep"`` is implemented (scan git-log
|
|
192
|
+
subjects). Kept as the forward extension point for a future tag- or
|
|
193
|
+
trailer-based style; a non-``"grep"`` value is accepted as data but the
|
|
194
|
+
kernel still runs the grep rung (the field is declarative-only for now).
|
|
195
|
+
|
|
196
|
+
code_dirs
|
|
197
|
+
The top-level dirs whose files count as a phase's *load-bearing
|
|
198
|
+
deliverables* for the **file-path backstop** rung
|
|
199
|
+
(`phase_shipped._check_phase_by_filepath`). That rung harvests the file
|
|
200
|
+
paths a phase's plan-doc section names, then asks "did one commit touch
|
|
201
|
+
≥2 of them together?" — an artefact match that catches a ship whose
|
|
202
|
+
commit *subject* drifted off the phase token. To harvest a path the rung
|
|
203
|
+
must first RECOGNISE the token as a repo-file path, which it does by
|
|
204
|
+
rooting it at a known top-level dir. The reference app hardcoded its own
|
|
205
|
+
dirs (``agents|job_search|go|scripts|templates|config|docs|tests``); a
|
|
206
|
+
foreign repo whose deliverables live under ``engine/``/``models/``/
|
|
207
|
+
``commands/`` saw the rung harvest **nothing**, so the artefact backstop
|
|
208
|
+
was dead and every subject-drifted ship resolved ``via none``.
|
|
209
|
+
|
|
210
|
+
This lifts that allowlist to data. An **empty** tuple (the generic
|
|
211
|
+
default) means "any plausible top-level dir": a path token rooted at any
|
|
212
|
+
``<segment>/…<ext>`` is harvested. That is SOUND — the dir allowlist was
|
|
213
|
+
only ever a *recognition* narrowing, never a false-positive gate (those
|
|
214
|
+
are the 2-file-overlap, distinctive-file, bookkeeping-exclusion, and
|
|
215
|
+
cross-series guards downstream, all preserved). A host that wants the
|
|
216
|
+
tight allowlist (the reference app) declares its dirs here.
|
|
217
|
+
infra_basenames
|
|
218
|
+
EXTRA shared-infra hub *code* file basenames, layered ON TOP of the
|
|
219
|
+
universal set (`_UNIVERSAL_INFRA_BASENAMES`: ``config.py``/``cli.py``/
|
|
220
|
+
``conftest.py``/…). A file whose basename is shared-infra is excluded
|
|
221
|
+
from the file-path rung's *distinctive*-overlap count — a coincidental
|
|
222
|
+
edit to a hub file is not ship evidence. The universal set covers any
|
|
223
|
+
Python repo; a host's OWN hub (the reference app's ``fanout_state.py``)
|
|
224
|
+
is declared here. Additive, never replace — you cannot un-declare a
|
|
225
|
+
universal hub (it is shared-infra by nature).
|
|
226
|
+
infra_doc_basenames
|
|
227
|
+
EXTRA bulk-regenerated documentation hub basenames, layered on top of
|
|
228
|
+
the universal diagram rule (any ``docs/…*.mmd``/``*.png`` is shared-infra
|
|
229
|
+
regardless). A host's named cross-cutting reference docs (the reference
|
|
230
|
+
app's ``architecture.mmd``/``00_subsystems-reference.md``) go here.
|
|
231
|
+
Additive, same discipline as ``infra_basenames``.
|
|
232
|
+
progress_markers
|
|
233
|
+
Words that, immediately after the phase id with a bare space (no
|
|
234
|
+
``:``/``—``/``-`` separator), mark a commit as *progress on* a multi-step
|
|
235
|
+
phase rather than a *ship of* it — the reference app's soak/observation
|
|
236
|
+
vocabulary (``week-1``/``audit``/``baseline``/``soak``/…). The grep rung
|
|
237
|
+
DEMOTES a ``<dir>/<SERIES>: <PHASE> <marker>`` subject so an incremental
|
|
238
|
+
commit on a long-running phase is not mistaken for its close-out ship.
|
|
239
|
+
|
|
240
|
+
This was a hardcoded module frozenset, so it fired on EVERY repo — a
|
|
241
|
+
foreign repo's genuine direct ship ``cache: Phase 0 audit of …`` was
|
|
242
|
+
silently demoted to NOT_SHIPPED because ``audit`` followed the id (a real
|
|
243
|
+
Benchmark false-negative). An **empty** tuple (the generic default) means
|
|
244
|
+
"no progress vocabulary" → a foreign repo's real ships are never demoted;
|
|
245
|
+
the worst failure mode (a *lost* ship) cannot happen out of the box. The
|
|
246
|
+
reference app declares its markers here; a host with its own soak
|
|
247
|
+
vocabulary declares its own.
|
|
248
|
+
sub_phase_parent_fallback
|
|
249
|
+
Whether a hyphen-suffixed query (``RS4-port``) that misses every direct
|
|
250
|
+
pass should fall back to checking the bare PARENT phase (``RS4``) and
|
|
251
|
+
accept it if the suffix slug appears in the matched commit's subject — a
|
|
252
|
+
reference-app convenience for its sub-phase id habit. It was gated purely
|
|
253
|
+
on the QUERY shape (``if "-" in phase``), so it fired on any repo: a
|
|
254
|
+
fabricated ``P2-CLI`` false-resolved to a real ``P2`` ship whose subject
|
|
255
|
+
merely contained ``CLI`` (a real Benchmark false-positive). Lifting it to
|
|
256
|
+
a per-convention FLAG (default ``False``) makes the behaviour declared,
|
|
257
|
+
not inferred from a query the kernel doesn't control — the closed-enum
|
|
258
|
+
discipline applied to a feature toggle. The reference app sets it
|
|
259
|
+
``True``; a generic repo never runs the fallback.
|
|
260
|
+
trailer_stamp
|
|
261
|
+
Whether a subject whose TAIL is ``(<PLAN> <PHASE>)`` — also
|
|
262
|
+
``(<PLAN>: <PHASE>)`` and ``(refs <PLAN> <PHASE>)`` — counts as a
|
|
263
|
+
direct ship of that ``(plan, phase)`` (docs/289). The
|
|
264
|
+
Conventional-Commits shape: ``feat(pypi): … (docs/286 Phase 3)``
|
|
265
|
+
carries the stamp as a parenthesized trailer at the END of the
|
|
266
|
+
subject, which no start-anchored grammar can see. Opt-in (default
|
|
267
|
+
``False``) because it widens what is *recognized*; the tightness the
|
|
268
|
+
start anchor provided comes from the end anchor + required parens
|
|
269
|
+
instead (`trailer_ship_core`). The trailer is exactly as forgeable as
|
|
270
|
+
the start-anchored subject, so the rung grades `grep-subject` like
|
|
271
|
+
the direct rung it mirrors.
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
subject_dirs: tuple[str, ...] = ()
|
|
275
|
+
summary_bundle_prefixes: tuple[str, ...] = ()
|
|
276
|
+
bookkeeping_prefixes: tuple[str, ...] = ()
|
|
277
|
+
style: str = "grep"
|
|
278
|
+
code_dirs: tuple[str, ...] = ()
|
|
279
|
+
infra_basenames: tuple[str, ...] = ()
|
|
280
|
+
infra_doc_basenames: tuple[str, ...] = ()
|
|
281
|
+
progress_markers: tuple[str, ...] = ()
|
|
282
|
+
sub_phase_parent_fallback: bool = False
|
|
283
|
+
trailer_stamp: bool = False
|
|
284
|
+
|
|
285
|
+
# -- serialization (crosses the grep-rung subprocess boundary) ----------
|
|
286
|
+
def to_dict(self) -> dict:
|
|
287
|
+
"""Plain-data form (lists, not tuples) — JSON-serializable.
|
|
288
|
+
|
|
289
|
+
Used to carry the active convention into the `phase_shipped` SUBPROCESS:
|
|
290
|
+
the grep rung shells out to a fresh Python process whose `config.active()`
|
|
291
|
+
would otherwise re-derive the DEFAULT (job) convention, losing a
|
|
292
|
+
caller-installed or `dos.toml`-declared one. The parent serializes the
|
|
293
|
+
active convention into an env var; the child rebuilds it with `from_dict`.
|
|
294
|
+
This makes the in-process `set_active(cfg)` authoritative across the
|
|
295
|
+
process boundary, the same way it is in-process (design-law 2 — one
|
|
296
|
+
convention, every path, even the shelled-out one).
|
|
297
|
+
"""
|
|
298
|
+
return {
|
|
299
|
+
"subject_dirs": list(self.subject_dirs),
|
|
300
|
+
"summary_bundle_prefixes": list(self.summary_bundle_prefixes),
|
|
301
|
+
"bookkeeping_prefixes": list(self.bookkeeping_prefixes),
|
|
302
|
+
"style": self.style,
|
|
303
|
+
"code_dirs": list(self.code_dirs),
|
|
304
|
+
"infra_basenames": list(self.infra_basenames),
|
|
305
|
+
"infra_doc_basenames": list(self.infra_doc_basenames),
|
|
306
|
+
"progress_markers": list(self.progress_markers),
|
|
307
|
+
"sub_phase_parent_fallback": self.sub_phase_parent_fallback,
|
|
308
|
+
"trailer_stamp": self.trailer_stamp,
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
@classmethod
|
|
312
|
+
def from_dict(cls, data: dict) -> "StampConvention":
|
|
313
|
+
"""Rebuild a convention from its `to_dict` form. Tolerant of missing keys
|
|
314
|
+
(each defaults to the empty/`"grep"` value) so a partial/forward-compatible
|
|
315
|
+
payload never crashes the child — it degrades to the generic shape."""
|
|
316
|
+
return cls(
|
|
317
|
+
subject_dirs=tuple(data.get("subject_dirs", ()) or ()),
|
|
318
|
+
summary_bundle_prefixes=tuple(data.get("summary_bundle_prefixes", ()) or ()),
|
|
319
|
+
bookkeeping_prefixes=tuple(data.get("bookkeeping_prefixes", ()) or ()),
|
|
320
|
+
style=str(data.get("style", "grep") or "grep"),
|
|
321
|
+
code_dirs=tuple(data.get("code_dirs", ()) or ()),
|
|
322
|
+
infra_basenames=tuple(data.get("infra_basenames", ()) or ()),
|
|
323
|
+
infra_doc_basenames=tuple(data.get("infra_doc_basenames", ()) or ()),
|
|
324
|
+
progress_markers=tuple(data.get("progress_markers", ()) or ()),
|
|
325
|
+
sub_phase_parent_fallback=bool(data.get("sub_phase_parent_fallback", False)),
|
|
326
|
+
trailer_stamp=bool(data.get("trailer_stamp", False)),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# -- the three regex fragments the grep rung interpolates ---------------
|
|
330
|
+
def direct_prefix_re(self) -> str:
|
|
331
|
+
"""The regex fragment anchoring a direct-ship subject's dir prefix.
|
|
332
|
+
|
|
333
|
+
With ``subject_dirs`` → ``(?:docs|go|…)/`` (the prefix is REQUIRED, the
|
|
334
|
+
job grammar). With no ``subject_dirs`` → ``(?:\\w[\\w.\\-]*/)?`` — an
|
|
335
|
+
OPTIONAL SINGLE-component path prefix, so both a bare ``AUTH: AUTH2`` (no
|
|
336
|
+
dir) and a ``src/AUTH: AUTH2`` (one dir) match. This is what makes the truth
|
|
337
|
+
syscall domain-free: an external repo that commits ``AUTH2: …`` with no dir
|
|
338
|
+
prefix is recognised, while a repo that scopes ships under a dir still works.
|
|
339
|
+
|
|
340
|
+
Returned WITHOUT the trailing ``<SERIES>:`` — the caller appends the
|
|
341
|
+
series + phase alternation, exactly as it did with the old
|
|
342
|
+
``_DIRECT_PREFIX`` constant.
|
|
343
|
+
"""
|
|
344
|
+
if self.subject_dirs:
|
|
345
|
+
alt = "|".join(re.escape(d) for d in self.subject_dirs)
|
|
346
|
+
return rf"(?:{alt})/"
|
|
347
|
+
# No declared dirs: accept an optional leading path segment so a bare
|
|
348
|
+
# `<SERIES>:` subject matches. The segment is a SINGLE path component
|
|
349
|
+
# (`\w[\w.\-]*/`, NO embedded `/`) made optional; it is NOT a greedy `.*`
|
|
350
|
+
# and NOT multi-segment. A `/` in the class let `docs/notes/sub/AUTH2:` (a
|
|
351
|
+
# deep, unrelated note that merely *names* the id) false-match a direct
|
|
352
|
+
# ship — the adversarial-review correctness finding. Keeping it one segment
|
|
353
|
+
# holds the direct anchor tight to the subject start; a release/bookkeeping
|
|
354
|
+
# subject is handled by its own guards.
|
|
355
|
+
return r"(?:\w[\w.\-]*/)?"
|
|
356
|
+
|
|
357
|
+
def direct_ship_core(self, series_re: str, phase_alt: str) -> str:
|
|
358
|
+
"""The full direct-ship regex core (everything after the `<sha>\\s+`).
|
|
359
|
+
|
|
360
|
+
Builds the dir prefix + the series/phase shape, branching on whether the
|
|
361
|
+
convention declares ``subject_dirs``. The caller anchors a boundary after
|
|
362
|
+
it and compiles case-insensitively; ``series_re`` and ``phase_alt`` are
|
|
363
|
+
already-escaped fragments (the caller built them from `_phase_variants`).
|
|
364
|
+
|
|
365
|
+
Two distinct ship-subject shapes a host uses — and why generic needs both:
|
|
366
|
+
|
|
367
|
+
* **Prefixed** (the JOB shape, and the spaced generic form):
|
|
368
|
+
``<dir>/<SERIES>:?\\s+<PHASE>`` — series, optional colon, whitespace,
|
|
369
|
+
then the phase token. This is `docs/AUTH: AUTH2` and the spaced
|
|
370
|
+
`AUTH: 2`. The ONLY shape the job convention emits, so when
|
|
371
|
+
``subject_dirs`` is set this is returned alone — byte-identical to the
|
|
372
|
+
pre-SCV `{_DIRECT_PREFIX}/{series}:?\\s+{phase}` pattern.
|
|
373
|
+
* **Glued** (the bare-id generic shape):
|
|
374
|
+
``<SERIES><PHASE>:`` — the *concatenated phase id* at subject start
|
|
375
|
+
followed by a colon. This is the North-Star `AUTH2: ship token
|
|
376
|
+
refresh`, where `AUTH2` = series+phase. A no-dir convention adds this
|
|
377
|
+
as a second alternative so a foreign repo that names the phase id
|
|
378
|
+
directly (the common external convention) is recognised.
|
|
379
|
+
|
|
380
|
+
A no-dir (generic) convention therefore matches EITHER shape; a
|
|
381
|
+
dir-scoped (job) convention matches only the prefixed shape, so nothing
|
|
382
|
+
about the job grep rung changes.
|
|
383
|
+
"""
|
|
384
|
+
prefix = self.direct_prefix_re()
|
|
385
|
+
prefixed = rf"{prefix}{series_re}:?\s+(?:{phase_alt})"
|
|
386
|
+
if self.subject_dirs:
|
|
387
|
+
return prefixed
|
|
388
|
+
# Generic: also accept the glued `<SERIES><PHASE>:` form. The phase
|
|
389
|
+
# alternation already contains the bare phase tokens; gluing the series
|
|
390
|
+
# in front yields the full phase id (`AUTH` + `2` → `AUTH2`). Require the
|
|
391
|
+
# trailing colon so a glued match is unambiguously a ship attribution
|
|
392
|
+
# (`AUTH2:`), not an incidental substring. The series is optional in the
|
|
393
|
+
# glue so a query that already passes the full id as the phase (`AUTH2`)
|
|
394
|
+
# still matches without doubling the series.
|
|
395
|
+
glued = rf"{prefix}(?:{series_re})?(?:{phase_alt}):"
|
|
396
|
+
return rf"(?:{prefixed}|{glued})"
|
|
397
|
+
|
|
398
|
+
def trailer_ship_core(self, series_alt: str, phase_alt: str) -> str | None:
|
|
399
|
+
"""The trailer-form direct-ship fragment, or None when the convention
|
|
400
|
+
doesn't opt in (docs/289).
|
|
401
|
+
|
|
402
|
+
Matches a parenthesized ``(<PLAN> <PHASE>)`` stamp at the END of a
|
|
403
|
+
subject — the Conventional-Commits shape (``feat(pypi): …
|
|
404
|
+
(docs/286 Phase 3)``), which the start-anchored `direct_ship_core` can
|
|
405
|
+
never see. Three spellings: ``(<PLAN> <PHASE>)``, ``(<PLAN>: <PHASE>)``,
|
|
406
|
+
``(refs <PLAN> <PHASE>)``. Unlike the other fragments this one carries
|
|
407
|
+
its OWN anchor (``\\)\\s*$``) — the caller searches rather than appending
|
|
408
|
+
a boundary; the close paren immediately after the phase token IS the
|
|
409
|
+
right boundary (a ``Phase 3`` query cannot match ``(… Phase 30)`` or
|
|
410
|
+
``(… Phase 3 audit)`` — a progress-marked trailer is not a ship,
|
|
411
|
+
fail-closed), and the end anchor is what keeps a subject that merely
|
|
412
|
+
NAMES an id in prose (or in a mid-subject paren) from matching.
|
|
413
|
+
|
|
414
|
+
``series_alt`` is an already-escaped alternation of plan-id spellings
|
|
415
|
+
(the caller bridges ``docs/286_<slug>`` ↔ ``docs/286`` — see
|
|
416
|
+
`phase_shipped._series_variants`); ``phase_alt`` is the same
|
|
417
|
+
`_phase_variants` alternation every other rung uses. The convention's
|
|
418
|
+
dir prefix is admitted OPTIONALLY before the series — even when
|
|
419
|
+
``subject_dirs`` makes it required at subject start — because a trailer
|
|
420
|
+
names the plan as written in the plan registry (``docs/286``), not as a
|
|
421
|
+
ship-subject prefix; the parens + end anchor carry the tightness the
|
|
422
|
+
required prefix used to.
|
|
423
|
+
|
|
424
|
+
Bookkeeping/summary exclusion is the CALLER's job (the same post-match
|
|
425
|
+
guards as the direct pass — `phase_shipped` Pass 1a′), exactly as it is
|
|
426
|
+
for `direct_ship_core`.
|
|
427
|
+
"""
|
|
428
|
+
if not self.trailer_stamp:
|
|
429
|
+
return None
|
|
430
|
+
prefix = self.direct_prefix_re()
|
|
431
|
+
return (
|
|
432
|
+
rf"\(\s*(?:refs\s+)?(?:{prefix})?(?:{series_alt}):?\s+(?:{phase_alt})\s*\)\s*$"
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
def summary_subject_re(self) -> str:
|
|
436
|
+
"""The regex fragment matching a summary-bundle subject.
|
|
437
|
+
|
|
438
|
+
``vX.Y.Z:`` (the universal release anchor) OR any declared
|
|
439
|
+
``summary_bundle_prefixes`` (job: ``docs/HYG:``). Used in place of the
|
|
440
|
+
bare release anchor in the release-prefix scan and the body-scan's
|
|
441
|
+
in-summary gate. Mirrors the old ``_SUMMARY_SUBJECT_RE`` construction.
|
|
442
|
+
"""
|
|
443
|
+
parts = [_RELEASE_VERSION_ANCHOR]
|
|
444
|
+
parts += [re.escape(p) for p in self.summary_bundle_prefixes]
|
|
445
|
+
return r"(?:" + "|".join(parts) + r")"
|
|
446
|
+
|
|
447
|
+
def bookkeeping_subject_re(self) -> "re.Pattern[str]":
|
|
448
|
+
"""The compiled, case-insensitive, start-anchored bookkeeping matcher.
|
|
449
|
+
|
|
450
|
+
A subject matching this NAMES phase ids as narrative (soft-claims,
|
|
451
|
+
archive rollups, bulk snapshots) and must never count as a ship. Always
|
|
452
|
+
includes TWO universal, host-agnostic guards — the ``... snapshot:`` bulk
|
|
453
|
+
guard and the ``… archive <RUN-ID>`` run-rollup guard — plus any declared
|
|
454
|
+
``bookkeeping_prefixes``. Mirrors the old ``_BOOKKEEPING_SUBJECT_RE``,
|
|
455
|
+
extended with the run-archive guard (the L2 zero-config fix).
|
|
456
|
+
|
|
457
|
+
A convention with NO declared bookkeeping prefixes still excludes bulk
|
|
458
|
+
snapshots AND run-archive rollups (the two universal guards), so the
|
|
459
|
+
generic convention is safe out of the box against the two commonest
|
|
460
|
+
names-but-ships-nothing shapes; it just doesn't know about a host's named
|
|
461
|
+
rollup prefixes (job's `docs/_plans:`) — a foreign repo declares its own.
|
|
462
|
+
"""
|
|
463
|
+
parts = [re.escape(p) for p in self.bookkeeping_prefixes]
|
|
464
|
+
parts.append(_SNAPSHOT_BOOKKEEPING_FRAGMENT)
|
|
465
|
+
parts.append(_RUN_ARCHIVE_BOOKKEEPING_FRAGMENT)
|
|
466
|
+
return re.compile(r"^(?:" + "|".join(parts) + r")", re.IGNORECASE)
|
|
467
|
+
|
|
468
|
+
# -- the file-path backstop rung (artefact match, see phase_shipped) ----
|
|
469
|
+
def repo_path_re(self) -> "re.Pattern[str]":
|
|
470
|
+
"""The compiled regex that harvests repo-file paths from a plan-doc section.
|
|
471
|
+
|
|
472
|
+
The file-path backstop (`phase_shipped._extract_phase_files`) scans a
|
|
473
|
+
phase's plan-doc section for the file paths it names — both markdown link
|
|
474
|
+
targets (``[`engine/run.py`](../engine/run.py)``) and inline backtick paths
|
|
475
|
+
(`` `models/metrics.py` ``). Both reduce to a token rooted at a top-level
|
|
476
|
+
dir and ending in a file extension; the leading ``../`` link-relative
|
|
477
|
+
prefix is stripped. This builds that matcher from ``code_dirs``:
|
|
478
|
+
|
|
479
|
+
* ``code_dirs`` declared (the reference app) → a TIGHT allowlist:
|
|
480
|
+
``(?:agents|job_search|…)/<path>.<ext>``. Only those dirs' paths are
|
|
481
|
+
harvested — byte-identical to the pre-genericization ``_REPO_PATH_RE``
|
|
482
|
+
when ``code_dirs`` is the reference app's dir set.
|
|
483
|
+
* ``code_dirs`` empty (the generic default) → ANY plausible top-level
|
|
484
|
+
dir: a single path segment (``\\w[\\w.\\-]*``) then ``/<path>.<ext>``.
|
|
485
|
+
This is what makes the artefact rung work on a foreign repo whose
|
|
486
|
+
deliverables live under ``engine/``/``models/``/``commands/`` — dirs
|
|
487
|
+
the reference allowlist never named. SOUND because the dir set was
|
|
488
|
+
only ever a recognition narrowing: the false-positive gates (2-file
|
|
489
|
+
overlap, distinctive-file, bookkeeping exclusion, cross-series) all
|
|
490
|
+
live downstream and are unchanged.
|
|
491
|
+
|
|
492
|
+
The capture group is group(1): the repo-relative path with the ``../``
|
|
493
|
+
link prefix stripped. The extension is required (``.<ext>``) so a bare
|
|
494
|
+
directory mention (``engine/``) is not harvested as a file.
|
|
495
|
+
"""
|
|
496
|
+
if self.code_dirs:
|
|
497
|
+
# Tight allowlist: a closed set of real dir names. This branch is kept
|
|
498
|
+
# BYTE-IDENTICAL to the pre-genericization `_REPO_PATH_RE` (no left
|
|
499
|
+
# boundary) so the reference app's artefact rung is unchanged — its
|
|
500
|
+
# alternation is already a closed set, so a URL host can't sneak in.
|
|
501
|
+
alt = "|".join(re.escape(d) for d in self.code_dirs)
|
|
502
|
+
return re.compile(
|
|
503
|
+
rf"(?:\.\.?/)*((?:{alt})/[\w./-]+\.[A-Za-z0-9]+)"
|
|
504
|
+
)
|
|
505
|
+
# Generic (no declared dirs): a single top-level path component, but with
|
|
506
|
+
# NO dot in the FIRST segment — a real top-level dir (`src/`, `docs/`,
|
|
507
|
+
# `my_pkg/`) never carries a dot, whereas a URL host (`github.com`) and a
|
|
508
|
+
# version root (`v1.2.3`) always do. Excluding the dot stops the harvester
|
|
509
|
+
# lifting a URL / release-version string out of plan prose and treating it
|
|
510
|
+
# as a load-bearing file — the adversarial-review false-positive (and the
|
|
511
|
+
# `len(files)`-inflation false-negative). The `(?<![\w./-])` LEFT boundary
|
|
512
|
+
# is REQUIRED alongside the no-dot segment: without it the matcher just
|
|
513
|
+
# slides its start rightward and still extracts `com/user/repo.git` from a
|
|
514
|
+
# URL (the dot host is skipped, the next segment matches). NOT a greedy
|
|
515
|
+
# `.*`: one named segment, then the rest of the path + extension.
|
|
516
|
+
return re.compile(
|
|
517
|
+
r"(?<![\w./-])(?:\.\.?/)*(\w[\w\-]*/[\w./-]+\.[A-Za-z0-9]+)"
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
def infra_basename_set(self) -> frozenset[str]:
|
|
521
|
+
"""The full shared-infra *code* basenames: universal ∪ declared extras.
|
|
522
|
+
|
|
523
|
+
A file whose basename is in this set is excluded from the file-path rung's
|
|
524
|
+
*distinctive*-overlap count (`is_shared_infra`). The universal set covers
|
|
525
|
+
any repo; ``infra_basenames`` adds a host's own hub (the reference app's
|
|
526
|
+
``fanout_state.py``). Additive — a host extends, never replaces, the
|
|
527
|
+
universal set, because a universal hub is shared-infra by nature.
|
|
528
|
+
"""
|
|
529
|
+
return _UNIVERSAL_INFRA_BASENAMES | frozenset(self.infra_basenames)
|
|
530
|
+
|
|
531
|
+
def infra_doc_basename_set(self) -> frozenset[str]:
|
|
532
|
+
"""The full shared-infra *doc* basenames: declared extras only.
|
|
533
|
+
|
|
534
|
+
The universal diagram rule (any ``docs/…*.mmd``/``*.png``) is applied
|
|
535
|
+
separately in `is_shared_infra` by suffix; this set is the host's NAMED
|
|
536
|
+
reference hubs (the reference app's ``architecture.mmd``/
|
|
537
|
+
``00_subsystems-reference.md``). Generic repos declare none.
|
|
538
|
+
"""
|
|
539
|
+
return frozenset(self.infra_doc_basenames)
|
|
540
|
+
|
|
541
|
+
def is_shared_infra(self, path: str) -> bool:
|
|
542
|
+
"""True if ``path`` is a hub file excluded from the file-path overlap count.
|
|
543
|
+
|
|
544
|
+
Three classes are excluded — all too widely-touched for a coincidental edit
|
|
545
|
+
to be ship evidence (the false-POSITIVE guard, see
|
|
546
|
+
`phase_shipped._check_phase_by_filepath`):
|
|
547
|
+
|
|
548
|
+
* hub *code* files (universal ∪ declared ``infra_basenames``), by basename;
|
|
549
|
+
* a host's named *documentation* hubs (declared ``infra_doc_basenames``);
|
|
550
|
+
* ANY rendered diagram under ``docs/`` (``*.mmd``/``*.png``) — host-agnostic.
|
|
551
|
+
|
|
552
|
+
**Case is folded** (`str.casefold`) on every comparison — the same discipline
|
|
553
|
+
`_tree.norm_tree_prefix` and the sibling `progress_marker_set` use. On a
|
|
554
|
+
case-INsensitive FS (Windows, the primary platform) ``agents/Config.py`` IS
|
|
555
|
+
``agents/config.py``; without folding, a mis-cased hub file failed the
|
|
556
|
+
basename membership, was treated as a DISTINCTIVE phase deliverable, and could
|
|
557
|
+
FALSE-SHIP a phase (the file-path rung's all-infra skip never fired, and the
|
|
558
|
+
single-file gate passed a sole mis-cased hub). Folding unconditionally for the
|
|
559
|
+
same cross-platform-determinism reason `_tree` gives.
|
|
560
|
+
|
|
561
|
+
Pure (no I/O) so the file-path rung stays replay-testable, matching the
|
|
562
|
+
``classify(Evidence, Policy)`` discipline of the rest of the kernel.
|
|
563
|
+
"""
|
|
564
|
+
p = path.casefold()
|
|
565
|
+
base = p.rsplit("/", 1)[-1]
|
|
566
|
+
# Sets are folded to match (built lowercase by convention, but fold defensively
|
|
567
|
+
# so a host that declares a capitalized extra still matches a real edit).
|
|
568
|
+
code = {b.casefold() for b in self.infra_basename_set()}
|
|
569
|
+
docs = {b.casefold() for b in self.infra_doc_basename_set()}
|
|
570
|
+
if base in code or base in docs:
|
|
571
|
+
return True
|
|
572
|
+
# Any diagram under docs/ is a regenerated hub, not a distinctive deliverable.
|
|
573
|
+
if p.startswith("docs/") and base.endswith(_UNIVERSAL_DIAGRAM_SUFFIXES):
|
|
574
|
+
return True
|
|
575
|
+
return False
|
|
576
|
+
|
|
577
|
+
# -- progress-marker demotion + bundle-slug fallback (see phase_shipped) --
|
|
578
|
+
def progress_marker_set(self) -> frozenset[str]:
|
|
579
|
+
"""The lowercased progress-marker words for this convention.
|
|
580
|
+
|
|
581
|
+
A subject of shape ``<dir>/<SERIES>: <PHASE> <marker>`` (bare space, no
|
|
582
|
+
separator) is demoted from a ship to *progress on* the phase when
|
|
583
|
+
``<marker>`` is in this set. Empty (generic) → no demotion ever, so a
|
|
584
|
+
foreign repo's real ships are never silently lost (the L1 fix). Lowercased
|
|
585
|
+
here so the caller's comparison is case-insensitive without re-lowering.
|
|
586
|
+
"""
|
|
587
|
+
return frozenset(w.lower() for w in self.progress_markers)
|
|
588
|
+
|
|
589
|
+
def bundle_slugs(self) -> frozenset[str]:
|
|
590
|
+
"""The UPPERCASED series slugs derived from ``summary_bundle_prefixes``.
|
|
591
|
+
|
|
592
|
+
A standalone-summary prefix like ``docs/HYG:`` carries a series slug
|
|
593
|
+
(``HYG``) whose plan ids are snake-case (``dropbox_zero_apply``) but whose
|
|
594
|
+
commit subjects use prose (``docs/HYG: Dropbox zero-apply …``). The grep
|
|
595
|
+
rung runs a prose-slug fallback for exactly those series. This derives the
|
|
596
|
+
eligible slugs from the DECLARED bundle prefixes rather than a hardcoded
|
|
597
|
+
``"HYG"`` literal (the L4 fix): the trailing ``:`` and any leading
|
|
598
|
+
``<dir>/`` are stripped, the remainder uppercased. A generic convention
|
|
599
|
+
declares no bundle prefixes → no slug is eligible → the fallback is inert.
|
|
600
|
+
"""
|
|
601
|
+
out: set[str] = set()
|
|
602
|
+
for p in self.summary_bundle_prefixes:
|
|
603
|
+
slug = p.strip().rstrip(":")
|
|
604
|
+
if "/" in slug:
|
|
605
|
+
slug = slug.rsplit("/", 1)[-1]
|
|
606
|
+
if slug:
|
|
607
|
+
out.add(slug.upper())
|
|
608
|
+
return frozenset(out)
|
|
609
|
+
|
|
610
|
+
def recognizes_direct_ship(self, subject: str) -> bool:
|
|
611
|
+
"""True iff this convention's direct-ship anchor matches `subject` for
|
|
612
|
+
SOME plausible `<SERIES><PHASE>` — a convention-aware "does this look like
|
|
613
|
+
a ship I would count?" probe used by the completeness rail (SCV 3c).
|
|
614
|
+
|
|
615
|
+
Builds the direct-ship core with permissive series/phase placeholders and
|
|
616
|
+
anchors it at subject start (no sha prefix — these are bare subjects). A
|
|
617
|
+
bookkeeping subject, and a `vX.Y.Z:` release-bundle subject, are never a
|
|
618
|
+
direct ship, so both are excluded first. This is a HEURISTIC recognizer
|
|
619
|
+
(it does not know the repo's real series ids), used only to flag a
|
|
620
|
+
declared-but-mismatched grammar — never on the hot verify path, which
|
|
621
|
+
always knows the concrete series/phase it is checking.
|
|
622
|
+
|
|
623
|
+
The series placeholder admits **multi-word, hyphenated** slugs (`[A-Za-z]
|
|
624
|
+
[\\w .-]*[A-Za-z0-9]`), because real hosts name plans that way —
|
|
625
|
+
`hardware-thing`, `blktrace auto-install`, `SGLang charts`. The original
|
|
626
|
+
`[A-Za-z][A-Za-z0-9]*` matched none of these, so the rail could not even
|
|
627
|
+
SEE a repo's dominant `<slug> Phase <N>:` ships, found "nothing
|
|
628
|
+
ship-shaped to judge against", and stayed silent on a real mismatch (the
|
|
629
|
+
F8 false all-clear). The phase placeholder admits the `Phase N` / `P N`
|
|
630
|
+
keyword form the same hosts use, plus compound tokens (`P1+P2`, `3b.2`).
|
|
631
|
+
"""
|
|
632
|
+
s = (subject or "").strip()
|
|
633
|
+
if not s or self.bookkeeping_subject_re().match(s):
|
|
634
|
+
return False
|
|
635
|
+
# A `vX.Y[.Z]:` release-cut bundles many phases into one free-form line;
|
|
636
|
+
# it is NOT a direct phase-ship attribution (the verify path treats it as
|
|
637
|
+
# the weak release-prefix rung, footprint-guarded). Counting it as
|
|
638
|
+
# "ship-shaped" let the rail cite a release commit as the repo's ship and
|
|
639
|
+
# masked the real convention — exclude it here so the rail judges against
|
|
640
|
+
# genuine direct ships only. Two- AND three-component versions occur in
|
|
641
|
+
# the wild (`v25.4:` and `v0.378.0:`), so match a looser anchor than the
|
|
642
|
+
# strict 3-part `_RELEASE_VERSION_ANCHOR`.
|
|
643
|
+
if re.match(r"^v\d+(?:\.\d+)+:", s):
|
|
644
|
+
return False
|
|
645
|
+
# Permissive placeholders. Series: an UPPER/lower-led run that may carry
|
|
646
|
+
# internal spaces, hyphens, and dots (a multi-word plan slug), ending on
|
|
647
|
+
# an alnum so it doesn't swallow the trailing separator. Phase: the
|
|
648
|
+
# `Phase N` / `P N` keyword form OR a bare id — but in EITHER case the
|
|
649
|
+
# phase token must CONTAIN A DIGIT (a ship references a *numbered* phase:
|
|
650
|
+
# `Phase 1`, `AUTH2`, `P1+P2`, `3b.2`). Requiring the digit is what
|
|
651
|
+
# separates a real ship-shape from an ordinary `chore: refactor` /
|
|
652
|
+
# `fix: typo` commit, which share the bare `<word>: <word>` shape but name
|
|
653
|
+
# no phase. Without it the heuristic flags every conventional-commit
|
|
654
|
+
# subject as ship-shaped (the rail's original over-match).
|
|
655
|
+
# Each placeholder is a SELF-CONTAINED group: `direct_ship_core`
|
|
656
|
+
# interpolates them into `{series}:?\s+{phase}` without adding its own
|
|
657
|
+
# parentheses, so a bare top-level `|` here would re-associate the whole
|
|
658
|
+
# alternation (making `{series-alt-1}` match alone, with no phase) — the
|
|
659
|
+
# `chore`/`Merge branch` false-positive. Wrap both in `(?:…)`.
|
|
660
|
+
series_ph = r"(?:[A-Za-z][\w .\-]*[A-Za-z0-9]|[A-Za-z])"
|
|
661
|
+
phase_ph = (
|
|
662
|
+
r"(?:(?:Phase|P)\s*\d+[A-Za-z0-9.\-+]*" # `Phase 1`, `P3.4`, `P1+P2`
|
|
663
|
+
r"|[A-Za-z]*\d[A-Za-z0-9.\-+]*)" # `AUTH2`, `3b.2`, `RS4` — has a digit
|
|
664
|
+
)
|
|
665
|
+
core = self.direct_ship_core(series_ph, phase_ph)
|
|
666
|
+
if re.match(rf"^{core}", s, re.IGNORECASE):
|
|
667
|
+
return True
|
|
668
|
+
# The trailer probe (docs/289): a `(<PLAN> <PHASE>)` tail. The series
|
|
669
|
+
# placeholder here is WIDER than the start-anchored one — a trailer
|
|
670
|
+
# names the plan as registered, and plan ids are commonly digit-led
|
|
671
|
+
# (`docs/286` → `286` after the dir prefix), a shape the start-anchored
|
|
672
|
+
# placeholder deliberately rejects (it would over-match prose there).
|
|
673
|
+
# Inside the parens + end anchor the digit-led form is safe. The phase
|
|
674
|
+
# placeholder is unchanged: a digit is still what separates a phase
|
|
675
|
+
# stamp from prose (`(docs/286 follow-up)` is a reference, not a ship).
|
|
676
|
+
trailer_series_ph = r"(?:[A-Za-z0-9][\w .\-]*[A-Za-z0-9]|[A-Za-z0-9])"
|
|
677
|
+
trailer = self.trailer_ship_core(trailer_series_ph, phase_ph)
|
|
678
|
+
if trailer and re.search(trailer, s, re.IGNORECASE):
|
|
679
|
+
return True
|
|
680
|
+
return False
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def ship_shaped_under_generic(subject: str) -> bool:
|
|
684
|
+
"""True iff `subject` looks like a ship under the most permissive (generic)
|
|
685
|
+
grammar — used by the completeness rail to decide "this commit is a SHIP that
|
|
686
|
+
SOME convention would recognize," independent of the active one.
|
|
687
|
+
|
|
688
|
+
Deliberately broad: an optional path prefix, then `<SERIES><sep><PHASE>:` in
|
|
689
|
+
either the spaced or glued form — OR a `(<PLAN> <PHASE>)` trailer at the end
|
|
690
|
+
of the subject (docs/289: the probe runs with `trailer_stamp` ON, because
|
|
691
|
+
this predicate's contract is "would SOME convention recognize it?", and the
|
|
692
|
+
trailer convention exists to be declared — a Conventional-Commits repo whose
|
|
693
|
+
stamps live in trailers should hear "reconcile [stamp]", not "none of your
|
|
694
|
+
commits name a unit of work"). Excludes bulk snapshots (the universal
|
|
695
|
+
bookkeeping guard) so a `working-dir snapshot:` is never counted. This is the
|
|
696
|
+
"is this even a ship subject?" predicate; the active convention's
|
|
697
|
+
`recognizes_direct_ship` is the "would MY grammar catch it?" predicate. A
|
|
698
|
+
subject that is ship-shaped-generic but NOT recognized-by-active is the
|
|
699
|
+
declared-grammar-misses-its-own-commits finding (SCV 3c).
|
|
700
|
+
"""
|
|
701
|
+
return _GENERIC_TRAILER_PROBE.recognizes_direct_ship(subject)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def convention_coverage_finding(
|
|
705
|
+
convention: StampConvention, subjects: list[str], *, declared: bool
|
|
706
|
+
) -> str | None:
|
|
707
|
+
"""The SCV 3c completeness finding, or None when the grammar looks fine.
|
|
708
|
+
|
|
709
|
+
The rail (HACKING.md's `--check` invariant, stamp analogue of "a reason
|
|
710
|
+
emitted but not in the registry"): if a workspace DECLARED a `[stamp]` table
|
|
711
|
+
but its active convention recognizes NONE of the repo's own recent
|
|
712
|
+
ship-shaped commits, the declared grammar almost certainly mismatches how the
|
|
713
|
+
repo actually stamps ships — so `verify` will silently resolve `via none` for
|
|
714
|
+
real ships. Surface that.
|
|
715
|
+
|
|
716
|
+
Returns a one-line finding string when:
|
|
717
|
+
* ``declared`` is True (an inherited default on a foreign repo is a
|
|
718
|
+
different, expected situation — only a *declared* grammar is the host's
|
|
719
|
+
own claim to check), AND
|
|
720
|
+
* at least one `subject` is ship-shaped under the generic grammar, AND
|
|
721
|
+
* the active ``convention`` recognizes NONE of those ship-shaped subjects.
|
|
722
|
+
|
|
723
|
+
Returns None otherwise (no declaration, no ship-shaped commits to judge
|
|
724
|
+
against, or the convention recognizes ≥1 — the healthy case). Pure: takes the
|
|
725
|
+
subjects list so it is unit-testable without git.
|
|
726
|
+
"""
|
|
727
|
+
if not declared:
|
|
728
|
+
return None
|
|
729
|
+
ship_shaped = [s for s in subjects if ship_shaped_under_generic(s)]
|
|
730
|
+
if not ship_shaped:
|
|
731
|
+
return None # nothing ship-shaped to judge the grammar against
|
|
732
|
+
if any(convention.recognizes_direct_ship(s) for s in ship_shaped):
|
|
733
|
+
return None # the declared grammar catches at least one real ship — fine
|
|
734
|
+
sample = ship_shaped[0]
|
|
735
|
+
dirs = ", ".join(convention.subject_dirs) or "(none — generic)"
|
|
736
|
+
return (
|
|
737
|
+
f"declared [stamp] (subject_dirs={dirs}) recognizes none of this repo's "
|
|
738
|
+
f"{len(ship_shaped)} recent ship-shaped commit(s) — e.g. {sample!r}. "
|
|
739
|
+
f"verify will resolve `via none` for real ships; reconcile [stamp] to how "
|
|
740
|
+
f"this repo stamps (see `dos doctor` / HACKING.md)."
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
# ---------------------------------------------------------------------------
|
|
745
|
+
# The reference userland app's convention — the current hardcoded grammar, lifted
|
|
746
|
+
# VERBATIM from `phase_shipped.py`'s module constants so the existing
|
|
747
|
+
# kernel suite is byte-for-byte unchanged. This is a plain default the kernel
|
|
748
|
+
# falls back to (the `stamp` field on SubstrateConfig defaults to it), NOT an
|
|
749
|
+
# import from `drivers.job` — same pattern as the `main`/`global` lane default.
|
|
750
|
+
#
|
|
751
|
+
# Provenance of each tuple (the constants this replaces, all in phase_shipped):
|
|
752
|
+
# subject_dirs <- _DIRECT_PREFIX = (docs|go|agents|job_search|scripts)
|
|
753
|
+
# summary_bundle_prefixes <- _SUMMARY_BUNDLE_PREFIXES = ("docs/HYG:",)
|
|
754
|
+
# bookkeeping_prefixes <- _BOOKKEEPING_SUBJECT_PREFIXES
|
|
755
|
+
# ---------------------------------------------------------------------------
|
|
756
|
+
# code_dirs <- _REPO_PATH_RE allowlist
|
|
757
|
+
# (agents|job_search|go|scripts|templates|config|docs|tests)
|
|
758
|
+
# infra_basenames <- the reference app's OWN hub beyond the universal set
|
|
759
|
+
# (_SHARED_INFRA_BASENAMES minus the universal ones)
|
|
760
|
+
# infra_doc_basenames <- _SHARED_INFRA_DOC_BASENAMES (the named diagram/ref hubs)
|
|
761
|
+
JOB_STAMP_CONVENTION = StampConvention(
|
|
762
|
+
subject_dirs=("docs", "go", "agents", "job_search", "scripts"),
|
|
763
|
+
summary_bundle_prefixes=("docs/HYG:",),
|
|
764
|
+
bookkeeping_prefixes=(
|
|
765
|
+
"docs/_plans:",
|
|
766
|
+
"docs/fanout:",
|
|
767
|
+
"docs/dispatch:",
|
|
768
|
+
"docs/dispatch-loop:",
|
|
769
|
+
"docs/_soaks:",
|
|
770
|
+
),
|
|
771
|
+
style="grep",
|
|
772
|
+
# The file-path backstop allowlist (`_REPO_PATH_RE`), lifted verbatim so the
|
|
773
|
+
# reference app's artefact rung is byte-for-byte unchanged.
|
|
774
|
+
code_dirs=(
|
|
775
|
+
"agents", "job_search", "go", "scripts",
|
|
776
|
+
"templates", "config", "docs", "tests",
|
|
777
|
+
),
|
|
778
|
+
# The reference app's OWN hub file beyond the universal set. `config.py` etc.
|
|
779
|
+
# are now universal (`_UNIVERSAL_INFRA_BASENAMES`); `fanout_state.py` is the
|
|
780
|
+
# one host-specific addition. The resolved set (`infra_basename_set()`) is the
|
|
781
|
+
# original `_SHARED_INFRA_BASENAMES` exactly.
|
|
782
|
+
infra_basenames=("fanout_state.py",),
|
|
783
|
+
# The reference app's named bulk-regenerated doc hubs (`_SHARED_INFRA_DOC_BASENAMES`).
|
|
784
|
+
infra_doc_basenames=(
|
|
785
|
+
"00_subsystems-reference.md", "architecture.mmd", "data-flow.mmd",
|
|
786
|
+
"pipeline-flow.mmd", "state-machine.mmd", "scoring-model.mmd",
|
|
787
|
+
"model-tiering.mmd",
|
|
788
|
+
),
|
|
789
|
+
# The reference app's soak/observation progress vocabulary, lifted verbatim
|
|
790
|
+
# from `phase_shipped._PROGRESS_MARKER_WORDS` so the demotion is byte-for-byte
|
|
791
|
+
# unchanged for the reference app. A `<PHASE> <marker>` subject is incremental
|
|
792
|
+
# progress on a multi-step phase, not its ship.
|
|
793
|
+
progress_markers=(
|
|
794
|
+
"week-1", "week-2", "week-3", "week-4",
|
|
795
|
+
"day-1", "day-2", "day-3", "day-4", "day-5", "day-6", "day-7",
|
|
796
|
+
"audit", "re-audit", "baseline", "re-baseline", "rebaseline",
|
|
797
|
+
"read", "reading", "snapshot", "obs", "observation", "measurement",
|
|
798
|
+
"progress", "soak", "wip", "partial",
|
|
799
|
+
"§why", "todo",
|
|
800
|
+
),
|
|
801
|
+
# The reference app uses hyphen-suffixed sub-phase ids (`RS4-port`) and wants
|
|
802
|
+
# the parent-phase fallback; a generic repo does not (it false-resolves a
|
|
803
|
+
# fabricated `P2-CLI` against a real `P2`). Declared on, off-by-default.
|
|
804
|
+
sub_phase_parent_fallback=True,
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
# ---------------------------------------------------------------------------
|
|
809
|
+
# The generic convention — what an EXTERNAL repo's ship subjects look like: a
|
|
810
|
+
# bare `<SERIES>: <PHASE>` / `<SERIES><PHASE>` with no dir prefix and no
|
|
811
|
+
# host-specific bundle/bookkeeping prefixes. Only the universal release anchor
|
|
812
|
+
# (`vX.Y.Z:`) and the universal bulk-snapshot guard apply. This is the value a
|
|
813
|
+
# foreign workspace gets by default once it has no `[stamp]` table of its own
|
|
814
|
+
# beyond `style="grep"` — and the value `test_verify_no_plan` exercises to prove
|
|
815
|
+
# `verify` is domain-free.
|
|
816
|
+
# ---------------------------------------------------------------------------
|
|
817
|
+
GENERIC_STAMP_CONVENTION = StampConvention(
|
|
818
|
+
subject_dirs=(),
|
|
819
|
+
summary_bundle_prefixes=(),
|
|
820
|
+
bookkeeping_prefixes=(),
|
|
821
|
+
style="grep",
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
# The breadth-probe convention behind `ship_shaped_under_generic` (docs/289):
|
|
825
|
+
# generic, with the trailer rung ON. NOT a default any workspace inherits —
|
|
826
|
+
# `verify` still recognizes trailers only where `[stamp] trailer_stamp = true`
|
|
827
|
+
# is declared. This probe only widens what the completeness rail / verifiability
|
|
828
|
+
# headline can SEE as ship-shaped, so a trailer-stamping repo is told to declare
|
|
829
|
+
# the flag instead of being told it has nothing checkable.
|
|
830
|
+
_GENERIC_TRAILER_PROBE = StampConvention(style="grep", trailer_stamp=True)
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
# ---------------------------------------------------------------------------
|
|
834
|
+
# The declarative on-ramp: read a `[stamp]` table out of a workspace's dos.toml.
|
|
835
|
+
#
|
|
836
|
+
# `dos init` already scaffolds `[stamp] style="grep"`; these turn that table into
|
|
837
|
+
# a `StampConvention`. Mirrors `reasons.specs_from_table` / `reasons.load_from_toml`
|
|
838
|
+
# exactly: a present table OVERRIDES the base (a host declaring `subject_dirs`
|
|
839
|
+
# means "these are MY dirs", not "these plus job's"); absent/empty degrades to the
|
|
840
|
+
# base; present-but-malformed raises (surfaced, not swallowed).
|
|
841
|
+
#
|
|
842
|
+
# TOML shape (every key optional; the omitted ones fall back to `base`'s value):
|
|
843
|
+
#
|
|
844
|
+
# [stamp]
|
|
845
|
+
# style = "grep"
|
|
846
|
+
# subject_dirs = ["src", "lib", "app"] # this repo's top-level dirs
|
|
847
|
+
# summary_bundle_prefixes = ["docs/HYG:"] # extra standalone-summary prefixes
|
|
848
|
+
# bookkeeping_prefixes = ["docs/_plans:"]# subjects that NAME but don't ship
|
|
849
|
+
# trailer_stamp = true # also ship via a `(<PLAN> <PHASE>)`
|
|
850
|
+
# # end-of-subject trailer (docs/289)
|
|
851
|
+
# ---------------------------------------------------------------------------
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
def _str_tuple(value: object, key: str) -> tuple[str, ...]:
|
|
855
|
+
"""Coerce a TOML value to a tuple of strings, or raise naming the bad key.
|
|
856
|
+
|
|
857
|
+
Accepts a single string (wrapped) or a list of strings. Anything else — a
|
|
858
|
+
number, a nested table, a list with a non-string element — is a host mistake
|
|
859
|
+
worth surfacing loudly at load (the same posture `reasons.specs_from_table`
|
|
860
|
+
takes on a bad category).
|
|
861
|
+
"""
|
|
862
|
+
if isinstance(value, str):
|
|
863
|
+
return (value,)
|
|
864
|
+
if isinstance(value, (list, tuple)):
|
|
865
|
+
out: list[str] = []
|
|
866
|
+
for item in value:
|
|
867
|
+
if not isinstance(item, str):
|
|
868
|
+
raise ValueError(
|
|
869
|
+
f"[stamp].{key} must be a list of strings; got a "
|
|
870
|
+
f"{type(item).__name__} element ({item!r})"
|
|
871
|
+
)
|
|
872
|
+
out.append(item)
|
|
873
|
+
return tuple(out)
|
|
874
|
+
raise ValueError(
|
|
875
|
+
f"[stamp].{key} must be a string or list of strings, "
|
|
876
|
+
f"got {type(value).__name__}"
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
def convention_from_table(
|
|
881
|
+
table: dict, *, base: StampConvention = JOB_STAMP_CONVENTION
|
|
882
|
+
) -> StampConvention:
|
|
883
|
+
"""Build a `StampConvention` from a parsed `[stamp]` TOML table.
|
|
884
|
+
|
|
885
|
+
Pure (no I/O). Each field the table names overrides ``base``; omitted fields
|
|
886
|
+
inherit ``base``'s value. An unknown key raises (a typo'd field is a host
|
|
887
|
+
mistake worth surfacing, mirroring `PathLayout.with_overrides`' posture in
|
|
888
|
+
the sibling WCR plan). A malformed value (non-string-list) raises via
|
|
889
|
+
`_str_tuple`.
|
|
890
|
+
|
|
891
|
+
Note the override (not merge) semantics on the list fields: a host that
|
|
892
|
+
declares ``subject_dirs = ["src"]`` gets exactly ``["src"]``, NOT
|
|
893
|
+
``["src"] + job's``. Declaring your dirs means declaring your dirs.
|
|
894
|
+
"""
|
|
895
|
+
if not isinstance(table, dict):
|
|
896
|
+
raise ValueError(f"[stamp] must be a table, got {type(table).__name__}")
|
|
897
|
+
known = {
|
|
898
|
+
"style", "subject_dirs", "summary_bundle_prefixes", "bookkeeping_prefixes",
|
|
899
|
+
"code_dirs", "infra_basenames", "infra_doc_basenames",
|
|
900
|
+
"progress_markers", "sub_phase_parent_fallback", "trailer_stamp",
|
|
901
|
+
}
|
|
902
|
+
unknown = set(table) - known
|
|
903
|
+
if unknown:
|
|
904
|
+
raise ValueError(
|
|
905
|
+
f"[stamp] has unknown key(s) {sorted(unknown)}; "
|
|
906
|
+
f"known keys are {sorted(known)}"
|
|
907
|
+
)
|
|
908
|
+
style = base.style
|
|
909
|
+
if "style" in table:
|
|
910
|
+
if not isinstance(table["style"], str):
|
|
911
|
+
raise ValueError(
|
|
912
|
+
f"[stamp].style must be a string, got {type(table['style']).__name__}"
|
|
913
|
+
)
|
|
914
|
+
style = table["style"]
|
|
915
|
+
sub_phase = base.sub_phase_parent_fallback
|
|
916
|
+
if "sub_phase_parent_fallback" in table:
|
|
917
|
+
if not isinstance(table["sub_phase_parent_fallback"], bool):
|
|
918
|
+
raise ValueError(
|
|
919
|
+
"[stamp].sub_phase_parent_fallback must be a boolean, got "
|
|
920
|
+
f"{type(table['sub_phase_parent_fallback']).__name__}"
|
|
921
|
+
)
|
|
922
|
+
sub_phase = table["sub_phase_parent_fallback"]
|
|
923
|
+
trailer = base.trailer_stamp
|
|
924
|
+
if "trailer_stamp" in table:
|
|
925
|
+
if not isinstance(table["trailer_stamp"], bool):
|
|
926
|
+
raise ValueError(
|
|
927
|
+
"[stamp].trailer_stamp must be a boolean, got "
|
|
928
|
+
f"{type(table['trailer_stamp']).__name__}"
|
|
929
|
+
)
|
|
930
|
+
trailer = table["trailer_stamp"]
|
|
931
|
+
return StampConvention(
|
|
932
|
+
subject_dirs=(
|
|
933
|
+
_str_tuple(table["subject_dirs"], "subject_dirs")
|
|
934
|
+
if "subject_dirs" in table
|
|
935
|
+
else base.subject_dirs
|
|
936
|
+
),
|
|
937
|
+
summary_bundle_prefixes=(
|
|
938
|
+
_str_tuple(table["summary_bundle_prefixes"], "summary_bundle_prefixes")
|
|
939
|
+
if "summary_bundle_prefixes" in table
|
|
940
|
+
else base.summary_bundle_prefixes
|
|
941
|
+
),
|
|
942
|
+
bookkeeping_prefixes=(
|
|
943
|
+
_str_tuple(table["bookkeeping_prefixes"], "bookkeeping_prefixes")
|
|
944
|
+
if "bookkeeping_prefixes" in table
|
|
945
|
+
else base.bookkeeping_prefixes
|
|
946
|
+
),
|
|
947
|
+
style=style,
|
|
948
|
+
code_dirs=(
|
|
949
|
+
_str_tuple(table["code_dirs"], "code_dirs")
|
|
950
|
+
if "code_dirs" in table
|
|
951
|
+
else base.code_dirs
|
|
952
|
+
),
|
|
953
|
+
infra_basenames=(
|
|
954
|
+
_str_tuple(table["infra_basenames"], "infra_basenames")
|
|
955
|
+
if "infra_basenames" in table
|
|
956
|
+
else base.infra_basenames
|
|
957
|
+
),
|
|
958
|
+
infra_doc_basenames=(
|
|
959
|
+
_str_tuple(table["infra_doc_basenames"], "infra_doc_basenames")
|
|
960
|
+
if "infra_doc_basenames" in table
|
|
961
|
+
else base.infra_doc_basenames
|
|
962
|
+
),
|
|
963
|
+
progress_markers=(
|
|
964
|
+
_str_tuple(table["progress_markers"], "progress_markers")
|
|
965
|
+
if "progress_markers" in table
|
|
966
|
+
else base.progress_markers
|
|
967
|
+
),
|
|
968
|
+
sub_phase_parent_fallback=sub_phase,
|
|
969
|
+
trailer_stamp=trailer,
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
def load_from_toml(
|
|
974
|
+
path: Path | str, *, base: StampConvention = JOB_STAMP_CONVENTION
|
|
975
|
+
) -> StampConvention:
|
|
976
|
+
"""Build a `StampConvention` from a `dos.toml`'s `[stamp]` table.
|
|
977
|
+
|
|
978
|
+
Returns ``base`` unchanged when the file is absent, has no `[stamp]` table, or
|
|
979
|
+
`tomllib` is unavailable (Python < 3.11 with no `tomli`) — the declarative
|
|
980
|
+
path is purely additive, so a missing/empty config degrades to the supplied
|
|
981
|
+
base, never an error. A *present but malformed* `[stamp]` table raises
|
|
982
|
+
(`convention_from_table`), because a host that declared its grammar wrong
|
|
983
|
+
wants that surfaced, not swallowed. Mirrors `reasons.load_from_toml` exactly.
|
|
984
|
+
"""
|
|
985
|
+
p = Path(path)
|
|
986
|
+
if not p.exists():
|
|
987
|
+
return base
|
|
988
|
+
try:
|
|
989
|
+
import tomllib # py3.11+
|
|
990
|
+
except ModuleNotFoundError: # pragma: no cover - py<3.11 fallback
|
|
991
|
+
try:
|
|
992
|
+
import tomli as tomllib # type: ignore
|
|
993
|
+
except ModuleNotFoundError:
|
|
994
|
+
return base
|
|
995
|
+
# `utf-8-sig` transparently strips a UTF-8 BOM (PowerShell's default `utf8`
|
|
996
|
+
# encoding writes one; raw `tomllib.load(rb)` chokes on it and would silently
|
|
997
|
+
# drop a valid declared table — see the same fix in `config._load_toml_table`).
|
|
998
|
+
data = tomllib.loads(p.read_text(encoding="utf-8-sig"))
|
|
999
|
+
table = data.get("stamp")
|
|
1000
|
+
if not isinstance(table, dict) or not table:
|
|
1001
|
+
return base
|
|
1002
|
+
return convention_from_table(table, base=base)
|