dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/phase_shipped.py
ADDED
|
@@ -0,0 +1,1985 @@
|
|
|
1
|
+
"""Check whether a plan phase has already shipped, by scanning git log for
|
|
2
|
+
plan-doc commits.
|
|
3
|
+
|
|
4
|
+
Both /next-up (when listing next-unblocked phases) and
|
|
5
|
+
/fanout-true-headless-multi-agent (Step 1.5 at-launch re-validation) use this.
|
|
6
|
+
The whole point is to catch picks that shipped *before* the packet's own
|
|
7
|
+
"Last commit" SHA — pure generator-side staleness that the packet's own
|
|
8
|
+
freshness header cannot detect.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python scripts/check_phase_shipped.py <series> <phase> [<plan-doc-path>]
|
|
12
|
+
|
|
13
|
+
Examples:
|
|
14
|
+
python scripts/check_phase_shipped.py RS RS4 docs/resume-surfacing-plan.md
|
|
15
|
+
python scripts/check_phase_shipped.py TF TF11.1 docs/38_top-of-funnel-quality-plan.md
|
|
16
|
+
python scripts/check_phase_shipped.py UP UP2.1.4
|
|
17
|
+
python scripts/check_phase_shipped.py LP LP10c.1 docs/33_apply-driver-consolidation-plan.md
|
|
18
|
+
|
|
19
|
+
# JSON output for /next-up batch mode:
|
|
20
|
+
python scripts/check_phase_shipped.py --json AT AT5
|
|
21
|
+
|
|
22
|
+
Exit codes (single-pick / --batch / --json):
|
|
23
|
+
0 — phase has shipped (caller should drop this pick)
|
|
24
|
+
1 — phase has not shipped (caller can proceed)
|
|
25
|
+
2 — usage / error
|
|
26
|
+
3 — UNCERTAIN (text mode only): a WEAK git verdict was found but no
|
|
27
|
+
plan-doc path was given to cross-check it. Re-run with the plan doc.
|
|
28
|
+
|
|
29
|
+
Exit codes (--check-packet):
|
|
30
|
+
0 — found-shipped (any pick is already in git log → caller should re-run)
|
|
31
|
+
1 — not-shipped (all picks clean)
|
|
32
|
+
2 — no-coverage (file parsed but no recognisable `### N. ...` headers)
|
|
33
|
+
3 — parse-error (file IO / decode error)
|
|
34
|
+
|
|
35
|
+
--check-packet false-NEGATIVE backstop (AAR-FQ230, finding #230): when all
|
|
36
|
+
subject-token passes miss but a pick names a plan doc, the verdict is
|
|
37
|
+
re-derived from the FILE PATHS the doc's phase row names vs the file paths
|
|
38
|
+
recent commits touched (`git log -- <file>`). This catches DT2-shape ships
|
|
39
|
+
whose commit subject lacks the phase token — the subject-token lineage
|
|
40
|
+
(#62/#63/#77/#103/#136/#226) is subject-regex widening; this matches the
|
|
41
|
+
artefact instead. SHIPPED requires one commit touching >= 2 of the phase's
|
|
42
|
+
named load-bearing files (false-positive guard). See `_check_phase_by_filepath`.
|
|
43
|
+
|
|
44
|
+
Stdout (text mode):
|
|
45
|
+
SHIPPED <short-sha> <commit summary> # if shipped (exit 0)
|
|
46
|
+
NOT_SHIPPED # if not (exit 1)
|
|
47
|
+
UNCERTAIN <sha> <summary> — ... # WEAK verdict, no plan_doc (exit 3)
|
|
48
|
+
|
|
49
|
+
Pass the plan-doc path whenever you have it. A bare two-arg call cannot
|
|
50
|
+
cross-check a WEAK (release-prefix / body-mention) verdict against the plan
|
|
51
|
+
body's SHIPPED stamp, so a stale plan doc can fool it — the exact divergence
|
|
52
|
+
behind /next-up packet false-positives (a renderer pre-screen that passes
|
|
53
|
+
plan_doc said NOT_SHIPPED while a bare spot-check said SHIPPED). With the doc
|
|
54
|
+
path supplied the WEAK verdict is demoted when the stamp is absent.
|
|
55
|
+
|
|
56
|
+
Stdout (--json mode):
|
|
57
|
+
{"shipped": true, "sha": "8ea6ee8", "summary": "docs/RS: RS4 — ..."}
|
|
58
|
+
|
|
59
|
+
Detection:
|
|
60
|
+
Scans `git log --oneline -1500` for commits whose summary matches either
|
|
61
|
+
1. `(docs|go)/<SERIES>:?\\s+<PHASE><not-suffix>` (direct ship; Go-side
|
|
62
|
+
handlers ship under `go/<SERIES>:` rather than `docs/<SERIES>:`), or
|
|
63
|
+
2. `<summary-bundle>:.*<PHASE><not-suffix>` (a summary commit bundles
|
|
64
|
+
several phases into its free-form summary line).
|
|
65
|
+
A *summary-bundle* subject is either a `vX.Y.Z:` release commit OR the
|
|
66
|
+
`docs/HYG:` hygiene-audit prefix. These are the only non-direct subjects a
|
|
67
|
+
real phase ship lands under: a release commit bundles several ships into one
|
|
68
|
+
`vX.Y.Z:` summary, and HYG phases ship under `docs/HYG:`. Also scans the
|
|
69
|
+
*bodies* of the most recent summary-bundle commits for a phase that appears
|
|
70
|
+
only inside an extended body; the summary-line mention is caught by the
|
|
71
|
+
cheaper oneline pass first.
|
|
72
|
+
|
|
73
|
+
⚓ Ship-shaped, not mentioned (FQ-77 — recurring-5, BOTH directions). The
|
|
74
|
+
pass condition is "this commit SHIPPED <PHASE>", not "this commit's subject
|
|
75
|
+
NAMES <PHASE>". Two distinct shapes carry phase ids without shipping them:
|
|
76
|
+
|
|
77
|
+
- **Bookkeeping subjects.** `docs/_plans:` (next-up soft-claims / replan
|
|
78
|
+
sweeps), `docs/fanout:` / `docs/dispatch:` / `docs/dispatch-loop:`
|
|
79
|
+
(run-archive rollups that QUOTE other runs' git-log history), and bulk
|
|
80
|
+
`working-dir snapshot:` commits all name phase ids as narrative. Every
|
|
81
|
+
such commit in this repo's history is bookkeeping — none is a real ship
|
|
82
|
+
attribution (verified 2026-05-19). OS-FQ63 wrongly allowlisted the first
|
|
83
|
+
three as summary-bundle ship-evidence, which produced the FQ-77
|
|
84
|
+
false-POSITIVE: `8d4d2851 docs/fanout: archive ... (FB0 shipped,
|
|
85
|
+
FB2/FB3 halted)` counted as an FB2 ship, culling the only live pick →
|
|
86
|
+
empty packet → `verdict=DRAIN`. FQ-77 supersedes #62/#63's coverage-gap
|
|
87
|
+
framing (it is a *specificity* defect, not a missing prefix): these
|
|
88
|
+
prefixes are EXCLUDED from counting as a ship — see
|
|
89
|
+
`_is_bookkeeping_subject` / `_BOOKKEEPING_SUBJECT_RE`.
|
|
90
|
+
|
|
91
|
+
- **Bulk-snapshot file-path coincidence.** A `working-dir snapshot:`
|
|
92
|
+
commit sweeps hundreds of files at once, so it incidentally touches a
|
|
93
|
+
phase's load-bearing files even when the phase's deliverables do not
|
|
94
|
+
exist (live false-positive 2026-05-19: `1647b0c0` flagged OC4 shipped).
|
|
95
|
+
The file-path backstop (`_check_phase_by_filepath`) excludes such
|
|
96
|
+
commits from the overlap count for the same reason.
|
|
97
|
+
|
|
98
|
+
The original FQ-77 false-NEGATIVE (short phase tags following a `vX.Y.Z:`
|
|
99
|
+
prefix, e.g. `a6b1a785 v0.307.0: AAR15.3 ..., TF9 ..., ACR1 ...`) stays
|
|
100
|
+
closed: `vX.Y.Z:` is a ship-shaped subject and its bundled tags resolve via
|
|
101
|
+
the release-prefix scan.
|
|
102
|
+
|
|
103
|
+
Phase-id alternation: queries containing `'` also try the `prime` spelling
|
|
104
|
+
(and vice versa), since plan docs use `MG3'-1` (canonical) but commits use
|
|
105
|
+
`MG3prime-1` (Windows-quoting workaround).
|
|
106
|
+
|
|
107
|
+
Free-form HYG fallback: HYG phase IDs are snake-case slugs but commit
|
|
108
|
+
subjects often carry prose form. After the literal passes miss, HYG
|
|
109
|
+
queries normalize subjects (lowercase + collapse `[\\s\\-_]+` → `_`) and
|
|
110
|
+
substring-match the slug. Catches `dropbox_zero_apply` ↔
|
|
111
|
+
`docs/HYG: Dropbox zero-apply picker audit (queue #20)`.
|
|
112
|
+
|
|
113
|
+
Phase id matching is **strict**: `RS4` matches `RS4` but not `RS40`, not
|
|
114
|
+
`RS4.1`, and not `RS4-port`. The boundary set disallows alnum, dot, AND
|
|
115
|
+
hyphen on either side of the token — so suffix variants like
|
|
116
|
+
`SF1.2-port` are recognised as distinct from base phase `SF1.2`.
|
|
117
|
+
Operators who want to verify a sub-phase (e.g. `RS4.1` or `SF1.2-port`)
|
|
118
|
+
should pass it exactly.
|
|
119
|
+
"""
|
|
120
|
+
from __future__ import annotations
|
|
121
|
+
|
|
122
|
+
import argparse
|
|
123
|
+
import json
|
|
124
|
+
import re
|
|
125
|
+
import subprocess
|
|
126
|
+
import sys
|
|
127
|
+
from dataclasses import dataclass
|
|
128
|
+
|
|
129
|
+
# The reference-app convention, imported once at the top so the back-compat
|
|
130
|
+
# aliases below (`_PROGRESS_MARKER_WORDS`, `_REPO_PATH_RE`, the infra sets) can be
|
|
131
|
+
# derived from it byte-identically. The lifted file-path / progress / fallback
|
|
132
|
+
# grammar all lives on `StampConvention` now (the genericization); these aliases
|
|
133
|
+
# exist only for `from dos.phase_shipped import *` consumers, never for live code.
|
|
134
|
+
from dos.stamp import ( # noqa: F401 — re-exported for `from dos.phase_shipped import *` back-compat
|
|
135
|
+
JOB_STAMP_CONVENTION as _JOB_STAMP_CONVENTION,
|
|
136
|
+
_UNIVERSAL_DIAGRAM_SUFFIXES as _DIAGRAM_SUFFIXES,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Windows default cp1252 stdout raises UnicodeEncodeError on `→`/`—`/`±` chars
|
|
140
|
+
# that show up in commit summaries when this helper smoke-tests series-port
|
|
141
|
+
# phase ids. UTF-8 reconfigure is the canonical fix (Python 3.7+); guard with
|
|
142
|
+
# hasattr so older runtimes / detached stdout wrappers don't blow up.
|
|
143
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
144
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _workspace_root():
|
|
148
|
+
"""The served workspace whose git history is scanned (separation refactor)."""
|
|
149
|
+
from dos import config as _config
|
|
150
|
+
|
|
151
|
+
return _config.active().paths.root
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _git_log(args: list[str]) -> list[str]:
|
|
155
|
+
"""Run `git log` with the given args. Returns the lines. Raises on error.
|
|
156
|
+
|
|
157
|
+
Runs in the served workspace so the grep rung scans the target repo's
|
|
158
|
+
history, not the dos package's own (the workspace-parameterized port).
|
|
159
|
+
"""
|
|
160
|
+
result = subprocess.run(
|
|
161
|
+
["git", "log"] + args,
|
|
162
|
+
cwd=str(_workspace_root()),
|
|
163
|
+
capture_output=True,
|
|
164
|
+
text=True,
|
|
165
|
+
encoding="utf-8",
|
|
166
|
+
errors="replace",
|
|
167
|
+
check=False,
|
|
168
|
+
)
|
|
169
|
+
if result.returncode != 0:
|
|
170
|
+
raise RuntimeError(f"git log failed: {result.stderr.strip()}")
|
|
171
|
+
return result.stdout.splitlines()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# Global oneline-scan window (commit count). SIZED BY CADENCE, NOT TIME — a phase
|
|
175
|
+
# that genuinely shipped but fell out of this window reads as a false-NEGATIVE
|
|
176
|
+
# (`shipped=false`), which the picker then treats as a *live pick that no longer
|
|
177
|
+
# exists*, contributing to the apply-lane WEDGE-storm. FQ-409 root cause: the LF
|
|
178
|
+
# series shipped 2026-05-09..13 (commits `34f8032e`/`f60b8217`) but the
|
|
179
|
+
# dispatch-loop + concurrent-loop archive churn pushed them to position ~3000
|
|
180
|
+
# from HEAD in ~18 days — past the old -1500 window — so every LF phase
|
|
181
|
+
# false-NEGATIVED and re-appeared as a phantom "remaining" pick. The window must
|
|
182
|
+
# stay ahead of the real ship-to-now commit distance; -4000 ≈ the current
|
|
183
|
+
# ~1.5-month depth at today's cadence (cost: ~0.15s vs 0.12s for -1500,
|
|
184
|
+
# negligible). When cadence rises again, raise this — or move ship-truth to the
|
|
185
|
+
# durable plan-body SHIPPED stamp (the real fix; see `_consult_plan_body`).
|
|
186
|
+
_ONELINE_WINDOW = 4000
|
|
187
|
+
# Per-file backstop window. A phase's named files churn far slower than the
|
|
188
|
+
# global log, so an explicit-pathspec `-- <file>` scan reaches much further back
|
|
189
|
+
# in time than _ONELINE_WINDOW for the same commit budget — this is the
|
|
190
|
+
# cadence-resilient path. Kept generous so a slow-moving phase's artefact is
|
|
191
|
+
# still found.
|
|
192
|
+
_FILEPATH_WINDOW = 800
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# Phase-boundary character class.
|
|
196
|
+
# A phase id `SF1.2` must NOT match `SF1.2-port`, `SF1.2.1`, or `SF1.20`.
|
|
197
|
+
# The original regex used `\b`, which treats `-`/`.` as boundaries and
|
|
198
|
+
# yields a false-positive match against `SF1.2-port` and `RS4.1`.
|
|
199
|
+
# Disallow alnum, dot, AND hyphen on either side of the token.
|
|
200
|
+
_BOUNDARY_NEG = r"(?![A-Za-z0-9.\-])"
|
|
201
|
+
_BOUNDARY_PRE_NEG = r"(?<![A-Za-z0-9.\-])"
|
|
202
|
+
|
|
203
|
+
# FQ-326 — open-soak markers. A plan-doc phase section can carry a `SHIPPED`
|
|
204
|
+
# token (a pre-soak substrate stamp, or a heading stamp that drifted) while the
|
|
205
|
+
# phase's ACTUAL completion still gates on an UNCLOSED soak window. A naive
|
|
206
|
+
# `"SHIPPED" in section` then false-positives that phase as shipped (the #326
|
|
207
|
+
# surface: the picker thinks soak-gated work shipped, masking the live close-out
|
|
208
|
+
# pick). These structural phrases name a phase whose own close is soak-gated and
|
|
209
|
+
# still open — date-blind on purpose (the kernel never reads a clock; a future
|
|
210
|
+
# `closes <date>` is matched as text, the registry/picker owns the date math).
|
|
211
|
+
# Conservative: each phrase indicates the soak gates THIS phase's close, not a
|
|
212
|
+
# soak merely mentioned as separate downstream follow-up.
|
|
213
|
+
#
|
|
214
|
+
# The cross-clause alternatives bound their gap with `[^.\n]*` (NOT `.*`) so a
|
|
215
|
+
# match cannot cross a sentence/line boundary — "gates on the upstream commit.
|
|
216
|
+
# The soak audit …" must NOT match (two separate sentences), only a same-clause
|
|
217
|
+
# "gates on a 30d soak". This tightening is load-bearing now that the guard
|
|
218
|
+
# DEMOTES (returns False) rather than defers: an over-match would false-demote a
|
|
219
|
+
# genuinely-shipped section that merely mentions a soak follow-up downstream.
|
|
220
|
+
_OPEN_SOAK_MARKER_RE = re.compile(
|
|
221
|
+
r"(?im)("
|
|
222
|
+
r"status:\s*in_progress" # a soak-ledger row pasted inline
|
|
223
|
+
r"|gate[sd]?\s+on\b[^.\n]*\bsoak" # "gates on a ... soak" (same clause)
|
|
224
|
+
r"|soak\b[^.\n]*\bgate[sd]?\b" # "soak ... gates the delete" (same clause)
|
|
225
|
+
r"|until\s+the\s+soak\s+(?:window\s+)?closes"
|
|
226
|
+
r"|soak\s+window\b[^.\n]*\bcloses\b" # "soak window ... closes 2026-06-27"
|
|
227
|
+
r"|→\s*tomb\b|->\s*tomb\b" # "### CRSn — 7d soak → tomb"
|
|
228
|
+
r"|\bzero-emit\s+(?:window|floor|read)\b"
|
|
229
|
+
r")"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
# The ship-stamp grammar is now per-workspace DATA (the SCV genericization).
|
|
234
|
+
# What a commit subject must look like to count as a ship moved out of these
|
|
235
|
+
# module constants into `dos.stamp.StampConvention`; the active workspace's
|
|
236
|
+
# convention (`SubstrateConfig.stamp`, defaulting to `JOB_STAMP_CONVENTION`)
|
|
237
|
+
# supplies the three regex fragments the matchers below interpolate. This file
|
|
238
|
+
# keeps the *mechanism* (the scans, the demotions); the *grammar* is the seam.
|
|
239
|
+
#
|
|
240
|
+
# The historical why-it's-shaped-this-way notes that used to live on the raw
|
|
241
|
+
# constants now annotate `JOB_STAMP_CONVENTION` in `dos.stamp` — the one bit of
|
|
242
|
+
# curated prose lives beside the data it explains. The load-bearing facts, in
|
|
243
|
+
# brief, so a reader here isn't sent away:
|
|
244
|
+
#
|
|
245
|
+
# * direct-ship dirs (`docs|go|agents|job_search|scripts`) — ships land under
|
|
246
|
+
# the top-level dir the deliverable lives in (FQ-409 widened `(docs|go)` to
|
|
247
|
+
# include the code dirs; the bookkeeping exclusion is all `docs/…`, so the
|
|
248
|
+
# widening added no false-POSITIVE surface).
|
|
249
|
+
# * summary-bundle prefixes (`docs/HYG:`, plus the universal `vX.Y.Z:`) — the
|
|
250
|
+
# only NON-direct subjects a real ship lands under; an allowlist, never a
|
|
251
|
+
# relaxed `docs/<anything>:`, because the release/body scans are unanchored.
|
|
252
|
+
# * bookkeeping prefixes (`docs/_plans:` soft-claims, `docs/fanout|dispatch|
|
|
253
|
+
# dispatch-loop:` archive rollups, `docs/_soaks:` ledger rows, the universal
|
|
254
|
+
# `… snapshot:` bulk commit) — subjects that NAME phase ids as narrative and
|
|
255
|
+
# must never count as a ship, on ANY scan path (FQ-77, both directions).
|
|
256
|
+
#
|
|
257
|
+
# A foreign repo with no `[stamp]` table inherits the GENERIC convention (no dir
|
|
258
|
+
# prefix → a bare `<SERIES>: <PHASE>` ships), which is what makes `verify`
|
|
259
|
+
# domain-free (the SCV North Star). See `dos.stamp` for the full provenance.
|
|
260
|
+
# ---------------------------------------------------------------------------
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@dataclass(frozen=True)
|
|
264
|
+
class _Matchers:
|
|
265
|
+
"""The compiled subject-matcher fragments for ONE workspace's convention.
|
|
266
|
+
|
|
267
|
+
Built once per entrypoint by `_subject_matchers(cfg)` and threaded down into
|
|
268
|
+
every scan (`_check_phase_with_cache`, `_check_phase_by_filepath`) and the
|
|
269
|
+
bookkeeping pre-filter (`_is_bookkeeping_subject`). Carrying the resolved
|
|
270
|
+
convention + fragments in one value — rather than re-reading `_config.active()`
|
|
271
|
+
in each helper — is what makes design-law 2 (multi-entrypoint oracle
|
|
272
|
+
consistency) structural: every path reads the SAME convention, resolved once,
|
|
273
|
+
because they are handed the same `_Matchers`. A fix to the grammar lands in
|
|
274
|
+
`dos.stamp` and reaches all three scan paths through this one object.
|
|
275
|
+
|
|
276
|
+
convention — the resolved `StampConvention`; `direct_ship_core` defers
|
|
277
|
+
to it because the direct-ship shape depends on the
|
|
278
|
+
per-call series/phase (the glued `<SERIES><PHASE>:` form).
|
|
279
|
+
direct_prefix — the `(?:docs|go|…)/` (or generic optional-path) fragment,
|
|
280
|
+
used by the HYG / sub-phase-parent patterns (job-shaped).
|
|
281
|
+
summary_subject— the `(?:vX.Y.Z:|docs/HYG:)` fragment gating the
|
|
282
|
+
release-prefix scan and the body-scan's in-summary check.
|
|
283
|
+
bookkeeping — the compiled, start-anchored, case-insensitive matcher for
|
|
284
|
+
a NAMES-but-doesn't-ship subject.
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
convention: object
|
|
288
|
+
direct_prefix: str
|
|
289
|
+
summary_subject: str
|
|
290
|
+
bookkeeping: "re.Pattern[str]"
|
|
291
|
+
repo_path: "re.Pattern[str]"
|
|
292
|
+
|
|
293
|
+
def direct_ship_core(self, series_re: str, phase_alt: str) -> str:
|
|
294
|
+
"""The full direct-ship regex core for this convention (see `dos.stamp`).
|
|
295
|
+
|
|
296
|
+
Delegates to `StampConvention.direct_ship_core`: the dir prefix + the
|
|
297
|
+
series/phase shape (prefixed for job, prefixed-OR-glued for generic). The
|
|
298
|
+
caller anchors a boundary after it.
|
|
299
|
+
"""
|
|
300
|
+
return self.convention.direct_ship_core(series_re, phase_alt)
|
|
301
|
+
|
|
302
|
+
def is_bookkeeping_subject(self, subject: str) -> bool:
|
|
303
|
+
"""True when `subject` NAMES phase ids as narrative (see `dos.stamp`).
|
|
304
|
+
|
|
305
|
+
`subject` is the bare commit summary — NOT prefixed with the sha. Callers
|
|
306
|
+
holding an `<sha> <summary>` oneline must strip the sha first.
|
|
307
|
+
"""
|
|
308
|
+
return bool(self.bookkeeping.match((subject or "").strip()))
|
|
309
|
+
|
|
310
|
+
def is_shared_infra(self, path: str) -> bool:
|
|
311
|
+
"""True if `path` is a shared-infra hub excluded from the file-path
|
|
312
|
+
overlap count (see `StampConvention.is_shared_infra`). Per-workspace data:
|
|
313
|
+
universal hubs ∪ the convention's declared extras, plus the universal
|
|
314
|
+
`docs/…*.mmd|*.png` diagram rule."""
|
|
315
|
+
return self.convention.is_shared_infra(path)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _subject_matchers(cfg: object | None = None) -> _Matchers:
|
|
319
|
+
"""Resolve the active workspace's `StampConvention` into compiled matchers.
|
|
320
|
+
|
|
321
|
+
The ONE place every entrypoint funnels through to learn the ship grammar
|
|
322
|
+
(design-law 2). With no `cfg`, reads `dos.config.active().stamp` — which the
|
|
323
|
+
CLI has already populated from `dos.toml` (`cli._apply_workspace`) and which
|
|
324
|
+
a `phase_shipped` SUBPROCESS bootstraps from its own workspace (see
|
|
325
|
+
`_bootstrap_active_config`), so the grep rung honours the same convention
|
|
326
|
+
whether it runs in-process or shelled out. An explicit `cfg` (a
|
|
327
|
+
`SubstrateConfig`) overrides — for a library caller verifying a specific
|
|
328
|
+
workspace without installing it as active.
|
|
329
|
+
"""
|
|
330
|
+
if cfg is None:
|
|
331
|
+
from dos import config as _config
|
|
332
|
+
|
|
333
|
+
cfg = _config.active()
|
|
334
|
+
conv = getattr(cfg, "stamp", None)
|
|
335
|
+
if conv is None: # defensive — a config without a stamp falls back to job
|
|
336
|
+
from dos.stamp import JOB_STAMP_CONVENTION
|
|
337
|
+
|
|
338
|
+
conv = JOB_STAMP_CONVENTION
|
|
339
|
+
return _Matchers(
|
|
340
|
+
convention=conv,
|
|
341
|
+
direct_prefix=conv.direct_prefix_re(),
|
|
342
|
+
summary_subject=conv.summary_subject_re(),
|
|
343
|
+
bookkeeping=conv.bookkeeping_subject_re(),
|
|
344
|
+
repo_path=conv.repo_path_re(),
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _parse_batch_line(line: str) -> tuple[str, str, str | None]:
|
|
349
|
+
"""Split one `--batch` stdin line into (series, phase, plan_doc).
|
|
350
|
+
|
|
351
|
+
Two wire formats, auto-detected by the presence of a TAB:
|
|
352
|
+
|
|
353
|
+
* **Tab-delimited** (the programmatic producer — `oracle.default_grep_
|
|
354
|
+
fallback_batch`): ``series \\t phase [\\t plan_doc]``. The fields are
|
|
355
|
+
taken VERBATIM between tabs, so a series OR phase containing spaces
|
|
356
|
+
survives intact — the benchmark's ``hybrid-cache-type`` + ``Phase 4``
|
|
357
|
+
(space in the phase) and even ``SGLang charts`` + ``Phase 3b.2`` (space
|
|
358
|
+
in the series) round-trip correctly. This is the F7 fix: the old
|
|
359
|
+
``line.split(None, 2)`` truncated ``"Phase 4"`` to ``"Phase"`` and shoved
|
|
360
|
+
``"4"`` into ``plan_doc``, so the parent's ``(series, phase)`` lookup key
|
|
361
|
+
never matched and a SHIPPED phase resolved ``via none``.
|
|
362
|
+
|
|
363
|
+
* **Whitespace-delimited** (the legacy / manual form — a human running
|
|
364
|
+
``python -m dos.phase_shipped --batch`` and typing ``RS RS4 docs/x.md``):
|
|
365
|
+
``line.split(None, 2)``. Preserved byte-for-byte so the documented CLI
|
|
366
|
+
and any existing caller that feeds space-separated single-token ids is
|
|
367
|
+
unchanged. Multi-word ids are not expressible in this form (they never
|
|
368
|
+
were) — a producer that needs them emits tabs.
|
|
369
|
+
|
|
370
|
+
Returns ``("", "", None)`` for a blank/garbage line (caller skips it).
|
|
371
|
+
Pure — no I/O, unit-testable.
|
|
372
|
+
"""
|
|
373
|
+
if "\t" in line:
|
|
374
|
+
parts = [p.strip() for p in line.split("\t")]
|
|
375
|
+
series = parts[0] if parts else ""
|
|
376
|
+
phase = parts[1] if len(parts) > 1 else ""
|
|
377
|
+
# A 3rd tab field is the plan doc; trailing empty fields (a doc-less pair
|
|
378
|
+
# emitted as `series\tphase\t`) collapse to None.
|
|
379
|
+
plan_doc = parts[2] if len(parts) > 2 and parts[2] else None
|
|
380
|
+
return series, phase, plan_doc
|
|
381
|
+
parts = line.split(None, 2)
|
|
382
|
+
if len(parts) < 2:
|
|
383
|
+
return "", "", None
|
|
384
|
+
return parts[0], parts[1], (parts[2] if len(parts) >= 3 else None)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _bootstrap_active_config() -> None:
|
|
388
|
+
"""Install the right ship-stamp convention into this process's active config.
|
|
389
|
+
|
|
390
|
+
Called once at the top of `main()` — i.e. when `phase_shipped` runs as the
|
|
391
|
+
grep-rung SUBPROCESS (`python -m dos.phase_shipped ...`). That child re-derives
|
|
392
|
+
`config.active()` from scratch (env-resolved `default_config()`), so without
|
|
393
|
+
this it would always use the JOB default and ignore a convention the PARENT
|
|
394
|
+
installed (`oracle.is_shipped(cfg=...)` → `set_active`) or one the workspace
|
|
395
|
+
DECLARED in `dos.toml`. The bootstrap resolves the convention in precedence
|
|
396
|
+
order and re-installs the active config carrying it:
|
|
397
|
+
|
|
398
|
+
1. ``DISPATCH_STAMP_CONVENTION`` env var (JSON `to_dict` form) — the parent's
|
|
399
|
+
active convention, the authoritative cross-process signal. This covers
|
|
400
|
+
the library `cfg=` path even when the workspace has no `dos.toml`.
|
|
401
|
+
2. the workspace's ``dos.toml`` ``[stamp]`` table — the declarative path for a
|
|
402
|
+
repo invoked directly (`dos verify --workspace <repo>` shells the rung in
|
|
403
|
+
that repo, where its own `dos.toml` lives). Read relative to the active
|
|
404
|
+
workspace root, the same root the rung greps.
|
|
405
|
+
3. otherwise leave the default (job) convention untouched — byte-identical
|
|
406
|
+
to the pre-SCV subprocess.
|
|
407
|
+
|
|
408
|
+
Best-effort and total: any parse/IO fault degrades to the current active
|
|
409
|
+
config rather than crashing the rung (the truth syscall must never crash for
|
|
410
|
+
a malformed override — it answers honestly from whatever convention it could
|
|
411
|
+
resolve). A malformed `dos.toml [stamp]` IS surfaced on the CLI's own
|
|
412
|
+
`_apply_workspace` path; here, in the shelled-out rung, we stay defensive.
|
|
413
|
+
"""
|
|
414
|
+
import json
|
|
415
|
+
import os
|
|
416
|
+
from dos import config as _config
|
|
417
|
+
from dos import stamp as _stamp
|
|
418
|
+
|
|
419
|
+
cur = _config.active()
|
|
420
|
+
# (1) explicit convention handed down by the parent process.
|
|
421
|
+
raw = os.environ.get(_config.ENV_STAMP_CONVENTION)
|
|
422
|
+
if raw:
|
|
423
|
+
try:
|
|
424
|
+
conv = _stamp.StampConvention.from_dict(json.loads(raw))
|
|
425
|
+
import dataclasses
|
|
426
|
+
_config.set_active(dataclasses.replace(cur, stamp=conv))
|
|
427
|
+
return
|
|
428
|
+
except Exception:
|
|
429
|
+
pass # fall through to the dos.toml / default path
|
|
430
|
+
# (2) the workspace's own dos.toml [stamp] table.
|
|
431
|
+
try:
|
|
432
|
+
toml_path = cur.paths.root / "dos.toml"
|
|
433
|
+
conv = _stamp.load_from_toml(toml_path, base=cur.stamp)
|
|
434
|
+
if conv is not cur.stamp:
|
|
435
|
+
import dataclasses
|
|
436
|
+
_config.set_active(dataclasses.replace(cur, stamp=conv))
|
|
437
|
+
except Exception:
|
|
438
|
+
pass # (3) leave the default convention in place
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _is_bookkeeping_subject(subject: str, matchers: "_Matchers | None" = None) -> bool:
|
|
442
|
+
"""True when `subject` is a bookkeeping commit that NAMES phases as narrative.
|
|
443
|
+
|
|
444
|
+
Backward-compatible shim over `_Matchers.is_bookkeeping_subject`: a caller
|
|
445
|
+
that already resolved `matchers` passes it (every in-module scan does, so the
|
|
446
|
+
convention is read once per entrypoint); a caller that doesn't resolves the
|
|
447
|
+
active convention on the spot. Soft-claims, replan sweeps, run-archive
|
|
448
|
+
rollups, and bulk working-dir snapshots quote phase ids without shipping them;
|
|
449
|
+
FQ-77 excludes them from every ship-detection path so a mention can never be
|
|
450
|
+
misread as a ship (the false-POSITIVE half of the recurring-5).
|
|
451
|
+
|
|
452
|
+
`subject` is the bare commit summary — NOT prefixed with the sha. Callers
|
|
453
|
+
that hold an `<sha> <summary>` oneline must strip the sha first.
|
|
454
|
+
"""
|
|
455
|
+
m = matchers if matchers is not None else _subject_matchers()
|
|
456
|
+
return m.is_bookkeeping_subject(subject)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _oneline_subject(line: str) -> str:
|
|
460
|
+
"""Return the bare summary from a `<sha> <summary>` oneline string.
|
|
461
|
+
|
|
462
|
+
`git log --oneline` emits `<short-sha> <summary>`; the bookkeeping filter
|
|
463
|
+
keys on the summary, so split off the leading sha token. A line with no
|
|
464
|
+
space (degenerate) yields ``""``.
|
|
465
|
+
"""
|
|
466
|
+
parts = line.split(None, 1)
|
|
467
|
+
return parts[1] if len(parts) > 1 else ""
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# The progress-marker vocabulary is now per-workspace DATA
|
|
471
|
+
# (`StampConvention.progress_markers`) — the L1 genericization. Words that, when
|
|
472
|
+
# they appear immediately after the phase id with a bare space (no `:`/`—`/`-`),
|
|
473
|
+
# mark a commit as PROGRESS on a multi-step phase rather than a SHIP of it (a
|
|
474
|
+
# `<PHASE> week-1`/`<PHASE> audit` soak commit). Hardcoding them fired on EVERY
|
|
475
|
+
# repo, so a foreign repo's genuine `cache: Phase 0 audit of …` direct ship was
|
|
476
|
+
# silently demoted to NOT_SHIPPED (a real Benchmark false-negative). The vocabulary
|
|
477
|
+
# moved to `JOB_STAMP_CONVENTION.progress_markers`; the generic convention declares
|
|
478
|
+
# none, so a foreign repo's ships are never demoted. The historical why-each-word
|
|
479
|
+
# notes (the CS6 `§why`/`todo` and the AAR10 `week-1` provenance) now annotate the
|
|
480
|
+
# tuple in `dos.stamp` beside the data they explain. `_is_progress_only` reads the
|
|
481
|
+
# active convention through `matchers`, never this alias.
|
|
482
|
+
#
|
|
483
|
+
# Kept as a BACK-COMPAT alias derived from `JOB_STAMP_CONVENTION` so a
|
|
484
|
+
# `from dos.phase_shipped import *` consumer still sees the byte-identical frozenset.
|
|
485
|
+
_PROGRESS_MARKER_WORDS = _JOB_STAMP_CONVENTION.progress_marker_set()
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
# Generic "Phase N" token — a phase id that carries NO series prefix of its
|
|
489
|
+
# own (`Phase 6`, `Phase 2.6`). Plan docs whose phases use the bare-ordinal
|
|
490
|
+
# heading style (PSC, docs/09, the login subsystem) name phases this way.
|
|
491
|
+
# Such a token is NOT self-qualifying: `Phase 6` collides literally across
|
|
492
|
+
# every such plan. OS-FQ136 series-qualifies these — see `_phase_variants`
|
|
493
|
+
# (synonym half) and `_check_phase_with_cache` (release/body guard half).
|
|
494
|
+
_GENERIC_PHASE_RE = re.compile(r"(?i)^phase\s*(\d+(?:\.\d+)?)$")
|
|
495
|
+
# No-space series-prefixed form — `PSC5`, `AAR15.3`. The series prefix here
|
|
496
|
+
# IS the discriminator, so this form is self-qualifying. The OS-FQ136
|
|
497
|
+
# synonym pairs a generic `Phase N` with the `<SERIES>N` form and vice versa.
|
|
498
|
+
_SERIES_NUM_RE = re.compile(r"(?i)^([A-Za-z]+)\s*(\d+(?:\.\d+)?)$")
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _is_generic_phase_token(phase: str) -> bool:
|
|
502
|
+
"""True when `phase` is a bare `Phase N` token with no series prefix.
|
|
503
|
+
|
|
504
|
+
Generic tokens collide literally across plans — PSC `Phase 6` and the
|
|
505
|
+
docs/09 pipeline-events `Phase 6` are unrelated work. The release-prefix
|
|
506
|
+
and body scans must series-qualify these (see `_check_phase_with_cache`).
|
|
507
|
+
"""
|
|
508
|
+
return bool(_GENERIC_PHASE_RE.match(phase.strip()))
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _release_body_alternation(series: str, phase: str) -> str:
|
|
512
|
+
"""Return the series-qualified phase alternation for the release/body scans.
|
|
513
|
+
|
|
514
|
+
The release-prefix and body scans have no `docs/<SERIES>:` prefix to anchor
|
|
515
|
+
them — a `.*?` jumps across the whole `vX.Y.Z:` summary/body. So any bare
|
|
516
|
+
`Phase N` literal in their alternation matches an *unrelated* plan's phase
|
|
517
|
+
(finding #226: `EV EV1` matched `v0.37.0: ... Login Subsystem Phase 1`).
|
|
518
|
+
|
|
519
|
+
OS-FQ136 series-qualified the case where the *query itself* is a generic
|
|
520
|
+
`Phase N`. This helper closes the symmetric leak: a series-prefixed query
|
|
521
|
+
(`EV1`) whose `_phase_variants` *expands into* a bare `Phase N` synonym —
|
|
522
|
+
that synonym must not reach the unanchored alternation bare. Only forms
|
|
523
|
+
that carry the series token (self-qualifying) are accepted bare; the
|
|
524
|
+
generic `Phase N` synonym is allowed only when `<SERIES>`-adjacent
|
|
525
|
+
(`EV Phase 1` / `EV: Phase 1`).
|
|
526
|
+
|
|
527
|
+
The predicate: a `_phase_variants` form is safe-bare iff it contains the
|
|
528
|
+
series token case-insensitively. Generic `Phase N` synonyms are rebuilt as
|
|
529
|
+
`<SERIES>\\s*:?\\s+Phase\\s*N`. Series-prefixed query tokens therefore keep
|
|
530
|
+
their self-qualifying forms; only the leaked generic synonym is constrained.
|
|
531
|
+
"""
|
|
532
|
+
series_re = re.escape(series)
|
|
533
|
+
safe: list[str] = []
|
|
534
|
+
for variant in _phase_variants(phase, series):
|
|
535
|
+
# `variant` is already re.escape()'d. Unescape just enough to test
|
|
536
|
+
# for the series substring (escaping only inserts backslashes before
|
|
537
|
+
# metacharacters; the series token is alnum so it is unaffected).
|
|
538
|
+
if series and series.lower() in variant.lower():
|
|
539
|
+
safe.append(variant)
|
|
540
|
+
else:
|
|
541
|
+
gm = _GENERIC_PHASE_RE.match(re.sub(r"\\(.)", r"\1", variant))
|
|
542
|
+
if gm:
|
|
543
|
+
num_re = re.escape(gm.group(1))
|
|
544
|
+
# Series-adjacent literal: `EV Phase 1` / `EV: Phase 1`.
|
|
545
|
+
safe.append(rf"{series_re}\s*:?\s+Phase\s*{num_re}")
|
|
546
|
+
else:
|
|
547
|
+
# No series token and not a generic `Phase N` — keep it
|
|
548
|
+
# (apostrophe/prime spellings of a series-prefixed id land
|
|
549
|
+
# here only if the series substring test missed, which it
|
|
550
|
+
# does not for alnum series tokens; defensive).
|
|
551
|
+
safe.append(variant)
|
|
552
|
+
return "|".join(safe)
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _phase_variants(phase: str, series: str = "") -> list[str]:
|
|
556
|
+
"""Return regex-escaped phase-id forms to try, covering known conventions.
|
|
557
|
+
|
|
558
|
+
Apostrophe ↔ `prime`: plan docs use `MG3'-1` (canonical) but commits use
|
|
559
|
+
`MG3prime-1` (Windows-quoting workaround). Either spelling may appear on
|
|
560
|
+
either side of the lookup. This keeps the script convention-blind:
|
|
561
|
+
callers do not need to know which form the commit happens to use.
|
|
562
|
+
|
|
563
|
+
OS-FQ136 — `Phase N` ↔ `<SERIES>N` synonym. Plans like PSC use bare
|
|
564
|
+
`Phase 6` headings, but ship commits write either form: `docs/PSC: Phase 6
|
|
565
|
+
TOMB` (spaced) and `v0.315.0: PSC5 score worker pool` (no-space, series-
|
|
566
|
+
prefixed). When `series` is supplied, a generic `Phase N` query also tries
|
|
567
|
+
`<SERIES>N` / `<SERIES> N`, and a `<SERIES>N` query also tries `Phase N` /
|
|
568
|
+
`Phase<space>N`. Closes the finding #136 false-negative (no-space form
|
|
569
|
+
missed). The release/body series-qualification guard (in the caller)
|
|
570
|
+
closes the matching false-positive.
|
|
571
|
+
"""
|
|
572
|
+
variants = {phase}
|
|
573
|
+
if "'" in phase:
|
|
574
|
+
variants.add(phase.replace("'", "prime"))
|
|
575
|
+
if "prime" in phase:
|
|
576
|
+
variants.add(phase.replace("prime", "'"))
|
|
577
|
+
if series:
|
|
578
|
+
gm = _GENERIC_PHASE_RE.match(phase.strip())
|
|
579
|
+
if gm:
|
|
580
|
+
num = gm.group(1)
|
|
581
|
+
# `Phase 6` → also try `PSC6` / `PSC 6` (the no-space series form).
|
|
582
|
+
variants.add(f"{series}{num}")
|
|
583
|
+
variants.add(f"{series} {num}")
|
|
584
|
+
else:
|
|
585
|
+
sm = _SERIES_NUM_RE.match(phase.strip())
|
|
586
|
+
if sm and sm.group(1).upper() == series.upper():
|
|
587
|
+
num = sm.group(2)
|
|
588
|
+
# `PSC5` → also try `Phase 5` / `Phase5` (the generic form).
|
|
589
|
+
variants.add(f"Phase {num}")
|
|
590
|
+
variants.add(f"Phase{num}")
|
|
591
|
+
return sorted(re.escape(v) for v in variants)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def _series_variants(series: str) -> list[str]:
|
|
595
|
+
"""Regex-escaped plan-id spellings for the TRAILER rung (docs/289).
|
|
596
|
+
|
|
597
|
+
A trailer names the plan as registered, and the two spellings in the wild
|
|
598
|
+
differ: the QUERY usually carries the full plan id
|
|
599
|
+
(``docs/286_shipping-the-go-binary-through-pypi-per-platform-wheels``) while
|
|
600
|
+
the TRAILER carries its short series head (``docs/286``). The bridge is the
|
|
601
|
+
underscore convention of plan-doc basenames — ``<head>_<slug>`` with
|
|
602
|
+
``<head>`` ending in a digit run — so this returns the full id plus
|
|
603
|
+
``<head>`` when the query has that shape, and just the full id otherwise
|
|
604
|
+
(a hyphenated slug like ``RS4-port`` or a multi-word series gains NO extra
|
|
605
|
+
spelling; the sub-phase-parent fallback is a different, separately-gated
|
|
606
|
+
feature). Pure; the sibling of `_phase_variants`.
|
|
607
|
+
|
|
608
|
+
"docs/286_shipping-…-wheels" -> ["docs/286", "docs/286_shipping-…-wheels"]
|
|
609
|
+
"82_liveness-oracle-plan" -> ["82", "82_liveness-oracle-plan"]
|
|
610
|
+
"docs/286" -> ["docs/286"] (already the head)
|
|
611
|
+
"RS4-port" -> ["RS4-port"] (no underscore head)
|
|
612
|
+
"my_plan" -> ["my_plan"] (head has no digit)
|
|
613
|
+
"""
|
|
614
|
+
variants = {series}
|
|
615
|
+
m = re.match(r"^([^_\s]*\d[a-z0-9]*)_", series, re.IGNORECASE)
|
|
616
|
+
if m:
|
|
617
|
+
variants.add(m.group(1))
|
|
618
|
+
return sorted(re.escape(v) for v in variants)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def _build_log_cache() -> tuple[list[str], list[str]]:
|
|
622
|
+
"""Pre-fetch git log once for batch mode. Returns (oneline_lines, body_lines).
|
|
623
|
+
|
|
624
|
+
Oneline window is wide enough (-1500 ≈ 1-2 months at current cadence)
|
|
625
|
+
to catch the CR2-class case where a phase shipped 330+ commits ago and
|
|
626
|
+
a tighter window dropped it. Body window stays narrow because release-
|
|
627
|
+
summary mentions are caught by the cheaper oneline pass.
|
|
628
|
+
"""
|
|
629
|
+
try:
|
|
630
|
+
oneline = _git_log(["--oneline", f"-{_ONELINE_WINDOW}"])
|
|
631
|
+
except RuntimeError:
|
|
632
|
+
oneline = []
|
|
633
|
+
try:
|
|
634
|
+
body = _git_log(["-50", "--format=%h%n%B%n--END--"])
|
|
635
|
+
except RuntimeError:
|
|
636
|
+
body = []
|
|
637
|
+
return oneline, body
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def _is_progress_only(line: str, match_end: int, matchers: "_Matchers | None" = None) -> bool:
|
|
641
|
+
"""True if what follows the matched phase id reads as a progress marker.
|
|
642
|
+
|
|
643
|
+
Called after `direct_pat` matches `^<sha> docs/<SERIES>: <PHASE>` at the
|
|
644
|
+
given char offset. If the next char is whitespace and the next token is
|
|
645
|
+
a known progress marker word (`week-1`, `audit`, `baseline`, …), the
|
|
646
|
+
commit is incremental progress on a multi-step phase, not a ship of it.
|
|
647
|
+
|
|
648
|
+
The marker vocabulary is per-workspace DATA (`StampConvention.progress_markers`,
|
|
649
|
+
read through `matchers`): the reference app declares its soak vocabulary, a
|
|
650
|
+
generic repo declares none so a real foreign-repo ship (`cache: Phase 0 audit
|
|
651
|
+
of …`) is NEVER demoted. Falls back to the active convention when a caller
|
|
652
|
+
didn't thread `matchers` in.
|
|
653
|
+
|
|
654
|
+
Separators (`:`, `—`, `-`, em-dashes) and end-of-line continue to read
|
|
655
|
+
as ship attributions — only the bare `<PHASE> <progress-word>` shape
|
|
656
|
+
triggers the demotion. Keeps the false-positive surface narrow.
|
|
657
|
+
"""
|
|
658
|
+
tail = line[match_end:]
|
|
659
|
+
if not tail or not tail[0].isspace():
|
|
660
|
+
return False # `:`, `—`, `-`, EOL — ship
|
|
661
|
+
markers = (
|
|
662
|
+
matchers.convention.progress_marker_set() if matchers is not None
|
|
663
|
+
else _subject_matchers().convention.progress_marker_set()
|
|
664
|
+
)
|
|
665
|
+
next_token = tail.lstrip().split(None, 1)[0] if tail.strip() else ""
|
|
666
|
+
return next_token.lower() in markers
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _check_phase_with_cache(
|
|
670
|
+
series: str,
|
|
671
|
+
phase: str,
|
|
672
|
+
oneline_lines: list[str],
|
|
673
|
+
body_lines: list[str],
|
|
674
|
+
matchers: "_Matchers | None" = None,
|
|
675
|
+
) -> dict:
|
|
676
|
+
"""Check one phase against pre-fetched log caches (zero subprocess cost).
|
|
677
|
+
|
|
678
|
+
`matchers` carries the active workspace's ship-stamp grammar (the SCV seam).
|
|
679
|
+
Defaults to resolving the active convention, so a direct call (tests, the
|
|
680
|
+
single-pick CLI) still works; the batch/check-packet/library entrypoints
|
|
681
|
+
resolve it ONCE and thread it in, so every pick in a packet reads the same
|
|
682
|
+
convention (design-law 2).
|
|
683
|
+
"""
|
|
684
|
+
if matchers is None:
|
|
685
|
+
matchers = _subject_matchers()
|
|
686
|
+
series_re = re.escape(series)
|
|
687
|
+
# Direct-ship variant set: includes the `Phase N` ↔ `<SERIES>N` synonyms.
|
|
688
|
+
# The direct-ship prefix (`docs/<SERIES>:`) IS the series qualifier, so
|
|
689
|
+
# the bare `Phase N` literal is safe here — `docs/PSC: Phase 6 TOMB` is
|
|
690
|
+
# unambiguously a PSC phase.
|
|
691
|
+
phase_alt = "|".join(_phase_variants(phase, series))
|
|
692
|
+
direct_pat = re.compile(
|
|
693
|
+
rf"^([a-f0-9]+)\s+{matchers.direct_ship_core(series_re, phase_alt)}{_BOUNDARY_NEG}",
|
|
694
|
+
re.IGNORECASE,
|
|
695
|
+
)
|
|
696
|
+
# OS-FQ136 — release/body series-qualification.
|
|
697
|
+
# The release-prefix and body scans have no `docs/<SERIES>:` prefix to
|
|
698
|
+
# anchor them: `.*?` jumps across the whole summary, so a bare `Phase 6`
|
|
699
|
+
# would match `v0.62.0: docs/09 Phase 6 UI` even when the query is a PSC
|
|
700
|
+
# phase (finding #136, 5th recurrence of the cross-series literal-collision
|
|
701
|
+
# class). For a GENERIC `Phase N` token we therefore drop the bare literal
|
|
702
|
+
# from the release/body alternation and accept only series-qualified forms:
|
|
703
|
+
# - the no-space `<SERIES>N` / `<SERIES> N` synonym (self-qualifying), or
|
|
704
|
+
# - `<SERIES>` immediately preceding the `Phase N` literal (`PSC Phase 8`).
|
|
705
|
+
# Series-prefixed query tokens (`PSC6`, `RS4`) are already self-qualifying,
|
|
706
|
+
# so their release/body alternation is unchanged.
|
|
707
|
+
if _is_generic_phase_token(phase):
|
|
708
|
+
gm = _GENERIC_PHASE_RE.match(phase.strip())
|
|
709
|
+
num = gm.group(1)
|
|
710
|
+
num_re = re.escape(num)
|
|
711
|
+
# Self-qualifying synonym forms: `PSC6`, `PSC 6`.
|
|
712
|
+
synonym_alt = rf"{series_re}\s*{num_re}"
|
|
713
|
+
# Series-prefixed-adjacent literal: `PSC Phase 8` / `PSC: Phase 8`.
|
|
714
|
+
adjacent_alt = rf"{series_re}\s*:?\s+Phase\s*{num_re}"
|
|
715
|
+
qualified_alt = rf"(?:{synonym_alt}|{adjacent_alt})"
|
|
716
|
+
# OS-FQ63 — the release-prefix scan also fires on the allowlisted
|
|
717
|
+
# standalone-summary subjects (`docs/_plans:`, `docs/HYG:`, …), not
|
|
718
|
+
# only `vX.Y.Z:` releases. The alternation is unchanged, so the
|
|
719
|
+
# generic-token series-qualification guard is preserved.
|
|
720
|
+
release_pat = re.compile(
|
|
721
|
+
rf"^([a-f0-9]+)\s+{matchers.summary_subject}.*?{_BOUNDARY_PRE_NEG}{qualified_alt}{_BOUNDARY_NEG}",
|
|
722
|
+
re.IGNORECASE,
|
|
723
|
+
)
|
|
724
|
+
else:
|
|
725
|
+
# Release-prefix oneline pattern: catches phases bundled into the
|
|
726
|
+
# `vX.Y.Z: ...` summary line of a release commit (e.g.
|
|
727
|
+
# `cae674f v0.268.0: EC17.2 escalation + RS4 archetype HTML + ...`).
|
|
728
|
+
# Body-scan still runs below for phases that appear only in an
|
|
729
|
+
# extended commit body, not in the summary itself.
|
|
730
|
+
#
|
|
731
|
+
# OS-FQ226 — the alternation must be series-qualified even when the
|
|
732
|
+
# query is series-prefixed. `_phase_variants("EV1","EV")` expands to
|
|
733
|
+
# include the bare generic synonym `Phase 1`; left bare in this
|
|
734
|
+
# unanchored `.*?` scan it matches `v0.37.0: ... Login Subsystem
|
|
735
|
+
# Phase 1` (finding #226). `_release_body_alternation` keeps the
|
|
736
|
+
# self-qualifying `EV1`/`EV 1` forms bare and constrains the generic
|
|
737
|
+
# synonym to a `<SERIES>`-adjacent shape.
|
|
738
|
+
release_alt = _release_body_alternation(series, phase)
|
|
739
|
+
# OS-FQ63 — same subject generalisation as the generic-token branch:
|
|
740
|
+
# the scan fires on `docs/_plans:` / `docs/HYG:` summary bundles too.
|
|
741
|
+
# `_release_body_alternation` already series-qualifies the alternation
|
|
742
|
+
# (OS-FQ226), so no false-positive cross-series leak is introduced.
|
|
743
|
+
release_pat = re.compile(
|
|
744
|
+
rf"^([a-f0-9]+)\s+{matchers.summary_subject}.*?{_BOUNDARY_PRE_NEG}(?:{release_alt}){_BOUNDARY_NEG}",
|
|
745
|
+
re.IGNORECASE,
|
|
746
|
+
)
|
|
747
|
+
# Pass 1a: direct-ship lines win (`docs/<SERIES>: <PHASE> ...`). They are
|
|
748
|
+
# the canonical attribution, so even if a release-prefix mention sits
|
|
749
|
+
# higher in the log we want the direct ship's SHA.
|
|
750
|
+
#
|
|
751
|
+
# Demotion: if the matched suffix is a progress marker (`<PHASE> week-1`,
|
|
752
|
+
# `<PHASE> audit`, …) this commit is incremental work on a multi-week
|
|
753
|
+
# soak / observation phase, not a ship. Skip it and keep scanning — a
|
|
754
|
+
# real ship may sit further back in the log.
|
|
755
|
+
for line in oneline_lines:
|
|
756
|
+
m = direct_pat.match(line)
|
|
757
|
+
# FQ-77 — never count a bookkeeping subject as a ship, even if it
|
|
758
|
+
# carries the `docs/<SERIES>:` shape. (A `docs/_plans:`/`docs/dispatch:`
|
|
759
|
+
# subject cannot match `direct_pat`'s `(docs|go)/<SERIES>:` anchor for a
|
|
760
|
+
# real series, but a series literally named `_plans`/`dispatch` would;
|
|
761
|
+
# the guard makes the exclusion total rather than incidental.)
|
|
762
|
+
if m and matchers.is_bookkeeping_subject(_oneline_subject(line)):
|
|
763
|
+
continue
|
|
764
|
+
if m and not _is_progress_only(line, m.end(), matchers):
|
|
765
|
+
return {"shipped": True, "sha": m.group(1), "summary": line, "via": "direct"}
|
|
766
|
+
# Pass 1a′ (docs/289): trailer-form stamp — a `(<PLAN> <PHASE>)` group at
|
|
767
|
+
# the END of the subject, the Conventional-Commits shape (`feat(pypi): …
|
|
768
|
+
# (docs/286 Phase 3)`), per-convention OPT-IN (`trailer_stamp`). Runs after
|
|
769
|
+
# the direct pass (a start-anchored ship anywhere in the window stays the
|
|
770
|
+
# canonical attribution) and before the release-prefix pass (the trailer is
|
|
771
|
+
# the commit's OWN claim about itself; a release bundle is a weaker,
|
|
772
|
+
# footprint-guarded mention). The core carries its own end anchor, so the
|
|
773
|
+
# tightness the start anchor provides elsewhere comes from the parens +
|
|
774
|
+
# `$` here (mid-subject mentions, prose ids, `Phase 30` vs `Phase 3`, and
|
|
775
|
+
# progress-marked trailers all fail the shape — see `trailer_ship_core`).
|
|
776
|
+
# Two guards mirror the sibling passes: a bookkeeping subject never ships
|
|
777
|
+
# (FQ-77), and a summary/release subject is SKIPPED — it falls through to
|
|
778
|
+
# Pass 1b where the release-bump footprint guards apply, so a version cut
|
|
779
|
+
# ending in a phrase-shaped paren can never be promoted to a direct ship.
|
|
780
|
+
if getattr(matchers.convention, "trailer_stamp", False):
|
|
781
|
+
series_alt = "|".join(_series_variants(series))
|
|
782
|
+
trailer_core = matchers.convention.trailer_ship_core(series_alt, phase_alt)
|
|
783
|
+
trailer_pat = re.compile(
|
|
784
|
+
rf"^([a-f0-9]+)\s+.*{trailer_core}", re.IGNORECASE
|
|
785
|
+
)
|
|
786
|
+
summary_start = re.compile(rf"^{matchers.summary_subject}", re.IGNORECASE)
|
|
787
|
+
for line in oneline_lines:
|
|
788
|
+
m = trailer_pat.match(line)
|
|
789
|
+
if not m:
|
|
790
|
+
continue
|
|
791
|
+
subject = _oneline_subject(line)
|
|
792
|
+
if matchers.is_bookkeeping_subject(subject):
|
|
793
|
+
continue
|
|
794
|
+
if summary_start.match(subject):
|
|
795
|
+
continue
|
|
796
|
+
return {"shipped": True, "sha": m.group(1), "summary": line, "via": "trailer"}
|
|
797
|
+
# Pass 1b: release-prefix bundled mentions, only if no direct ship was
|
|
798
|
+
# found above. Newest release commit wins (oneline is newest-first).
|
|
799
|
+
# Same progress-marker demotion as Pass 1a — a release commit that
|
|
800
|
+
# bundles `<PHASE> audit` or `<PHASE> dual-key attribution` (substrate
|
|
801
|
+
# work on a multi-step phase) is not the close-out ship.
|
|
802
|
+
for line in oneline_lines:
|
|
803
|
+
m = release_pat.match(line)
|
|
804
|
+
# FQ-77 — bookkeeping subjects are excluded from `_SUMMARY_BUNDLE_PREFIXES`,
|
|
805
|
+
# so `release_pat` no longer matches a `docs/_plans:`/`docs/fanout:`/
|
|
806
|
+
# `docs/dispatch:` subject. The guard is belt-and-suspenders: a future
|
|
807
|
+
# widening of the summary anchor must still not let a quoted phase id in
|
|
808
|
+
# a soft-claim / archive-rollup subject read as a ship.
|
|
809
|
+
if m and matchers.is_bookkeeping_subject(_oneline_subject(line)):
|
|
810
|
+
continue
|
|
811
|
+
if m and not _is_progress_only(line, m.end(), matchers):
|
|
812
|
+
return {
|
|
813
|
+
"shipped": True,
|
|
814
|
+
"sha": m.group(1),
|
|
815
|
+
"summary": f"{line} (release-prefix mention)",
|
|
816
|
+
"via": "release-prefix",
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
# Pass 1c: bundle-slug free-form subject fallback. A standalone-summary
|
|
820
|
+
# series like job's HYG has snake-case phase IDs (`dropbox_zero_apply`) but
|
|
821
|
+
# commit subjects in prose form (`docs/HYG: Dropbox zero-apply picker audit
|
|
822
|
+
# (queue #20)`). Normalize both sides — lowercase + collapse [\s\-_]+ → single
|
|
823
|
+
# underscore — and substring-match the slug. Runs ONLY for a series the active
|
|
824
|
+
# convention declared as a summary-bundle prefix (`bundle_slugs()`, derived
|
|
825
|
+
# from `summary_bundle_prefixes` — the L4 fix: no hardcoded `"HYG"` literal),
|
|
826
|
+
# to bound the false-positive surface; a generic repo declares none so this
|
|
827
|
+
# never runs, and non-bundle slugs would conflict with how phase ids are
|
|
828
|
+
# formed in ordinary series (`SF1.2`, `RS4`, etc.).
|
|
829
|
+
if series.upper() in matchers.convention.bundle_slugs():
|
|
830
|
+
series_re_local = re.escape(series)
|
|
831
|
+
slug = re.sub(r"[\s\-_]+", "_", phase.lower())
|
|
832
|
+
hyg_subject_pat = re.compile(
|
|
833
|
+
rf"^([a-f0-9]+)\s+{matchers.direct_prefix}{series_re_local}:?\s+(.+)$",
|
|
834
|
+
re.IGNORECASE,
|
|
835
|
+
)
|
|
836
|
+
for line in oneline_lines:
|
|
837
|
+
m = hyg_subject_pat.match(line)
|
|
838
|
+
if not m:
|
|
839
|
+
continue
|
|
840
|
+
normalized = re.sub(r"[\s\-_]+", "_", m.group(2).lower())
|
|
841
|
+
if slug in normalized:
|
|
842
|
+
return {
|
|
843
|
+
"shipped": True,
|
|
844
|
+
"sha": m.group(1),
|
|
845
|
+
"summary": f"{line} (HYG slug fallback)",
|
|
846
|
+
"via": "hyg-slug",
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
# Pass 1d (OS12, 2026-05-16, finding #167): sub-phase parent fallback.
|
|
850
|
+
# When the query is a sub-phase tag (`<phase>-<suffix>`) and none of the
|
|
851
|
+
# direct passes hit, try the bare-parent form (`<phase>` without the
|
|
852
|
+
# `-<suffix>`) and accept the match only if the matched commit's subject
|
|
853
|
+
# contains a normalized substring match of `<suffix>` (case-insensitive,
|
|
854
|
+
# `[\s\-_]+` collapsed to `_`). This catches the queue #167 class:
|
|
855
|
+
# UP6-diagnostics was queried against commit `647c6eaf docs/UP: UP6 —
|
|
856
|
+
# /ui/system/diagnostics tile fan-out`; the parent UP6 matches, and
|
|
857
|
+
# "diagnostics" appears in the subject — so the ship is real.
|
|
858
|
+
#
|
|
859
|
+
# Guard against false positives: a `docs/AAR: AAR15 — apply lessons`
|
|
860
|
+
# commit must NOT match a `AAR15-foo` query unless `foo` appears in the
|
|
861
|
+
# subject. The suffix slug is the gate, not the parent match alone.
|
|
862
|
+
#
|
|
863
|
+
# L4-fix: this fallback is now a per-convention FEATURE FLAG
|
|
864
|
+
# (`sub_phase_parent_fallback`), not an unconditional `"-" in phase` query
|
|
865
|
+
# test. The reference app declares it on (it uses hyphen-suffixed sub-phase
|
|
866
|
+
# ids); a generic repo leaves it off, so a fabricated `P2-CLI` no longer
|
|
867
|
+
# false-resolves against a real `P2` whose subject merely contains `CLI`.
|
|
868
|
+
if matchers.convention.sub_phase_parent_fallback and "-" in phase:
|
|
869
|
+
parent, _, suffix = phase.partition("-")
|
|
870
|
+
suffix_slug = re.sub(r"[\s\-_]+", "_", suffix.lower())
|
|
871
|
+
parent_alt = "|".join(_phase_variants(parent))
|
|
872
|
+
parent_direct_pat = re.compile(
|
|
873
|
+
rf"^([a-f0-9]+)\s+{matchers.direct_prefix}{series_re}:?\s+(?:{parent_alt}){_BOUNDARY_NEG}",
|
|
874
|
+
re.IGNORECASE,
|
|
875
|
+
)
|
|
876
|
+
for line in oneline_lines:
|
|
877
|
+
m = parent_direct_pat.match(line)
|
|
878
|
+
if not m:
|
|
879
|
+
continue
|
|
880
|
+
# FQ-77 — same bookkeeping exclusion as the direct pass.
|
|
881
|
+
if matchers.is_bookkeeping_subject(_oneline_subject(line)):
|
|
882
|
+
continue
|
|
883
|
+
if _is_progress_only(line, m.end(), matchers):
|
|
884
|
+
continue
|
|
885
|
+
# Confirm the suffix slug appears in the subject (substring match
|
|
886
|
+
# after normalization). Without this gate the bare parent match
|
|
887
|
+
# would over-claim — every sub-phase id would resolve to the
|
|
888
|
+
# parent's most recent ship regardless of topic.
|
|
889
|
+
normalized_subject = re.sub(r"[\s\-_]+", "_", line.lower())
|
|
890
|
+
if suffix_slug and suffix_slug in normalized_subject:
|
|
891
|
+
return {
|
|
892
|
+
"shipped": True,
|
|
893
|
+
"sha": m.group(1),
|
|
894
|
+
"summary": f"{line} (sub-phase parent fallback: '{suffix}' in subject)",
|
|
895
|
+
"via": "sub-phase-parent",
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
# OS-FQ136 — the body-scan only fires inside summary-bundle commit
|
|
899
|
+
# bodies (see `in_summary` below), so it shares the release-prefix scan's
|
|
900
|
+
# series-blindness. Reuse the same series-qualified alternation for a
|
|
901
|
+
# generic `Phase N` token; series-prefixed tokens keep the full variant set.
|
|
902
|
+
if _is_generic_phase_token(phase):
|
|
903
|
+
body_pat = re.compile(rf"{_BOUNDARY_PRE_NEG}{qualified_alt}{_BOUNDARY_NEG}", re.IGNORECASE)
|
|
904
|
+
else:
|
|
905
|
+
# OS-FQ226 — same series-qualification as the release-prefix scan: the
|
|
906
|
+
# body-scan only fires inside `vX.Y.Z:` bodies and shares the series-
|
|
907
|
+
# blindness, so a series-prefixed query must not leak its generic
|
|
908
|
+
# `Phase N` synonym bare into the alternation.
|
|
909
|
+
body_pat = re.compile(
|
|
910
|
+
rf"{_BOUNDARY_PRE_NEG}(?:{_release_body_alternation(series, phase)}){_BOUNDARY_NEG}",
|
|
911
|
+
re.IGNORECASE,
|
|
912
|
+
)
|
|
913
|
+
sha = ""
|
|
914
|
+
summary = ""
|
|
915
|
+
in_summary = False
|
|
916
|
+
for line in body_lines:
|
|
917
|
+
if line == "--END--":
|
|
918
|
+
sha = ""
|
|
919
|
+
summary = ""
|
|
920
|
+
in_summary = False
|
|
921
|
+
continue
|
|
922
|
+
if not sha:
|
|
923
|
+
sha = line.strip()
|
|
924
|
+
continue
|
|
925
|
+
if not summary:
|
|
926
|
+
summary = line.strip()
|
|
927
|
+
# Gate the body-scan on a summary-bundle subject (`vX.Y.Z:` release
|
|
928
|
+
# OR the `docs/HYG:` standalone-summary prefix). FQ-77 — a
|
|
929
|
+
# bookkeeping subject (soft-claim / archive rollup / snapshot) is
|
|
930
|
+
# never a summary bundle, so its body's quoted phase ids must not
|
|
931
|
+
# resolve a ship. The `_SUMMARY_SUBJECT_RE` no longer lists the
|
|
932
|
+
# bookkeeping prefixes; the explicit guard makes that total.
|
|
933
|
+
in_summary = bool(re.match(rf"^{matchers.summary_subject}", summary)) and not matchers.is_bookkeeping_subject(summary)
|
|
934
|
+
continue
|
|
935
|
+
if in_summary and body_pat.search(line):
|
|
936
|
+
return {
|
|
937
|
+
"shipped": True,
|
|
938
|
+
"sha": sha,
|
|
939
|
+
"summary": f"{summary} (body mention)",
|
|
940
|
+
"via": "body-mention",
|
|
941
|
+
}
|
|
942
|
+
return {"shipped": False, "sha": "", "summary": "", "via": ""}
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def _plan_body_says_shipped(plan_doc: str, phase: str, series: str = "") -> bool | None:
|
|
946
|
+
"""Consult the plan body for an authoritative SHIPPED stamp on `phase`.
|
|
947
|
+
|
|
948
|
+
Returns:
|
|
949
|
+
True — phase has a section AND the section contains `SHIPPED`
|
|
950
|
+
False — phase has a section AND the section does NOT contain `SHIPPED`
|
|
951
|
+
None — plan body doesn't mention this phase as a section header, OR
|
|
952
|
+
the doc is unreadable. Caller should fall back to git log.
|
|
953
|
+
|
|
954
|
+
"Section" means either the top-level `### <PHASE>` heading (bounded by the
|
|
955
|
+
next `##`/`###` heading or EOF) OR a bullet sub-phase `- **<PHASE> — ...`
|
|
956
|
+
(bounded by the next sibling bullet or `##` heading). Both shapes are
|
|
957
|
+
common — AAR13.0-AAR13.4 use the bullet shape under `### AAR13`, while
|
|
958
|
+
AAR10 uses the `### AAR10` shape. The phase id boundary disallows
|
|
959
|
+
`[A-Za-z0-9.\\-]` on either side to avoid `AAR1` matching `AAR10`.
|
|
960
|
+
|
|
961
|
+
Used as a tiebreaker for WEAK git verdicts (release-prefix or body-mention)
|
|
962
|
+
— a release commit that bundles `<PHASE> dual-key attribution` is reading
|
|
963
|
+
substrate work as a ship; the plan body's stamp (or lack thereof) is the
|
|
964
|
+
operator's intent. Direct-ship verdicts are NOT cross-checked — stamp drift
|
|
965
|
+
is a known operator habit (`AAR6` shipped 2026-05-04 with no stamp; `AFR2`
|
|
966
|
+
shipped without one). Trusting direct-ship despite stamp drift was a
|
|
967
|
+
deliberate call.
|
|
968
|
+
"""
|
|
969
|
+
try:
|
|
970
|
+
with open(plan_doc, encoding="utf-8") as f:
|
|
971
|
+
text = f.read()
|
|
972
|
+
except (OSError, UnicodeDecodeError):
|
|
973
|
+
return None
|
|
974
|
+
# OS-FQ136 — `series` (when supplied) threads the `Phase N` ↔ `<SERIES>N`
|
|
975
|
+
# synonym in, so a `PSC6` query resolves against a `### Phase 6` heading
|
|
976
|
+
# and vice versa. Optional: pre-OS-FQ136 callers omit it harmlessly.
|
|
977
|
+
phase_re = "|".join(_phase_variants(phase, series))
|
|
978
|
+
# Try ### header section first.
|
|
979
|
+
h3_pat = re.compile(
|
|
980
|
+
rf"(?m)^###\s+(?:Phase\s+)?(?:{phase_re}){_BOUNDARY_NEG}",
|
|
981
|
+
)
|
|
982
|
+
m = h3_pat.search(text)
|
|
983
|
+
if m:
|
|
984
|
+
rest = text[m.end():]
|
|
985
|
+
nm = re.search(r"(?m)^##+\s+", rest)
|
|
986
|
+
section = text[m.start() : m.end() + (nm.start() if nm else len(rest))]
|
|
987
|
+
return _section_says_shipped(section)
|
|
988
|
+
# Try bullet sub-phase: `- **<PHASE> — ...` (optionally `Phase <PHASE>`).
|
|
989
|
+
bullet_pat = re.compile(
|
|
990
|
+
rf"(?m)^[\s]*-\s+\*\*(?:Phase\s+)?(?:{phase_re}){_BOUNDARY_NEG}[^\n]*",
|
|
991
|
+
)
|
|
992
|
+
m = bullet_pat.search(text)
|
|
993
|
+
if m:
|
|
994
|
+
rest = text[m.end():]
|
|
995
|
+
nm = re.search(r"(?m)^[\s]*-\s+\*\*|^##+\s+", rest)
|
|
996
|
+
section = text[m.start() : m.end() + (nm.start() if nm else len(rest))]
|
|
997
|
+
return _section_says_shipped(section)
|
|
998
|
+
return None
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def _section_says_shipped(section: str) -> bool | None:
|
|
1002
|
+
"""The SHIPPED verdict for ONE bounded plan-doc phase section.
|
|
1003
|
+
|
|
1004
|
+
Returns True iff the section carries a `SHIPPED` token, False iff it has the
|
|
1005
|
+
section but no stamp — EXCEPT the FQ-326 soak guard: a section that contains
|
|
1006
|
+
`SHIPPED` but ALSO an OPEN-soak marker (the phase's own close gates on an
|
|
1007
|
+
unclosed soak — `_OPEN_SOAK_MARKER_RE`) returns **False**, not True. The
|
|
1008
|
+
`SHIPPED` there is a pre-soak substrate stamp (or a drifted heading stamp)
|
|
1009
|
+
for work that landed ahead of the soak the phase actually gates on; trusting
|
|
1010
|
+
it false-positives the soak-gated phase as done and masks the live close-out
|
|
1011
|
+
pick (#326).
|
|
1012
|
+
|
|
1013
|
+
Why False (the demote) and not None (defer): `_consult_plan_body` only
|
|
1014
|
+
challenges WEAK verdicts (`via != "direct"` early-returns before this is even
|
|
1015
|
+
reached) and only DEMOTES on `body_verdict is False`. Returning None would
|
|
1016
|
+
leave the already-`shipped=True` weak verdict un-demoted — i.e. the guard
|
|
1017
|
+
would *stop confirming* the false ship but not actually *kill* it, defeating
|
|
1018
|
+
the #326 fix (a release-prefix / body-mention match would ride the pre-soak
|
|
1019
|
+
stamp straight to SHIPPED). Returning False makes `_consult_plan_body` demote
|
|
1020
|
+
it, which is the whole point. Direct-ship is untouched (it never reaches
|
|
1021
|
+
here), so a genuinely direct-shipped soak phase is never false-demoted.
|
|
1022
|
+
|
|
1023
|
+
Date-blind: the marker is structural prose, never a clock read — the soak
|
|
1024
|
+
registry / picker owns the open-vs-closed date math.
|
|
1025
|
+
"""
|
|
1026
|
+
if "SHIPPED" not in section:
|
|
1027
|
+
return False
|
|
1028
|
+
if _OPEN_SOAK_MARKER_RE.search(section):
|
|
1029
|
+
return False
|
|
1030
|
+
return True
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
def _consult_plan_body(result: dict, plan_doc: str | None, phase: str, series: str = "") -> dict:
|
|
1034
|
+
"""Apply the plan-body cross-check to a `_check_phase_with_cache` result.
|
|
1035
|
+
|
|
1036
|
+
Only WEAK ship verdicts (`via != "direct"`) are challenged. Direct-ship
|
|
1037
|
+
verdicts win unconditionally because their attribution is unambiguous
|
|
1038
|
+
(the commit author named `<PHASE>` as the subject). Release-prefix and
|
|
1039
|
+
body-mention verdicts are reading bundled mentions of related work —
|
|
1040
|
+
when the plan body has the phase's section AND the section carries no
|
|
1041
|
+
SHIPPED stamp, treat git's verdict as a false positive and demote.
|
|
1042
|
+
|
|
1043
|
+
Returns the same shape dict; if demoted, `shipped=False`, `via=demoted`,
|
|
1044
|
+
and `summary` carries a one-line note for Course-corrections rendering.
|
|
1045
|
+
"""
|
|
1046
|
+
if not plan_doc or not result.get("shipped"):
|
|
1047
|
+
return result
|
|
1048
|
+
if result.get("via") == "direct":
|
|
1049
|
+
return result
|
|
1050
|
+
body_verdict = _plan_body_says_shipped(plan_doc, phase, series)
|
|
1051
|
+
if body_verdict is False:
|
|
1052
|
+
return {
|
|
1053
|
+
"shipped": False,
|
|
1054
|
+
"sha": "",
|
|
1055
|
+
"summary": (
|
|
1056
|
+
f"git matched {result.get('sha','')} via {result.get('via','?')} but "
|
|
1057
|
+
f"plan body has no SHIPPED stamp on {phase} — demoted"
|
|
1058
|
+
),
|
|
1059
|
+
"via": "demoted-by-plan-body",
|
|
1060
|
+
"git_match_sha": result.get("sha", ""),
|
|
1061
|
+
"git_match_summary": result.get("summary", ""),
|
|
1062
|
+
}
|
|
1063
|
+
return result
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
# ---------------------------------------------------------------------------
|
|
1067
|
+
# The file-path backstop's grammar is now per-workspace DATA (the file-path-rung
|
|
1068
|
+
# genericization, sibling to the SCV subject-grammar lift). What a plan-doc token
|
|
1069
|
+
# must look like to count as a load-bearing FILE PATH — the top-level dir
|
|
1070
|
+
# allowlist — and which basenames count as shared-infra hubs moved out of these
|
|
1071
|
+
# module constants into `dos.stamp.StampConvention` (the `code_dirs` /
|
|
1072
|
+
# `infra_basenames` / `infra_doc_basenames` fields + the `repo_path_re()` /
|
|
1073
|
+
# `is_shared_infra()` accessors). The active workspace's convention supplies them
|
|
1074
|
+
# through `_Matchers`, exactly as it supplies the subject-grammar fragments.
|
|
1075
|
+
#
|
|
1076
|
+
# Why this was a leak: the reference app hardcoded its OWN top-level dirs
|
|
1077
|
+
# (`agents|job_search|go|…`) in `_REPO_PATH_RE`, so on a foreign repo whose
|
|
1078
|
+
# deliverables live under `engine/`/`models/`/`commands/` the rung harvested
|
|
1079
|
+
# NOTHING and the artefact backstop was dead — every subject-drifted ship
|
|
1080
|
+
# resolved `via none`. The generic convention (`code_dirs=()`) harvests a path
|
|
1081
|
+
# rooted at ANY top-level dir, which is sound because the dir allowlist was only
|
|
1082
|
+
# ever a recognition narrowing; the false-positive gates (2-file overlap,
|
|
1083
|
+
# distinctive-file, bookkeeping, cross-series) all live downstream and are
|
|
1084
|
+
# unchanged. A host that wants the tight allowlist declares `code_dirs` (the
|
|
1085
|
+
# reference app does, in `JOB_STAMP_CONVENTION`).
|
|
1086
|
+
#
|
|
1087
|
+
# The names below are kept as BACK-COMPAT aliases derived from
|
|
1088
|
+
# `JOB_STAMP_CONVENTION` so a `from dos.phase_shipped import *` consumer (the
|
|
1089
|
+
# reference app's thin shim) still sees byte-identical values; live code reads the
|
|
1090
|
+
# active convention through `matchers`, never these constants.
|
|
1091
|
+
# ---------------------------------------------------------------------------
|
|
1092
|
+
# (`_JOB_STAMP_CONVENTION` / `_DIAGRAM_SUFFIXES` are imported at the top of the
|
|
1093
|
+
# module.) These are the back-compat aliases for the file-path-rung constants.
|
|
1094
|
+
_REPO_PATH_RE = _JOB_STAMP_CONVENTION.repo_path_re()
|
|
1095
|
+
_SHARED_INFRA_BASENAMES = _JOB_STAMP_CONVENTION.infra_basename_set()
|
|
1096
|
+
_SHARED_INFRA_DOC_BASENAMES = _JOB_STAMP_CONVENTION.infra_doc_basename_set()
|
|
1097
|
+
|
|
1098
|
+
# Non-source artifact suffixes — a harvested token ending in one of these is a
|
|
1099
|
+
# release tarball / archive / binary / vendored-repo URL, NEVER a load-bearing
|
|
1100
|
+
# SOURCE deliverable a phase ships. The generic harvester is deliberately loose
|
|
1101
|
+
# (match-any top-level dir), so without this a plan row that mentions a release
|
|
1102
|
+
# tarball (`v1.2.3/release.tar.gz`) or a git remote (`github.com/a/b.git`) lets a
|
|
1103
|
+
# series-attributed commit touching that committed artifact false-ship an
|
|
1104
|
+
# unshipped phase via the single-file gate (adversarial-review finding). Modeled
|
|
1105
|
+
# on `stamp._UNIVERSAL_DIAGRAM_SUFFIXES` (which excludes regenerated diagrams).
|
|
1106
|
+
# This is the artefact analogue: a phase's *distinctive* deliverable is source, so
|
|
1107
|
+
# an archive/binary/URL is dropped at harvest time on every scan path.
|
|
1108
|
+
_NON_SOURCE_SUFFIXES = (
|
|
1109
|
+
".tar", ".tar.gz", ".tgz", ".zip", ".gz", ".bz2", ".xz", ".whl", ".egg",
|
|
1110
|
+
".git", ".png", ".jpg", ".jpeg", ".gif", ".pdf", ".bin", ".so", ".dll",
|
|
1111
|
+
".dylib", ".exe", ".o", ".a", ".jar", ".class",
|
|
1112
|
+
)
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def _is_shared_infra(path: str, matchers: "_Matchers | None" = None) -> bool:
|
|
1116
|
+
"""True if `path` is a hub file excluded from the overlap count.
|
|
1117
|
+
|
|
1118
|
+
Reads the active workspace's shared-infra set through `matchers` (the SCV
|
|
1119
|
+
seam); a caller that didn't resolve one falls back to the active convention.
|
|
1120
|
+
Three classes are excluded — all too widely-touched for a coincidental edit
|
|
1121
|
+
to be ship evidence:
|
|
1122
|
+
- hub *code* files (universal ∪ the convention's `infra_basenames`); and
|
|
1123
|
+
- the convention's named *documentation* hubs (`infra_doc_basenames`); and
|
|
1124
|
+
- any `docs/…*.mmd`/`*.png` diagram (the universal regenerated-hub rule).
|
|
1125
|
+
"""
|
|
1126
|
+
m = matchers if matchers is not None else _subject_matchers()
|
|
1127
|
+
return m.is_shared_infra(path)
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
def _extract_phase_files(
|
|
1131
|
+
plan_doc: str, phase: str, series: str = "", matchers: "_Matchers | None" = None
|
|
1132
|
+
) -> list[str]:
|
|
1133
|
+
"""Return the repo-relative file paths named in `phase`'s plan-doc section.
|
|
1134
|
+
|
|
1135
|
+
Reuses the section-bounding logic of `_plan_body_says_shipped` — the phase
|
|
1136
|
+
is located as either a `### <PHASE>` heading section or a `- **<PHASE> — ...`
|
|
1137
|
+
bullet sub-phase, bounded by the next sibling heading/bullet — then every
|
|
1138
|
+
repo-path-shaped token inside that section is harvested and de-duplicated.
|
|
1139
|
+
|
|
1140
|
+
These are the phase's *load-bearing* files: the files its plan row names as
|
|
1141
|
+
the ones the phase adds or edits. They are the artefact the file-path
|
|
1142
|
+
verdict path matches ship commits against (AAR-FQ230, finding #230).
|
|
1143
|
+
|
|
1144
|
+
Returns `[]` when the doc is unreadable, the phase has no section, or the
|
|
1145
|
+
section names no repo-path-shaped tokens — in every such case the caller's
|
|
1146
|
+
file-path fallback yields no verdict and the phase stays NOT_SHIPPED.
|
|
1147
|
+
"""
|
|
1148
|
+
try:
|
|
1149
|
+
with open(plan_doc, encoding="utf-8") as f:
|
|
1150
|
+
text = f.read()
|
|
1151
|
+
except (OSError, UnicodeDecodeError):
|
|
1152
|
+
return []
|
|
1153
|
+
phase_re = "|".join(_phase_variants(phase, series))
|
|
1154
|
+
section = ""
|
|
1155
|
+
h3_pat = re.compile(rf"(?m)^###\s+(?:Phase\s+)?(?:{phase_re}){_BOUNDARY_NEG}")
|
|
1156
|
+
m = h3_pat.search(text)
|
|
1157
|
+
if m:
|
|
1158
|
+
rest = text[m.end():]
|
|
1159
|
+
nm = re.search(r"(?m)^##+\s+", rest)
|
|
1160
|
+
section = text[m.start() : m.end() + (nm.start() if nm else len(rest))]
|
|
1161
|
+
else:
|
|
1162
|
+
bullet_pat = re.compile(
|
|
1163
|
+
rf"(?m)^[\s]*-\s+\*\*(?:Phase\s+)?(?:{phase_re}){_BOUNDARY_NEG}[^\n]*",
|
|
1164
|
+
)
|
|
1165
|
+
m = bullet_pat.search(text)
|
|
1166
|
+
if m:
|
|
1167
|
+
rest = text[m.end():]
|
|
1168
|
+
nm = re.search(r"(?m)^[\s]*-\s+\*\*|^##+\s+", rest)
|
|
1169
|
+
section = text[m.start() : m.end() + (nm.start() if nm else len(rest))]
|
|
1170
|
+
if not section:
|
|
1171
|
+
return []
|
|
1172
|
+
# Harvest file-path tokens with the ACTIVE workspace's repo-path matcher (the
|
|
1173
|
+
# `code_dirs` seam): the reference app's tight dir allowlist when declared, a
|
|
1174
|
+
# match-any-top-level-dir matcher generically. Falls back to the active
|
|
1175
|
+
# convention when a caller didn't thread `matchers` in.
|
|
1176
|
+
repo_path = matchers.repo_path if matchers is not None else _subject_matchers().repo_path
|
|
1177
|
+
seen: list[str] = []
|
|
1178
|
+
for pm in repo_path.finditer(section):
|
|
1179
|
+
path = pm.group(1)
|
|
1180
|
+
# Drop non-source artifacts (tarballs, archives, binaries, vendored-repo
|
|
1181
|
+
# URLs): they are never a phase's distinctive SOURCE deliverable, and the
|
|
1182
|
+
# loose generic harvester would otherwise lift one out of plan prose and
|
|
1183
|
+
# let it carry a false ship (adversarial-review finding).
|
|
1184
|
+
if path.lower().endswith(_NON_SOURCE_SUFFIXES):
|
|
1185
|
+
continue
|
|
1186
|
+
if path not in seen:
|
|
1187
|
+
seen.append(path)
|
|
1188
|
+
return seen
|
|
1189
|
+
|
|
1190
|
+
|
|
1191
|
+
def phase_deliverable_touched(
|
|
1192
|
+
plan: str,
|
|
1193
|
+
phase: str,
|
|
1194
|
+
plan_doc: str | None,
|
|
1195
|
+
touched_files: "set[str] | list[str] | None",
|
|
1196
|
+
*,
|
|
1197
|
+
series: str = "",
|
|
1198
|
+
drop_shared_infra: bool = True,
|
|
1199
|
+
matchers: "_Matchers | None" = None,
|
|
1200
|
+
) -> bool | None:
|
|
1201
|
+
"""The ONE shared deliverable-overlap ground-truth predicate.
|
|
1202
|
+
|
|
1203
|
+
Answers: "did `touched_files` touch any of `phase`'s declared *distinctive*
|
|
1204
|
+
deliverable files?" — the SAME question the read-side file-path verdict
|
|
1205
|
+
(`_check_phase_by_filepath`) and the write-side stamp guards (job's
|
|
1206
|
+
`_gh4_plandoc_only_lacks_deliverable` / `_gh4_subject_is_prelaunch_staging_only`)
|
|
1207
|
+
each re-implemented over their own footprint source. Both sides now feed this
|
|
1208
|
+
one predicate via a trivial footprint adapter — ending the recurring
|
|
1209
|
+
"build the deliverable check (it already exists, just duplicated)" loop that
|
|
1210
|
+
let the CRS3/#387/#365 false-stamps and the 84% zero-ship false-drains
|
|
1211
|
+
recur. It is footprint-source-AGNOSTIC (takes a touched-file SET, not a sha
|
|
1212
|
+
and not a committed-list) and PURE (no git, no clock) — so the two sources
|
|
1213
|
+
(a git-show-on-sha set on the read side, the committed pathspec on the write
|
|
1214
|
+
side) are thin adapters and this core is unit-testable without git.
|
|
1215
|
+
|
|
1216
|
+
Returns:
|
|
1217
|
+
* True — the phase declares >= 1 distinctive deliverable file AND the
|
|
1218
|
+
touched set hit at least one of them (a real deliverable shipped).
|
|
1219
|
+
* False — the phase declares >= 1 distinctive deliverable file AND the
|
|
1220
|
+
touched set hit NONE of them (coverage with zero deliverable —
|
|
1221
|
+
the CRS3 / plan-doc-only / prelaunch-staging shape: demote/refuse).
|
|
1222
|
+
* None — PERMISSIVE: the phase declares no distinctive deliverable file
|
|
1223
|
+
(a genuinely doc-only phase, or every declared file is a
|
|
1224
|
+
shared-infra hub), OR the inputs are unresolvable (no plan_doc,
|
|
1225
|
+
empty/None touched set). The caller must treat None as "no
|
|
1226
|
+
evidence to demote/refuse" — i.e. trust the prior verdict —
|
|
1227
|
+
so this can only ever ADD a refusal where there is zero
|
|
1228
|
+
distinctive evidence, never manufacture a false-negative on a
|
|
1229
|
+
real ship. This None=permissive posture is the load-bearing
|
|
1230
|
+
contract both call sites already honor; preserve it exactly.
|
|
1231
|
+
|
|
1232
|
+
`drop_shared_infra=True` (default) excludes hub files (`config.py`,
|
|
1233
|
+
`fanout_state.py`, doc hubs, regenerated diagrams — see `_is_shared_infra`)
|
|
1234
|
+
from the "distinctive" set, matching the read side. The write side historically
|
|
1235
|
+
counted hubs; converging on drop_shared_infra=True only ever makes the write
|
|
1236
|
+
guard refuse a stamp that had *no distinctive* evidence (a hub-only edit),
|
|
1237
|
+
never a real ship — but a caller that must preserve the legacy hub-counting
|
|
1238
|
+
behavior can pass drop_shared_infra=False.
|
|
1239
|
+
"""
|
|
1240
|
+
if not plan_doc or not touched_files:
|
|
1241
|
+
return None
|
|
1242
|
+
declared = _extract_phase_files(plan_doc, phase, series, matchers)
|
|
1243
|
+
# Drop the plan doc itself — editing the plan doc is the 3b coverage under
|
|
1244
|
+
# scrutiny, never a deliverable for this check (mirrors the write side's
|
|
1245
|
+
# plan-doc-self drop).
|
|
1246
|
+
pdp = plan_doc.replace("\\", "/")
|
|
1247
|
+
distinctive = [
|
|
1248
|
+
f for f in declared
|
|
1249
|
+
if f.replace("\\", "/") != pdp
|
|
1250
|
+
and not (drop_shared_infra and _is_shared_infra(f, matchers))
|
|
1251
|
+
]
|
|
1252
|
+
if not distinctive:
|
|
1253
|
+
return None # doc-only / hub-only phase → no distinctive evidence → permissive
|
|
1254
|
+
norm = {str(p).replace("\\", "/") for p in touched_files if p}
|
|
1255
|
+
for f in distinctive:
|
|
1256
|
+
ff = f.replace("\\", "/")
|
|
1257
|
+
# exact path, dir-prefix (declared "dir/"), or basename match — the union
|
|
1258
|
+
# of the read side's basename match and the write side's path/dir-prefix
|
|
1259
|
+
# match, so the merged predicate is at least as accepting as either source.
|
|
1260
|
+
if ff in norm:
|
|
1261
|
+
return True
|
|
1262
|
+
if ff.endswith("/") and any(p.startswith(ff) for p in norm):
|
|
1263
|
+
return True
|
|
1264
|
+
base = ff.rsplit("/", 1)[-1]
|
|
1265
|
+
if base and any(p == ff or p.rsplit("/", 1)[-1] == base for p in norm):
|
|
1266
|
+
return True
|
|
1267
|
+
return False # declares distinctive files, touched none → no deliverable shipped
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
# #394 — cross-series guard for the file-path backstop.
|
|
1271
|
+
#
|
|
1272
|
+
# A `vX.Y.Z:` release commit (or a `docs/<OTHER>:` ship) bundles one plan
|
|
1273
|
+
# series' work, but its diff can incidentally touch >= 2 of a DIFFERENT,
|
|
1274
|
+
# genuinely-unshipped plan's load-bearing files when the two series share an
|
|
1275
|
+
# infra-adjacent file. Live recurrence (finding #394): RTN0 (drafted, zero
|
|
1276
|
+
# commits ever) false-flagged SHIPPED by `c7c87566`
|
|
1277
|
+
# ("v0.365.0: lane gardener — LG0 baseline + LG1 /lane-audit skill"), which
|
|
1278
|
+
# touches scripts/lane_gardener_audit.py + tests/test_lane_gardener.py — 2/5
|
|
1279
|
+
# of RTN0's named files — because the LG-series release shares those files.
|
|
1280
|
+
# A shared load-bearing file is a SIBLING-SERIES ship, not a ship of `series`.
|
|
1281
|
+
#
|
|
1282
|
+
# The discriminator: a commit subject that names a CONCRETE other plan-series
|
|
1283
|
+
# (a `docs/<SERIES>:` / `go/<SERIES>:` prefix whose series token != the queried
|
|
1284
|
+
# series, or a `vX.Y.Z: <other-series> …` release whose summary leads with a
|
|
1285
|
+
# different series id) must carry an explicit phase-id / series token for the
|
|
1286
|
+
# QUERIED series before the file-path overlap counts as a ship. The conservative
|
|
1287
|
+
# direction: when the subject does NOT clearly name another series (a generic
|
|
1288
|
+
# release summary, an unparseable subject), the guard does NOT fire — it only
|
|
1289
|
+
# refuses when it can prove the commit belongs to a different series.
|
|
1290
|
+
_OTHER_SERIES_PREFIX_RE = re.compile(
|
|
1291
|
+
r"^(?:docs|go)/([A-Za-z][A-Za-z0-9]*)\s*:", re.IGNORECASE
|
|
1292
|
+
)
|
|
1293
|
+
# A `vX.Y.Z:` release whose summary names a series-prefixed phase id
|
|
1294
|
+
# (`v0.365.0: ... LG0 ...` / `v0.378.0: ... GBA6 ...`). Captures the first
|
|
1295
|
+
# such series token after the version tag.
|
|
1296
|
+
_RELEASE_SERIES_TOKEN_RE = re.compile(
|
|
1297
|
+
r"^v\d+\.\d+\.\d+:\s.*?\b([A-Za-z]{2,})\d", re.IGNORECASE
|
|
1298
|
+
)
|
|
1299
|
+
|
|
1300
|
+
|
|
1301
|
+
def _subject_names_other_series(subject: str, series: str) -> bool:
|
|
1302
|
+
"""True iff `subject` clearly attributes to a plan-series OTHER than `series`.
|
|
1303
|
+
|
|
1304
|
+
Used by the file-path backstop's cross-series guard (#394): a file-path
|
|
1305
|
+
overlap whose commit subject names a *different* series is a sibling-series
|
|
1306
|
+
ship, not a ship of `series`, unless the subject also carries `series`'s own
|
|
1307
|
+
token (checked separately by the caller).
|
|
1308
|
+
|
|
1309
|
+
Conservative: returns False (guard does not fire) when the subject does not
|
|
1310
|
+
parse to a concrete series — so an ambiguous/generic subject is treated as
|
|
1311
|
+
"could be this series", preserving the pre-#394 behaviour for everything
|
|
1312
|
+
except a provable cross-series collision.
|
|
1313
|
+
"""
|
|
1314
|
+
s = (subject or "").strip()
|
|
1315
|
+
want = (series or "").strip().lower()
|
|
1316
|
+
if not s or not want:
|
|
1317
|
+
return False
|
|
1318
|
+
# `docs/<OTHER>:` / `go/<OTHER>:` direct prefix naming a different series.
|
|
1319
|
+
m = _OTHER_SERIES_PREFIX_RE.match(s)
|
|
1320
|
+
if m and m.group(1).lower() != want:
|
|
1321
|
+
return True
|
|
1322
|
+
# `vX.Y.Z: ... <OTHER><digit> ...` release summary leading with a different
|
|
1323
|
+
# series-prefixed phase id (and the queried series is NOT mentioned at all).
|
|
1324
|
+
m = _RELEASE_SERIES_TOKEN_RE.match(s)
|
|
1325
|
+
if m and m.group(1).lower() != want:
|
|
1326
|
+
# Only treat as cross-series if the queried series token is absent from
|
|
1327
|
+
# the whole subject — a release that bundles BOTH series should not be
|
|
1328
|
+
# refused (the caller's same-series token check handles that case).
|
|
1329
|
+
token_re = re.compile(
|
|
1330
|
+
rf"{_BOUNDARY_PRE_NEG}{re.escape(series)}(?![A-Za-z])", re.IGNORECASE
|
|
1331
|
+
)
|
|
1332
|
+
if not token_re.search(s):
|
|
1333
|
+
return True
|
|
1334
|
+
return False
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
# A batched file-path log cache: `{repo-relative-path: [(sha, subject), …]}`,
|
|
1338
|
+
# newest-first, capped per file at `_FILEPATH_WINDOW`. Built ONCE by
|
|
1339
|
+
# `_build_filepath_log_cache` from a single `git log --name-only` scan over the
|
|
1340
|
+
# UNION of every pair's named files, then handed to `_check_phase_by_filepath`
|
|
1341
|
+
# so the per-pair file-path backstop does ZERO subprocesses (docs/284). The
|
|
1342
|
+
# per-file list is byte-identical to what `git log --oneline -<window> -- <path>`
|
|
1343
|
+
# returns for that path, so an overlap test reading from the cache produces the
|
|
1344
|
+
# SAME verdict as the per-file-subprocess path (the never-under-count pin).
|
|
1345
|
+
_FilepathLogCache = "dict[str, list[tuple[str, str]]]"
|
|
1346
|
+
|
|
1347
|
+
|
|
1348
|
+
def _build_filepath_log_cache(
|
|
1349
|
+
files: "set[str] | list[str]",
|
|
1350
|
+
) -> "dict[str, list[tuple[str, str]]] | None":
|
|
1351
|
+
"""Build the per-file commit-history cache in ONE `git log --name-only` scan.
|
|
1352
|
+
|
|
1353
|
+
docs/284 — the file-path backstop's batch path. The per-pair path runs one
|
|
1354
|
+
``git log --oneline -800 -- <file>`` subprocess PER named file (364 git
|
|
1355
|
+
subprocesses for a 262-pair job snapshot, ~19s). Every one asks the same git
|
|
1356
|
+
history "which commits touched which files"; only the per-pair overlap test
|
|
1357
|
+
differs. This builds `{path: [(sha, subject), …]}` for every named file from a
|
|
1358
|
+
SINGLE windowed scan over the union of those files as pathspecs, so the
|
|
1359
|
+
per-pair overlap becomes a pure in-memory lookup.
|
|
1360
|
+
|
|
1361
|
+
Byte-identical-by-construction (the ⚓ never-under-count invariant, docs/284):
|
|
1362
|
+
a `git log --name-only -- f1 f2 … fk` over the union charges the SAME commit
|
|
1363
|
+
its `--name-only` block lists for whichever of f1..fk it touched, and we then
|
|
1364
|
+
truncate each path's list to its `_FILEPATH_WINDOW` most-recent touches —
|
|
1365
|
+
exactly what the per-file ``-{_FILEPATH_WINDOW} -- <path>`` subprocess returns.
|
|
1366
|
+
The scan is bounded by `_FILEPATH_WINDOW * _BATCH_SCAN_CAP_FACTOR` UNION
|
|
1367
|
+
commits; if that cap is HIT (a pathological deep history where a per-file
|
|
1368
|
+
800-window could reach a commit the union cap dropped) we return ``None`` so
|
|
1369
|
+
the caller degrades to the exact per-file path rather than risk an
|
|
1370
|
+
under-counted (false-NEGATIVE) verdict. Returns ``None`` on any git error for
|
|
1371
|
+
the same fail-to-the-safe-path reason.
|
|
1372
|
+
|
|
1373
|
+
The returned subjects are the RAW commit subjects (no bookkeeping filtering) —
|
|
1374
|
+
the per-pair path filters `is_bookkeeping_subject` at overlap-build time, and
|
|
1375
|
+
the cache consumer does the same, so the filtering stays at exactly one place.
|
|
1376
|
+
"""
|
|
1377
|
+
paths = sorted({f for f in files if f})
|
|
1378
|
+
if not paths:
|
|
1379
|
+
return {}
|
|
1380
|
+
cap = _FILEPATH_WINDOW * _BATCH_SCAN_CAP_FACTOR
|
|
1381
|
+
# `%x00`-delimited so a subject containing the format chars survives, and
|
|
1382
|
+
# `--name-only` lists each commit's touched paths on their own lines. `%h` is
|
|
1383
|
+
# the ABBREVIATED sha — byte-identical to what the per-file `git log --oneline`
|
|
1384
|
+
# path returns (the full `%H` would break the `sha`/`summary` parity pin). The
|
|
1385
|
+
# union pathspec restricts the scan to commits touching at least one named file
|
|
1386
|
+
# (cheap on a wide history). The `-cap` bounds the worst case.
|
|
1387
|
+
#
|
|
1388
|
+
# ⚓ `--no-merges` is LOAD-BEARING for byte-identity (docs/284): a union
|
|
1389
|
+
# `git log --name-only` over many pathspecs cannot reproduce git's per-PATH
|
|
1390
|
+
# history simplification through MERGE commits — default `--oneline -- <file>`
|
|
1391
|
+
# FOLLOWS a TREESAME parent and prunes a merge that introduced no new change to
|
|
1392
|
+
# <file>, but lists an "evil merge" that did; `--name-only` over the union has
|
|
1393
|
+
# no single parent to follow, so a flag like `--diff-merges=first-parent`
|
|
1394
|
+
# over-counts the pruned merges while the combined-diff default under-counts the
|
|
1395
|
+
# carried ones. `--no-merges` removes the whole ambiguity at the source: a
|
|
1396
|
+
# merge commit is NEVER a phase's ship of record (the ship is the underlying
|
|
1397
|
+
# feature commit, which both paths always retain), so dropping merges from the
|
|
1398
|
+
# overlap evidence is sound AND makes the union scan byte-identical to the
|
|
1399
|
+
# per-file `--oneline --no-merges` path (verified 129/129 src files on this
|
|
1400
|
+
# repo, 0 divergence). The per-file path adds `--no-merges` in lock-step.
|
|
1401
|
+
try:
|
|
1402
|
+
lines = _git_log(
|
|
1403
|
+
[
|
|
1404
|
+
"--name-only",
|
|
1405
|
+
"--no-merges",
|
|
1406
|
+
"--format=%x00%h%x00%s",
|
|
1407
|
+
f"-{cap}",
|
|
1408
|
+
"--",
|
|
1409
|
+
*paths,
|
|
1410
|
+
]
|
|
1411
|
+
)
|
|
1412
|
+
except RuntimeError:
|
|
1413
|
+
return None
|
|
1414
|
+
wanted = set(paths)
|
|
1415
|
+
cache: dict[str, list[tuple[str, str]]] = {p: [] for p in paths}
|
|
1416
|
+
cur_sha = ""
|
|
1417
|
+
cur_subj = ""
|
|
1418
|
+
n_commits = 0
|
|
1419
|
+
for line in lines:
|
|
1420
|
+
if line.startswith("\x00"):
|
|
1421
|
+
# A new commit header: `\x00<sha>\x00<subject>`.
|
|
1422
|
+
_, _, rest = line.partition("\x00")
|
|
1423
|
+
sha, _, subj = rest.partition("\x00")
|
|
1424
|
+
cur_sha, cur_subj = sha, subj
|
|
1425
|
+
n_commits += 1
|
|
1426
|
+
continue
|
|
1427
|
+
path = line.strip()
|
|
1428
|
+
if path in wanted and cur_sha:
|
|
1429
|
+
# Newest-first order is preserved by append (git log is newest-first).
|
|
1430
|
+
cache[path].append((cur_sha, cur_subj))
|
|
1431
|
+
if n_commits >= cap:
|
|
1432
|
+
# The union window was saturated: a per-file 800-window could reach a
|
|
1433
|
+
# commit this scan dropped. Refuse to answer from a possibly-narrower
|
|
1434
|
+
# window — the caller re-runs the exact per-file path (never-under-count).
|
|
1435
|
+
return None
|
|
1436
|
+
# Truncate each path's list to its own `_FILEPATH_WINDOW` (the per-file cap
|
|
1437
|
+
# the subprocess path applied via `-{_FILEPATH_WINDOW}`).
|
|
1438
|
+
for p in cache:
|
|
1439
|
+
if len(cache[p]) > _FILEPATH_WINDOW:
|
|
1440
|
+
cache[p] = cache[p][:_FILEPATH_WINDOW]
|
|
1441
|
+
return cache
|
|
1442
|
+
|
|
1443
|
+
|
|
1444
|
+
# Headroom multiplier for the batched union scan vs the per-file window. The
|
|
1445
|
+
# per-pair path caps each file at `_FILEPATH_WINDOW` commits-touching-THAT-file;
|
|
1446
|
+
# the batch scan caps the UNION at this multiple of that window. A file's touches
|
|
1447
|
+
# are sparse in the union stream (most union commits touch OTHER files), so this
|
|
1448
|
+
# headroom lets every file reach its own 800-window in the common case; when it
|
|
1449
|
+
# is not enough the cache builder returns None and the caller falls back to the
|
|
1450
|
+
# exact per-file path. Sized generously (the scan is ~50ms regardless of cap on a
|
|
1451
|
+
# union-restricted history) — correctness never rides on it, only the fast path.
|
|
1452
|
+
_BATCH_SCAN_CAP_FACTOR = 12
|
|
1453
|
+
|
|
1454
|
+
|
|
1455
|
+
def _check_phase_by_filepath(
|
|
1456
|
+
series: str,
|
|
1457
|
+
phase: str,
|
|
1458
|
+
plan_doc: str,
|
|
1459
|
+
matchers: "_Matchers | None" = None,
|
|
1460
|
+
fp_cache: "dict[str, list[tuple[str, str]]] | None" = None,
|
|
1461
|
+
) -> dict:
|
|
1462
|
+
"""Re-derive a ship verdict from the file paths a phase's plan row names.
|
|
1463
|
+
|
|
1464
|
+
⚓ Data-driven decisions — evidence-over-narrative: subject-token matching
|
|
1465
|
+
keys on the *commit subject string*, which drifts (`8b0aec12 DT2: …` ships
|
|
1466
|
+
DT2 with a bare `DT2:` prefix, not the canonical `docs/DT: DT2 —`). Six
|
|
1467
|
+
subject-regex widenings have failed to close this false-NEGATIVE class
|
|
1468
|
+
(#62/#63/#77/#103/#136/#226). This path matches the *artefact* instead:
|
|
1469
|
+
when the commit subject and the file-path-touched set disagree, the
|
|
1470
|
+
file-path set wins.
|
|
1471
|
+
|
|
1472
|
+
Overlap rule (the false-POSITIVE guard — see plan AAR-FQ230):
|
|
1473
|
+
- Harvest the phase's load-bearing files via `_extract_phase_files`.
|
|
1474
|
+
- For each file, `git log --oneline -400 -- <file>` (capped window,
|
|
1475
|
+
explicit pathspec).
|
|
1476
|
+
- SHIPPED requires a SINGLE commit touching >= 2 of those named files,
|
|
1477
|
+
AND at least one of the matched files must be *distinctive* (not a
|
|
1478
|
+
shared-infra hub file — see `_is_shared_infra`). A real phase ship
|
|
1479
|
+
touches the cluster of files its plan row names together; an
|
|
1480
|
+
incidental edit touches one. The 2-file-coincidence threshold guards
|
|
1481
|
+
against over-matching; the distinctive-file requirement guards the
|
|
1482
|
+
residual case where a section names two infra files an unrelated
|
|
1483
|
+
commit happened to touch together (the false STALE-STAMP class).
|
|
1484
|
+
- Degenerate case: a phase that names only ONE load-bearing file cannot
|
|
1485
|
+
meet the 2-file rule, so the single file is accepted only when the
|
|
1486
|
+
touching commit's subject still carries the `<SERIES>` token (a
|
|
1487
|
+
weaker, bounded gate — series attribution without the phase id). A
|
|
1488
|
+
sole shared-infra file is never enough — it yields no verdict.
|
|
1489
|
+
- FQ-77 bookkeeping exclusion: a `working-dir snapshot:` / run-archive /
|
|
1490
|
+
soft-claim commit is dropped before the overlap count — a bulk sweep
|
|
1491
|
+
that incidentally co-touches a phase's files is not ship evidence.
|
|
1492
|
+
|
|
1493
|
+
Returns the standard verdict dict. `via="file-path"` is a WEAK verdict —
|
|
1494
|
+
the caller still routes it through `_consult_plan_body`, so a plan body
|
|
1495
|
+
with no SHIPPED stamp demotes it (no new false-positive surface beyond
|
|
1496
|
+
the existing release-prefix / body-mention WEAK verdicts).
|
|
1497
|
+
"""
|
|
1498
|
+
if matchers is None:
|
|
1499
|
+
matchers = _subject_matchers()
|
|
1500
|
+
files = _extract_phase_files(plan_doc, phase, series, matchers)
|
|
1501
|
+
if not files:
|
|
1502
|
+
return {"shipped": False, "sha": "", "summary": "", "via": ""}
|
|
1503
|
+
# Map each commit sha -> (set of named files it touched, subject line).
|
|
1504
|
+
touched: dict[str, set[str]] = {}
|
|
1505
|
+
subjects: dict[str, str] = {}
|
|
1506
|
+
series_lc = series.lower()
|
|
1507
|
+
for path in files:
|
|
1508
|
+
# docs/284 — the per-file commit list comes from the shared batch cache
|
|
1509
|
+
# when one was built (a single `git log --name-only` over the union of all
|
|
1510
|
+
# pairs' files), else from a per-file `git log` subprocess. Both yield the
|
|
1511
|
+
# SAME `[(sha, subject), …]` newest-first list capped at `_FILEPATH_WINDOW`
|
|
1512
|
+
# for this path, so the overlap below is byte-identical either way.
|
|
1513
|
+
if fp_cache is not None:
|
|
1514
|
+
file_commits = fp_cache.get(path, [])
|
|
1515
|
+
else:
|
|
1516
|
+
try:
|
|
1517
|
+
# `--no-merges` in lock-step with the batch cache builder
|
|
1518
|
+
# (docs/284): a merge commit is never a phase's ship of record, and
|
|
1519
|
+
# excluding it keeps this per-file path byte-identical to the batched
|
|
1520
|
+
# `--name-only --no-merges` union scan. The underlying feature commit
|
|
1521
|
+
# (the real ship) is retained either way.
|
|
1522
|
+
lines = _git_log(
|
|
1523
|
+
["--oneline", "--no-merges", f"-{_FILEPATH_WINDOW}", "--", path]
|
|
1524
|
+
)
|
|
1525
|
+
except RuntimeError:
|
|
1526
|
+
continue
|
|
1527
|
+
file_commits = []
|
|
1528
|
+
for line in lines:
|
|
1529
|
+
parts = line.split(None, 1)
|
|
1530
|
+
if not parts:
|
|
1531
|
+
continue
|
|
1532
|
+
file_commits.append((parts[0], parts[1] if len(parts) > 1 else ""))
|
|
1533
|
+
for sha, subj in file_commits:
|
|
1534
|
+
# FQ-77 — exclude bookkeeping commits from the overlap count. A
|
|
1535
|
+
# `working-dir snapshot:` commit sweeps hundreds of files in one
|
|
1536
|
+
# commit, so it incidentally touches >= 2 of a phase's load-bearing
|
|
1537
|
+
# files even when the phase's deliverables do not exist on disk
|
|
1538
|
+
# (live false-positive 2026-05-19: `1647b0c0` flagged OC4 shipped).
|
|
1539
|
+
# Run-archive rollups / soft-claims are excluded for the same reason
|
|
1540
|
+
# the subject scan excludes them — a sweep is not a ship of any one
|
|
1541
|
+
# phase. Such a commit never enters `touched`, so it can satisfy
|
|
1542
|
+
# neither the >=2-file rule nor the single-file series-attr gate.
|
|
1543
|
+
if matchers.is_bookkeeping_subject(subj):
|
|
1544
|
+
continue
|
|
1545
|
+
subjects[sha] = subj
|
|
1546
|
+
touched.setdefault(sha, set()).add(path)
|
|
1547
|
+
if not touched:
|
|
1548
|
+
return {"shipped": False, "sha": "", "summary": "", "via": ""}
|
|
1549
|
+
# Route the multi-vs-single decision on the files that actually HAVE commit
|
|
1550
|
+
# history in the capped window — NOT the raw harvested count. A harvested
|
|
1551
|
+
# token that touches zero commits (a prose URL, a release-version string, a
|
|
1552
|
+
# `Phase 1/summary.txt` fragment the loose generic matcher lifted) is provably
|
|
1553
|
+
# not ship evidence: it can satisfy neither the >=2 overlap nor the single-file
|
|
1554
|
+
# gate. Counting it toward `len(files)` was a real FALSE-NEGATIVE — it pushed a
|
|
1555
|
+
# genuine single-file phase into the >=2 branch (where its lone commit can't
|
|
1556
|
+
# meet the overlap), silently losing a true ship (adversarial-review finding).
|
|
1557
|
+
# Routing on `live_files` is behaviour-preserving for every REAL verdict (a
|
|
1558
|
+
# commitless file changes no outcome) and only removes the inert-noise
|
|
1559
|
+
# inflation. `files[0]` for the single-file gate likewise becomes `live_files[0]`
|
|
1560
|
+
# so noise-before-real harvest order can't make the noise token the "sole file".
|
|
1561
|
+
live_files = [f for f in files if any(f in hit for hit in touched.values())]
|
|
1562
|
+
if not live_files:
|
|
1563
|
+
return {"shipped": False, "sha": "", "summary": "", "via": ""}
|
|
1564
|
+
if len(live_files) >= 2:
|
|
1565
|
+
# Multi-file phase: a single commit touching >= 2 named files is the
|
|
1566
|
+
# ship — BUT at least one matched file must be distinctive (not a
|
|
1567
|
+
# shared-infra hub file). `_git_log` is newest-first, but dict
|
|
1568
|
+
# insertion order tracks first-seen across files — scan for the
|
|
1569
|
+
# strongest (most-files) hit that satisfies both gates.
|
|
1570
|
+
best_sha = ""
|
|
1571
|
+
best_n = 1
|
|
1572
|
+
for sha, hit in touched.items():
|
|
1573
|
+
if len(hit) <= best_n:
|
|
1574
|
+
continue
|
|
1575
|
+
# False-positive guard: an overlap made up entirely of shared-
|
|
1576
|
+
# infra files (`config.py` + `fanout_state.py`, …) is not ship
|
|
1577
|
+
# evidence — an unrelated commit touches those together routinely.
|
|
1578
|
+
if not any(not _is_shared_infra(f, matchers) for f in hit):
|
|
1579
|
+
continue
|
|
1580
|
+
# #394 cross-series guard: a commit whose subject names a DIFFERENT
|
|
1581
|
+
# plan-series (`vX.Y.Z: ... LG0 ...`, `docs/LG: ...`) that does not
|
|
1582
|
+
# also carry THIS series' token is a sibling-series ship — the
|
|
1583
|
+
# file-path overlap is a shared load-bearing file, not a ship of
|
|
1584
|
+
# `phase`. Refuse it (RTN0 vs `c7c87566`, finding #394).
|
|
1585
|
+
if _subject_names_other_series(subjects.get(sha, ""), series):
|
|
1586
|
+
continue
|
|
1587
|
+
best_n, best_sha = len(hit), sha
|
|
1588
|
+
if best_sha:
|
|
1589
|
+
return {
|
|
1590
|
+
"shipped": True,
|
|
1591
|
+
"sha": best_sha,
|
|
1592
|
+
"summary": (
|
|
1593
|
+
f"{best_sha} {subjects.get(best_sha, '')} "
|
|
1594
|
+
f"(file-path match: touched {best_n}/{len(live_files)} "
|
|
1595
|
+
f"load-bearing files of {phase})"
|
|
1596
|
+
),
|
|
1597
|
+
"via": "file-path",
|
|
1598
|
+
}
|
|
1599
|
+
return {"shipped": False, "sha": "", "summary": "", "via": ""}
|
|
1600
|
+
# Single-file phase: accept only when the touching commit's subject
|
|
1601
|
+
# carries the series token — series attribution without the full phase
|
|
1602
|
+
# id. The series token may stand alone (`docs/DT: ...`) OR merge into a
|
|
1603
|
+
# phase id (`DT7: ...`, `DT3.1 —`); both attribute to the series, so a
|
|
1604
|
+
# trailing digit/dot is allowed after the token but a trailing LETTER is
|
|
1605
|
+
# not (`DTX` is a different series). A preceding alnum is always rejected.
|
|
1606
|
+
#
|
|
1607
|
+
# False-positive guard: if the sole load-bearing file is a shared-infra
|
|
1608
|
+
# hub (`config.py`, `fanout_state.py`, …), even a series-attributed edit
|
|
1609
|
+
# to it is too weak to be ship evidence — a `docs/DT:` commit touching
|
|
1610
|
+
# `agents/config.py` is routine. Such a phase yields no file-path verdict.
|
|
1611
|
+
if series_lc and not _is_shared_infra(live_files[0], matchers):
|
|
1612
|
+
series_attr_re = re.compile(
|
|
1613
|
+
rf"{_BOUNDARY_PRE_NEG}{re.escape(series)}(?![A-Za-z])",
|
|
1614
|
+
re.IGNORECASE,
|
|
1615
|
+
)
|
|
1616
|
+
for sha, hit in touched.items():
|
|
1617
|
+
subj = subjects.get(sha, "")
|
|
1618
|
+
if live_files[0] in hit and series_attr_re.search(subj):
|
|
1619
|
+
return {
|
|
1620
|
+
"shipped": True,
|
|
1621
|
+
"sha": sha,
|
|
1622
|
+
"summary": (
|
|
1623
|
+
f"{sha} {subj} (file-path match: sole load-bearing "
|
|
1624
|
+
f"file of {phase}, series-attributed subject)"
|
|
1625
|
+
),
|
|
1626
|
+
"via": "file-path",
|
|
1627
|
+
}
|
|
1628
|
+
return {"shipped": False, "sha": "", "summary": "", "via": ""}
|
|
1629
|
+
|
|
1630
|
+
|
|
1631
|
+
def _apply_filepath_backstop(
|
|
1632
|
+
result: dict, series: str, phase: str, plan_doc: str | None,
|
|
1633
|
+
matchers: "_Matchers | None" = None,
|
|
1634
|
+
fp_cache: "dict[str, list[tuple[str, str]]] | None" = None,
|
|
1635
|
+
) -> dict:
|
|
1636
|
+
"""Run the AAR-FQ230 file-path false-NEGATIVE backstop on a verdict.
|
|
1637
|
+
|
|
1638
|
+
The single shared entry point for the artefact-based backstop. When the
|
|
1639
|
+
subject-token passes (`_check_phase_with_cache` + `_consult_plan_body`)
|
|
1640
|
+
all miss BUT the pick names a plan doc, re-derive the verdict from the
|
|
1641
|
+
file paths the doc's phase row names vs the file paths recent commits
|
|
1642
|
+
touched. Catches DT2-shape ships whose commit subject lacks the phase
|
|
1643
|
+
token (#230, recurring 9×).
|
|
1644
|
+
|
|
1645
|
+
Until 2026-05-18 this ran ONLY in `--check-packet`. `/next-up` and
|
|
1646
|
+
`/replan` pick work through `--batch` and `check_phase_shipped()`, which
|
|
1647
|
+
skipped it — so the fix never ran on the path that picks work, and #230
|
|
1648
|
+
kept recurring. This helper closes that gap: all three entry points now
|
|
1649
|
+
route through it.
|
|
1650
|
+
|
|
1651
|
+
The file-path verdict is NOT routed through `_consult_plan_body`: the
|
|
1652
|
+
file-path set IS an artefact derived from the plan doc itself, so
|
|
1653
|
+
re-consulting that doc for a SHIPPED *stamp* would let a missing stamp (a
|
|
1654
|
+
known operator habit) overturn the artefact. The overlap rule inside
|
|
1655
|
+
`_check_phase_by_filepath` is the false-positive guard, not the stamp.
|
|
1656
|
+
"""
|
|
1657
|
+
if result.get("shipped") or not plan_doc:
|
|
1658
|
+
return result
|
|
1659
|
+
fp = _check_phase_by_filepath(series, phase, plan_doc, matchers, fp_cache)
|
|
1660
|
+
return fp if fp.get("shipped") else result
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
def build_batch_filepath_cache(
|
|
1664
|
+
triples: "list[tuple[str, str, str | None]]",
|
|
1665
|
+
matchers: "_Matchers | None" = None,
|
|
1666
|
+
) -> "dict[str, list[tuple[str, str]]] | None":
|
|
1667
|
+
"""Build the shared file-path log cache for a whole `--batch` pair-set (docs/284).
|
|
1668
|
+
|
|
1669
|
+
Harvests the union of every pick's load-bearing files (the SAME
|
|
1670
|
+
`_extract_phase_files` each `_check_phase_by_filepath` would call, using that
|
|
1671
|
+
pick's OWN plan-doc) and folds them into ONE `git log --name-only` scan via
|
|
1672
|
+
`_build_filepath_log_cache`. The returned cache is handed to
|
|
1673
|
+
`_apply_filepath_backstop(..., fp_cache=…)` for every pick, so the backstop
|
|
1674
|
+
does zero per-pick git subprocesses.
|
|
1675
|
+
|
|
1676
|
+
`triples` is `(series, phase, plan_doc | None)` per pick — taken per-pick (not
|
|
1677
|
+
a collapsed series→doc map) so two picks of the same series with different
|
|
1678
|
+
docs each contribute their own files to the union.
|
|
1679
|
+
|
|
1680
|
+
Returns:
|
|
1681
|
+
* the cache — when the union scan stayed within the bounded window; or
|
|
1682
|
+
* ``{}`` — when no pick names any file (nothing to scan, no fallback
|
|
1683
|
+
needed); or
|
|
1684
|
+
* ``None`` — when the union window saturated or git errored, signalling the
|
|
1685
|
+
caller to fall back to the exact per-file path (the never-under-count
|
|
1686
|
+
safety degrade — a None cache makes `_apply_filepath_backstop` re-run the
|
|
1687
|
+
per-file subprocesses, identical to the pre-docs/284 behaviour).
|
|
1688
|
+
"""
|
|
1689
|
+
if matchers is None:
|
|
1690
|
+
matchers = _subject_matchers()
|
|
1691
|
+
union: set[str] = set()
|
|
1692
|
+
for series, phase, plan_doc in triples:
|
|
1693
|
+
if not phase or not plan_doc:
|
|
1694
|
+
continue
|
|
1695
|
+
try:
|
|
1696
|
+
union.update(_extract_phase_files(plan_doc, phase, series, matchers))
|
|
1697
|
+
except Exception: # pragma: no cover — a harvest hiccup just skips this pick's files
|
|
1698
|
+
continue
|
|
1699
|
+
if not union:
|
|
1700
|
+
return {}
|
|
1701
|
+
return _build_filepath_log_cache(union)
|
|
1702
|
+
|
|
1703
|
+
|
|
1704
|
+
def check_phase_shipped(series: str, phase: str, plan_doc: str | None = None) -> dict:
|
|
1705
|
+
"""Return {'shipped': bool, 'sha': str, 'summary': str, 'via': str} for the given phase.
|
|
1706
|
+
|
|
1707
|
+
Searches `git log --oneline -1500` repo-wide for two patterns:
|
|
1708
|
+
- direct ship: `docs/<SERIES>:?\\s+<PHASE><not-suffix>`
|
|
1709
|
+
- summary-bundle: `<vX.Y.Z:|docs/_plans:|docs/HYG:|…>.*<PHASE><not-suffix>`
|
|
1710
|
+
If neither hits, also scans the most recent summary-bundle commit *bodies*
|
|
1711
|
+
for the phase token (rare case where phase only appears in extended body).
|
|
1712
|
+
Finally, when all subject-token passes miss and `plan_doc` is supplied, the
|
|
1713
|
+
AAR-FQ230 file-path backstop re-derives the verdict from the artefact (the
|
|
1714
|
+
files the phase's plan-doc row names) — see `_apply_filepath_backstop`.
|
|
1715
|
+
|
|
1716
|
+
`plan_doc` (when supplied) cross-checks WEAK git verdicts (release-prefix,
|
|
1717
|
+
body-mention) against the plan body's SHIPPED stamps. Direct-ship verdicts
|
|
1718
|
+
are never overridden — stamp drift is a known operator habit and the
|
|
1719
|
+
direct attribution is unambiguous. See `_consult_plan_body` for the rule.
|
|
1720
|
+
"""
|
|
1721
|
+
# Resolve the active workspace's ship-stamp grammar ONCE and thread it
|
|
1722
|
+
# through both scan paths (subject-token + file-path backstop), so a single
|
|
1723
|
+
# library call reads one convention end-to-end (design-law 2).
|
|
1724
|
+
matchers = _subject_matchers()
|
|
1725
|
+
try:
|
|
1726
|
+
oneline = _git_log(["--oneline", f"-{_ONELINE_WINDOW}"])
|
|
1727
|
+
except RuntimeError as e:
|
|
1728
|
+
return {"shipped": False, "sha": "", "summary": "", "via": "", "error": str(e)}
|
|
1729
|
+
try:
|
|
1730
|
+
body_out = _git_log(["-50", "--format=%h%n%B%n--END--"])
|
|
1731
|
+
except RuntimeError:
|
|
1732
|
+
body_out = []
|
|
1733
|
+
result = _check_phase_with_cache(series, phase, oneline, body_out, matchers)
|
|
1734
|
+
result = _consult_plan_body(result, plan_doc, phase, series)
|
|
1735
|
+
return _apply_filepath_backstop(result, series, phase, plan_doc, matchers)
|
|
1736
|
+
|
|
1737
|
+
|
|
1738
|
+
def _extract_picks_from_packet(path: str) -> list[tuple[str, str]]:
|
|
1739
|
+
"""Parse a /next-up packet markdown for pick headers.
|
|
1740
|
+
|
|
1741
|
+
Three header shapes are recognised in pick order:
|
|
1742
|
+
1. Standard `### N. <SERIES> <PHASE> — ...` (e.g., `### 1. PLA PLA9 — ...`).
|
|
1743
|
+
2. Hygiene `### N. HYG <hygiene_id> — ...` (e.g., `### 3. HYG worker_id_audit — ...`),
|
|
1744
|
+
with optional backtick-wrap on the id.
|
|
1745
|
+
3. Single-token `### N. <SERIES><N>prime-<n> — ...` (e.g., `### 2. MG3prime-1 — ...`),
|
|
1746
|
+
the Windows-quoting workaround for canonical `MG3'-1` style ids.
|
|
1747
|
+
|
|
1748
|
+
A pick may also carry a **depth-slot table** of chained hops:
|
|
1749
|
+
|
|
1750
|
+
**Depth slot — sequential hops (per-hop subprocess):**
|
|
1751
|
+
| Hop | Phase | Files | Validate |
|
|
1752
|
+
|---|---|---|---|
|
|
1753
|
+
| 1 | TS3.2 — ... | ... | ... |
|
|
1754
|
+
| 2 | TS5 — ... | ... | ... |
|
|
1755
|
+
|
|
1756
|
+
Each hop is its own dispatchable phase, so every hop row is extracted
|
|
1757
|
+
as a `(series, phase)` pair (series inherited from the enclosing
|
|
1758
|
+
`### N.` header). Without this, a chain like `TS3.2 → TS5` only got
|
|
1759
|
+
its primary hop (TS3.2) staleness-checked — the already-shipped TS5
|
|
1760
|
+
slipped through `/fanout`'s Step 1.5 net (dispatch run 20260516T0004Z).
|
|
1761
|
+
|
|
1762
|
+
Returns (series, phase, plan_doc) triples. `plan_doc` is the path from
|
|
1763
|
+
the pick body's ``- **Plan doc:** `<path>` `` line (`""` when missing —
|
|
1764
|
+
plan-body cross-check is skipped for that pick). Phase ids may carry a
|
|
1765
|
+
hyphen-suffix (`SF1.2-port`, `SF7-retry`, `TF-close-out`); preserve them
|
|
1766
|
+
— without `\\-` the picker truncates `SF1.2-port` to `SF1.2`, which
|
|
1767
|
+
would trigger the suffix false-positive because base `SF1.2` is shipped.
|
|
1768
|
+
"""
|
|
1769
|
+
picks: list[tuple[str, str, str]] = []
|
|
1770
|
+
pat_standard = re.compile(r"^###\s+\d+\.\s+([A-Z]+)\s+([A-Z][A-Za-z0-9.\-]+?)\s+[—\-]")
|
|
1771
|
+
pat_hyg = re.compile(r"^###\s+\d+\.\s+(HYG)\s+`?([A-Za-z][\w\-]*)`?\s+[—\-]")
|
|
1772
|
+
pat_prime = re.compile(r"^###\s+\d+\.\s+(([A-Z]+)\d+prime-\d+(?:\.\d+)?)\s+[—\-]")
|
|
1773
|
+
# Depth-slot hop row: `| 2 | TS5 — 7-day soak ... | files | validate |`.
|
|
1774
|
+
pat_hop = re.compile(r"^\|\s*\d+\s*\|\s*`?([A-Z][A-Za-z0-9.\-]+?)`?\s+[—\-]")
|
|
1775
|
+
# Per-pick plan-doc line: `- **Plan doc:** `docs/foo-plan.md``
|
|
1776
|
+
pat_plan_doc = re.compile(r"^\s*[-*]\s*\*\*Plan doc:\*\*\s*`?([^`\n]+?)`?\s*$")
|
|
1777
|
+
cur_series: str | None = None
|
|
1778
|
+
in_depth_table = False
|
|
1779
|
+
with open(path, encoding="utf-8") as f:
|
|
1780
|
+
for line in f:
|
|
1781
|
+
m = pat_standard.match(line)
|
|
1782
|
+
if m:
|
|
1783
|
+
cur_series = m.group(1)
|
|
1784
|
+
in_depth_table = False
|
|
1785
|
+
picks.append((m.group(1), m.group(2), ""))
|
|
1786
|
+
continue
|
|
1787
|
+
m = pat_hyg.match(line)
|
|
1788
|
+
if m:
|
|
1789
|
+
cur_series = m.group(1)
|
|
1790
|
+
in_depth_table = False
|
|
1791
|
+
picks.append((m.group(1), m.group(2), ""))
|
|
1792
|
+
continue
|
|
1793
|
+
m = pat_prime.match(line)
|
|
1794
|
+
if m:
|
|
1795
|
+
cur_series = m.group(2)
|
|
1796
|
+
in_depth_table = False
|
|
1797
|
+
picks.append((m.group(2), m.group(1), ""))
|
|
1798
|
+
continue
|
|
1799
|
+
# Stamp the most recently appended pick's plan-doc field when the
|
|
1800
|
+
# `- **Plan doc:** ...` line appears in its body.
|
|
1801
|
+
m = pat_plan_doc.match(line)
|
|
1802
|
+
if m and picks:
|
|
1803
|
+
series, phase, _ = picks[-1]
|
|
1804
|
+
picks[-1] = (series, phase, m.group(1).strip())
|
|
1805
|
+
continue
|
|
1806
|
+
# Track entry into a pick's depth-slot hop table, then extract
|
|
1807
|
+
# every hop row as its own (series, phase) pair — inheriting the
|
|
1808
|
+
# enclosing pick's plan_doc since hops live in the same plan.
|
|
1809
|
+
if "Depth slot — sequential hops" in line:
|
|
1810
|
+
in_depth_table = True
|
|
1811
|
+
continue
|
|
1812
|
+
if in_depth_table and cur_series:
|
|
1813
|
+
m = pat_hop.match(line)
|
|
1814
|
+
if m:
|
|
1815
|
+
inherited_doc = picks[-1][2] if picks else ""
|
|
1816
|
+
triple = (cur_series, m.group(1), inherited_doc)
|
|
1817
|
+
# Hop 1 is usually the primary phase already captured
|
|
1818
|
+
# by the `### N.` header — dedupe.
|
|
1819
|
+
if not any(t[0] == triple[0] and t[1] == triple[1] for t in picks):
|
|
1820
|
+
picks.append(triple)
|
|
1821
|
+
continue
|
|
1822
|
+
# A non-table, non-blank line ends the depth-slot table.
|
|
1823
|
+
if line.strip() and not line.lstrip().startswith("|"):
|
|
1824
|
+
in_depth_table = False
|
|
1825
|
+
return picks
|
|
1826
|
+
|
|
1827
|
+
|
|
1828
|
+
def main() -> int:
|
|
1829
|
+
ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
1830
|
+
ap.add_argument("series", nargs="?", help="Plan series prefix, e.g. RS, UP, TF, AO, AT")
|
|
1831
|
+
ap.add_argument("phase", nargs="?", help="Exact phase id, e.g. RS4, UP2.1.4, TF11.1")
|
|
1832
|
+
ap.add_argument(
|
|
1833
|
+
"plan_doc",
|
|
1834
|
+
nargs="?",
|
|
1835
|
+
help="Plan doc path. STRONGLY recommended: when supplied, a WEAK git "
|
|
1836
|
+
"verdict (release-prefix / body-mention bundle) is cross-checked "
|
|
1837
|
+
"against the plan body's SHIPPED stamp and demoted if the stamp is "
|
|
1838
|
+
"absent. Omit it and a WEAK verdict prints UNCERTAIN instead of a "
|
|
1839
|
+
"(possibly wrong) SHIPPED — see _consult_plan_body.",
|
|
1840
|
+
)
|
|
1841
|
+
ap.add_argument("--json", action="store_true", help="Emit JSON instead of text")
|
|
1842
|
+
ap.add_argument(
|
|
1843
|
+
"--batch",
|
|
1844
|
+
action="store_true",
|
|
1845
|
+
help='Read "<series> <phase>" pairs from stdin (one per line); emit one JSON line per pair. '
|
|
1846
|
+
'/next-up calls this once per packet to amortize the git-log cost across all candidate picks.',
|
|
1847
|
+
)
|
|
1848
|
+
ap.add_argument(
|
|
1849
|
+
"--check-packet",
|
|
1850
|
+
metavar="PATH",
|
|
1851
|
+
help="Parse a /next-up packet markdown and check every Pick header against git log. "
|
|
1852
|
+
"Prints a per-pick disposition table; exits 0 if any pick is shipped (caller should re-run /next-up), "
|
|
1853
|
+
"1 if all picks are clean.",
|
|
1854
|
+
)
|
|
1855
|
+
args = ap.parse_args()
|
|
1856
|
+
|
|
1857
|
+
# Install the ship-stamp convention this rung must use BEFORE any scan — from
|
|
1858
|
+
# the parent's env hand-off, else the workspace's dos.toml, else the job
|
|
1859
|
+
# default (see `_bootstrap_active_config`). This is what lets the shelled-out
|
|
1860
|
+
# grep rung honour a generic / declared convention, not just the job default.
|
|
1861
|
+
_bootstrap_active_config()
|
|
1862
|
+
|
|
1863
|
+
if args.check_packet:
|
|
1864
|
+
# Distinct exit codes so callers can tell "no recognizable headers"
|
|
1865
|
+
# apart from "all clean":
|
|
1866
|
+
# 0 = found-shipped (any pick is in git log → caller should re-run /next-up)
|
|
1867
|
+
# 1 = not-shipped (all picks clean)
|
|
1868
|
+
# 2 = no-coverage (file parsed OK but no picks recognised)
|
|
1869
|
+
# 3 = parse-error (file IO / decode error)
|
|
1870
|
+
try:
|
|
1871
|
+
picks = _extract_picks_from_packet(args.check_packet)
|
|
1872
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
1873
|
+
print(f"ERROR could not read {args.check_packet}: {e}", file=sys.stderr)
|
|
1874
|
+
return 3
|
|
1875
|
+
if not picks:
|
|
1876
|
+
print(f"ERROR no picks found in {args.check_packet}", file=sys.stderr)
|
|
1877
|
+
return 2
|
|
1878
|
+
# Pre-fetch git log once for all picks (same amortization as --batch).
|
|
1879
|
+
oneline_lines, body_lines = _build_log_cache()
|
|
1880
|
+
# Resolve the workspace's ship-stamp grammar ONCE for every pick, so the
|
|
1881
|
+
# whole packet is checked against one convention (design-law 2).
|
|
1882
|
+
matchers = _subject_matchers()
|
|
1883
|
+
# docs/284 — build the file-path backstop's log cache ONCE over the union
|
|
1884
|
+
# of every pick's named files, so the backstop does no per-pick git work.
|
|
1885
|
+
fp_cache = build_batch_filepath_cache(
|
|
1886
|
+
[(s, ph, pd or None) for s, ph, pd in picks], matchers
|
|
1887
|
+
)
|
|
1888
|
+
any_stale = False
|
|
1889
|
+
print(f"Pre-flight check on {args.check_packet} ({len(picks)} picks):")
|
|
1890
|
+
for series, phase, plan_doc in picks:
|
|
1891
|
+
r = _check_phase_with_cache(series, phase, oneline_lines, body_lines, matchers)
|
|
1892
|
+
r = _consult_plan_body(r, plan_doc or None, phase, series)
|
|
1893
|
+
# AAR-FQ230 — subject-token false-NEGATIVE backstop. Shared with
|
|
1894
|
+
# `--batch` and `check_phase_shipped()` so every entry point that
|
|
1895
|
+
# picks work runs the same artefact check (see helper docstring).
|
|
1896
|
+
r = _apply_filepath_backstop(r, series, phase, plan_doc or None, matchers, fp_cache)
|
|
1897
|
+
mark = "DROP" if r["shipped"] else "KEEP"
|
|
1898
|
+
extra = f"shipped in {r['sha']}" if r["shipped"] else ""
|
|
1899
|
+
print(f" {mark:5s} {series:5s} {phase:12s} {extra}")
|
|
1900
|
+
if r["shipped"]:
|
|
1901
|
+
any_stale = True
|
|
1902
|
+
return 0 if any_stale else 1
|
|
1903
|
+
|
|
1904
|
+
if args.batch:
|
|
1905
|
+
# Pre-fetch git log once, then check all pairs against the shared cache.
|
|
1906
|
+
# This collapses N git subprocess calls into 2 regardless of batch size.
|
|
1907
|
+
# Stdin lines: `<series> <phase> [<plan_doc>]`. When `<plan_doc>` is
|
|
1908
|
+
# present, WEAK git verdicts (release-prefix / body-mention) are
|
|
1909
|
+
# cross-checked against the plan body for a SHIPPED stamp.
|
|
1910
|
+
oneline_lines, body_lines = _build_log_cache()
|
|
1911
|
+
# Resolve the workspace convention ONCE for every stdin pair (design-law 2).
|
|
1912
|
+
matchers = _subject_matchers()
|
|
1913
|
+
# Drain stdin into a pick list FIRST so the file-path backstop's log cache
|
|
1914
|
+
# can be built once over the union of every pick's files (docs/284). The
|
|
1915
|
+
# per-line output ordering is preserved — only the emit moves after the cache.
|
|
1916
|
+
batch_picks: list[tuple[str, str, str]] = []
|
|
1917
|
+
for line in sys.stdin:
|
|
1918
|
+
line = line.rstrip("\n").rstrip("\r")
|
|
1919
|
+
if not line.strip() or line.lstrip().startswith("#"):
|
|
1920
|
+
continue
|
|
1921
|
+
series, phase, plan_doc = _parse_batch_line(line)
|
|
1922
|
+
if not (series and phase):
|
|
1923
|
+
continue
|
|
1924
|
+
batch_picks.append((series, phase, plan_doc))
|
|
1925
|
+
# docs/284 — one `git log --name-only` over the union, shared across picks.
|
|
1926
|
+
fp_cache = build_batch_filepath_cache(
|
|
1927
|
+
[(s, ph, pd or None) for s, ph, pd in batch_picks], matchers
|
|
1928
|
+
)
|
|
1929
|
+
any_shipped = False
|
|
1930
|
+
for series, phase, plan_doc in batch_picks:
|
|
1931
|
+
result = _check_phase_with_cache(series, phase, oneline_lines, body_lines, matchers)
|
|
1932
|
+
result = _consult_plan_body(result, plan_doc, phase, series)
|
|
1933
|
+
# AAR-FQ230 — file-path false-NEGATIVE backstop. /next-up and
|
|
1934
|
+
# /replan pick work through --batch; without this the #230 fix
|
|
1935
|
+
# (previously --check-packet only) never ran on the picker path.
|
|
1936
|
+
result = _apply_filepath_backstop(result, series, phase, plan_doc, matchers, fp_cache)
|
|
1937
|
+
result["series"] = series
|
|
1938
|
+
result["phase"] = phase
|
|
1939
|
+
print(json.dumps(result))
|
|
1940
|
+
if result["shipped"]:
|
|
1941
|
+
any_shipped = True
|
|
1942
|
+
return 0 if any_shipped else 1
|
|
1943
|
+
|
|
1944
|
+
if not args.series or not args.phase:
|
|
1945
|
+
ap.error("series and phase are required (or use --batch / --check-packet)")
|
|
1946
|
+
|
|
1947
|
+
result = check_phase_shipped(args.series, args.phase, args.plan_doc)
|
|
1948
|
+
|
|
1949
|
+
# A WEAK git verdict (release-prefix / body-mention bundle) with NO plan_doc
|
|
1950
|
+
# supplied is the footgun case: _consult_plan_body could not run, so the
|
|
1951
|
+
# verdict was NOT cross-checked against the plan body's SHIPPED stamp. This
|
|
1952
|
+
# is exactly how a stale plan doc fools a bare `check_phase_shipped` call
|
|
1953
|
+
# while /next-up's renderer (which always passes plan_doc) gets it right.
|
|
1954
|
+
# Surface it as UNCERTAIN rather than a confident — possibly wrong — SHIPPED.
|
|
1955
|
+
weak_unchecked = (
|
|
1956
|
+
result.get("shipped")
|
|
1957
|
+
and not args.plan_doc
|
|
1958
|
+
and result.get("via") in ("release-prefix", "body-mention")
|
|
1959
|
+
)
|
|
1960
|
+
|
|
1961
|
+
if args.json:
|
|
1962
|
+
if weak_unchecked:
|
|
1963
|
+
result["uncertain"] = True
|
|
1964
|
+
print(json.dumps(result))
|
|
1965
|
+
else:
|
|
1966
|
+
if result.get("error"):
|
|
1967
|
+
print(f"ERROR {result['error']}", file=sys.stderr)
|
|
1968
|
+
return 2
|
|
1969
|
+
if weak_unchecked:
|
|
1970
|
+
print(
|
|
1971
|
+
f"UNCERTAIN {result['sha']} {result['summary']} "
|
|
1972
|
+
f"— WEAK verdict, no plan_doc given to cross-check; "
|
|
1973
|
+
f"re-run with the plan doc path as the 3rd argument"
|
|
1974
|
+
)
|
|
1975
|
+
return 3
|
|
1976
|
+
if result["shipped"]:
|
|
1977
|
+
print(f"SHIPPED {result['sha']} {result['summary']}")
|
|
1978
|
+
else:
|
|
1979
|
+
print("NOT_SHIPPED")
|
|
1980
|
+
|
|
1981
|
+
return 0 if result["shipped"] else 1
|
|
1982
|
+
|
|
1983
|
+
|
|
1984
|
+
if __name__ == "__main__":
|
|
1985
|
+
sys.exit(main())
|