dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/oracle.py
ADDED
|
@@ -0,0 +1,1661 @@
|
|
|
1
|
+
"""Registry-first ship oracle — single source of truth for "has this phase shipped?"
|
|
2
|
+
|
|
3
|
+
Background — the tail-wagging-the-dog inversion this module starts to correct.
|
|
4
|
+
============================================================================
|
|
5
|
+
|
|
6
|
+
Before this module, every gate that needed to answer "is `(plan, phase)`
|
|
7
|
+
already shipped?" went straight to `scripts/check_phase_shipped.py`, which
|
|
8
|
+
greps git log + plan-doc markdown for a `· SHIPPED <date> <sha>` heading
|
|
9
|
+
stamp. That grep is the *cheap* signal but it lags reality whenever a stamp
|
|
10
|
+
isn't atomically written at ship time (`QWB1` `mark done` partially fixed
|
|
11
|
+
this; `QWB2` /replan backstop catches the rest). When the stamp lags, the
|
|
12
|
+
gates that consume it get the wrong answer:
|
|
13
|
+
|
|
14
|
+
* `fanout_state._check_phase_shipped_prescreen` (register-time gate)
|
|
15
|
+
* `next_up_render._batch_check_shipped` (packet-render pre-screen)
|
|
16
|
+
* `/dispatch` Step 5.6 empty-packet gate
|
|
17
|
+
* `gate_classify.classify_packet` (typed-verdict classifier)
|
|
18
|
+
|
|
19
|
+
The fix is to invert the lookup: the run registry (`recently_completed` in
|
|
20
|
+
`docs/_plans/execution-state.yaml`) is the *load-bearing* source — every
|
|
21
|
+
`fanout_state.py mark <phase> done` writes a row there with the SHA at ship
|
|
22
|
+
time, atomically, in the same call that publishes the work. The markdown
|
|
23
|
+
stamp becomes a *belt-and-suspenders* secondary signal: useful when the
|
|
24
|
+
registry is missing a row (a manual `git commit` that bypassed `mark`), but
|
|
25
|
+
never the primary gate.
|
|
26
|
+
|
|
27
|
+
What this module provides
|
|
28
|
+
=========================
|
|
29
|
+
|
|
30
|
+
`is_shipped(plan, phase, *, state=..., grep_fallback=...) -> ShipVerdict`
|
|
31
|
+
Pure function. Looks up `(plan, phase)` in the supplied registry state;
|
|
32
|
+
if a `recently_completed` row exists with `status: done`, returns a
|
|
33
|
+
`ShipVerdict(shipped=True, sha=<short-sha>, source='registry')`. Otherwise
|
|
34
|
+
calls `grep_fallback(plan, phase)` (defaulting to a thin wrapper around
|
|
35
|
+
`check_phase_shipped.py --batch`) and returns its verdict tagged
|
|
36
|
+
`source='grep'`. The fallback is fully pluggable so tests can pass a stub.
|
|
37
|
+
|
|
38
|
+
`batch_is_shipped(pairs, *, state=..., grep_fallback=...) -> dict[(plan,phase), ShipVerdict]`
|
|
39
|
+
Many-pair variant. Registry hits short-circuit; the residual misses are
|
|
40
|
+
passed to the fallback in one batched call (the existing batch shape of
|
|
41
|
+
`check_phase_shipped.py`), so the worst case is still one subprocess.
|
|
42
|
+
|
|
43
|
+
`load_state()` / `load_state_from(path)` are the I/O wrappers. The pure-fn
|
|
44
|
+
core (`is_shipped` / `batch_is_shipped`) takes the parsed state explicitly so
|
|
45
|
+
callers and tests can inject any shape they want.
|
|
46
|
+
|
|
47
|
+
Status taxonomy considered "shipped"
|
|
48
|
+
====================================
|
|
49
|
+
|
|
50
|
+
A `recently_completed` row counts as a ship iff:
|
|
51
|
+
|
|
52
|
+
* the row's `status` field is `done` (terminal-success), AND
|
|
53
|
+
* the row's `claim_status` is either absent OR `done`.
|
|
54
|
+
|
|
55
|
+
`failed` / `stalled` / `expired` rows DO NOT count (those are terminal-
|
|
56
|
+
not-success). The `abandoned:` bucket is NEVER consulted — QWB3 routes
|
|
57
|
+
TTL-expired soft claims there precisely so no reader of `recently_completed`
|
|
58
|
+
miscounts them as completions.
|
|
59
|
+
|
|
60
|
+
CLI
|
|
61
|
+
===
|
|
62
|
+
|
|
63
|
+
python scripts/ship_oracle.py <PLAN> <PHASE>
|
|
64
|
+
python scripts/ship_oracle.py --batch < pairs.txt # `<plan> <phase>` per line
|
|
65
|
+
|
|
66
|
+
Exit codes mirror `check_phase_shipped.py --batch`:
|
|
67
|
+
|
|
68
|
+
0 — at least one queried pair is shipped
|
|
69
|
+
1 — none shipped
|
|
70
|
+
2 — usage/error
|
|
71
|
+
|
|
72
|
+
Each row is printed as a JSON line:
|
|
73
|
+
|
|
74
|
+
{"plan": "IF", "phase": "IF4.1", "shipped": true, "sha": "53943fc2",
|
|
75
|
+
"source": "registry"}
|
|
76
|
+
|
|
77
|
+
The CLI exists for inspection and tests; the in-repo callers
|
|
78
|
+
(`fanout_state.py`, `next_up_render.py`) import the Python API directly.
|
|
79
|
+
"""
|
|
80
|
+
from __future__ import annotations
|
|
81
|
+
|
|
82
|
+
import argparse
|
|
83
|
+
import dataclasses
|
|
84
|
+
import json
|
|
85
|
+
import os
|
|
86
|
+
import re
|
|
87
|
+
import subprocess
|
|
88
|
+
import sys
|
|
89
|
+
from pathlib import Path
|
|
90
|
+
from typing import Callable, Iterable
|
|
91
|
+
|
|
92
|
+
from dos import config as _config
|
|
93
|
+
|
|
94
|
+
# Path coupling resolves against the ACTIVE WORKSPACE (separation refactor),
|
|
95
|
+
# not the package's own tree. Env overrides preserved for tests. The pure
|
|
96
|
+
# `is_shipped`/`batch_is_shipped` take `state` explicitly, so only the default
|
|
97
|
+
# `load_state()`/`load_soaks()` loaders read these.
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _state_path() -> Path:
|
|
101
|
+
env = os.environ.get("JOB_FANOUT_STATE_PATH") or os.environ.get("DISPATCH_STATE_PATH")
|
|
102
|
+
return Path(env) if env else _config.active().paths.execution_state
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _soaks_path() -> Path:
|
|
106
|
+
env = os.environ.get("DISPATCH_SOAKS_PATH")
|
|
107
|
+
return Path(env) if env else _config.active().paths.soaks_index
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _workspace_root() -> Path:
|
|
111
|
+
"""The workspace whose git history + plan docs the oracle reads.
|
|
112
|
+
|
|
113
|
+
The grep rung shells out to `git` and reads plan-doc files; both must run
|
|
114
|
+
against the SERVED workspace, not the dos package's own tree.
|
|
115
|
+
"""
|
|
116
|
+
return _config.active().paths.root
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
# Verdict shape
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclasses.dataclass(frozen=True)
|
|
125
|
+
class ShipVerdict:
|
|
126
|
+
"""One pair's verdict. `source` names which signal answered.
|
|
127
|
+
|
|
128
|
+
`sha` is best-effort short SHA — present on `registry` hits when the
|
|
129
|
+
`recently_completed` row carries `commit_sha`, present on `grep` hits
|
|
130
|
+
when `check_phase_shipped` resolved a commit. Absent (`None`) when the
|
|
131
|
+
answer is `shipped=False` or when the source can't name a SHA.
|
|
132
|
+
|
|
133
|
+
`summary` is the human-readable commit summary when known — populated by
|
|
134
|
+
the grep fallback for log-friendly output; not always available from the
|
|
135
|
+
registry (the row doesn't carry the commit subject).
|
|
136
|
+
|
|
137
|
+
`rung` is the RAW evidence rung the grep fallback stood on (`direct`,
|
|
138
|
+
`file-path`, `release-prefix`, `body-mention`, `hyg-slug`,
|
|
139
|
+
`sub-phase-parent`) — the `via` field `phase_shipped` already emits. It is
|
|
140
|
+
carried verbatim so a verdict can be graded by FORGEABILITY: `file-path` is
|
|
141
|
+
the artefact/diff rung (non-forgeable — a commit cannot fake which files it
|
|
142
|
+
touched), whereas `direct`/`release-prefix`/`body-mention` rest on the commit
|
|
143
|
+
SUBJECT/BODY the agent itself authored (forgeable by `git commit --allow-empty
|
|
144
|
+
-m 'docs/X: PHASE …'`). `source` carries the GRADED label derived from it
|
|
145
|
+
(`grep-artifact` vs `grep-subject`); `rung` keeps the un-graded original so a
|
|
146
|
+
consumer that wants the exact rung name still has it. This is the same
|
|
147
|
+
forgeability split `resume.NONFORGEABLE_RUNGS` already trusts ({file-path,
|
|
148
|
+
registry}) — surfaced here instead of discarded at the oracle boundary
|
|
149
|
+
(docs/118). Empty for a registry hit, a miss, or an injected fallback that
|
|
150
|
+
doesn't set it.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
plan: str
|
|
154
|
+
phase: str
|
|
155
|
+
shipped: bool
|
|
156
|
+
sha: str | None = None
|
|
157
|
+
source: str = "" # "registry" | "grep" | "grep-artifact" | "grep-subject" | "none"
|
|
158
|
+
summary: str = ""
|
|
159
|
+
rung: str = "" # raw `via` from the grep rung — the un-graded provenance
|
|
160
|
+
|
|
161
|
+
def to_dict(self) -> dict:
|
|
162
|
+
d = {
|
|
163
|
+
"plan": self.plan,
|
|
164
|
+
"phase": self.phase,
|
|
165
|
+
"shipped": self.shipped,
|
|
166
|
+
"source": self.source,
|
|
167
|
+
}
|
|
168
|
+
if self.sha:
|
|
169
|
+
d["sha"] = self.sha
|
|
170
|
+
if self.summary:
|
|
171
|
+
d["summary"] = self.summary
|
|
172
|
+
if self.rung:
|
|
173
|
+
d["rung"] = self.rung
|
|
174
|
+
return d
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
# The non-git evidence rung (docs/109/265) — a conjunctive, accountability-only
|
|
179
|
+
# upgrade layered on top of a git ship-verdict. The DATUM is kernel-local: a label
|
|
180
|
+
# + a one-line why + the rung's own verdict word, carrying NO provider type and no
|
|
181
|
+
# host name (the `gh api` subprocess lives in a DRIVER, resolved by name at the
|
|
182
|
+
# `cmd_verify` boundary; this dataclass is the already-gathered result handed in,
|
|
183
|
+
# exactly as `grep_touched_files`/`soaks`/`commit_touches_doc` are). The whole
|
|
184
|
+
# reason it is safe to ship: a non-git rung may make `verify` answer MORE
|
|
185
|
+
# skeptically (mint a richer `source` over a commit git ALREADY found, or withhold
|
|
186
|
+
# that upgrade), NEVER more permissively — it is applied ONLY to a `shipped=True`
|
|
187
|
+
# git verdict, so green CI without a reachable commit manufactures nothing
|
|
188
|
+
# (`docs/265 §1`). The kernel adjudication stays mechanical; only the SIGNAL gets
|
|
189
|
+
# more accountable (the `docs/76` move restated for `verify`).
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
# The non-git states an upgrade may stand on — the rung's own verdict word, mirroring
|
|
193
|
+
# `ci_status.Ci`. Kept as a frozen set of strings (data, not an import) so `oracle`
|
|
194
|
+
# names no driver type: the boundary maps a `CiVerdict` (or any future driver's
|
|
195
|
+
# verdict) onto one of these tokens before handing in a `NonGitRung`.
|
|
196
|
+
_NON_GIT_GREEN = "GREEN"
|
|
197
|
+
_NON_GIT_RED = "RED"
|
|
198
|
+
# A withheld-upgrade marker stamped onto a `shipped=True` verdict's `summary` when
|
|
199
|
+
# the non-git rung is RED on the very commit the git rung matched — the ship stands
|
|
200
|
+
# (git is the necessary gate), but the upgrade is withheld and the state flagged so a
|
|
201
|
+
# host MAY route a decision off it (docs/265 §2b; Phase 2 decides whether RED demotes
|
|
202
|
+
# to a WARN-class source — for now it is surfaced, never silently dropped).
|
|
203
|
+
_NON_GIT_RED_WITHHOLD_MARKER = (
|
|
204
|
+
"[ship_oracle: non-git rung RED at this commit — git ship stands, "
|
|
205
|
+
"accountability upgrade WITHHELD (docs/265)]"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@dataclasses.dataclass(frozen=True)
|
|
210
|
+
class NonGitRung:
|
|
211
|
+
"""An already-gathered non-git evidence verdict for one commit (docs/265 §2b).
|
|
212
|
+
|
|
213
|
+
`source` is the upgraded label a GREEN rung mints onto the verdict (`"ci-green"`
|
|
214
|
+
first; an infra-log/approval driver mints its own — `"approved"`, `"audit-green"`).
|
|
215
|
+
It is DATA the verdict carries (the renderer prints it as `(via <source>)`), no
|
|
216
|
+
provider type. `reason` is a one-line why (`"checks green at <sha>"`), carried
|
|
217
|
+
into the verdict `summary` so an operator sees what upgraded it. `state` is the
|
|
218
|
+
rung's own verdict word — `"GREEN"` / `"RED"` / `"NO_SIGNAL"` / `"PENDING"` — the
|
|
219
|
+
only field the conjunctive fold branches on.
|
|
220
|
+
|
|
221
|
+
The fold (`_apply_non_git_rung`) is applied ONLY to a `shipped=True` git verdict
|
|
222
|
+
and ONLY in the conjunctive direction: GREEN upgrades `source`; RED withholds the
|
|
223
|
+
upgrade (ship stands, a marker flags it); NO_SIGNAL/PENDING (and any unknown
|
|
224
|
+
state) pass the git verdict through byte-identical. It can never promote
|
|
225
|
+
`shipped=False → True` — the structural safety property the seam stands on.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
source: str
|
|
229
|
+
reason: str = ""
|
|
230
|
+
state: str = ""
|
|
231
|
+
|
|
232
|
+
def to_dict(self) -> dict:
|
|
233
|
+
return {"source": self.source, "reason": self.reason, "state": self.state}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _apply_non_git_rung(
|
|
237
|
+
verdict: "ShipVerdict",
|
|
238
|
+
non_git_rung: "NonGitRung | None",
|
|
239
|
+
) -> "ShipVerdict":
|
|
240
|
+
"""Fold a non-git rung onto a git ship-verdict — conjunctive, never promoting.
|
|
241
|
+
|
|
242
|
+
Returns `verdict` unchanged when there is no rung, the git verdict is NOT
|
|
243
|
+
`shipped=True` (the necessary gate did not pass — a non-git rung cannot
|
|
244
|
+
manufacture a ship), or the rung's `state` is neither GREEN nor RED (NO_SIGNAL /
|
|
245
|
+
PENDING / unknown all degrade to the git answer, byte-identical). The two active
|
|
246
|
+
branches:
|
|
247
|
+
|
|
248
|
+
* GREEN → mint `verdict.source = non_git_rung.source` (the accountability
|
|
249
|
+
upgrade: a richer rung name over a commit git already found) and append the
|
|
250
|
+
rung's `reason` to `summary` so the upgrade is legible.
|
|
251
|
+
* RED → keep `shipped=True` and the git `source` (the ship stands), but append
|
|
252
|
+
the withhold marker + the rung's reason so a host can route a decision off the
|
|
253
|
+
flagged-but-unchanged state.
|
|
254
|
+
|
|
255
|
+
PURE — the rung was gathered at the boundary; this only folds the already-decided
|
|
256
|
+
states. The `shipped` bit is NEVER touched (the §1 invariant: conjunctive, never
|
|
257
|
+
disjunctive)."""
|
|
258
|
+
if non_git_rung is None or not verdict.shipped:
|
|
259
|
+
return verdict
|
|
260
|
+
state = (non_git_rung.state or "").strip().upper()
|
|
261
|
+
why = (non_git_rung.reason or "").strip()
|
|
262
|
+
if state == _NON_GIT_GREEN:
|
|
263
|
+
upgraded_source = (non_git_rung.source or "").strip() or verdict.source
|
|
264
|
+
new_summary = verdict.summary
|
|
265
|
+
if why:
|
|
266
|
+
new_summary = (new_summary + " " if new_summary else "") + why
|
|
267
|
+
return dataclasses.replace(verdict, source=upgraded_source, summary=new_summary)
|
|
268
|
+
if state == _NON_GIT_RED:
|
|
269
|
+
flag = _NON_GIT_RED_WITHHOLD_MARKER + (f" {why}" if why else "")
|
|
270
|
+
new_summary = (verdict.summary + " " if verdict.summary else "") + flag
|
|
271
|
+
return dataclasses.replace(verdict, summary=new_summary)
|
|
272
|
+
# NO_SIGNAL / PENDING / unknown → the git verdict, untouched.
|
|
273
|
+
return verdict
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# ---------------------------------------------------------------------------
|
|
277
|
+
# State loading (the only I/O paths)
|
|
278
|
+
# ---------------------------------------------------------------------------
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def load_state_from(path: Path) -> dict:
|
|
282
|
+
"""Parse an execution-state.yaml file. Returns the raw dict; `{}` on miss.
|
|
283
|
+
|
|
284
|
+
A missing file, malformed YAML, or missing PyYAML all degrade to `{}`
|
|
285
|
+
— callers then see "no registry rows" and fall through to the grep
|
|
286
|
+
fallback. This is the same defensive shape `next_up_context.py` uses;
|
|
287
|
+
`ship_oracle` is downstream of it and must not be more brittle.
|
|
288
|
+
"""
|
|
289
|
+
if not path.exists():
|
|
290
|
+
return {}
|
|
291
|
+
try:
|
|
292
|
+
import yaml
|
|
293
|
+
except ImportError:
|
|
294
|
+
return {}
|
|
295
|
+
try:
|
|
296
|
+
with path.open(encoding="utf-8") as f:
|
|
297
|
+
data = yaml.safe_load(f) or {}
|
|
298
|
+
except (OSError, Exception):
|
|
299
|
+
return {}
|
|
300
|
+
return data if isinstance(data, dict) else {}
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def load_state() -> dict:
|
|
304
|
+
"""Default loader — parses the active workspace's execution-state.yaml."""
|
|
305
|
+
return load_state_from(_state_path())
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def load_soaks_from(path: Path) -> list[dict]:
|
|
309
|
+
"""Parse a `_soaks/index.yaml` file → its `soaks:` list; `[]` on any miss.
|
|
310
|
+
|
|
311
|
+
A missing file, missing PyYAML, malformed YAML, or a non-list `soaks` key
|
|
312
|
+
all degrade to `[]` (the #326 soak cross-check then suppresses nothing —
|
|
313
|
+
permissive, never more brittle than the picker). Mirrors the defensive
|
|
314
|
+
shape of `load_state_from` and `next_up_context._load_soak_registry`; kept
|
|
315
|
+
self-contained (no import of `next_up_context`) so the oracle stays a leaf.
|
|
316
|
+
"""
|
|
317
|
+
if not path.exists():
|
|
318
|
+
return []
|
|
319
|
+
try:
|
|
320
|
+
import yaml
|
|
321
|
+
except ImportError:
|
|
322
|
+
return []
|
|
323
|
+
try:
|
|
324
|
+
with path.open(encoding="utf-8") as f:
|
|
325
|
+
data = yaml.safe_load(f) or {}
|
|
326
|
+
except (OSError, Exception):
|
|
327
|
+
return []
|
|
328
|
+
if not isinstance(data, dict):
|
|
329
|
+
return []
|
|
330
|
+
soaks = data.get("soaks")
|
|
331
|
+
return soaks if isinstance(soaks, list) else []
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def load_soaks() -> list[dict]:
|
|
335
|
+
"""Default loader — parses the active workspace's _soaks/index.yaml."""
|
|
336
|
+
return load_soaks_from(_soaks_path())
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
# ---------------------------------------------------------------------------
|
|
340
|
+
# Pure-fn core
|
|
341
|
+
# ---------------------------------------------------------------------------
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _norm(s: str) -> str:
|
|
345
|
+
"""Normalize a plan or phase token for comparison.
|
|
346
|
+
|
|
347
|
+
Registry rows write plan ids upper-case (`IF`, `AAR`) but callers may
|
|
348
|
+
pass either case; phase ids are case-preserved on disk but operator
|
|
349
|
+
inputs may not be. Strip + lower for the match key — same shape both
|
|
350
|
+
sides see.
|
|
351
|
+
"""
|
|
352
|
+
return (s or "").strip().lower()
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _norm_doc(path: str) -> str:
|
|
356
|
+
"""Normalize a plan-doc path for comparison: posix slashes, lower-case,
|
|
357
|
+
basename-or-relative tolerant. Two docs are "the same doc" iff their
|
|
358
|
+
normalized full path OR their normalized basename matches — callers pass a
|
|
359
|
+
repo-relative path, registry rows may carry either form."""
|
|
360
|
+
p = (path or "").strip().replace("\\", "/").lower()
|
|
361
|
+
return p
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _doc_paths_match(row_doc: str, expected_doc: str) -> bool:
|
|
365
|
+
"""True iff a registry row's `doc_path` names the SAME plan doc the caller
|
|
366
|
+
expects. Compares normalized full path first, then basename — so a row that
|
|
367
|
+
stored `docs/_plans/x-plan.md` matches an expected `docs/x-plan.md` only
|
|
368
|
+
when the basenames agree, which is the load-bearing discriminator (two
|
|
369
|
+
plans sharing a series id always have DIFFERENT doc basenames)."""
|
|
370
|
+
a, b = _norm_doc(row_doc), _norm_doc(expected_doc)
|
|
371
|
+
if not a or not b:
|
|
372
|
+
return False
|
|
373
|
+
if a == b:
|
|
374
|
+
return True
|
|
375
|
+
return a.rsplit("/", 1)[-1] == b.rsplit("/", 1)[-1]
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# The dispatch *ledger* files: pure bookkeeping that routing / status / soft-claim
|
|
379
|
+
# commits stamp constantly but that no phase "ships". A commit touching ONLY
|
|
380
|
+
# these (no plan doc, no code, no test, no substantive doc) is a routing stamp,
|
|
381
|
+
# never a ship — used by `default_commit_touches_doc`'s Signal C to demote the
|
|
382
|
+
# FQ-388 "routing-stamp row short-circuits as shipped" class. Matched on basename
|
|
383
|
+
# (findings queue, execution-state ledger, the rendered plan index).
|
|
384
|
+
# The kernel's OWN generic-layout bookkeeping basenames come FIRST — `for_dos_dir`
|
|
385
|
+
# ships `dos.state.yaml` (execution_state) + `dos.findings.md` (findings_queue), so a
|
|
386
|
+
# generic-layout workspace gets the same non-substantive-footprint demotion the
|
|
387
|
+
# reference app does (the bug the userland-coupling audit 2026-06-08 found: this set
|
|
388
|
+
# used to carry ONLY the reference app's filenames, so a generic workspace's
|
|
389
|
+
# bookkeeping commits were never demoted). The reference-app dialect
|
|
390
|
+
# (`execution-state.yaml` / `findings-followup-queue.md` / `plans.yaml`) is kept for
|
|
391
|
+
# back-compat; a host with a different layout should ideally have these DERIVED from
|
|
392
|
+
# `cfg.paths.{execution_state,findings_queue}` rather than enumerated here — left as a
|
|
393
|
+
# follow-up because the predicate is a pure module-level helper threaded through the
|
|
394
|
+
# truth syscall (see the audit's MEDIUM note).
|
|
395
|
+
_DISPATCH_LEDGER_BASENAMES = frozenset({
|
|
396
|
+
# generic kernel layout (dos.config.PathLayout.for_dos_dir)
|
|
397
|
+
"dos.state.yaml",
|
|
398
|
+
"dos.findings.md",
|
|
399
|
+
# reference-app dialect (back-compat)
|
|
400
|
+
"findings-followup-queue.md",
|
|
401
|
+
"execution-state.yaml",
|
|
402
|
+
"plans.yaml",
|
|
403
|
+
})
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _is_dispatch_ledger(path: str) -> bool:
|
|
407
|
+
"""True iff `path` is a pure dispatch-bookkeeping ledger file (see
|
|
408
|
+
`_DISPATCH_LEDGER_BASENAMES`). Basename match — these live at known paths
|
|
409
|
+
but compare on basename for robustness to repo-relative vs absolute forms."""
|
|
410
|
+
base = _norm_doc(path).rsplit("/", 1)[-1]
|
|
411
|
+
return base in _DISPATCH_LEDGER_BASENAMES
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
# The version-bump *release* files: the small fixed set of paths a `/release`
|
|
415
|
+
# version-cut touches when it bumps the version and writes release notes. A
|
|
416
|
+
# commit touching ONLY these (no plan doc, no code, no test, no substantive
|
|
417
|
+
# doc) is a release-bump, never a phase ship — even though its *subject* /
|
|
418
|
+
# release-notes body routinely names the phases batched into that version
|
|
419
|
+
# (`v0.378.0: GBA6 soak registration + FQ-375 live-API closer + …`). The grep
|
|
420
|
+
# rung in `check_phase_shipped.py` matches such a commit when the queried
|
|
421
|
+
# phase token appears in the subject / release-notes prose, false-flagging an
|
|
422
|
+
# UNSHIPPED phase as shipped (finding #399 — the bookkeeping-only-diff predicate
|
|
423
|
+
# gap; recurred 3× in <2h on 2026-05-30, the `ship_oracle_false_positive`
|
|
424
|
+
# unstick cluster #336). This set mirrors the version files `/release` Step 7
|
|
425
|
+
# writes; `docs/releases/` and `docs/06_implementation-status.md` are the
|
|
426
|
+
# release-notes + status-rollup surfaces it also stamps. Matched basename-wise
|
|
427
|
+
# for `VERSION`/`version.txt`/`pyproject.toml`/`__init__.py`, prefix-wise for
|
|
428
|
+
# the release-notes / status dirs (one-definition, two callers — registry
|
|
429
|
+
# Signal C + the grep-side post-filter).
|
|
430
|
+
_RELEASE_BUMP_BASENAMES = frozenset({
|
|
431
|
+
"version", # VERSION (root) / go buildinfo VERSION
|
|
432
|
+
"version.txt", # go/internal/buildinfo/version.txt
|
|
433
|
+
"pyproject.toml", # version field bump
|
|
434
|
+
"__init__.py", # job_search/__init__.py __version__
|
|
435
|
+
"changelog.md", # CHANGELOG.md, if present
|
|
436
|
+
})
|
|
437
|
+
# `docs/releases/` is the generic /release notes surface (this repo's own, and the
|
|
438
|
+
# kernel's `/release` skill writes it). A host that re-stamps an extra status-rollup
|
|
439
|
+
# path on a version cut (e.g. the reference app's `docs/06_implementation-status`)
|
|
440
|
+
# declares it in its own config rather than baking the host path into the kernel
|
|
441
|
+
# oracle — the reaped host literal lived here (userland-coupling audit 2026-06-08).
|
|
442
|
+
_RELEASE_BUMP_PREFIXES = (
|
|
443
|
+
"docs/releases/", # docs/releases/vX.Y.Z.md release notes
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _is_release_bump_path(path: str) -> bool:
|
|
448
|
+
"""True iff `path` is one of the fixed version-bump / release-notes files a
|
|
449
|
+
`/release` version-cut touches (see `_RELEASE_BUMP_BASENAMES` /
|
|
450
|
+
`_RELEASE_BUMP_PREFIXES`). Used to recognise a commit whose footprint is
|
|
451
|
+
*only* a release bump — which names batched phases in its subject/notes but
|
|
452
|
+
ships none of them."""
|
|
453
|
+
norm = _norm_doc(path)
|
|
454
|
+
base = norm.rsplit("/", 1)[-1]
|
|
455
|
+
if base in _RELEASE_BUMP_BASENAMES:
|
|
456
|
+
return True
|
|
457
|
+
return any(norm.startswith(p) for p in _RELEASE_BUMP_PREFIXES)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _commit_footprint_is_nonsubstantive(touched: Iterable[str]) -> bool:
|
|
461
|
+
"""True iff every path in `touched` is a dispatch-ledger OR release-bump
|
|
462
|
+
file — i.e. the commit shipped no plan doc, no code, no test, and no
|
|
463
|
+
substantive doc. Such a commit is a routing stamp or a version cut, never a
|
|
464
|
+
phase ship, regardless of what phase tokens its subject / release-notes
|
|
465
|
+
prose mentions. Empty input is NOT non-substantive (returns False — an
|
|
466
|
+
unknown/empty footprint is not provably bookkeeping; callers treat it
|
|
467
|
+
permissively).
|
|
468
|
+
|
|
469
|
+
This is the shared predicate behind both the registry-side Signal C demotion
|
|
470
|
+
(`default_commit_touches_doc`) and the grep-side release-bump post-filter
|
|
471
|
+
(`_grep_verdict_is_release_bump_falsepos`). Pure — takes the touched-file
|
|
472
|
+
set so it is unit-testable without git (finding #399 fix; unstick #336)."""
|
|
473
|
+
paths = [p for p in touched if p and p.strip()]
|
|
474
|
+
if not paths:
|
|
475
|
+
return False
|
|
476
|
+
return all(_is_dispatch_ledger(p) or _is_release_bump_path(p) for p in paths)
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
# Per-process memo of a commit's touched-file set, keyed by (root, sha). A git
|
|
480
|
+
# commit is IMMUTABLE — its SHA names a fixed tree, so the files it touched can
|
|
481
|
+
# never change — which makes this the safest possible cache (no mtime/staleness
|
|
482
|
+
# concern). It matters because a single `is_shipped` on a shipped=True grep
|
|
483
|
+
# verdict fetches the SAME sha's footprint twice (the #399 post-filter in
|
|
484
|
+
# `default_grep_fallback_batch`, then again in `_demote_if_false_positive`), and a
|
|
485
|
+
# fan-out over many phases re-hits the same release-bump SHAs repeatedly; each was
|
|
486
|
+
# a `git show` subprocess. Keyed on the workspace root too, so a server fielding
|
|
487
|
+
# several workspaces never crosses their histories. `None` results are NOT cached
|
|
488
|
+
# (a transient git failure should be retried, never frozen into a false miss).
|
|
489
|
+
_TOUCHED_FILES_CACHE: "dict[tuple[str, str], frozenset[str]]" = {}
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _clear_touched_files_cache() -> None:
|
|
493
|
+
"""Drop the per-process touched-files memo (test hook)."""
|
|
494
|
+
_TOUCHED_FILES_CACHE.clear()
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def _git_touched_files(sha: str, *, timeout: int = 15) -> set[str] | None:
|
|
498
|
+
"""Return the set of repo-relative paths a commit touched, or None if the
|
|
499
|
+
sha is unknown / git is unavailable / the clone is shallow (caller treats
|
|
500
|
+
None permissively — we never manufacture a false negative from an
|
|
501
|
+
unresolvable sha).
|
|
502
|
+
|
|
503
|
+
Memoized per process on (workspace-root, sha) — a commit's footprint is
|
|
504
|
+
immutable, so the `git show` runs once per distinct SHA for the process's
|
|
505
|
+
life (see `_TOUCHED_FILES_CACHE`). A copy of the cached frozenset is returned
|
|
506
|
+
so a caller that mutates the result can't poison the cache."""
|
|
507
|
+
sha = (sha or "").strip()
|
|
508
|
+
if not sha:
|
|
509
|
+
return None
|
|
510
|
+
root = str(_workspace_root())
|
|
511
|
+
cache_key = (root, sha)
|
|
512
|
+
cached = _TOUCHED_FILES_CACHE.get(cache_key)
|
|
513
|
+
if cached is not None:
|
|
514
|
+
return set(cached)
|
|
515
|
+
try:
|
|
516
|
+
res = subprocess.run(
|
|
517
|
+
["git", "show", "--name-only", "--format=", sha],
|
|
518
|
+
cwd=root,
|
|
519
|
+
capture_output=True,
|
|
520
|
+
text=True,
|
|
521
|
+
encoding="utf-8",
|
|
522
|
+
errors="replace",
|
|
523
|
+
timeout=timeout,
|
|
524
|
+
check=False,
|
|
525
|
+
)
|
|
526
|
+
except (subprocess.TimeoutExpired, OSError):
|
|
527
|
+
return None
|
|
528
|
+
if res.returncode != 0:
|
|
529
|
+
return None
|
|
530
|
+
files = {
|
|
531
|
+
ln.strip().replace("\\", "/")
|
|
532
|
+
for ln in res.stdout.splitlines()
|
|
533
|
+
if ln.strip()
|
|
534
|
+
}
|
|
535
|
+
_TOUCHED_FILES_CACHE[cache_key] = frozenset(files)
|
|
536
|
+
return files
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def _grep_verdict_is_release_bump_falsepos(
|
|
540
|
+
verdict: "ShipVerdict",
|
|
541
|
+
*,
|
|
542
|
+
touched_files: Callable[[str], set[str] | None] = _git_touched_files,
|
|
543
|
+
) -> bool:
|
|
544
|
+
"""True iff a grep-derived `shipped=True` verdict rests on a commit whose
|
|
545
|
+
footprint is *only* a release-bump / dispatch-ledger (the finding #399
|
|
546
|
+
false-positive). The grep rung in `check_phase_shipped.py` has no
|
|
547
|
+
footprint guard — it matches a phase token anywhere in the commit subject
|
|
548
|
+
or release-notes body — so a version-cut that batches `… + FQ-375 closer +
|
|
549
|
+
…` in its notes false-flags an unshipped FQ-375 as shipped. This is the
|
|
550
|
+
grep-side complement of the registry-side Signal C.
|
|
551
|
+
|
|
552
|
+
Only fires when (a) the verdict claims shipped, (b) it carries a resolvable
|
|
553
|
+
sha, and (c) that commit's touched-file set is provably non-substantive.
|
|
554
|
+
An unresolvable sha (None footprint) is permissive → returns False (we do
|
|
555
|
+
NOT demote a verdict we cannot prove is a release bump). `touched_files`
|
|
556
|
+
is injectable so tests run without git."""
|
|
557
|
+
if not verdict.shipped:
|
|
558
|
+
return False
|
|
559
|
+
sha = (verdict.sha or "").strip()
|
|
560
|
+
if not sha:
|
|
561
|
+
return False
|
|
562
|
+
touched = touched_files(sha)
|
|
563
|
+
if not touched:
|
|
564
|
+
return False # unresolvable / empty → permissive, do not demote
|
|
565
|
+
return _commit_footprint_is_nonsubstantive(touched)
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
# Markers stamped onto a demoted verdict's `summary` so a downstream log /
|
|
569
|
+
# audit can name *which* false-positive class fired. Kept module-level so the
|
|
570
|
+
# in-fallback site (`default_grep_fallback_batch`) and the oracle-boundary
|
|
571
|
+
# sites (`is_shipped` / `batch_is_shipped`) all stamp the identical string.
|
|
572
|
+
_RELEASE_BUMP_DEMOTE_MARKER = (
|
|
573
|
+
"[ship_oracle: demoted — grep matched a release-bump/ledger-only commit "
|
|
574
|
+
"whose footprint ships no phase content (#399)]"
|
|
575
|
+
)
|
|
576
|
+
# The operator-facing demote markers name the soak index by its ACTUAL configured
|
|
577
|
+
# path (`{soaks}` is interpolated from `cfg.paths.soaks_index` at the call site),
|
|
578
|
+
# never a hardcoded host literal — the generic default is `.dos/soaks/index.yaml`,
|
|
579
|
+
# the reference app's is `docs/_soaks/index.yaml`; the message must match the
|
|
580
|
+
# workspace's real location (userland-coupling audit 2026-06-08).
|
|
581
|
+
_SOAK_DEMOTE_MARKER = (
|
|
582
|
+
"[ship_oracle: demoted — (plan, phase) is a registered in-progress soak in "
|
|
583
|
+
"{soaks}; a commit names the soak token but the pick is the "
|
|
584
|
+
"soak follow-up, not a re-ship (#326)]"
|
|
585
|
+
)
|
|
586
|
+
_SOAK_REGISTRY_DEMOTE_MARKER = (
|
|
587
|
+
"[ship_oracle: demoted — (plan, phase) has a registry status:done row but its "
|
|
588
|
+
"soak is still in_progress in {soaks}; the implementation "
|
|
589
|
+
"shipped, but under soak-as-parallel-lane the pick the lane wants is the soak "
|
|
590
|
+
"FOLLOW-UP, not the shipped impl — so the picker treats it as not-yet-done "
|
|
591
|
+
"until the soak window closes (#326-registry)]"
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def _alnum(s: str) -> str:
|
|
596
|
+
"""Aggressive token key: lower-case, strip every non-alphanumeric char.
|
|
597
|
+
|
|
598
|
+
Soak ids on disk are inconsistently delimited — `dt5-discover-wallclock`
|
|
599
|
+
(hyphen), `cs6_legacy_marker_classify` (underscore), `tf11_4_1` (a dotted
|
|
600
|
+
phase rendered with underscores), `gba6` (bare). Reducing both the phase
|
|
601
|
+
and the candidate id-tokens to alnum makes the comparison delimiter-blind.
|
|
602
|
+
"""
|
|
603
|
+
return re.sub(r"[^a-z0-9]", "", (s or "").strip().lower())
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def _is_registered_inprogress_soak(
|
|
607
|
+
plan: str,
|
|
608
|
+
phase: str,
|
|
609
|
+
*,
|
|
610
|
+
soaks: Iterable[dict],
|
|
611
|
+
) -> bool:
|
|
612
|
+
"""True iff `(plan, phase)` is a registered, **in-progress** soak (#326).
|
|
613
|
+
|
|
614
|
+
A soak whose implementation has shipped but whose soak window is still open
|
|
615
|
+
is, by the soak-as-parallel-lane convention, a *pickable follow-up* — not a
|
|
616
|
+
drained/shipped-done phase. But the grep rung false-flags it as shipped the
|
|
617
|
+
moment any commit (a release note, a soak-ledger stamp) names the phase
|
|
618
|
+
token, which culls the live soak-window follow-up pick and WEDGEs the lane.
|
|
619
|
+
This predicate lets `is_shipped` / `batch_is_shipped` suppress that
|
|
620
|
+
grep-derived shipped verdict for the duration of the in-progress window.
|
|
621
|
+
|
|
622
|
+
Matching (delimiter-blind, boundary-safe). A soak entry matches iff:
|
|
623
|
+
* its `plan` equals the queried plan (alnum-normalized — disk casing is
|
|
624
|
+
inconsistent: `dt`, `PSC`, `gba` all appear), AND
|
|
625
|
+
* its `status` is `in_progress` (the ONLY status that suppresses — once
|
|
626
|
+
the operator flips it to passed/closed_pass/failed/aborted the phase
|
|
627
|
+
reports shipped normally again), AND
|
|
628
|
+
* the queried phase, alnum-normalized, EQUALS one of the entry-id
|
|
629
|
+
candidate tokens: the leading run before the first `-`, the leading run
|
|
630
|
+
before the first `_` of that, or the whole id (each alnum-normalized).
|
|
631
|
+
|
|
632
|
+
The exact-equality-against-leading-token rule is what prevents over-match:
|
|
633
|
+
a query for `DT50` does not match soak `dt5-discover-wallclock` (its tokens
|
|
634
|
+
are `dt5`/`dt5discoverwallclock`, neither equals `dt50`), and `DT5` does not
|
|
635
|
+
match a hypothetical `dt55-…`. Pure — takes the parsed soak list injected,
|
|
636
|
+
so it is unit-testable with no file I/O (mirrors `touched_files` injection).
|
|
637
|
+
"""
|
|
638
|
+
plan_n = _alnum(plan)
|
|
639
|
+
phase_n = _alnum(phase)
|
|
640
|
+
if not plan_n or not phase_n:
|
|
641
|
+
return False
|
|
642
|
+
for entry in soaks:
|
|
643
|
+
if not isinstance(entry, dict):
|
|
644
|
+
continue
|
|
645
|
+
if (entry.get("status") or "").strip().lower() != "in_progress":
|
|
646
|
+
continue
|
|
647
|
+
if _alnum(str(entry.get("plan", ""))) != plan_n:
|
|
648
|
+
continue
|
|
649
|
+
sid = str(entry.get("id", "")).strip().lower()
|
|
650
|
+
if not sid:
|
|
651
|
+
continue
|
|
652
|
+
lead = sid.split("-", 1)[0] # 'dt5' from 'dt5-discover-wallclock'
|
|
653
|
+
lead_us = lead.split("_", 1)[0] # 'cs6' from 'cs6_legacy_marker_classify'
|
|
654
|
+
candidates = {_alnum(lead), _alnum(lead_us), _alnum(sid)}
|
|
655
|
+
if phase_n in candidates:
|
|
656
|
+
return True
|
|
657
|
+
return False
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def _demote_if_false_positive(
|
|
661
|
+
verdict: "ShipVerdict",
|
|
662
|
+
*,
|
|
663
|
+
grep_touched_files: Callable[[str], set[str] | None] | None,
|
|
664
|
+
soaks: Iterable[dict] | None,
|
|
665
|
+
) -> "ShipVerdict":
|
|
666
|
+
"""Apply the two grep-side false-positive demotions to a `source='grep'`
|
|
667
|
+
verdict, gated on their respective inputs being supplied (the gate-OFF
|
|
668
|
+
convention: a `None` input means "this check is off → byte-identical").
|
|
669
|
+
|
|
670
|
+
Returns the verdict unchanged unless a check fires, in which case it returns
|
|
671
|
+
a `shipped=False` copy with the matching `#399` / `#326` marker appended to
|
|
672
|
+
`summary`. Both checks are monotone `shipped=True → shipped=False`, so they
|
|
673
|
+
compose; the markers are distinct so a log can name which fired. Only the
|
|
674
|
+
grep rung is demoted — a registry `status: done` hit is real ship truth and
|
|
675
|
+
is never routed through this helper.
|
|
676
|
+
"""
|
|
677
|
+
if not verdict.shipped:
|
|
678
|
+
return verdict
|
|
679
|
+
if grep_touched_files is not None and _grep_verdict_is_release_bump_falsepos(
|
|
680
|
+
verdict, touched_files=grep_touched_files
|
|
681
|
+
):
|
|
682
|
+
return dataclasses.replace(
|
|
683
|
+
verdict,
|
|
684
|
+
shipped=False,
|
|
685
|
+
summary=(verdict.summary + " " if verdict.summary else "")
|
|
686
|
+
+ _RELEASE_BUMP_DEMOTE_MARKER,
|
|
687
|
+
)
|
|
688
|
+
if soaks is not None and _is_registered_inprogress_soak(
|
|
689
|
+
verdict.plan, verdict.phase, soaks=soaks
|
|
690
|
+
):
|
|
691
|
+
return dataclasses.replace(
|
|
692
|
+
verdict,
|
|
693
|
+
shipped=False,
|
|
694
|
+
summary=(verdict.summary + " " if verdict.summary else "")
|
|
695
|
+
+ _SOAK_DEMOTE_MARKER.format(soaks=_soaks_path()),
|
|
696
|
+
)
|
|
697
|
+
return verdict
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def _suppress_registry_soak(
|
|
701
|
+
verdict: "ShipVerdict",
|
|
702
|
+
*,
|
|
703
|
+
soaks: Iterable[dict] | None,
|
|
704
|
+
) -> "ShipVerdict":
|
|
705
|
+
"""Demote a REGISTRY `shipped=True` verdict whose phase is a registered
|
|
706
|
+
in-progress soak (#326-registry).
|
|
707
|
+
|
|
708
|
+
The grep-side `_demote_if_false_positive` deliberately does NOT touch a
|
|
709
|
+
registry hit — a `status: done` row means the *implementation* genuinely
|
|
710
|
+
shipped, which is real ship truth the oracle must not erase. But under the
|
|
711
|
+
soak-as-parallel-lane model (`next_up_context.collect_soaks`,
|
|
712
|
+
[[soak-as-parallel-lane]]), a phase whose impl shipped while its soak window
|
|
713
|
+
is still open is a *pickable follow-up*, not a drained/done phase: the work
|
|
714
|
+
the lane wants next is the soak's named follow-up, not a re-ship of the impl.
|
|
715
|
+
Reporting it `shipped=True` makes the picker drop the pick and re-render —
|
|
716
|
+
the exact soak-gated WEDGE the operator hit (PIW2/CS6/AFR6/IF3 all carried a
|
|
717
|
+
`status:done` row while in_progress).
|
|
718
|
+
|
|
719
|
+
So for the picker's purpose we suppress the registry-soak verdict *for the
|
|
720
|
+
duration of the in-progress window only*. This is bounded and self-healing:
|
|
721
|
+
the instant the operator flips the soak to passed/closed_pass/failed/aborted
|
|
722
|
+
the suppression stops and the phase reports shipped from its registry row
|
|
723
|
+
again. Opt-in via `soaks` (None → OFF → byte-identical, the gate-OFF
|
|
724
|
+
convention); the demoted verdict keeps `source='registry'` and carries the
|
|
725
|
+
distinct `#326-registry` marker so a log names the cause. Pure — injected
|
|
726
|
+
soak list, no I/O."""
|
|
727
|
+
if not verdict.shipped or soaks is None:
|
|
728
|
+
return verdict
|
|
729
|
+
if _is_registered_inprogress_soak(verdict.plan, verdict.phase, soaks=soaks):
|
|
730
|
+
return dataclasses.replace(
|
|
731
|
+
verdict,
|
|
732
|
+
shipped=False,
|
|
733
|
+
summary=(verdict.summary + " " if verdict.summary else "")
|
|
734
|
+
+ _SOAK_REGISTRY_DEMOTE_MARKER.format(soaks=_soaks_path()),
|
|
735
|
+
)
|
|
736
|
+
return verdict
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
# The plan-meta series-id + classification fields, read from a plan doc's
|
|
740
|
+
# `<!-- plan-meta … -->` frontmatter block (`id: ISV` / `classification: ACTIVE`).
|
|
741
|
+
# The id is the key the run registry stamps a `recently_completed` row on; the
|
|
742
|
+
# classification is what lets `default_plan_doc_map` prefer the LIVE plan when two
|
|
743
|
+
# docs collide on one id — the join the FQ-390 gate needs to know which doc a
|
|
744
|
+
# queried plan EXPECTS.
|
|
745
|
+
_PLAN_META_ID_RE = re.compile(r"^\s*id:\s*([A-Za-z0-9_\-]+)\s*$", re.MULTILINE)
|
|
746
|
+
_PLAN_META_CLASS_RE = re.compile(r"^\s*classification:\s*([A-Za-z0-9_\-]+)\s*$", re.MULTILINE)
|
|
747
|
+
|
|
748
|
+
# A classification that means the plan is NOT the live one an operator querying its
|
|
749
|
+
# id means — a retired/superseded doc that may still carry a same-id `done` row.
|
|
750
|
+
# Matched case-insensitively against the plan-meta `classification:` field.
|
|
751
|
+
_TOMBED_CLASSIFICATIONS = frozenset({"tombed", "tombstone", "retired", "superseded", "archived"})
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def default_plan_doc_map(cfg: "_config.SubstrateConfig | None" = None) -> dict[str, str]:
|
|
755
|
+
"""Resolve ``{series_id: plan_doc_path}`` from the workspace's plan docs. FAIL-SAFE.
|
|
756
|
+
|
|
757
|
+
Globs ``cfg.paths.plans_glob`` (the declared plan location), reads each doc's
|
|
758
|
+
``<!-- plan-meta … id: <SERIES> … -->`` frontmatter, and maps that series id to
|
|
759
|
+
the doc's repo-relative path. This is the map the FQ-390 collision gate consults
|
|
760
|
+
to learn which doc a queried plan EXPECTS — so a registry `status:done` row
|
|
761
|
+
whose ship commit touched a DIFFERENT plan's doc (a tombed same-id plan) is
|
|
762
|
+
rejected instead of false-clearing the active plan's unshipped phase.
|
|
763
|
+
|
|
764
|
+
**Collision preference — the LIVE doc wins.** When two docs share one id (the
|
|
765
|
+
exact PLAN_ID_COLLISION_FALSE_SHIPPED scenario: an ACTIVE `dispatch-lane-canon`
|
|
766
|
+
plan and a TOMBED `data-labelling` plan both `id: DL`), an operator querying
|
|
767
|
+
that id means the LIVE plan, so a non-tombed doc is preferred over a tombed one
|
|
768
|
+
regardless of glob order. With the live doc as `expected_doc`, a registry row
|
|
769
|
+
that points at the TOMBED doc fails `_doc_paths_match` and is correctly skipped.
|
|
770
|
+
Among same-classification docs the sorted-glob first wins (deterministic).
|
|
771
|
+
|
|
772
|
+
Every failure path yields an EMPTY map (a missing glob, an unreadable tree, a
|
|
773
|
+
doc with no plan-meta id, a parse error) — and an empty map turns the gate OFF,
|
|
774
|
+
byte-identical to the pre-wiring behavior (`expected_doc is None`). So wiring
|
|
775
|
+
this in can only ADD collision rejections where the workspace genuinely has
|
|
776
|
+
plan docs with series ids; it never changes the answer in a repo with no plans
|
|
777
|
+
(the no-plan contract) or a host that doesn't stamp series ids. The single I/O
|
|
778
|
+
is this glob+read at the call boundary, exactly like `plan_source` — never
|
|
779
|
+
inside a pure verdict.
|
|
780
|
+
|
|
781
|
+
Reuses `plan_source`'s harvest discipline conceptually but reads the *series id*
|
|
782
|
+
(the registry key) from frontmatter, not the markdown heading token (the
|
|
783
|
+
plan-view label) — a different join than `plan_source` needs, so it is its own
|
|
784
|
+
small reader rather than a shared one.
|
|
785
|
+
"""
|
|
786
|
+
cfg = _config.ensure(cfg)
|
|
787
|
+
out: dict[str, str] = {}
|
|
788
|
+
chosen_is_tombed: dict[str, bool] = {} # id → did the doc we currently picked tomb?
|
|
789
|
+
try:
|
|
790
|
+
paths = getattr(cfg, "paths", None)
|
|
791
|
+
root = Path(getattr(paths, "root", "."))
|
|
792
|
+
glob = str(getattr(paths, "plans_glob", "") or "")
|
|
793
|
+
if not glob:
|
|
794
|
+
return out
|
|
795
|
+
matched = sorted(root.glob(glob))
|
|
796
|
+
except (OSError, ValueError):
|
|
797
|
+
return out
|
|
798
|
+
for p in matched:
|
|
799
|
+
try:
|
|
800
|
+
if not p.is_file():
|
|
801
|
+
continue
|
|
802
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
803
|
+
except OSError:
|
|
804
|
+
continue
|
|
805
|
+
m = _PLAN_META_ID_RE.search(text)
|
|
806
|
+
if not m:
|
|
807
|
+
continue
|
|
808
|
+
series_id = m.group(1)
|
|
809
|
+
cm = _PLAN_META_CLASS_RE.search(text)
|
|
810
|
+
is_tombed = bool(cm) and cm.group(1).strip().lower() in _TOMBED_CLASSIFICATIONS
|
|
811
|
+
try:
|
|
812
|
+
rel = str(p.relative_to(root))
|
|
813
|
+
except ValueError:
|
|
814
|
+
rel = str(p)
|
|
815
|
+
# Bind the id to this doc when (a) the id is unseen, or (b) we previously
|
|
816
|
+
# picked a TOMBED doc and this one is LIVE — the live plan an operator
|
|
817
|
+
# querying the id means. Two LIVE (or two tombed) docs keep the sorted-glob
|
|
818
|
+
# first, which is deterministic; a genuine live-vs-live id collision is a
|
|
819
|
+
# workspace bug the commit-footprint check still guards downstream.
|
|
820
|
+
if series_id not in out or (chosen_is_tombed.get(series_id) and not is_tombed):
|
|
821
|
+
out[series_id] = rel
|
|
822
|
+
chosen_is_tombed[series_id] = is_tombed
|
|
823
|
+
return out
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def _registry_ship_row(
|
|
827
|
+
state: dict,
|
|
828
|
+
plan: str,
|
|
829
|
+
phase: str,
|
|
830
|
+
*,
|
|
831
|
+
expected_doc: str | None = None,
|
|
832
|
+
commit_touches_doc: Callable[[str, str, str], bool | None] | None = None,
|
|
833
|
+
) -> dict | None:
|
|
834
|
+
"""Return the most recent terminal-success `recently_completed` row for
|
|
835
|
+
`(plan, phase)`, or None if no such row exists (or every candidate row is
|
|
836
|
+
rejected as a cross-plan series collision — see `expected_doc`).
|
|
837
|
+
|
|
838
|
+
"Most recent" means the first match in iteration order — `fanout_state`
|
|
839
|
+
`_dump`s `recently_completed` with newest first (it `insert(0, entry)`s
|
|
840
|
+
on every `mark done`), so the first hit is the latest ship.
|
|
841
|
+
|
|
842
|
+
Only `status: done` rows count. `failed` / `stalled` / `expired` are
|
|
843
|
+
terminal-not-success and must NOT be reported as ships — the test suite
|
|
844
|
+
locks this contract.
|
|
845
|
+
|
|
846
|
+
**Plan-id-collision disambiguation (`expected_doc`, opt-in, FQ-390).** Two
|
|
847
|
+
DIFFERENT plans can share a series id (an ACTIVE `dispatch-lane-canonicalization`
|
|
848
|
+
plan and a TOMBED data-labelling plan both register `id: DL`). The registry
|
|
849
|
+
keys only on `(plan, phase)`, so a `DL/DL2 status=done` row written by the
|
|
850
|
+
tombed plan was reported as a ship of the ACTIVE plan's genuinely-unshipped
|
|
851
|
+
DL2 — culling the live pick and wedging `/dispatch-loop` (the
|
|
852
|
+
`PLAN_ID_COLLISION_FALSE_SHIPPED` class). When `expected_doc` is supplied,
|
|
853
|
+
a matched row must be proven to belong to the SAME plan doc before it counts:
|
|
854
|
+
|
|
855
|
+
* row carries a `doc_path` → it must `_doc_paths_match(expected_doc)`;
|
|
856
|
+
a mismatch is a collision → skip and keep scanning.
|
|
857
|
+
* row carries NO `doc_path` (all legacy rows) → if a
|
|
858
|
+
`commit_touches_doc(sha, expected_doc, phase)` callback is supplied,
|
|
859
|
+
the row's `commit_sha` must have touched ≥1 of the *expected* plan's
|
|
860
|
+
files (the callback returns True/False/None). A definite False is a
|
|
861
|
+
collision → skip. None (can't tell — no sha, no callback, or git
|
|
862
|
+
unavailable) is **permissive → keep**, preserving the deliberate
|
|
863
|
+
direct-ship trust (stamp drift is a known operator habit; a real
|
|
864
|
+
unstamped ship touched its OWN plan's files and must stay shipped).
|
|
865
|
+
|
|
866
|
+
When `expected_doc is None` the gate is OFF and behavior is byte-identical
|
|
867
|
+
to the pre-FQ-390 lookup — every existing caller and test is unaffected.
|
|
868
|
+
"""
|
|
869
|
+
plan_n, phase_n = _norm(plan), _norm(phase)
|
|
870
|
+
rows = state.get("recently_completed")
|
|
871
|
+
if not isinstance(rows, list):
|
|
872
|
+
return None
|
|
873
|
+
for row in rows:
|
|
874
|
+
if not isinstance(row, dict):
|
|
875
|
+
continue
|
|
876
|
+
if _norm(row.get("plan", "")) != plan_n:
|
|
877
|
+
continue
|
|
878
|
+
if _norm(row.get("phase", "")) != phase_n:
|
|
879
|
+
continue
|
|
880
|
+
if (row.get("status") or "").strip().lower() != "done":
|
|
881
|
+
continue
|
|
882
|
+
if expected_doc:
|
|
883
|
+
row_doc = str(row.get("doc_path") or "")
|
|
884
|
+
if row_doc:
|
|
885
|
+
# The row knows its own plan doc — trust it directly.
|
|
886
|
+
if not _doc_paths_match(row_doc, expected_doc):
|
|
887
|
+
continue # cross-plan collision — not this plan's ship
|
|
888
|
+
elif commit_touches_doc is not None:
|
|
889
|
+
# Legacy row (no doc_path): verify the ship commit's footprint
|
|
890
|
+
# against the expected plan's files. Only a DEFINITE miss skips.
|
|
891
|
+
sha = str(row.get("commit_sha") or "")
|
|
892
|
+
if sha:
|
|
893
|
+
touched = commit_touches_doc(sha, expected_doc, phase)
|
|
894
|
+
if touched is False:
|
|
895
|
+
continue # commit touched none of the expected plan's files
|
|
896
|
+
return row
|
|
897
|
+
return None
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def is_shipped(
|
|
901
|
+
plan: str,
|
|
902
|
+
phase: str,
|
|
903
|
+
*,
|
|
904
|
+
cfg: "_config.SubstrateConfig | None" = None,
|
|
905
|
+
state: dict | None = None,
|
|
906
|
+
grep_fallback: Callable[[str, str], ShipVerdict] | None = None,
|
|
907
|
+
expected_doc: str | None = None,
|
|
908
|
+
commit_touches_doc: Callable[[str, str, str], bool | None] | None = None,
|
|
909
|
+
grep_touched_files: Callable[[str], set[str] | None] | None = None,
|
|
910
|
+
soaks: Iterable[dict] | None = None,
|
|
911
|
+
non_git_rung: "NonGitRung | None" = None,
|
|
912
|
+
) -> ShipVerdict:
|
|
913
|
+
"""Registry-first lookup. Pure given `state` + `grep_fallback`.
|
|
914
|
+
|
|
915
|
+
Resolution order:
|
|
916
|
+
|
|
917
|
+
1. `recently_completed` row with `status: done` → `source='registry'`,
|
|
918
|
+
SHA from the row's `commit_sha` field if present.
|
|
919
|
+
2. Otherwise call `grep_fallback(plan, phase)` → its verdict, with
|
|
920
|
+
`source` set to `'grep'` (the fallback's `source` field is ignored;
|
|
921
|
+
we own the label so callers can tell which gate answered).
|
|
922
|
+
3. If no `grep_fallback` is supplied and the registry misses, return
|
|
923
|
+
`shipped=False` with `source='none'` — the conservative default.
|
|
924
|
+
|
|
925
|
+
`expected_doc` / `commit_touches_doc` (opt-in, FQ-390) disambiguate a
|
|
926
|
+
cross-plan series collision in the registry — see `_registry_ship_row`. A
|
|
927
|
+
row rejected as a collision falls through to the grep fallback (which runs
|
|
928
|
+
its own doc-aware cross-check) exactly as a registry miss would.
|
|
929
|
+
|
|
930
|
+
`grep_touched_files` (opt-in, #399) and `soaks` (opt-in, #326) demote a
|
|
931
|
+
known false-positive `shipped=True`:
|
|
932
|
+
* `grep_touched_files` demotes a GREP verdict resting on a release-bump/
|
|
933
|
+
ledger-only commit (#399).
|
|
934
|
+
* `soaks` demotes BOTH a grep verdict AND a registry `status:done` hit
|
|
935
|
+
whose phase is a registered in-progress soak — for a grep hit the commit
|
|
936
|
+
merely *names* the soak token (#326); for a registry hit the impl
|
|
937
|
+
genuinely shipped but the pick the lane wants is the soak FOLLOW-UP, not
|
|
938
|
+
a re-ship, so during the open window the picker treats it as not-yet-done
|
|
939
|
+
(#326-registry; see `_suppress_registry_soak`).
|
|
940
|
+
Both default `None` → the check is OFF → byte-identical to the pre-fix
|
|
941
|
+
behavior (the `expected_doc is None` gate-OFF convention). The registry-soak
|
|
942
|
+
suppression is bounded by `status: in_progress` and self-heals the instant
|
|
943
|
+
the operator closes the soak.
|
|
944
|
+
|
|
945
|
+
`non_git_rung` (opt-in, docs/265) is an already-gathered non-git evidence
|
|
946
|
+
verdict (a `NonGitRung`: a label + reason + the rung's state word) the
|
|
947
|
+
`cmd_verify` boundary resolves from a `dos.evidence_sources` driver and hands in.
|
|
948
|
+
It is applied ONLY to a `shipped=True` git verdict and ONLY conjunctively (see
|
|
949
|
+
`_apply_non_git_rung`): GREEN upgrades `source` to `non_git_rung.source`
|
|
950
|
+
(`"ci-green"`); RED withholds the upgrade and flags the verdict; NO_SIGNAL /
|
|
951
|
+
PENDING / unknown pass the git verdict through byte-identical. It can NEVER
|
|
952
|
+
promote `shipped=False → True` — green CI without a reachable commit
|
|
953
|
+
manufactures nothing (the §1 safety invariant). Default `None` → OFF →
|
|
954
|
+
byte-identical (no provider type ever enters this module; the datum is gathered
|
|
955
|
+
at the boundary, the arbiter/`git_delta` rule).
|
|
956
|
+
|
|
957
|
+
`cfg` (opt-in) is the library convenience for "verify against THIS workspace":
|
|
958
|
+
pass a `SubstrateConfig` and the loader fills the two I/O hooks from it when
|
|
959
|
+
the caller didn't supply them — `state` from `cfg.state_path()` and a default
|
|
960
|
+
git-log `grep_fallback` resolved against `cfg`'s root. This is what lets
|
|
961
|
+
`is_shipped(plan, phase, cfg=cfg)` answer from git history alone in a repo
|
|
962
|
+
with no registry (the no-plan contract, `tests/test_verify_no_plan.py`): the
|
|
963
|
+
registry miss falls through to the grep rung instead of a bare `source='none'`.
|
|
964
|
+
Passing `cfg` alongside an explicit `state`/`grep_fallback` leaves those
|
|
965
|
+
explicit values untouched (the caller wins); omitting `cfg` is byte-identical
|
|
966
|
+
to before — the pure, host-agnostic core.
|
|
967
|
+
|
|
968
|
+
Tests inject `state` directly; the production callers pass `state=load_state()`.
|
|
969
|
+
"""
|
|
970
|
+
if cfg is not None:
|
|
971
|
+
# Wire the two I/O hooks from the workspace config, but only the ones the
|
|
972
|
+
# caller left open. The grep rung reads `_config.active()` for its root, so
|
|
973
|
+
# install `cfg` as active for the lookup and restore afterward — keeping
|
|
974
|
+
# this convenience free of any global side effect once it returns.
|
|
975
|
+
_prev_active = _config.active()
|
|
976
|
+
_config.set_active(cfg)
|
|
977
|
+
try:
|
|
978
|
+
if state is None:
|
|
979
|
+
state = load_state_from(cfg.state_path())
|
|
980
|
+
if grep_fallback is None:
|
|
981
|
+
grep_fallback = default_grep_fallback_single
|
|
982
|
+
# FQ-390 — turn the plan-id-collision gate ON by DEFAULT for every
|
|
983
|
+
# cfg-passing caller (the CLI `dos verify`, the MCP `dos_verify`, the
|
|
984
|
+
# `dispatch_top`/`plan_board` fan-outs). Resolve the queried plan's
|
|
985
|
+
# expected doc from the workspace's plan-meta ids and default the
|
|
986
|
+
# footprint verifier, but ONLY where the caller left them open — an
|
|
987
|
+
# explicit value still wins. Both resolutions are FAIL-SAFE (an empty
|
|
988
|
+
# doc-map / unresolved plan ⇒ expected_doc stays None ⇒ gate OFF ⇒
|
|
989
|
+
# byte-identical to before), so this can only ADD collision rejections
|
|
990
|
+
# in a workspace that actually has same-id plan docs; it never changes
|
|
991
|
+
# the no-plan answer. Without this, the gate was built but every shipped
|
|
992
|
+
# verify surface opted out, so a tombed same-id plan's `done` row
|
|
993
|
+
# false-cleared the active plan's unshipped phase.
|
|
994
|
+
if expected_doc is None:
|
|
995
|
+
doc_map = default_plan_doc_map(cfg)
|
|
996
|
+
expected_doc = doc_map.get(plan) or doc_map.get(str(plan).upper())
|
|
997
|
+
if commit_touches_doc is None and expected_doc:
|
|
998
|
+
commit_touches_doc = default_commit_touches_doc
|
|
999
|
+
# #326 — the soak false-positive demotion, ON by default for every
|
|
1000
|
+
# cfg-passing caller (same built-but-unwired class as FQ-390 above). A
|
|
1001
|
+
# phase whose impl shipped but is in an OPEN soak window is not the pick
|
|
1002
|
+
# the lane wants (the pick is the soak FOLLOW-UP), so a registry/grep
|
|
1003
|
+
# `done` hit during the window must read as not-yet-done — else `dos
|
|
1004
|
+
# verify` / MCP / the projections report a soaking phase shipped and the
|
|
1005
|
+
# picker culls the live follow-up. Load the workspace's soak index
|
|
1006
|
+
# fail-safe (a missing/!PyYAML/malformed index → [] → suppresses nothing
|
|
1007
|
+
# → byte-identical to before, so the no-plan/no-soak answer is unchanged)
|
|
1008
|
+
# and only where the caller left `soaks` open — an explicit value wins.
|
|
1009
|
+
# Bounded by `status: in_progress`; self-heals the instant the operator
|
|
1010
|
+
# closes the soak.
|
|
1011
|
+
if soaks is None:
|
|
1012
|
+
soaks = load_soaks_from(cfg.paths.soaks_index)
|
|
1013
|
+
# #399 — the release-bump / ledger-only grep false-positive demotion
|
|
1014
|
+
# needs the touched-files footprint to fire; default it ON for every
|
|
1015
|
+
# cfg-passing caller, the same way commit_touches_doc/soaks above are
|
|
1016
|
+
# defaulted (it was the one demotion hook the convenience branch left
|
|
1017
|
+
# open, so `dos verify` / MCP / the TUI projections inherited a grep
|
|
1018
|
+
# release-prefix false-ship the picker path already demotes). FAIL-SAFE:
|
|
1019
|
+
# `_git_touched_files` returns None on any git failure ⇒ the boundary
|
|
1020
|
+
# gate stays OFF ⇒ byte-identical to before; an explicit value wins.
|
|
1021
|
+
if grep_touched_files is None:
|
|
1022
|
+
grep_touched_files = _git_touched_files
|
|
1023
|
+
verdict = is_shipped(
|
|
1024
|
+
plan, phase,
|
|
1025
|
+
state=state,
|
|
1026
|
+
grep_fallback=grep_fallback,
|
|
1027
|
+
expected_doc=expected_doc,
|
|
1028
|
+
commit_touches_doc=commit_touches_doc,
|
|
1029
|
+
grep_touched_files=grep_touched_files,
|
|
1030
|
+
soaks=soaks,
|
|
1031
|
+
non_git_rung=non_git_rung,
|
|
1032
|
+
)
|
|
1033
|
+
# `source` names the gate that CONFIRMED a ship. A negative answer
|
|
1034
|
+
# carries `source='none'` whether the grep rung looked or not — the
|
|
1035
|
+
# no-plan contract is "no positive evidence", not "grep said no". (The
|
|
1036
|
+
# pure path keeps the fallback's own `source='grep'` on a miss for
|
|
1037
|
+
# back-compat; only this convenience surface normalizes it.)
|
|
1038
|
+
if not verdict.shipped and verdict.source != "none":
|
|
1039
|
+
verdict = dataclasses.replace(verdict, source="none")
|
|
1040
|
+
return verdict
|
|
1041
|
+
finally:
|
|
1042
|
+
_config.set_active(_prev_active)
|
|
1043
|
+
if state is None:
|
|
1044
|
+
state = load_state()
|
|
1045
|
+
row = _registry_ship_row(
|
|
1046
|
+
state, plan, phase,
|
|
1047
|
+
expected_doc=expected_doc, commit_touches_doc=commit_touches_doc,
|
|
1048
|
+
)
|
|
1049
|
+
if row is not None:
|
|
1050
|
+
sha = row.get("commit_sha") or None
|
|
1051
|
+
verdict = ShipVerdict(
|
|
1052
|
+
plan=plan,
|
|
1053
|
+
phase=phase,
|
|
1054
|
+
shipped=True,
|
|
1055
|
+
sha=str(sha)[:12] if sha else None,
|
|
1056
|
+
source="registry",
|
|
1057
|
+
)
|
|
1058
|
+
verdict = _suppress_registry_soak(verdict, soaks=soaks)
|
|
1059
|
+
# The non-git rung is folded LAST — only after every demotion has settled the
|
|
1060
|
+
# final `shipped` bit (a soak-suppressed registry hit is `shipped=False`, so
|
|
1061
|
+
# the conjunctive rung is a no-op on it; the §1 invariant). Conjunctive only:
|
|
1062
|
+
# it upgrades/withholds a real ship, never manufactures one.
|
|
1063
|
+
return _apply_non_git_rung(verdict, non_git_rung)
|
|
1064
|
+
if grep_fallback is None:
|
|
1065
|
+
return ShipVerdict(plan=plan, phase=phase, shipped=False, source="none")
|
|
1066
|
+
fb = grep_fallback(plan, phase)
|
|
1067
|
+
verdict = ShipVerdict(
|
|
1068
|
+
plan=fb.plan,
|
|
1069
|
+
phase=fb.phase,
|
|
1070
|
+
shipped=fb.shipped,
|
|
1071
|
+
sha=fb.sha,
|
|
1072
|
+
# Preserve a forgeability-graded source the default fallback set
|
|
1073
|
+
# (`grep-artifact`/`grep-subject`, docs/118); stamp bare `grep` otherwise
|
|
1074
|
+
# (incl. an injected stub) so the long-standing `source='grep'` contract
|
|
1075
|
+
# holds. Carry the raw `rung` through too.
|
|
1076
|
+
source=_restamp_grep_source(fb.source),
|
|
1077
|
+
summary=fb.summary,
|
|
1078
|
+
rung=fb.rung,
|
|
1079
|
+
)
|
|
1080
|
+
verdict = _demote_if_false_positive(
|
|
1081
|
+
verdict, grep_touched_files=grep_touched_files, soaks=soaks
|
|
1082
|
+
)
|
|
1083
|
+
# Fold the non-git rung LAST, after the #399/#326 grep demotions have settled the
|
|
1084
|
+
# final `shipped` bit — a demoted grep verdict is `shipped=False`, so the rung is a
|
|
1085
|
+
# no-op (conjunctive: it can only upgrade/withhold a still-standing ship).
|
|
1086
|
+
return _apply_non_git_rung(verdict, non_git_rung)
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def batch_is_shipped(
|
|
1090
|
+
pairs: Iterable[tuple[str, str]],
|
|
1091
|
+
*,
|
|
1092
|
+
state: dict | None = None,
|
|
1093
|
+
grep_fallback: Callable[[list[tuple[str, str]]], dict[tuple[str, str], ShipVerdict]] | None = None,
|
|
1094
|
+
plan_doc_map: dict[str, str] | None = None,
|
|
1095
|
+
commit_touches_doc: Callable[[str, str, str], bool | None] | None = None,
|
|
1096
|
+
grep_touched_files: Callable[[str], set[str] | None] | None = None,
|
|
1097
|
+
soaks: Iterable[dict] | None = None,
|
|
1098
|
+
non_git_rungs: "dict[tuple[str, str], NonGitRung] | None" = None,
|
|
1099
|
+
) -> dict[tuple[str, str], ShipVerdict]:
|
|
1100
|
+
"""Many-pair variant. Registry hits short-circuit; the registry misses are
|
|
1101
|
+
passed to `grep_fallback` in one batched call, so the worst case is still
|
|
1102
|
+
one subprocess (matching the existing `check_phase_shipped --batch` shape).
|
|
1103
|
+
|
|
1104
|
+
Keyed by the *original* (plan, phase) tuple the caller passed (case
|
|
1105
|
+
preserved). Values are `ShipVerdict`s. Pairs the caller passed but neither
|
|
1106
|
+
the registry nor the fallback resolved get a `shipped=False, source='none'`
|
|
1107
|
+
placeholder so the returned dict matches `pairs` 1:1.
|
|
1108
|
+
|
|
1109
|
+
`plan_doc_map` (series-id → expected plan-doc path) + `commit_touches_doc`
|
|
1110
|
+
(opt-in, FQ-390) disambiguate a cross-plan series collision in the registry
|
|
1111
|
+
— see `_registry_ship_row`. A row rejected as a collision is treated as a
|
|
1112
|
+
registry MISS for that pair, so it falls through to the doc-aware grep
|
|
1113
|
+
fallback (which the production caller already feeds the same `plan_doc_map`).
|
|
1114
|
+
When `plan_doc_map` is None the gate is OFF — byte-identical to the
|
|
1115
|
+
pre-FQ-390 lookup.
|
|
1116
|
+
|
|
1117
|
+
`grep_touched_files` (opt-in, #399) and `soaks` (opt-in, #326) demote
|
|
1118
|
+
false-positive `shipped=True` exactly as `is_shipped` does — `#399` on a
|
|
1119
|
+
grep verdict resting on a release-bump/ledger-only commit, and `#326` on a
|
|
1120
|
+
registered in-progress soak phase via EITHER source (grep token-name, or a
|
|
1121
|
+
registry `status:done` row whose soak window is still open — the pick is the
|
|
1122
|
+
soak follow-up). Both default `None` → OFF → byte-identical.
|
|
1123
|
+
|
|
1124
|
+
`non_git_rungs` (opt-in, docs/265) is a PER-PAIR map `{(plan,phase): NonGitRung}`
|
|
1125
|
+
of already-gathered non-git evidence — per-pair because each pair resolves to a
|
|
1126
|
+
DIFFERENT commit, so a single batch CI verdict could not apply (the single-pair
|
|
1127
|
+
`is_shipped` takes one `non_git_rung`; the batch keys it the same way it keys
|
|
1128
|
+
everything else). Each rung is folded conjunctively onto its pair's FINAL git
|
|
1129
|
+
verdict (after every #399/#326 demotion has settled the `shipped` bit) by
|
|
1130
|
+
`_apply_non_git_rung`: GREEN upgrades `source`, RED withholds + flags,
|
|
1131
|
+
NO_SIGNAL/PENDING pass through — and it can never promote `shipped=False → True`.
|
|
1132
|
+
A pair with no entry is untouched; `None` → OFF → byte-identical.
|
|
1133
|
+
"""
|
|
1134
|
+
if state is None:
|
|
1135
|
+
state = load_state()
|
|
1136
|
+
doc_map = plan_doc_map or {}
|
|
1137
|
+
pair_list = [(p, ph) for (p, ph) in pairs if p and ph]
|
|
1138
|
+
out: dict[tuple[str, str], ShipVerdict] = {}
|
|
1139
|
+
misses: list[tuple[str, str]] = []
|
|
1140
|
+
for plan, phase in pair_list:
|
|
1141
|
+
row = _registry_ship_row(
|
|
1142
|
+
state, plan, phase,
|
|
1143
|
+
expected_doc=doc_map.get(plan) or doc_map.get(plan.upper()) if doc_map else None,
|
|
1144
|
+
commit_touches_doc=commit_touches_doc,
|
|
1145
|
+
)
|
|
1146
|
+
if row is not None:
|
|
1147
|
+
sha = row.get("commit_sha") or None
|
|
1148
|
+
reg_verdict = ShipVerdict(
|
|
1149
|
+
plan=plan,
|
|
1150
|
+
phase=phase,
|
|
1151
|
+
shipped=True,
|
|
1152
|
+
sha=str(sha)[:12] if sha else None,
|
|
1153
|
+
source="registry",
|
|
1154
|
+
)
|
|
1155
|
+
out[(plan, phase)] = _suppress_registry_soak(reg_verdict, soaks=soaks)
|
|
1156
|
+
else:
|
|
1157
|
+
misses.append((plan, phase))
|
|
1158
|
+
if misses and grep_fallback is not None:
|
|
1159
|
+
fb_results = grep_fallback(misses)
|
|
1160
|
+
for key in misses:
|
|
1161
|
+
fb = fb_results.get(key)
|
|
1162
|
+
if fb is None:
|
|
1163
|
+
out[key] = ShipVerdict(plan=key[0], phase=key[1], shipped=False, source="none")
|
|
1164
|
+
else:
|
|
1165
|
+
verdict = ShipVerdict(
|
|
1166
|
+
plan=fb.plan,
|
|
1167
|
+
phase=fb.phase,
|
|
1168
|
+
shipped=fb.shipped,
|
|
1169
|
+
sha=fb.sha,
|
|
1170
|
+
# Preserve the forgeability grade the default fallback set
|
|
1171
|
+
# (`grep-artifact`/`grep-subject`, docs/118); bare `grep`
|
|
1172
|
+
# otherwise. Carry the raw `rung` through.
|
|
1173
|
+
source=_restamp_grep_source(fb.source),
|
|
1174
|
+
summary=fb.summary,
|
|
1175
|
+
rung=fb.rung,
|
|
1176
|
+
)
|
|
1177
|
+
out[key] = _demote_if_false_positive(
|
|
1178
|
+
verdict, grep_touched_files=grep_touched_files, soaks=soaks
|
|
1179
|
+
)
|
|
1180
|
+
else:
|
|
1181
|
+
for key in misses:
|
|
1182
|
+
out[key] = ShipVerdict(plan=key[0], phase=key[1], shipped=False, source="none")
|
|
1183
|
+
# Fold the per-pair non-git rungs LAST, over the fully-settled verdicts (a single
|
|
1184
|
+
# pass covers both registry hits and grep results). Conjunctive + a no-op on any
|
|
1185
|
+
# `shipped=False` verdict, so a `source='none'` placeholder or a demoted hit is
|
|
1186
|
+
# never touched — the §1 invariant, applied uniformly across the batch.
|
|
1187
|
+
if non_git_rungs:
|
|
1188
|
+
for key, verdict in list(out.items()):
|
|
1189
|
+
rung = non_git_rungs.get(key)
|
|
1190
|
+
if rung is not None:
|
|
1191
|
+
out[key] = _apply_non_git_rung(verdict, rung)
|
|
1192
|
+
return out
|
|
1193
|
+
|
|
1194
|
+
|
|
1195
|
+
# ---------------------------------------------------------------------------
|
|
1196
|
+
# Forgeability grading of the grep rung (docs/118).
|
|
1197
|
+
#
|
|
1198
|
+
# `phase_shipped` answers a ship on one of several rungs, emitted as `via`. They
|
|
1199
|
+
# split cleanly by FORGEABILITY — whether an agent that can write a commit can
|
|
1200
|
+
# fake the evidence the rung stood on:
|
|
1201
|
+
#
|
|
1202
|
+
# * NON-FORGEABLE: `file-path` — the artefact rung re-derives SHIPPED from the
|
|
1203
|
+
# FILES a commit touched (≥2 of the phase's named load-bearing files). A
|
|
1204
|
+
# commit cannot lie about which files it changed; this is the diff, not the
|
|
1205
|
+
# narration. (`registry` is the other non-forgeable source, handled above —
|
|
1206
|
+
# it is a `mark done` write, not a grep verdict.)
|
|
1207
|
+
# * FORGEABLE: `direct` / `release-prefix` / `body-mention` / `hyg-slug` /
|
|
1208
|
+
# `sub-phase-parent` — every one of these matches a phase token in the commit
|
|
1209
|
+
# SUBJECT or BODY, which the agent authored. `git commit --allow-empty -m
|
|
1210
|
+
# 'docs/X: PHASE — done'` satisfies `direct` with zero code shipped.
|
|
1211
|
+
#
|
|
1212
|
+
# This is the SAME split `resume.NONFORGEABLE_RUNGS` already encodes ({file-path,
|
|
1213
|
+
# registry}) for the resume re-entry mint — but `verify` was throwing the `via`
|
|
1214
|
+
# field away at the oracle boundary and flattening every rung to a single
|
|
1215
|
+
# `source='grep'`, so `dos verify` rendered `(via grep)` identically whether the
|
|
1216
|
+
# ship stood on the diff or on a subject line the agent typed. Grading it is the
|
|
1217
|
+
# read-side seed of the mediated-apply-gate (docs/118): the gate's job is to run
|
|
1218
|
+
# the artefact rung over the diff AT WRITE-TIME; this teaches the verdict to at
|
|
1219
|
+
# least REPORT which rung answered, graded by trust. It is advisory — it changes
|
|
1220
|
+
# the GRADE of the report, never the moment of control (no write is mediated).
|
|
1221
|
+
# ---------------------------------------------------------------------------
|
|
1222
|
+
|
|
1223
|
+
# The grep rungs whose evidence is the commit ARTEFACT (files touched), not the
|
|
1224
|
+
# agent-authored subject/body. Mirrors `resume.NONFORGEABLE_RUNGS` (minus
|
|
1225
|
+
# `registry`, which is not a grep rung). Kept as data so a new artefact rung is a
|
|
1226
|
+
# one-line addition, never a scattered edit.
|
|
1227
|
+
_NONFORGEABLE_GREP_RUNGS = frozenset({"file-path"})
|
|
1228
|
+
|
|
1229
|
+
|
|
1230
|
+
def _grade_grep_source(via: str) -> str:
|
|
1231
|
+
"""Map a raw `via` rung → the graded `source` label for a grep verdict.
|
|
1232
|
+
|
|
1233
|
+
`file-path` (the artefact/diff rung) → ``'grep-artifact'`` (non-forgeable).
|
|
1234
|
+
Every subject/body rung (`direct`, `release-prefix`, `body-mention`,
|
|
1235
|
+
`hyg-slug`, `sub-phase-parent`) → ``'grep-subject'`` (forgeable — the agent
|
|
1236
|
+
authored the text the match stood on). An empty/unknown `via` (a fallback
|
|
1237
|
+
that didn't report one) → bare ``'grep'`` so nothing is mis-graded as one
|
|
1238
|
+
side or the other. Pure — no I/O, unit-testable."""
|
|
1239
|
+
v = (via or "").strip()
|
|
1240
|
+
if not v:
|
|
1241
|
+
return "grep"
|
|
1242
|
+
return "grep-artifact" if v in _NONFORGEABLE_GREP_RUNGS else "grep-subject"
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
def _restamp_grep_source(fb_source: str) -> str:
|
|
1246
|
+
"""The `source` to put on a grep verdict the oracle boundary re-stamps.
|
|
1247
|
+
|
|
1248
|
+
`is_shipped`/`batch_is_shipped` OWN the `source` label (a caller can inject
|
|
1249
|
+
any `grep_fallback`, and the oracle decides what `source` the world sees). The
|
|
1250
|
+
rule: a fallback that already graded itself by forgeability (`grep-artifact` /
|
|
1251
|
+
`grep-subject`, from `default_grep_fallback_*`) keeps its grade; anything else
|
|
1252
|
+
is stamped the bare ``'grep'`` — preserving the long-standing contract that an
|
|
1253
|
+
injected stub returning `source='grep'` reports `'grep'`
|
|
1254
|
+
(`tests/test_oracle_and_loop.py`). Pure."""
|
|
1255
|
+
return fb_source if (fb_source or "").startswith("grep-") else "grep"
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
# ---------------------------------------------------------------------------
|
|
1259
|
+
# Default grep fallback — thin wrapper around check_phase_shipped --batch
|
|
1260
|
+
# ---------------------------------------------------------------------------
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
def _grep_batch_in_process(
|
|
1264
|
+
pairs: list[tuple[str, str]],
|
|
1265
|
+
doc_map: dict[str, str],
|
|
1266
|
+
) -> "dict[str, dict] | None":
|
|
1267
|
+
"""Run the `--batch` rung IN-PROCESS, returning ``{json-line}``-shaped dicts.
|
|
1268
|
+
|
|
1269
|
+
`phase_shipped` is already importable in this interpreter (the kernel imports
|
|
1270
|
+
it), so shelling out to ``python -m dos.phase_shipped --batch`` paid a whole
|
|
1271
|
+
SECOND interpreter startup + ``import dos`` (~170ms of pure overhead on top of
|
|
1272
|
+
the ~60ms of actual `git log`) for a result this process can compute directly.
|
|
1273
|
+
A long-lived consumer that verifies on every call — the MCP `dos_verify` tool,
|
|
1274
|
+
the `dispatch_top` / `plan_board` fan-outs — paid that tax per call; this path
|
|
1275
|
+
removes it (~4× faster per `verify`).
|
|
1276
|
+
|
|
1277
|
+
Behavior is BYTE-IDENTICAL to the child: it runs the SAME functions the child's
|
|
1278
|
+
`main()` `--batch` branch runs, in the same order (`_build_log_cache` ONCE, then
|
|
1279
|
+
per pair `_check_phase_with_cache` → `_consult_plan_body` → `_apply_filepath_backstop`),
|
|
1280
|
+
and reads the active ship-stamp convention from `config.active()` — which the
|
|
1281
|
+
caller has ALREADY installed (the `is_shipped(cfg=…)` branch `set_active`s it),
|
|
1282
|
+
so the env-var hand-off the subprocess needed (`ENV_STAMP_CONVENTION`) is moot
|
|
1283
|
+
in-process: the convention is simply already active. The git cwd is
|
|
1284
|
+
`phase_shipped._workspace_root()` == `config.active().paths.root`, the same root
|
|
1285
|
+
the subprocess used via `cwd=`. Returns one dict per pair keyed exactly like the
|
|
1286
|
+
child's JSON line (`{series, phase, shipped, sha, summary, via}`), or ``None`` if
|
|
1287
|
+
the rung module can't be driven (the caller then falls back to the subprocess).
|
|
1288
|
+
"""
|
|
1289
|
+
try:
|
|
1290
|
+
from dos import phase_shipped as _ps # noqa: PLC0415 — kernel sibling, always importable
|
|
1291
|
+
except Exception: # pragma: no cover — defensive; the import never fails in practice
|
|
1292
|
+
return None
|
|
1293
|
+
try:
|
|
1294
|
+
oneline_lines, body_lines = _ps._build_log_cache()
|
|
1295
|
+
matchers = _ps._subject_matchers()
|
|
1296
|
+
# docs/284 — hoist the file-path backstop's per-pair `git log` into ONE
|
|
1297
|
+
# windowed `--name-only` scan shared across every pair. The cache maps each
|
|
1298
|
+
# named file to its commit history; `_apply_filepath_backstop` then reads it
|
|
1299
|
+
# instead of shelling a `git log` per file (~19s → ~1s on a 262-pair job
|
|
1300
|
+
# snapshot). A `None` cache (union window saturated / git error) makes the
|
|
1301
|
+
# backstop fall back to the exact per-file subprocess path — never-under-count.
|
|
1302
|
+
fp_cache = _ps.build_batch_filepath_cache(
|
|
1303
|
+
[(s, p, doc_map.get(s) or None) for s, p in pairs if p], matchers
|
|
1304
|
+
)
|
|
1305
|
+
rows: dict[str, dict] = {}
|
|
1306
|
+
for s, p in pairs:
|
|
1307
|
+
if not p:
|
|
1308
|
+
continue
|
|
1309
|
+
plan_doc = doc_map.get(s) or None
|
|
1310
|
+
result = _ps._check_phase_with_cache(s, p, oneline_lines, body_lines, matchers)
|
|
1311
|
+
result = _ps._consult_plan_body(result, plan_doc, p, s)
|
|
1312
|
+
result = _ps._apply_filepath_backstop(result, s, p, plan_doc, matchers, fp_cache)
|
|
1313
|
+
result["series"] = s
|
|
1314
|
+
result["phase"] = p
|
|
1315
|
+
rows[f"{s}\t{p}"] = result
|
|
1316
|
+
return rows
|
|
1317
|
+
except Exception: # pragma: no cover — any rung error → let the subprocess try
|
|
1318
|
+
return None
|
|
1319
|
+
|
|
1320
|
+
|
|
1321
|
+
def _grep_batch_subprocess(
|
|
1322
|
+
pairs: list[tuple[str, str]],
|
|
1323
|
+
doc_map: dict[str, str],
|
|
1324
|
+
timeout: int,
|
|
1325
|
+
) -> "dict[str, dict] | None":
|
|
1326
|
+
"""The legacy out-of-process `--batch` rung — fallback only.
|
|
1327
|
+
|
|
1328
|
+
Kept as a safety net behind `_grep_batch_in_process` (and reachable directly by
|
|
1329
|
+
forcing `DOS_ORACLE_GREP_SUBPROCESS=1`) so a hypothetical in-process rung failure
|
|
1330
|
+
degrades to the previously-shipped behavior rather than a wrong answer. Returns
|
|
1331
|
+
the parsed JSON-line dicts keyed `"<series>\\t<phase>"`, or ``None`` on timeout.
|
|
1332
|
+
"""
|
|
1333
|
+
lines: list[str] = []
|
|
1334
|
+
for s, p in pairs:
|
|
1335
|
+
if not p:
|
|
1336
|
+
continue
|
|
1337
|
+
# TAB-delimit the fields so a series OR phase containing spaces survives
|
|
1338
|
+
# the round-trip to the rung (`_parse_batch_line` splits on the tab). The
|
|
1339
|
+
# old space-join truncated a multi-word phase like `Phase 4` — the child's
|
|
1340
|
+
# `line.split(None, 2)` read phase=`Phase`, doc=`4`, so this pair's lookup
|
|
1341
|
+
# key never matched and a shipped phase resolved `via none` (F7). The
|
|
1342
|
+
# phase ids real hosts use (`hybrid-cache-type Phase 4`) contain spaces, so
|
|
1343
|
+
# this is load-bearing for any foreign repo, not an edge case.
|
|
1344
|
+
doc = doc_map.get(s, "")
|
|
1345
|
+
if doc:
|
|
1346
|
+
lines.append(f"{s}\t{p}\t{doc}")
|
|
1347
|
+
else:
|
|
1348
|
+
lines.append(f"{s}\t{p}")
|
|
1349
|
+
stdin_payload = "\n".join(lines) + "\n"
|
|
1350
|
+
# Carry the ACTIVE ship-stamp convention into the rung subprocess. The child
|
|
1351
|
+
# re-derives `config.active()` from scratch (it would default to the job
|
|
1352
|
+
# convention), so without this hand-off a caller-installed (`set_active`) or
|
|
1353
|
+
# `dos.toml`-declared convention would be lost the moment the grep rung shells
|
|
1354
|
+
# out — the exact gap that made `verify` non-domain-free. The child's
|
|
1355
|
+
# `_bootstrap_active_config` reads this env var back (SCV).
|
|
1356
|
+
child_env = dict(os.environ)
|
|
1357
|
+
try:
|
|
1358
|
+
child_env[_config.ENV_STAMP_CONVENTION] = json.dumps(
|
|
1359
|
+
_config.active().stamp.to_dict()
|
|
1360
|
+
)
|
|
1361
|
+
except Exception:
|
|
1362
|
+
pass # never block the rung on a serialization hiccup
|
|
1363
|
+
try:
|
|
1364
|
+
res = subprocess.run(
|
|
1365
|
+
[sys.executable, "-m", "dos.phase_shipped", "--batch"],
|
|
1366
|
+
cwd=_workspace_root(),
|
|
1367
|
+
capture_output=True,
|
|
1368
|
+
text=True,
|
|
1369
|
+
input=stdin_payload,
|
|
1370
|
+
encoding="utf-8",
|
|
1371
|
+
errors="replace",
|
|
1372
|
+
timeout=timeout,
|
|
1373
|
+
check=False,
|
|
1374
|
+
env=child_env,
|
|
1375
|
+
)
|
|
1376
|
+
except subprocess.TimeoutExpired:
|
|
1377
|
+
return None
|
|
1378
|
+
rows: dict[str, dict] = {}
|
|
1379
|
+
for line in res.stdout.splitlines():
|
|
1380
|
+
line = line.strip()
|
|
1381
|
+
if not line.startswith("{"):
|
|
1382
|
+
continue
|
|
1383
|
+
try:
|
|
1384
|
+
row = json.loads(line)
|
|
1385
|
+
except json.JSONDecodeError:
|
|
1386
|
+
continue
|
|
1387
|
+
s = row.get("series", "")
|
|
1388
|
+
p = row.get("phase", "")
|
|
1389
|
+
if s and p:
|
|
1390
|
+
rows[f"{s}\t{p}"] = row
|
|
1391
|
+
return rows
|
|
1392
|
+
|
|
1393
|
+
|
|
1394
|
+
def default_grep_fallback_batch(
|
|
1395
|
+
pairs: list[tuple[str, str]],
|
|
1396
|
+
*,
|
|
1397
|
+
plan_doc_map: dict[str, str] | None = None,
|
|
1398
|
+
timeout: int = 30,
|
|
1399
|
+
) -> dict[tuple[str, str], ShipVerdict]:
|
|
1400
|
+
"""Resolve the residual `(plan, phase)` pairs through the git-log grep rung.
|
|
1401
|
+
|
|
1402
|
+
Runs the rung IN-PROCESS by default (`_grep_batch_in_process`) — the kernel
|
|
1403
|
+
already imports `phase_shipped`, so the historical ``python -m
|
|
1404
|
+
dos.phase_shipped --batch`` subprocess was paying a second interpreter startup
|
|
1405
|
+
+ ``import dos`` per call for nothing. The out-of-process path
|
|
1406
|
+
(`_grep_batch_subprocess`) is kept as a fallback (and forced by
|
|
1407
|
+
`DOS_ORACLE_GREP_SUBPROCESS=1`) so a rung-import failure degrades to the
|
|
1408
|
+
previously-shipped behavior. Both produce the SAME JSON-line dicts; this
|
|
1409
|
+
function then grades each into a `ShipVerdict` (the forgeability `source` rung
|
|
1410
|
+
+ the #399 release-bump demotion) exactly as before.
|
|
1411
|
+
|
|
1412
|
+
Returns `{(plan,phase): ShipVerdict}` for every resolved pair; an unresolved
|
|
1413
|
+
pair is simply absent (caller fills with `source='none'`).
|
|
1414
|
+
"""
|
|
1415
|
+
if not pairs:
|
|
1416
|
+
return {}
|
|
1417
|
+
doc_map = plan_doc_map or {}
|
|
1418
|
+
|
|
1419
|
+
rows: "dict[str, dict] | None" = None
|
|
1420
|
+
if os.environ.get("DOS_ORACLE_GREP_SUBPROCESS") not in ("1", "true", "TRUE"):
|
|
1421
|
+
rows = _grep_batch_in_process(pairs, doc_map)
|
|
1422
|
+
if rows is None: # forced subprocess, OR in-process rung unavailable/raised
|
|
1423
|
+
rows = _grep_batch_subprocess(pairs, doc_map, timeout)
|
|
1424
|
+
if rows is None: # subprocess timed out
|
|
1425
|
+
return {}
|
|
1426
|
+
|
|
1427
|
+
out: dict[tuple[str, str], ShipVerdict] = {}
|
|
1428
|
+
for _key, row in rows.items():
|
|
1429
|
+
s = row.get("series", "")
|
|
1430
|
+
p = row.get("phase", "")
|
|
1431
|
+
if not (s and p):
|
|
1432
|
+
continue
|
|
1433
|
+
# Carry the raw `via` rung and grade `source` by forgeability (docs/118):
|
|
1434
|
+
# `file-path` is the artefact/diff rung (`grep-artifact`, non-forgeable),
|
|
1435
|
+
# every subject/body rung is `grep-subject` (forgeable). A blank `via`
|
|
1436
|
+
# falls back to bare `grep` so nothing is mis-graded.
|
|
1437
|
+
via = str(row.get("via", "") or "")
|
|
1438
|
+
verdict = ShipVerdict(
|
|
1439
|
+
plan=s,
|
|
1440
|
+
phase=p,
|
|
1441
|
+
shipped=bool(row.get("shipped")),
|
|
1442
|
+
sha=row.get("sha") or None,
|
|
1443
|
+
source=_grade_grep_source(via),
|
|
1444
|
+
summary=row.get("summary", "") or "",
|
|
1445
|
+
rung=via,
|
|
1446
|
+
)
|
|
1447
|
+
# finding #399 — release-bump post-filter. The grep rung matches a
|
|
1448
|
+
# phase token anywhere in a commit's subject / release-notes body, so a
|
|
1449
|
+
# version cut that batches `… + <PHASE> closer + …` in its notes
|
|
1450
|
+
# false-flags an unshipped <PHASE> as shipped. Demote a shipped=True
|
|
1451
|
+
# verdict whose sha is a release-bump/ledger-only commit (footprint
|
|
1452
|
+
# check; an unresolvable sha stays permissive). This is the grep-side
|
|
1453
|
+
# complement of the registry-side Signal C demotion above.
|
|
1454
|
+
if _grep_verdict_is_release_bump_falsepos(verdict):
|
|
1455
|
+
verdict = dataclasses.replace(
|
|
1456
|
+
verdict,
|
|
1457
|
+
shipped=False,
|
|
1458
|
+
summary=(
|
|
1459
|
+
(verdict.summary + " " if verdict.summary else "")
|
|
1460
|
+
+ f"[ship_oracle: demoted — grep matched release-bump/ledger-only "
|
|
1461
|
+
f"commit {verdict.sha}; phase token in notes, not a ship (#399)]"
|
|
1462
|
+
),
|
|
1463
|
+
)
|
|
1464
|
+
out[(s, p)] = verdict
|
|
1465
|
+
return out
|
|
1466
|
+
|
|
1467
|
+
|
|
1468
|
+
def default_grep_fallback_single(plan: str, phase: str) -> ShipVerdict:
|
|
1469
|
+
"""Single-pair convenience wrapper around the batch fallback."""
|
|
1470
|
+
out = default_grep_fallback_batch([(plan, phase)])
|
|
1471
|
+
return out.get(
|
|
1472
|
+
(plan, phase),
|
|
1473
|
+
ShipVerdict(plan=plan, phase=phase, shipped=False, source="none"),
|
|
1474
|
+
)
|
|
1475
|
+
|
|
1476
|
+
|
|
1477
|
+
# ---------------------------------------------------------------------------
|
|
1478
|
+
# FQ-390 — default commit-footprint verifier for the registry collision gate.
|
|
1479
|
+
#
|
|
1480
|
+
# A legacy `recently_completed` row (no `doc_path`) is disambiguated by asking:
|
|
1481
|
+
# did this row's `commit_sha` actually touch any of the EXPECTED plan's phase
|
|
1482
|
+
# files? A genuine ship of the expected plan touched its own files; a same-id
|
|
1483
|
+
# collision (a tombed plan's `DL/DL2` row) touched a DIFFERENT plan's files.
|
|
1484
|
+
# This reuses `check_phase_shipped`'s plan-doc file-extraction + shared-infra
|
|
1485
|
+
# guard so the "distinctive file" definition is identical to the grep-side
|
|
1486
|
+
# file-path backstop (one definition, two callers).
|
|
1487
|
+
# ---------------------------------------------------------------------------
|
|
1488
|
+
|
|
1489
|
+
|
|
1490
|
+
def default_commit_touches_doc(sha: str, expected_doc: str, phase: str) -> bool | None:
|
|
1491
|
+
"""Return True/False/None for "did commit `sha` ship work for `expected_doc`?".
|
|
1492
|
+
|
|
1493
|
+
Two signals over the commit's touched-file set, strongest first:
|
|
1494
|
+
|
|
1495
|
+
A. **Plan-doc signal (the reliable collision detector).** If the commit
|
|
1496
|
+
touched a `docs/...-plan.md` (or `docs/tombstones/...-plan.md`):
|
|
1497
|
+
- it touched `expected_doc` (or a plan doc with the same basename) → True
|
|
1498
|
+
- it touched ONLY a DIFFERENT plan doc → False (the commit shipped
|
|
1499
|
+
another plan that happens to share this series id — the exact
|
|
1500
|
+
tombed-`DL` collision). Plan docs are stamped at ship time
|
|
1501
|
+
(`_stamp_plan_doc`), so a real ship of `expected_doc` touches
|
|
1502
|
+
`expected_doc`; a same-id collision touches the other plan's doc.
|
|
1503
|
+
B. **Distinctive-file signal (fallback).** Otherwise compare against the
|
|
1504
|
+
phase's NON-shared-infra files named in `expected_doc`:
|
|
1505
|
+
- commit touched ≥1 → True
|
|
1506
|
+
- phase names ≥1 distinctive file but commit touched NONE → False
|
|
1507
|
+
|
|
1508
|
+
None — neither signal fires (no plan doc touched AND the phase names no
|
|
1509
|
+
distinctive file, or git/doc unreadable). PERMISSIVE → keep the
|
|
1510
|
+
row, preserving the deliberate direct-ship trust (a real unstamped
|
|
1511
|
+
ship must not be manufactured into a false negative).
|
|
1512
|
+
|
|
1513
|
+
Only a *definite* miss (False) demotes a registry row — same posture as the
|
|
1514
|
+
grep-side `_apply_filepath_backstop`.
|
|
1515
|
+
"""
|
|
1516
|
+
sha = (sha or "").strip()
|
|
1517
|
+
if not sha:
|
|
1518
|
+
return None
|
|
1519
|
+
try:
|
|
1520
|
+
from dos.phase_shipped import ( # type: ignore
|
|
1521
|
+
_extract_phase_files,
|
|
1522
|
+
_is_shared_infra,
|
|
1523
|
+
)
|
|
1524
|
+
except Exception:
|
|
1525
|
+
return None
|
|
1526
|
+
doc_path = expected_doc
|
|
1527
|
+
if not Path(doc_path).is_absolute():
|
|
1528
|
+
doc_path = str(_workspace_root() / expected_doc)
|
|
1529
|
+
|
|
1530
|
+
# Read the commit's touched-file set through the per-process memo
|
|
1531
|
+
# (`_git_touched_files`) rather than shelling our OWN `git show`. The command
|
|
1532
|
+
# is byte-identical (`git show --name-only --format= <sha>` with the same path
|
|
1533
|
+
# normalization), so the verdict is unchanged — but the memo collapses the
|
|
1534
|
+
# duplicate footprint reads this rung shares with the grep-side #399 demotion
|
|
1535
|
+
# (the docs/284 "demotion reads from the same scan" deferred item). A single
|
|
1536
|
+
# registry-collision check warms a release-bump sha here whose footprint
|
|
1537
|
+
# `_demote_if_false_positive` → `_grep_verdict_is_release_bump_falsepos` then
|
|
1538
|
+
# reads from cache (and vice-versa); a fan-out re-hits the same shas free. The
|
|
1539
|
+
# memo is keyed on the IMMUTABLE (root, sha), so it is the safest possible cache
|
|
1540
|
+
# (no staleness) — see `_git_touched_files`. `None` (unknown sha / shallow clone
|
|
1541
|
+
# / git unavailable) and the empty-footprint case both fall to the permissive
|
|
1542
|
+
# `return None` below, exactly as the inline `git show` did.
|
|
1543
|
+
touched = _git_touched_files(sha)
|
|
1544
|
+
if not touched:
|
|
1545
|
+
return None
|
|
1546
|
+
|
|
1547
|
+
expected_base = _norm_doc(expected_doc).rsplit("/", 1)[-1]
|
|
1548
|
+
|
|
1549
|
+
# Signal A — plan docs the commit touched. Match ANY path ending `-plan.md`
|
|
1550
|
+
# (optionally under a `tombstones/` segment), NOT just a `docs/`-rooted one: the
|
|
1551
|
+
# plan location is a per-workspace choice (`cfg.paths.plans_glob`; the shipped
|
|
1552
|
+
# `examples/workspaces/riverflow` uses `experiments/*.md`), so anchoring on `docs/`
|
|
1553
|
+
# silently never fired Signal A for a non-`docs/` layout (userland-coupling audit
|
|
1554
|
+
# 2026-06-08). The verdict is decided on the plan BASENAME below (`expected_base`),
|
|
1555
|
+
# so dropping the host-shaped directory anchor is behavior-identical for a
|
|
1556
|
+
# `docs/`-rooted repo and correct for the rest.
|
|
1557
|
+
_PLAN_DOC_RE = re.compile(r"(?:^|/)(?:tombstones/)?[\w.-]+-plan\.md$", re.IGNORECASE)
|
|
1558
|
+
plan_docs_touched = {t for t in touched if _PLAN_DOC_RE.search(t.lower())}
|
|
1559
|
+
if plan_docs_touched:
|
|
1560
|
+
bases = {t.lower().rsplit("/", 1)[-1] for t in plan_docs_touched}
|
|
1561
|
+
if expected_base in bases:
|
|
1562
|
+
return True
|
|
1563
|
+
# The commit touched plan doc(s), but NONE is the expected one →
|
|
1564
|
+
# it shipped a different plan (same-series-id collision).
|
|
1565
|
+
return False
|
|
1566
|
+
|
|
1567
|
+
# Signal B — distinctive (non-shared-infra) files named for this phase.
|
|
1568
|
+
try:
|
|
1569
|
+
series = re.sub(r"[\d.].*$", "", str(phase)) or str(phase)
|
|
1570
|
+
named = _extract_phase_files(doc_path, phase, series)
|
|
1571
|
+
except Exception:
|
|
1572
|
+
named = []
|
|
1573
|
+
distinctive = [f for f in named if not _is_shared_infra(f)]
|
|
1574
|
+
if not distinctive:
|
|
1575
|
+
# Signal C — routing-only / bookkeeping / release-bump commit (the
|
|
1576
|
+
# FQ-388 ANC3 shape + the finding #399 release-bump shape). Before
|
|
1577
|
+
# falling to the permissive None, check whether the commit touched
|
|
1578
|
+
# ANYTHING of substance. A genuine unstamped ship always edits ≥1 code,
|
|
1579
|
+
# test, or substantive doc file; a row stamped `status: done` by a
|
|
1580
|
+
# commit that touched ONLY the dispatch ledger (findings queue /
|
|
1581
|
+
# execution-state / plans.yaml) OR ONLY version-bump/release-notes files
|
|
1582
|
+
# is a routing stamp / version cut, never a ship — demote it
|
|
1583
|
+
# definitively. The release-bump case is what false-shipped FQ-375 off
|
|
1584
|
+
# `30d3ac30` (`v0.378.0: … + FQ-375 live-API closer + …` touched only
|
|
1585
|
+
# VERSION/pyproject/__init__/docs-releases). This kills the whole
|
|
1586
|
+
# "bookkeeping/release-bump row short-circuits as shipped" class, not one
|
|
1587
|
+
# row. Signal A already returned for plan-doc commits, so `touched` here
|
|
1588
|
+
# is plan-doc-free.
|
|
1589
|
+
if _commit_footprint_is_nonsubstantive(touched):
|
|
1590
|
+
return False # touched only ledger / release-bump files → not a ship
|
|
1591
|
+
return None # nothing distinctive to verify against — permissive
|
|
1592
|
+
for f in distinctive:
|
|
1593
|
+
fp = f.replace("\\", "/")
|
|
1594
|
+
if fp in touched:
|
|
1595
|
+
return True
|
|
1596
|
+
base = fp.rsplit("/", 1)[-1]
|
|
1597
|
+
if any(t.rsplit("/", 1)[-1] == base for t in touched):
|
|
1598
|
+
return True
|
|
1599
|
+
return False
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
# ---------------------------------------------------------------------------
|
|
1603
|
+
# CLI
|
|
1604
|
+
# ---------------------------------------------------------------------------
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def _cli_single(plan: str, phase: str) -> int:
|
|
1608
|
+
state = load_state()
|
|
1609
|
+
# Live CLI gets BOTH grep-side false-positive demotions wired in (#399 +
|
|
1610
|
+
# #326) — the boundary backstop + the new soak cross-check. The pure core
|
|
1611
|
+
# stays injection-only; this is the in-process opt-in.
|
|
1612
|
+
verdict = is_shipped(
|
|
1613
|
+
plan, phase, state=state, grep_fallback=default_grep_fallback_single,
|
|
1614
|
+
grep_touched_files=_git_touched_files, soaks=load_soaks(),
|
|
1615
|
+
)
|
|
1616
|
+
print(json.dumps(verdict.to_dict(), ensure_ascii=False))
|
|
1617
|
+
return 0 if verdict.shipped else 1
|
|
1618
|
+
|
|
1619
|
+
|
|
1620
|
+
def _cli_batch() -> int:
|
|
1621
|
+
state = load_state()
|
|
1622
|
+
pairs: list[tuple[str, str]] = []
|
|
1623
|
+
for line in sys.stdin:
|
|
1624
|
+
parts = line.strip().split(None, 2)
|
|
1625
|
+
if len(parts) >= 2:
|
|
1626
|
+
pairs.append((parts[0], parts[1]))
|
|
1627
|
+
results = batch_is_shipped(
|
|
1628
|
+
pairs, state=state, grep_fallback=default_grep_fallback_batch,
|
|
1629
|
+
grep_touched_files=_git_touched_files, soaks=load_soaks(),
|
|
1630
|
+
)
|
|
1631
|
+
any_shipped = False
|
|
1632
|
+
for pair in pairs:
|
|
1633
|
+
v = results.get(pair) or ShipVerdict(plan=pair[0], phase=pair[1], shipped=False, source="none")
|
|
1634
|
+
print(json.dumps(v.to_dict(), ensure_ascii=False))
|
|
1635
|
+
if v.shipped:
|
|
1636
|
+
any_shipped = True
|
|
1637
|
+
return 0 if any_shipped else 1
|
|
1638
|
+
|
|
1639
|
+
|
|
1640
|
+
def main(argv: list[str] | None = None) -> int:
|
|
1641
|
+
parser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
|
|
1642
|
+
parser.add_argument("plan", nargs="?", help="Plan series (e.g. IF, AAR, TF)")
|
|
1643
|
+
parser.add_argument("phase", nargs="?", help="Phase id (e.g. IF4.1)")
|
|
1644
|
+
parser.add_argument(
|
|
1645
|
+
"--batch",
|
|
1646
|
+
action="store_true",
|
|
1647
|
+
help="Read `<plan> <phase>` pairs from stdin; emit one JSON line per result.",
|
|
1648
|
+
)
|
|
1649
|
+
args = parser.parse_args(argv)
|
|
1650
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
1651
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
1652
|
+
if args.batch:
|
|
1653
|
+
return _cli_batch()
|
|
1654
|
+
if not (args.plan and args.phase):
|
|
1655
|
+
parser.error("plan and phase are required unless --batch is used")
|
|
1656
|
+
return 2
|
|
1657
|
+
return _cli_single(args.plan, args.phase)
|
|
1658
|
+
|
|
1659
|
+
|
|
1660
|
+
if __name__ == "__main__":
|
|
1661
|
+
sys.exit(main())
|