dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/gate_classify.py
ADDED
|
@@ -0,0 +1,827 @@
|
|
|
1
|
+
"""Typed gate verdict for a /next-up packet (QWB6).
|
|
2
|
+
|
|
3
|
+
Today the empty-packet gate in /dispatch-loop is a single binary fork — a
|
|
4
|
+
packet either has live picks or it does not, and "no live picks" is treated as
|
|
5
|
+
one thing: *drain*. But a 0-pick packet has at least three distinct root
|
|
6
|
+
causes, and each warrants a different response. Collapsing them is what lets
|
|
7
|
+
/dispatch-loop false-stop on stale stamps (queue #240, observed live in run
|
|
8
|
+
`20260517T1626Z` iter 3 — the loop "drained" while the backlog was full,
|
|
9
|
+
only the plan-doc SHIPPED stamps were stale).
|
|
10
|
+
|
|
11
|
+
`classify_packet()` is the keystone fix: a **pure** function that turns the
|
|
12
|
+
packet's picks + their dispositions into one typed verdict —
|
|
13
|
+
|
|
14
|
+
LIVE packet has >= 1 soft-claimable pick
|
|
15
|
+
DRAIN genuine empty backlog — nothing left to dispatch
|
|
16
|
+
STALE-STAMP phases shipped in git but plan-doc rows unstamped (false drain)
|
|
17
|
+
BLOCKED picks exist but soft-claimed by a sibling, or quota-blocked
|
|
18
|
+
(was WEDGE — renamed; WEDGE survives as a permanent alias)
|
|
19
|
+
|
|
20
|
+
QWB7 (the `--gate hard|soft|drive` policy) and QWB8 (`/dispatch` emits the
|
|
21
|
+
verdict token) are thin consumers of this function — they are NOT wired here.
|
|
22
|
+
|
|
23
|
+
⚓ Data-driven decisions (evidence-over-narrative): the verdict is derived from
|
|
24
|
+
already-loaded portfolio state — the packet's picks and their per-pick
|
|
25
|
+
dispositions (each carrying `check_phase_shipped`'s `via` field and the
|
|
26
|
+
plan-doc stamp boolean) — never from /dispatch's prose reply. Run
|
|
27
|
+
`20260517T1626Z` iter 3 *said* "stamp drift" in prose, but the loop could not
|
|
28
|
+
branch on prose, so it false-stopped. The verdict type is the fix.
|
|
29
|
+
|
|
30
|
+
⚓ Typed verdict over binary gate: a control-flow gate whose one signal
|
|
31
|
+
(drained backlog) has multiple root causes needs a typed verdict, not a binary
|
|
32
|
+
fork. `classify_packet` is pure (no subprocess, no file I/O — the caller passes
|
|
33
|
+
already-loaded state) precisely so it can be tested in isolation, away from
|
|
34
|
+
everything that makes a live /dispatch run expensive.
|
|
35
|
+
|
|
36
|
+
OC3 (2026-05-18): `classify_packet_file` is the validated I/O wrapper around
|
|
37
|
+
the pure `classify_packet`. Pre-OC3, /dispatch Step 5.6.1 *resolved* the
|
|
38
|
+
disposition list by hand-parsing the packet's `## Course corrections` prose —
|
|
39
|
+
the OC-P3 weakness: a well-formed-but-wrong dict produced a plausible-but-wrong
|
|
40
|
+
verdict (findings #240). OC3 moved disposition resolution into /next-up's
|
|
41
|
+
renderer, which emits the structured list to `.dispositions-<tag>.json`;
|
|
42
|
+
`classify_packet_file` reads that sidecar, rejects a stale/wrong-schema
|
|
43
|
+
contract (`StaleDispositionContract`), and delegates to `classify_packet`.
|
|
44
|
+
The classifier stays pure and is still the unit-test surface.
|
|
45
|
+
"""
|
|
46
|
+
from __future__ import annotations
|
|
47
|
+
|
|
48
|
+
import enum
|
|
49
|
+
import json
|
|
50
|
+
import re
|
|
51
|
+
from dataclasses import dataclass, field
|
|
52
|
+
from pathlib import Path
|
|
53
|
+
from typing import Any
|
|
54
|
+
|
|
55
|
+
# The gate-side verdict enum is now defined centrally in scripts/dispatch_tokens.py
|
|
56
|
+
# (the single source of truth for every dispatch verdict/outcome/reason token).
|
|
57
|
+
# Re-export it here as `Verdict` so every existing `from gate_classify import
|
|
58
|
+
# Verdict` / `gate_classify.Verdict` reference keeps working unchanged (a
|
|
59
|
+
# byte-compatible re-export shim — the same pattern apply_core uses per CLAUDE.md).
|
|
60
|
+
# `Verdict.WEDGE` survives as a permanent Enum alias of `Verdict.BLOCKED` defined
|
|
61
|
+
# on GateVerdict, so any un-migrated `verdict is Verdict.WEDGE` keeps working.
|
|
62
|
+
# In DOS the verdict vocabulary lives in `dos.tokens` (the ported
|
|
63
|
+
# `dispatch_tokens`). One canonical package import — the dual-mode bare-sibling
|
|
64
|
+
# fallback the origin repo needed (scripts run as bare files) is gone now that
|
|
65
|
+
# everything is a proper package module.
|
|
66
|
+
from dos.tokens import GateVerdict as Verdict # noqa: F401
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Drop-reason tokens a caller stamps on a dropped pick's disposition. These
|
|
70
|
+
# are the artefact the classifier keys on — not the packet's prose.
|
|
71
|
+
DROP_SHIPPED = "shipped" # check_phase_shipped proved the phase shipped
|
|
72
|
+
DROP_SOFT_CLAIMED = "soft_claimed" # a sibling fanout holds a live soft-claim
|
|
73
|
+
DROP_QUOTA_BLOCKED = "quota_blocked" # quota / credential saturation
|
|
74
|
+
|
|
75
|
+
# `via` value from check_phase_shipped that counts as an unambiguous direct
|
|
76
|
+
# ship. STALE-STAMP is deliberately scoped to direct-ship evidence only — a
|
|
77
|
+
# weak verdict (release-prefix / body-mention / file-path) is exactly the
|
|
78
|
+
# #230 false-positive surface, and treating it as a confirmed ship would let
|
|
79
|
+
# the loop auto-clear drift that was never real.
|
|
80
|
+
SHIP_VIA_DIRECT = "direct"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass(frozen=True)
|
|
84
|
+
class PickDisposition:
|
|
85
|
+
"""The per-pick evidence `classify_packet` consumes.
|
|
86
|
+
|
|
87
|
+
A pick the packet *kept* (rendered as soft-claimable) has `live=True` and
|
|
88
|
+
needs no other field. A pick the packet *dropped* (auto-dropped to Course
|
|
89
|
+
corrections) has `live=False` and carries the evidence for *why* it
|
|
90
|
+
dropped — the artefact the verdict stands on.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
series: str
|
|
94
|
+
phase: str
|
|
95
|
+
live: bool
|
|
96
|
+
# Evidence for a dropped pick (`live=False`). Ignored when `live=True`.
|
|
97
|
+
drop_reason: str = "" # one of DROP_* above
|
|
98
|
+
ship_via: str = "" # check_phase_shipped `via` field, when drop_reason==shipped
|
|
99
|
+
ship_sha: str = "" # the ship commit, for the reason string
|
|
100
|
+
plan_doc_stamped: bool = True # does the plan-doc heading carry a SHIPPED token?
|
|
101
|
+
claim_tag: str = "" # the fanout tag holding a live soft-claim, when soft_claimed
|
|
102
|
+
|
|
103
|
+
def is_stale_stamp(self) -> bool:
|
|
104
|
+
"""True when this dropped pick is a shipped-but-unstamped phase.
|
|
105
|
+
|
|
106
|
+
Direct-ship git evidence AND a plan-doc heading with no SHIPPED token
|
|
107
|
+
— the exact false-drain shape behind queue #240. Weak ship verdicts
|
|
108
|
+
do not qualify (see SHIP_VIA_DIRECT).
|
|
109
|
+
"""
|
|
110
|
+
return (
|
|
111
|
+
not self.live
|
|
112
|
+
and self.drop_reason == DROP_SHIPPED
|
|
113
|
+
and self.ship_via == SHIP_VIA_DIRECT
|
|
114
|
+
and not self.plan_doc_stamped
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def is_blocked(self) -> bool:
|
|
118
|
+
"""True when this dropped pick is blocked, not drained.
|
|
119
|
+
|
|
120
|
+
A live soft-claim under a sibling tag, or a quota/credential block —
|
|
121
|
+
work that exists but cannot be dispatched right now. (Was `is_wedge`;
|
|
122
|
+
renamed alongside the WEDGE→BLOCKED verdict rename.)
|
|
123
|
+
"""
|
|
124
|
+
return not self.live and self.drop_reason in (
|
|
125
|
+
DROP_SOFT_CLAIMED,
|
|
126
|
+
DROP_QUOTA_BLOCKED,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True)
|
|
131
|
+
class ClassifyResult:
|
|
132
|
+
"""The typed verdict plus the evidence that produced it.
|
|
133
|
+
|
|
134
|
+
`verdict` is the load-bearing field /dispatch-loop branches on. `reason`
|
|
135
|
+
is a one-line operator-facing summary (drained-twice stop messages, the
|
|
136
|
+
QWB8 archive-commit subject). `evidence` is the subset of dispositions
|
|
137
|
+
that drove the verdict — kept so QWB7/QWB8 can surface *which* phases are
|
|
138
|
+
stale/blocked without re-deriving them.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
verdict: Verdict
|
|
142
|
+
reason: str
|
|
143
|
+
evidence: list[PickDisposition] = field(default_factory=list)
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def is_false_drain(self) -> bool:
|
|
147
|
+
"""True when this verdict is a non-`DRAIN` 0-live-pick gate.
|
|
148
|
+
|
|
149
|
+
STALE-STAMP, BLOCKED, and RACE all render as "0 live picks" to the old
|
|
150
|
+
binary gate, which is exactly why it false-stopped. QWB7's drained-twice
|
|
151
|
+
rule counts `DRAIN` only — this property names the class it must
|
|
152
|
+
exclude. NRT2 added RACE to this set: a lost candidates-cache lock
|
|
153
|
+
race is also a "0 live picks" shape that must not arm drained-twice.
|
|
154
|
+
"""
|
|
155
|
+
return self.verdict in (
|
|
156
|
+
Verdict.STALE_STAMP,
|
|
157
|
+
Verdict.BLOCKED,
|
|
158
|
+
Verdict.RACE,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class MalformedDisposition(ValueError):
|
|
163
|
+
"""A disposition dict the classifier cannot coerce.
|
|
164
|
+
|
|
165
|
+
Raised instead of a bare ``KeyError`` so a caller (the /dispatch skill
|
|
166
|
+
building dispositions from prose) gets a named, actionable error naming
|
|
167
|
+
the missing field — not a stack trace that the loop swallows into a
|
|
168
|
+
conservative DRAIN. See the dispatch SKILL Step 5.6.1 for the schema.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _coerce(obj: Any) -> PickDisposition:
|
|
173
|
+
"""Accept either a PickDisposition or a plain dict (fixture / JSON shape).
|
|
174
|
+
|
|
175
|
+
The dict schema is tolerant by design — /dispatch builds these by hand
|
|
176
|
+
from prose, so the easy-to-miss fields are aliased or derived:
|
|
177
|
+
|
|
178
|
+
- ``phase`` accepts ``phase_id`` as an alias.
|
|
179
|
+
- ``series`` is optional: when absent it is derived from ``phase`` by
|
|
180
|
+
stripping the trailing phase number (``FB2`` -> ``FB``).
|
|
181
|
+
- ``live`` defaults to ``False`` (the dropped-pick case — the only case
|
|
182
|
+
that carries evidence; a live pick needs no disposition dict at all).
|
|
183
|
+
|
|
184
|
+
A genuinely unusable dict (no ``phase``/``phase_id`` at all) raises
|
|
185
|
+
``MalformedDisposition``, never a bare ``KeyError``.
|
|
186
|
+
"""
|
|
187
|
+
if isinstance(obj, PickDisposition):
|
|
188
|
+
return obj
|
|
189
|
+
if not isinstance(obj, dict):
|
|
190
|
+
raise MalformedDisposition(
|
|
191
|
+
f"disposition must be a PickDisposition or dict, got {type(obj).__name__}"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
phase = obj.get("phase") or obj.get("phase_id")
|
|
195
|
+
if not phase:
|
|
196
|
+
raise MalformedDisposition(
|
|
197
|
+
f"disposition is missing 'phase' (or 'phase_id'): {obj!r}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
series = obj.get("series")
|
|
201
|
+
if not series:
|
|
202
|
+
# Derive from the phase id: strip the trailing run of digits/dots.
|
|
203
|
+
series = re.sub(r"[\d.]+$", "", str(phase)) or str(phase)
|
|
204
|
+
|
|
205
|
+
return PickDisposition(
|
|
206
|
+
series=str(series),
|
|
207
|
+
phase=str(phase),
|
|
208
|
+
live=bool(obj.get("live", False)),
|
|
209
|
+
drop_reason=obj.get("drop_reason", ""),
|
|
210
|
+
ship_via=obj.get("ship_via", ""),
|
|
211
|
+
ship_sha=obj.get("ship_sha", ""),
|
|
212
|
+
plan_doc_stamped=bool(obj.get("plan_doc_stamped", True)),
|
|
213
|
+
claim_tag=obj.get("claim_tag", ""),
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def classify_packet(dispositions: list[Any]) -> ClassifyResult:
|
|
218
|
+
"""Classify a /next-up packet's picks into one typed gate verdict.
|
|
219
|
+
|
|
220
|
+
PURE — no subprocess, no file or git I/O. The caller resolves every pick's
|
|
221
|
+
disposition first (run `check_phase_shipped` for the `via` field, read the
|
|
222
|
+
plan-doc heading for the stamp boolean, check the registry for sibling
|
|
223
|
+
soft-claims) and passes the already-decided evidence in.
|
|
224
|
+
|
|
225
|
+
`dispositions` — a list of `PickDisposition` (or dict equivalents, the
|
|
226
|
+
fixture/JSON shape). One per pick the packet rendered, kept or dropped.
|
|
227
|
+
|
|
228
|
+
Decision order is most-specific-first so a mixed packet resolves
|
|
229
|
+
deterministically:
|
|
230
|
+
|
|
231
|
+
1. LIVE — any pick is `live` (soft-claimable). A packet with even
|
|
232
|
+
one live pick is not drained, whatever the others are.
|
|
233
|
+
2. STALE-STAMP — no live picks, and >= 1 dropped pick is a direct-ship
|
|
234
|
+
phase whose plan-doc heading lacks a SHIPPED token. This
|
|
235
|
+
is the #240 false-drain: work shipped, the doc lagged.
|
|
236
|
+
3. BLOCKED — no live picks, no stale stamps, and >= 1 dropped pick is
|
|
237
|
+
soft-claimed by a sibling tag or quota-blocked.
|
|
238
|
+
4. DRAIN — no live picks and no recoverable signal: a genuine
|
|
239
|
+
empty backlog. The only verdict QWB7's drained-twice
|
|
240
|
+
rule may count toward an early stop.
|
|
241
|
+
|
|
242
|
+
An empty packet (`dispositions == []`) is `DRAIN` — /next-up rendered no
|
|
243
|
+
picks at all, so there is nothing left to dispatch.
|
|
244
|
+
"""
|
|
245
|
+
dets = [_coerce(d) for d in dispositions]
|
|
246
|
+
|
|
247
|
+
live = [d for d in dets if d.live]
|
|
248
|
+
if live:
|
|
249
|
+
return ClassifyResult(
|
|
250
|
+
verdict=Verdict.LIVE,
|
|
251
|
+
reason=f"{len(live)} live pick(s) — packet has dispatchable work",
|
|
252
|
+
evidence=live,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
stale = [d for d in dets if d.is_stale_stamp()]
|
|
256
|
+
if stale:
|
|
257
|
+
ids = ", ".join(f"{d.series} {d.phase}" for d in stale)
|
|
258
|
+
return ClassifyResult(
|
|
259
|
+
verdict=Verdict.STALE_STAMP,
|
|
260
|
+
reason=(
|
|
261
|
+
f"{len(stale)} pick(s) shipped in git but plan-doc unstamped "
|
|
262
|
+
f"({ids}) — false drain, not an empty backlog"
|
|
263
|
+
),
|
|
264
|
+
evidence=stale,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
blocked = [d for d in dets if d.is_blocked()]
|
|
268
|
+
if blocked:
|
|
269
|
+
ids = ", ".join(f"{d.series} {d.phase}" for d in blocked)
|
|
270
|
+
return ClassifyResult(
|
|
271
|
+
verdict=Verdict.BLOCKED,
|
|
272
|
+
reason=(
|
|
273
|
+
f"{len(blocked)} pick(s) blocked by a sibling soft-claim or "
|
|
274
|
+
f"quota ({ids}) — work exists but is not dispatchable now"
|
|
275
|
+
),
|
|
276
|
+
evidence=blocked,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
return ClassifyResult(
|
|
280
|
+
verdict=Verdict.DRAIN,
|
|
281
|
+
reason="no live picks and no recoverable signal — backlog genuinely drained",
|
|
282
|
+
evidence=[],
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
# ---------------------------------------------------------------------------
|
|
287
|
+
# OC3 — classify directly from the renderer's disposition sidecar.
|
|
288
|
+
#
|
|
289
|
+
# `classify_packet` is pure: the caller resolves every pick's disposition and
|
|
290
|
+
# passes the list in. Pre-OC3, /dispatch Step 5.6.1 *resolved* that list by
|
|
291
|
+
# hand-parsing the packet's `## Course corrections` prose — the OC-P3 weakness
|
|
292
|
+
# (a well-formed-but-wrong dict → a plausible-but-wrong verdict, findings
|
|
293
|
+
# #240). OC3 moved the resolution to the renderer, which emits the structured
|
|
294
|
+
# list to `.dispositions-<tag>.json`. `classify_packet_file` reads that file,
|
|
295
|
+
# validates the envelope, and delegates to `classify_packet`. /dispatch now
|
|
296
|
+
# makes one call against a derived artefact — there is no hand-assembly step.
|
|
297
|
+
#
|
|
298
|
+
# `classify_packet` stays pure and is still the unit-test surface; this
|
|
299
|
+
# function is the thin, validated I/O wrapper around it.
|
|
300
|
+
# ---------------------------------------------------------------------------
|
|
301
|
+
|
|
302
|
+
# The schema tag the renderer (`next_up_render._build_dispositions` →
|
|
303
|
+
# `cmd_render`) stamps on the sidecar. A mismatch fails loudly: a /dispatch
|
|
304
|
+
# reading a contract its /next-up did not write is exactly the OC-P4 silent
|
|
305
|
+
# drift this guard refuses to let through.
|
|
306
|
+
DISPOSITIONS_SCHEMA = "oc3-dispositions-v1"
|
|
307
|
+
|
|
308
|
+
# NRT2 (docs/53): the schema tag the renderer's `_emit_race_envelope` stamps
|
|
309
|
+
# on the per-tag race sidecar (`output/next-up/.race-<tag>.json`). A wrong
|
|
310
|
+
# `schema` value silently degrades to the existing classification (DRAIN /
|
|
311
|
+
# STALE-STAMP / BLOCKED) — a malformed race envelope must NOT promote an
|
|
312
|
+
# otherwise-LIVE packet to RACE.
|
|
313
|
+
RACE_SCHEMA = "next-up-race-v1"
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class StaleDispositionContract(ValueError):
|
|
317
|
+
"""The disposition sidecar is missing, malformed, or a schema mismatch.
|
|
318
|
+
|
|
319
|
+
Raised — rather than silently falling back to a conservative ``DRAIN`` —
|
|
320
|
+
so a /dispatch reading a stale or wrong-shaped contract fails specifically
|
|
321
|
+
and visibly. The caller decides the fallback (Step 5.6.1's documented
|
|
322
|
+
``DRAIN`` default), but it does so *knowing* the sidecar was unusable, not
|
|
323
|
+
by accident.
|
|
324
|
+
"""
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _race_envelope_for(dispositions_path: Path) -> ClassifyResult | None:
|
|
328
|
+
"""If a sibling `.race-<tag>.json` envelope exists alongside the OC3
|
|
329
|
+
sidecar AND carries `schema == RACE_SCHEMA`, return a typed RACE
|
|
330
|
+
`ClassifyResult` that names the foreign holder. Otherwise return None.
|
|
331
|
+
|
|
332
|
+
NRT2 (docs/53): the race envelope is the artefact `next_up_render` writes
|
|
333
|
+
when `_acquire_candidates_lock_or_race` times out. Its presence next to
|
|
334
|
+
the packet's tag means this /next-up shell lost a lock race against a
|
|
335
|
+
sibling — the packet on disk (if any) is wrong-scope and the loop must
|
|
336
|
+
not classify it as DRAIN / STALE-STAMP / BLOCKED.
|
|
337
|
+
|
|
338
|
+
A malformed envelope (bad JSON, wrong schema, missing fields) returns
|
|
339
|
+
None — RACE classification is *precedence-only*; an unusable race file
|
|
340
|
+
falls through to the existing verdicts so a corrupt sidecar cannot
|
|
341
|
+
silently promote a real LIVE/DRAIN/BLOCKED packet to a spurious RACE.
|
|
342
|
+
"""
|
|
343
|
+
name = dispositions_path.name
|
|
344
|
+
if name.startswith(".dispositions-") and name.endswith(".json"):
|
|
345
|
+
tag = name[len(".dispositions-"):-len(".json")]
|
|
346
|
+
else:
|
|
347
|
+
return None
|
|
348
|
+
race_path = dispositions_path.parent / f".race-{tag}.json"
|
|
349
|
+
if not race_path.exists():
|
|
350
|
+
return None
|
|
351
|
+
try:
|
|
352
|
+
env = json.loads(race_path.read_text(encoding="utf-8"))
|
|
353
|
+
except (json.JSONDecodeError, OSError):
|
|
354
|
+
return None
|
|
355
|
+
if not isinstance(env, dict) or env.get("schema") != RACE_SCHEMA:
|
|
356
|
+
return None
|
|
357
|
+
blocked_by = env.get("blocked_by_pid")
|
|
358
|
+
attempted_at = env.get("attempted_at") or "(unknown)"
|
|
359
|
+
lock_path = env.get("lock_path") or "(unknown)"
|
|
360
|
+
reason = env.get("reason") or (
|
|
361
|
+
f"/next-up shell lost the candidates-cache lock race for tag={tag!r} "
|
|
362
|
+
f"(blocked_by_pid={blocked_by}, attempted_at={attempted_at}, "
|
|
363
|
+
f"lock_path={lock_path})"
|
|
364
|
+
)
|
|
365
|
+
return ClassifyResult(verdict=Verdict.RACE, reason=reason, evidence=[])
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def classify_packet_file(path: str | Path) -> ClassifyResult:
|
|
369
|
+
"""Classify a /next-up packet from its OC3 disposition sidecar file.
|
|
370
|
+
|
|
371
|
+
`path` — the `.dispositions-<tag>.json` the renderer wrote next to the
|
|
372
|
+
packet. The file's envelope is `{"tag", "schema", "dispositions": [...]}`.
|
|
373
|
+
|
|
374
|
+
NRT2 (docs/53): if a sibling `.race-<tag>.json` envelope (schema
|
|
375
|
+
`next-up-race-v1`) exists in the same directory, that wins — the packet
|
|
376
|
+
came from a /next-up shell that lost a candidates-cache lock race, and the
|
|
377
|
+
on-disk packet is wrong-scope. RACE takes precedence over DRAIN /
|
|
378
|
+
STALE-STAMP / WEDGE because those classifications would be derived from
|
|
379
|
+
the wrong-scope packet. A malformed race envelope (bad JSON, wrong schema)
|
|
380
|
+
falls through to the existing classification so a corrupt sidecar cannot
|
|
381
|
+
silently promote a real verdict to a spurious RACE.
|
|
382
|
+
|
|
383
|
+
Raises `StaleDispositionContract` when the file is absent, is not valid
|
|
384
|
+
JSON, lacks the `dispositions` array, or carries a `schema` value other
|
|
385
|
+
than `DISPOSITIONS_SCHEMA` — a wrong-shaped contract fails loudly here
|
|
386
|
+
instead of producing a plausible-but-wrong verdict downstream.
|
|
387
|
+
|
|
388
|
+
A well-formed file delegates straight to the pure `classify_packet`.
|
|
389
|
+
"""
|
|
390
|
+
p = Path(path)
|
|
391
|
+
race = _race_envelope_for(p)
|
|
392
|
+
if race is not None:
|
|
393
|
+
return race
|
|
394
|
+
if not p.exists():
|
|
395
|
+
raise StaleDispositionContract(
|
|
396
|
+
f"disposition sidecar not found: {p} — "
|
|
397
|
+
f"run `next_up_render.py render` to emit it"
|
|
398
|
+
)
|
|
399
|
+
try:
|
|
400
|
+
envelope = json.loads(p.read_text(encoding="utf-8"))
|
|
401
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
402
|
+
raise StaleDispositionContract(
|
|
403
|
+
f"disposition sidecar {p} is not readable JSON: {e}"
|
|
404
|
+
) from e
|
|
405
|
+
if not isinstance(envelope, dict):
|
|
406
|
+
raise StaleDispositionContract(
|
|
407
|
+
f"disposition sidecar {p} is not a JSON object: {type(envelope).__name__}"
|
|
408
|
+
)
|
|
409
|
+
schema = envelope.get("schema")
|
|
410
|
+
if schema != DISPOSITIONS_SCHEMA:
|
|
411
|
+
raise StaleDispositionContract(
|
|
412
|
+
f"disposition sidecar {p} has schema {schema!r}, "
|
|
413
|
+
f"expected {DISPOSITIONS_SCHEMA!r} — the /next-up that wrote it is "
|
|
414
|
+
f"out of contract with this /dispatch"
|
|
415
|
+
)
|
|
416
|
+
dispositions = envelope.get("dispositions")
|
|
417
|
+
if not isinstance(dispositions, list):
|
|
418
|
+
raise StaleDispositionContract(
|
|
419
|
+
f"disposition sidecar {p} has no `dispositions` list "
|
|
420
|
+
f"(got {type(dispositions).__name__})"
|
|
421
|
+
)
|
|
422
|
+
# `_coerce` inside `classify_packet` raises `MalformedDisposition` on a
|
|
423
|
+
# genuinely unusable entry — that surfaces as-is; it is the per-field
|
|
424
|
+
# loud failure the schema guard's sibling.
|
|
425
|
+
return classify_packet(dispositions)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
# ---------------------------------------------------------------------------
|
|
429
|
+
# QWB7 — gate policy modes for /dispatch-loop.
|
|
430
|
+
#
|
|
431
|
+
# classify_packet() above turns a packet into one typed Verdict. QWB7 adds the
|
|
432
|
+
# *policy* layer: given that verdict and a `--gate hard|soft|drive` mode chosen
|
|
433
|
+
# at /dispatch-loop invocation, what should the loop actually DO with this
|
|
434
|
+
# iteration? One classifier, three callers, an explicit policy — exactly the
|
|
435
|
+
# rebalance the ⚓ typed-verdict-over-binary-gate anchor names ("give the loop a
|
|
436
|
+
# policy when one gate must serve different intents").
|
|
437
|
+
#
|
|
438
|
+
# `gate_policy()` is pure for the same reason `classify_packet()` is: the
|
|
439
|
+
# Tier-3 replay harness must exercise the hard-vs-drive divergence without a
|
|
440
|
+
# live /dispatch run.
|
|
441
|
+
# ---------------------------------------------------------------------------
|
|
442
|
+
|
|
443
|
+
# The three gate policy modes. `hard` is the default — bare /dispatch-loop is
|
|
444
|
+
# byte-unchanged.
|
|
445
|
+
GATE_HARD = "hard"
|
|
446
|
+
GATE_SOFT = "soft"
|
|
447
|
+
GATE_DRIVE = "drive"
|
|
448
|
+
GATE_MODES = (GATE_HARD, GATE_SOFT, GATE_DRIVE)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
@dataclass(frozen=True)
|
|
452
|
+
class GateAction:
|
|
453
|
+
"""What /dispatch-loop's Step 3 does with one iteration's verdict.
|
|
454
|
+
|
|
455
|
+
A pure value the loop branches on — the policy decision extracted out of
|
|
456
|
+
SKILL.md prose so the Tier-3 replay can assert it without a live run.
|
|
457
|
+
|
|
458
|
+
Fields:
|
|
459
|
+
next_mode — `dispatch` | `replan` | `stop`. `stop` ends the
|
|
460
|
+
loop; `dispatch`/`replan` is the next iteration's
|
|
461
|
+
mode.
|
|
462
|
+
counts_toward_drain — True iff this iteration increments the
|
|
463
|
+
drained-twice counter. QWB7's load-bearing rule:
|
|
464
|
+
**only a true DRAIN counts.** STALE-STAMP and
|
|
465
|
+
BLOCKED never do — that kills the #240 false-stop
|
|
466
|
+
class structurally.
|
|
467
|
+
reconcile — True iff the loop must run an inline stamp-
|
|
468
|
+
reconcile pass (QWB2's reconcile_plan_doc_stamps)
|
|
469
|
+
before the next iteration. Set for `drive`/`soft`
|
|
470
|
+
on STALE-STAMP — the loop self-heals stamp drift
|
|
471
|
+
instead of stopping on it.
|
|
472
|
+
surface — True iff the loop must surface this verdict to
|
|
473
|
+
the operator (a stop that needs a human, or a
|
|
474
|
+
BLOCKED the loop will not sit waiting on).
|
|
475
|
+
reason — one-line operator-facing summary.
|
|
476
|
+
"""
|
|
477
|
+
|
|
478
|
+
next_mode: str
|
|
479
|
+
counts_toward_drain: bool
|
|
480
|
+
reconcile: bool
|
|
481
|
+
surface: bool
|
|
482
|
+
reason: str
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def gate_policy(verdict: Verdict, mode: str = GATE_HARD) -> GateAction:
|
|
486
|
+
"""Map a typed gate verdict + a `--gate` mode to a loop action.
|
|
487
|
+
|
|
488
|
+
PURE — no I/O. The caller (/dispatch-loop Step 3) has already run
|
|
489
|
+
`classify_packet()` for the verdict and parsed `--gate` once at Step 0.
|
|
490
|
+
|
|
491
|
+
The policy matrix (QWB7 plan, docs/44):
|
|
492
|
+
|
|
493
|
+
| --gate | STALE-STAMP | BLOCKED | DRAIN | RACE |
|
|
494
|
+
|--------|-----------------------|----------------|--------------------|--------------------|
|
|
495
|
+
| hard | /replan, counts* | /replan, counts*| /replan, stop on 2nd| continue, retry-once|
|
|
496
|
+
| soft | auto-clear, re-dispatch| stop + surface | stop + surface | continue, retry-once|
|
|
497
|
+
| drive | auto-clear, re-dispatch| stop + surface | stop on true DRAIN | continue, retry-once|
|
|
498
|
+
|
|
499
|
+
NRT2 (docs/53): RACE behaves the same in all three modes — sleep + retry-
|
|
500
|
+
once, never count toward drained-twice / SHIPPED-DIRTY-0. The packet on
|
|
501
|
+
disk is wrong-scope; the foreign holder will produce the intended packet.
|
|
502
|
+
|
|
503
|
+
* `hard` keeps today's behavior: a non-LIVE verdict routes to /replan and
|
|
504
|
+
the iteration counts toward drained-twice. (Pre-QWB7 the loop counted
|
|
505
|
+
*every* gate; QWB7's precise rule is DRAIN-only — but under `hard` a
|
|
506
|
+
STALE-STAMP/BLOCKED still routes to /replan, so the operator who wants
|
|
507
|
+
the old conservative behavior gets it. The difference: even under
|
|
508
|
+
`hard`, STALE-STAMP/BLOCKED no longer *increment the counter*, so a
|
|
509
|
+
single stale-stamp gate can no longer arm a false drained-twice stop —
|
|
510
|
+
it just spends a /replan iteration. This is the structural #240 fix;
|
|
511
|
+
`drive` then goes further and self-heals inline.)
|
|
512
|
+
|
|
513
|
+
LIVE is never a gate verdict the loop branches on here — a LIVE packet
|
|
514
|
+
means /fanout ran and shipped; the loop simply continues `dispatch`. It is
|
|
515
|
+
accepted for completeness so a caller can route any verdict through one
|
|
516
|
+
function.
|
|
517
|
+
|
|
518
|
+
`mode` defaults to `hard`; an unknown mode raises `ValueError` (the Step 0
|
|
519
|
+
parser must reject a bad `--gate` value before threading it).
|
|
520
|
+
"""
|
|
521
|
+
if mode not in GATE_MODES:
|
|
522
|
+
raise ValueError(
|
|
523
|
+
f"unknown --gate mode {mode!r} — expected one of {GATE_MODES}"
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
if verdict is Verdict.LIVE:
|
|
527
|
+
return GateAction(
|
|
528
|
+
next_mode="dispatch",
|
|
529
|
+
counts_toward_drain=False,
|
|
530
|
+
reconcile=False,
|
|
531
|
+
surface=False,
|
|
532
|
+
reason="LIVE — picks shipped, continue dispatch",
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
if verdict is Verdict.DRAIN:
|
|
536
|
+
# A true DRAIN is the only verdict that may count toward an early stop,
|
|
537
|
+
# in every mode. Under hard it routes through /replan first (drained-
|
|
538
|
+
# twice = the *second* DRAIN around a /replan); under soft/drive a true
|
|
539
|
+
# DRAIN stops directly — the backlog is genuinely empty.
|
|
540
|
+
if mode == GATE_HARD:
|
|
541
|
+
return GateAction(
|
|
542
|
+
next_mode="replan",
|
|
543
|
+
counts_toward_drain=True,
|
|
544
|
+
reconcile=False,
|
|
545
|
+
surface=False,
|
|
546
|
+
reason="DRAIN — backlog drained, /replan to refill (drained-twice on 2nd)",
|
|
547
|
+
)
|
|
548
|
+
return GateAction(
|
|
549
|
+
next_mode="stop",
|
|
550
|
+
counts_toward_drain=True,
|
|
551
|
+
reconcile=False,
|
|
552
|
+
surface=True,
|
|
553
|
+
reason="DRAIN — backlog genuinely drained, stopping",
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
if verdict is Verdict.RACE:
|
|
557
|
+
# NRT2 (docs/53): a candidates-cache lock race. The packet on disk is
|
|
558
|
+
# wrong-scope; the foreign holder will (or already has) emitted the
|
|
559
|
+
# intended packet. Retry semantics: sleep briefly + retry once
|
|
560
|
+
# (/dispatch-loop SKILL.md policy line) rather than route to /replan or
|
|
561
|
+
# stop — the lock will clear when the sibling /next-up finishes. RACE
|
|
562
|
+
# never counts toward drained-twice and never counts toward the
|
|
563
|
+
# SHIPPED-DIRTY-0 / back-to-back ceilings (the back-to-back streak
|
|
564
|
+
# counts ONLY SHIPPED-DIRTY iterations; a GATE verdict=RACE never
|
|
565
|
+
# increments it structurally — this branch keeps that contract loud).
|
|
566
|
+
return GateAction(
|
|
567
|
+
next_mode="dispatch",
|
|
568
|
+
counts_toward_drain=False,
|
|
569
|
+
reconcile=False,
|
|
570
|
+
surface=True,
|
|
571
|
+
reason="RACE — candidates cache race; rerun on lock-clear (sleep + retry once, no drain count)",
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
if verdict is Verdict.STALE_STAMP:
|
|
575
|
+
# The #240 false-drain. Never counts toward drained-twice in any mode.
|
|
576
|
+
if mode == GATE_HARD:
|
|
577
|
+
return GateAction(
|
|
578
|
+
next_mode="replan",
|
|
579
|
+
counts_toward_drain=False,
|
|
580
|
+
reconcile=False,
|
|
581
|
+
surface=False,
|
|
582
|
+
reason="STALE-STAMP — /replan to stamp the drift (does NOT count toward drained-twice)",
|
|
583
|
+
)
|
|
584
|
+
# soft / drive — self-heal: reconcile the stamps inline and re-dispatch
|
|
585
|
+
# WITHOUT counting the iteration. The loop heals stamp drift instead of
|
|
586
|
+
# false-stopping on it.
|
|
587
|
+
return GateAction(
|
|
588
|
+
next_mode="dispatch",
|
|
589
|
+
counts_toward_drain=False,
|
|
590
|
+
reconcile=True,
|
|
591
|
+
surface=False,
|
|
592
|
+
reason="STALE-STAMP — auto-clear via inline stamp-reconcile, re-dispatch (no drain count)",
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
# Verdict.BLOCKED — picks exist but a sibling soft-claim / quota blocks them.
|
|
596
|
+
# Never counts toward drained-twice. Under hard it spends a /replan
|
|
597
|
+
# iteration; under soft/drive it stops and surfaces — the loop must not sit
|
|
598
|
+
# unattended waiting on a quota window or block a sibling batch. drive
|
|
599
|
+
# self-heals only the *deterministic* cause (STALE-STAMP), never a BLOCKED.
|
|
600
|
+
if mode == GATE_HARD:
|
|
601
|
+
return GateAction(
|
|
602
|
+
next_mode="replan",
|
|
603
|
+
counts_toward_drain=False,
|
|
604
|
+
reconcile=False,
|
|
605
|
+
surface=False,
|
|
606
|
+
reason="BLOCKED — /replan (does NOT count toward drained-twice)",
|
|
607
|
+
)
|
|
608
|
+
return GateAction(
|
|
609
|
+
next_mode="stop",
|
|
610
|
+
counts_toward_drain=False,
|
|
611
|
+
reconcile=False,
|
|
612
|
+
surface=True,
|
|
613
|
+
reason="BLOCKED — picks blocked by sibling-claim/quota, stopping + surfacing",
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
# ---------------------------------------------------------------------------
|
|
618
|
+
# FQ-240 — /replan productivity verdict (the second half of the drained-twice fix).
|
|
619
|
+
#
|
|
620
|
+
# QWB6/QWB7 fixed the *input-gate* half of finding #240: a 0-pick /dispatch now
|
|
621
|
+
# carries a typed verdict, and the drained-twice counter increments on DRAIN
|
|
622
|
+
# only — so a STALE-STAMP gate can no longer arm a false stop. But finding #240
|
|
623
|
+
# named a SECOND, distinct shape that QWB7 did not close: the drained-twice rule
|
|
624
|
+
# treats *any* completed /replan as a valid refill attempt. A /replan can
|
|
625
|
+
# complete having done **0 gardening and 0 refill** — most cleanly via /replan's
|
|
626
|
+
# §1.5 no-op skip gate ("no new evidence since <ts>"), which prints one line and
|
|
627
|
+
# writes nothing. When the next /dispatch DRAINs, the loop calls it
|
|
628
|
+
# DRAINED_TWICE and stops — declaring the portfolio drained even though /replan
|
|
629
|
+
# never actually *tried* to refill. The honest stop is: drained-twice fires only
|
|
630
|
+
# when a **productive** /replan (one that refilled / gardened) was still followed
|
|
631
|
+
# by a DRAIN.
|
|
632
|
+
#
|
|
633
|
+
# `classify_replan_productivity()` is the typed verdict that distinguishes the
|
|
634
|
+
# two. It is PURE (no I/O) for the same reason `classify_packet` / `gate_policy`
|
|
635
|
+
# are: the loop's stop condition can be replay-tested without a live /replan run.
|
|
636
|
+
#
|
|
637
|
+
# ⚓ Typed verdict over binary gate ([[feedback_typed_verdict_over_binary_gate]]):
|
|
638
|
+
# the drained-twice trigger must read a typed /replan-productivity verdict, not
|
|
639
|
+
# "a /replan ran". A /replan that ran-but-did-nothing is not a refill attempt.
|
|
640
|
+
#
|
|
641
|
+
# ⚓ Data-driven decisions (evidence-over-narrative): the verdict is derived from
|
|
642
|
+
# the /replan iteration's own terminal `result` text — the structural no-op skip
|
|
643
|
+
# marker /replan emits, and its gardening-count summary — never from a prose
|
|
644
|
+
# guess about whether the sweep "felt productive".
|
|
645
|
+
# ---------------------------------------------------------------------------
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
class ReplanProductivity(str, enum.Enum):
|
|
649
|
+
"""Whether a completed /replan iteration actually refilled / gardened.
|
|
650
|
+
|
|
651
|
+
`str`-valued so it round-trips through Step 3's grep stdout token without a
|
|
652
|
+
lookup table (mirrors `Verdict` / `OutcomeKind`).
|
|
653
|
+
|
|
654
|
+
PRODUCTIVE /replan ran its full sweep and did real work — promoted a
|
|
655
|
+
candidate, reconciled an anchor, swept a stale claim,
|
|
656
|
+
backfilled a SHIPPED stamp, reranked the queue, etc. This is
|
|
657
|
+
a genuine refill attempt: a DRAIN that *still* follows it is
|
|
658
|
+
an honest drained-twice signal.
|
|
659
|
+
UNPRODUCTIVE /replan completed without refilling the backlog: it hit the
|
|
660
|
+
§1.5 no-op skip gate ("no new evidence"), or it ran the sweep
|
|
661
|
+
but every gardening counter came back 0 (0 promoted, 0
|
|
662
|
+
reconciled, 0 swept, 0 backfilled, …). A DRAIN after such a
|
|
663
|
+
/replan is NOT drained-twice — /replan never actually tried.
|
|
664
|
+
"""
|
|
665
|
+
|
|
666
|
+
PRODUCTIVE = "PRODUCTIVE"
|
|
667
|
+
UNPRODUCTIVE = "UNPRODUCTIVE"
|
|
668
|
+
|
|
669
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
670
|
+
return self.value
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
# The structural marker /replan's §1.5 no-op skip gate prints (replan/SKILL.md
|
|
674
|
+
# §1.5). When this appears in the iteration's terminal result text, the sweep
|
|
675
|
+
# did not run at all — 0 gardening, 0 refill, no replan-state.yaml write, no
|
|
676
|
+
# archive commit. The single most-decisive unproductive signal.
|
|
677
|
+
REPLAN_NOOP_SKIP_MARKER = "/replan skipped: no new evidence"
|
|
678
|
+
|
|
679
|
+
# The gardening-count tokens /replan's §7 summary emits ("**Gardening:** <M>
|
|
680
|
+
# anchors reconciled · <P> percent-refreshes · …") plus the §7 header's
|
|
681
|
+
# "<N>/<X> promoted to inbox · <C> auto-closed · <A> added". When EVERY count a
|
|
682
|
+
# /replan reports is 0, the sweep ran but did no work — the second unproductive
|
|
683
|
+
# shape (sweep-ran-found-nothing, distinct from the no-op skip). Each entry is
|
|
684
|
+
# (regex, "this many were acted on" group): a non-zero in ANY one of them is
|
|
685
|
+
# enough to call the sweep productive.
|
|
686
|
+
_REPLAN_WORK_PATTERNS = (
|
|
687
|
+
# §7 header — candidates promoted to inbox. The header form is "<N>/<X>
|
|
688
|
+
# promoted" (N acted-on of X candidates); capture the NUMERATOR (the count
|
|
689
|
+
# actually promoted), not the denominator, so "0/4 promoted" reads as 0.
|
|
690
|
+
r"(\d+)\s*(?:/\s*\d+)?\s+promoted",
|
|
691
|
+
r"(\d+)\s+auto-closed", # §7 header — queue rows auto-closed
|
|
692
|
+
r"(\d+)\s+added", # §7 header — new queue rows added
|
|
693
|
+
r"(\d+)\s+anchors?\s+reconciled",
|
|
694
|
+
r"(\d+)\s+percent-refreshes",
|
|
695
|
+
r"(\d+)\s+stale\s+claims?\s+swept",
|
|
696
|
+
r"(\d+)\s+gitignore\s+patterns?\s+added",
|
|
697
|
+
r"(\d+)\s+tomb-stamps?\s+applied",
|
|
698
|
+
r"(\d+)\s+stale\s+fanouts?\s+flagged",
|
|
699
|
+
r"(\d+)\s+queue\s+rows?\s+reranked",
|
|
700
|
+
r"(\d+)\s+next-hits\s+reranked",
|
|
701
|
+
r"(\d+)\s+escalated",
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def classify_replan_productivity(replan_result_text: str) -> ReplanProductivity:
|
|
706
|
+
"""Classify one completed /replan iteration's productivity. PURE — no I/O.
|
|
707
|
+
|
|
708
|
+
`replan_result_text` is the /replan iteration's terminal `result` text — the
|
|
709
|
+
same envelope text Step 3 already extracted into `result.json`. The caller
|
|
710
|
+
passes the already-loaded text; this function does no file or git I/O so it
|
|
711
|
+
is replay-testable away from a live $2-4 /replan run.
|
|
712
|
+
|
|
713
|
+
Decision order (most-decisive first):
|
|
714
|
+
|
|
715
|
+
1. The §1.5 no-op skip marker present → UNPRODUCTIVE. The sweep never ran;
|
|
716
|
+
it found no new evidence and exited cheap without writing state.
|
|
717
|
+
2. The sweep ran — read its gardening counts. If ANY work counter is
|
|
718
|
+
non-zero → PRODUCTIVE (a genuine refill attempt). If EVERY recognised
|
|
719
|
+
counter is 0 (a 0/0/0 ceremony sweep) → UNPRODUCTIVE.
|
|
720
|
+
3. No recognised counts at all (a pre-FQ-240 /replan build, a truncated
|
|
721
|
+
envelope, an unexpected format) → PRODUCTIVE — the conservative
|
|
722
|
+
default. Treating an unparseable /replan as productive preserves
|
|
723
|
+
today's behavior (the drained-twice rule still fires on the next
|
|
724
|
+
DRAIN), so this change can NEVER make the loop run *longer* than it does
|
|
725
|
+
today on a /replan it cannot read; it only spares the false-stop on a
|
|
726
|
+
/replan it can positively confirm did nothing.
|
|
727
|
+
"""
|
|
728
|
+
text = replan_result_text or ""
|
|
729
|
+
|
|
730
|
+
# 1. The no-op skip gate — the cleanest unproductive signal.
|
|
731
|
+
if REPLAN_NOOP_SKIP_MARKER in text:
|
|
732
|
+
return ReplanProductivity.UNPRODUCTIVE
|
|
733
|
+
|
|
734
|
+
# 2. The sweep ran — did any gardening counter report work?
|
|
735
|
+
saw_a_count = False
|
|
736
|
+
for pattern in _REPLAN_WORK_PATTERNS:
|
|
737
|
+
m = re.search(pattern, text)
|
|
738
|
+
if m:
|
|
739
|
+
saw_a_count = True
|
|
740
|
+
if int(m.group(1)) > 0:
|
|
741
|
+
return ReplanProductivity.PRODUCTIVE
|
|
742
|
+
|
|
743
|
+
if saw_a_count:
|
|
744
|
+
# Every recognised counter was 0 — a 0/0/0 sweep that ran but did
|
|
745
|
+
# nothing. /replan completed without refilling the backlog.
|
|
746
|
+
return ReplanProductivity.UNPRODUCTIVE
|
|
747
|
+
|
|
748
|
+
# 3. No recognised summary at all — conservative PRODUCTIVE (preserves the
|
|
749
|
+
# pre-FQ-240 drained-twice behavior; never extends the loop).
|
|
750
|
+
return ReplanProductivity.PRODUCTIVE
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
# ---------------------------------------------------------------------------
|
|
754
|
+
# /replan §1.5 no-op-skip decision — the PRODUCER-side twin of the consumer-side
|
|
755
|
+
# `classify_replan_productivity` above.
|
|
756
|
+
#
|
|
757
|
+
# `classify_replan_productivity` reads a *completed* /replan's terminal text
|
|
758
|
+
# (the consumer: the dispatch-loop driver deciding drained-twice). This function
|
|
759
|
+
# is the *producer* decision /replan's own §1.5 gate makes BEFORE it sweeps:
|
|
760
|
+
# given the two evidence counters its context bundler computes, should the sweep
|
|
761
|
+
# run at all, or skip cheap?
|
|
762
|
+
#
|
|
763
|
+
# replan_skip_decision(new_findings, substantive_ships) -> SKIP | PROCEED
|
|
764
|
+
#
|
|
765
|
+
# Before this lift the same boolean lived in THREE hand-synced copies that only
|
|
766
|
+
# agreed by accident: (a) the LLM following /replan SKILL.md §1.5 prose, (b) the
|
|
767
|
+
# kernel re-deriving "the sweep did nothing" downstream by string-matching
|
|
768
|
+
# REPLAN_NOOP_SKIP_MARKER, (c) `replan_context.py`'s BOOKKEEPING_PREFIXES list
|
|
769
|
+
# (comment: "keep in sync if /replan SKILL.md adds new classes"). Lifting the
|
|
770
|
+
# predicate here lets the producer print the SKIP marker FROM the kernel-owned
|
|
771
|
+
# constant and the consumer key on that SAME constant — they agree by
|
|
772
|
+
# construction, not by coincidence.
|
|
773
|
+
#
|
|
774
|
+
# ⚓ Typed verdict over binary gate ([[feedback_typed_verdict_over_binary_gate]]):
|
|
775
|
+
# the §1.5 gate is a fork on "is there new evidence"; emit a typed SKIP/PROCEED,
|
|
776
|
+
# not a bare bool, so the marker the producer prints and the verdict the consumer
|
|
777
|
+
# reads are the one shared vocabulary.
|
|
778
|
+
#
|
|
779
|
+
# PURE — no I/O. The two counters are reduced at /replan's I/O edge
|
|
780
|
+
# (`replan_context.py`, which already greps git + the findings window); this
|
|
781
|
+
# decision is replay-testable on frozen (new_findings, substantive_ships) inputs,
|
|
782
|
+
# exactly like `classify_replan_productivity` / `classify_packet`.
|
|
783
|
+
# ---------------------------------------------------------------------------
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
class ReplanSkip(str, enum.Enum):
|
|
787
|
+
"""Whether /replan's §1.5 gate should run the sweep or skip it cheap.
|
|
788
|
+
|
|
789
|
+
`str`-valued so it round-trips through the context bundler's JSON without a
|
|
790
|
+
lookup table (mirrors `ReplanProductivity` / `Verdict`).
|
|
791
|
+
|
|
792
|
+
SKIP No new evidence since the last run (0 new findings AND 0
|
|
793
|
+
substantive ships) — the sweep cannot produce a non-trivial
|
|
794
|
+
result, so /replan prints REPLAN_NOOP_SKIP_MARKER and exits
|
|
795
|
+
without running steps 2-7, writing replan-state.yaml, or making
|
|
796
|
+
an archive commit. The consumer-side `classify_replan_productivity`
|
|
797
|
+
reads that marker and calls the iteration UNPRODUCTIVE.
|
|
798
|
+
PROCEED At least one new finding OR one substantive ship since the last
|
|
799
|
+
run — there is real evidence to garden; run the full sweep.
|
|
800
|
+
"""
|
|
801
|
+
|
|
802
|
+
SKIP = "SKIP"
|
|
803
|
+
PROCEED = "PROCEED"
|
|
804
|
+
|
|
805
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
806
|
+
return self.value
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
def replan_skip_decision(new_findings: int, substantive_ships: int) -> ReplanSkip:
|
|
810
|
+
"""Classify /replan's §1.5 no-op-skip gate. PURE — no I/O.
|
|
811
|
+
|
|
812
|
+
`new_findings` is the count of findings entries that post-date the last
|
|
813
|
+
/replan run; `substantive_ships` is the count of non-bookkeeping commits in
|
|
814
|
+
`<last_run_commit>..HEAD`. Both are computed by `replan_context.py` at the
|
|
815
|
+
I/O edge and passed in here — this function makes no file, git, or clock
|
|
816
|
+
call, so the §1.5 decision is replay-testable away from a live $2-4 sweep.
|
|
817
|
+
|
|
818
|
+
The rule is the §1.5 gate verbatim: a sweep with no new evidence cannot
|
|
819
|
+
produce a non-trivial result. Skip iff BOTH counters are zero; any positive
|
|
820
|
+
signal in either → PROCEED (run the full sweep). Negative inputs are treated
|
|
821
|
+
as zero (defensive — a malformed count must never *suppress* a real sweep).
|
|
822
|
+
"""
|
|
823
|
+
nf = max(0, int(new_findings))
|
|
824
|
+
ss = max(0, int(substantive_ships))
|
|
825
|
+
if nf == 0 and ss == 0:
|
|
826
|
+
return ReplanSkip.SKIP
|
|
827
|
+
return ReplanSkip.PROCEED
|