dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/loop_decide.py
ADDED
|
@@ -0,0 +1,1746 @@
|
|
|
1
|
+
"""OC2 — the /dispatch-loop control-flow decision surface (the "one small thing").
|
|
2
|
+
|
|
3
|
+
`/dispatch-loop`'s SKILL.md is ~1400 lines and ~80 steps; the load-bearing
|
|
4
|
+
question — *under what exact conditions does this loop stop?* — was answerable
|
|
5
|
+
only by reading Step 3, Step 3.5, and Step 4 together (~210 lines of prose state
|
|
6
|
+
transitions). OC2 pulls that loop-level decision into one pure, testable
|
|
7
|
+
function so a reader can hold the control flow in their head and verify the stop
|
|
8
|
+
conditions without the whole file.
|
|
9
|
+
|
|
10
|
+
This module is the loop-level layer **above** `gate_classify.gate_policy`:
|
|
11
|
+
|
|
12
|
+
gate_classify.classify_packet → one packet → one typed Verdict
|
|
13
|
+
gate_classify.gate_policy → (Verdict, --gate mode) → one GateAction
|
|
14
|
+
dispatch_loop_decide.decide → (LoopState, IterationOutcome) → one LoopDecision
|
|
15
|
+
^ THIS module — composes gate_policy, adds
|
|
16
|
+
the counters/streaks/cap the loop carries
|
|
17
|
+
|
|
18
|
+
`decide()` consumes one iteration's typed outcome plus the carried working-
|
|
19
|
+
context counters and returns exactly one decision: continue (with the next mode)
|
|
20
|
+
or stop (with a named reason). It is **pure** — no subprocess, no file or git
|
|
21
|
+
I/O — for the same reason `gate_policy` is: the loop's stop conditions can be
|
|
22
|
+
replay-tested in isolation, away from everything that makes a live /dispatch
|
|
23
|
+
iteration cost $10-40.
|
|
24
|
+
|
|
25
|
+
The five stop conditions, in one place (the whole point of this module):
|
|
26
|
+
|
|
27
|
+
1. ITERATION_CAP — iteration count reached `max_iterations` (default 5).
|
|
28
|
+
2. DRAINED_TWICE — a DRAIN verdict on the /dispatch immediately after a
|
|
29
|
+
**productive** /replan that itself followed a DRAIN.
|
|
30
|
+
/replan tried to refill and could not; the
|
|
31
|
+
lane/portfolio is genuinely exhausted. (hard gate only —
|
|
32
|
+
soft/drive stop on the first DRAIN.) FQ-240: an
|
|
33
|
+
*unproductive* /replan (0 gardening / 0 refill, e.g. the
|
|
34
|
+
§1.5 no-op skip) does NOT arm this trigger — it never
|
|
35
|
+
actually attempted a refill, so a DRAIN after it is not
|
|
36
|
+
"drained twice".
|
|
37
|
+
3. CONSECUTIVE_UNCLEAR — `consecutive_unclear` reached `max_unclear` (default
|
|
38
|
+
3). The iteration subprocess is failing systematically,
|
|
39
|
+
not draining a backlog.
|
|
40
|
+
4. RATE_LIMITED — a usage/rate-limit rejection. Every retry would fail the
|
|
41
|
+
same way until the window resets; do not burn launches.
|
|
42
|
+
5. LAUNCH_FAILED — the iteration subprocess never produced a valid init
|
|
43
|
+
envelope. A repeating launch failure would burn all
|
|
44
|
+
remaining slots.
|
|
45
|
+
|
|
46
|
+
Plus the soft/drive gate-policy stops (a true DRAIN or a BLOCKED under
|
|
47
|
+
soft/drive), which `decide()` reads straight off `gate_policy`'s GateAction
|
|
48
|
+
rather than re-encoding.
|
|
49
|
+
|
|
50
|
+
⚓ Mechanical contract over prose ([[feedback_mechanical_contract_over_prose]]):
|
|
51
|
+
the loop's stop/continue/replan decision is now a mechanism (this function),
|
|
52
|
+
not ~80 steps of prose a downstream model is trusted to apply consistently.
|
|
53
|
+
|
|
54
|
+
⚓ Typed verdict over binary gate ([[feedback_typed_verdict_over_binary_gate]]):
|
|
55
|
+
`decide()` composes the existing typed `gate_policy` rather than re-classifying;
|
|
56
|
+
the loop-level counters (drained-twice, unclear streak) are the part this layer
|
|
57
|
+
adds on top.
|
|
58
|
+
|
|
59
|
+
The wait-marker budget (`wait_marker_budget`) is the OC2 billing addendum: every
|
|
60
|
+
`claude -p` keep-alive marker is its own assistant turn that replays the full
|
|
61
|
+
context out of cache (~$0.03-0.10 each; session 4b4ff97c burned 252 markers /
|
|
62
|
+
~$7.80 in one run). The post-hoc `keepalive_poll` flag in
|
|
63
|
+
`scripts/headless_telemetry.py` *names* the spend at >=5 markers; this function
|
|
64
|
+
is the *runtime* lever — the loop can refuse a marker that won't earn its
|
|
65
|
+
cache-read cost before it is emitted.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
from __future__ import annotations
|
|
69
|
+
|
|
70
|
+
import enum
|
|
71
|
+
from dataclasses import dataclass, replace
|
|
72
|
+
from typing import Optional
|
|
73
|
+
|
|
74
|
+
from dos import breaker
|
|
75
|
+
from dos.gate_classify import (
|
|
76
|
+
GATE_HARD,
|
|
77
|
+
GATE_MODES,
|
|
78
|
+
GateAction,
|
|
79
|
+
ReplanProductivity,
|
|
80
|
+
Verdict,
|
|
81
|
+
gate_policy,
|
|
82
|
+
)
|
|
83
|
+
from dos.liveness import Liveness
|
|
84
|
+
from dos.completion import (
|
|
85
|
+
Completion,
|
|
86
|
+
CompletionVerdict,
|
|
87
|
+
ConvergenceVerdict,
|
|
88
|
+
)
|
|
89
|
+
from dos.pickable import Pickability
|
|
90
|
+
from dos.cooldown import Cooldown
|
|
91
|
+
from dos.tokens import blocked_reason_for_key
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
# Iteration outcome — the typed result of ONE /dispatch-loop iteration.
|
|
96
|
+
#
|
|
97
|
+
# This is exactly what Step 3's gate-detection grep already prints:
|
|
98
|
+
# `SHIPPED verdict=LIVE`, `GATE verdict=<DRAIN|STALE-STAMP|BLOCKED>`, `INTERIM`,
|
|
99
|
+
# `UNCLEAR`, `RATE_LIMITED`. `OutcomeKind` names those, and `IterationOutcome`
|
|
100
|
+
# carries the GATE verdict alongside the kind so `decide()` can route a GATE
|
|
101
|
+
# through `gate_policy` without re-parsing prose.
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class OutcomeKind(str, enum.Enum):
|
|
106
|
+
"""The kind of one iteration's exit, as Step 3's grep classifies it.
|
|
107
|
+
|
|
108
|
+
`str`-valued so it round-trips through the grep's stdout token without a
|
|
109
|
+
lookup table (mirrors `gate_classify.Verdict`).
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
SHIPPED = "SHIPPED" # /dispatch shipped picks (child2 ran)
|
|
113
|
+
GATE = "GATE" # /dispatch reached Step 9 with child2 skipped
|
|
114
|
+
REPLAN_DONE = "REPLAN_DONE" # a /replan iteration completed (any outcome)
|
|
115
|
+
UNCLEAR = "UNCLEAR" # crashed/killed before Step 9, or INTERIM
|
|
116
|
+
RATE_LIMITED = "RATE_LIMITED" # usage/rate-limit rejection — not a fault
|
|
117
|
+
OVERLOADED = "OVERLOADED" # transient 529 server overload — retryable with backoff
|
|
118
|
+
LAUNCH_FAILED = "LAUNCH_FAILED" # no valid init envelope — never started
|
|
119
|
+
|
|
120
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
121
|
+
return self.value
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class DescendantProgress(str, enum.Enum):
|
|
125
|
+
"""FQ-509 — *is a parked parent's own descendant making FORWARD PROGRESS?*
|
|
126
|
+
|
|
127
|
+
The loop-level companion to `liveness`, but about a DIFFERENT subject: not
|
|
128
|
+
"is THIS run advancing" (that is `liveness`/`Liveness`) but "did the headless
|
|
129
|
+
`-p` orchestrator this iteration launched PARK while a descendant it spawned
|
|
130
|
+
is still committing the registered picks". When a parent `/dispatch` ends its
|
|
131
|
+
turn early (the self-park invariant), its descendants keep working in their
|
|
132
|
+
own (detached) trees and land their commits seconds-to-minutes LATER — but the
|
|
133
|
+
driver's ancestry check fires the instant the parent `-p` exits, sees 0
|
|
134
|
+
committed picks, and the iteration collapses to UNCLEAR. Today that UNCLEAR
|
|
135
|
+
charges the `consecutive_unclear` breaker, so a parent that merely parked over
|
|
136
|
+
a HEALTHY committing descendant is counted as a systematic FAULT and the loop
|
|
137
|
+
self-stops with CONSECUTIVE_UNCLEAR after `max_unclear` such iters — AND it
|
|
138
|
+
re-dispatches a fresh child each time instead of waiting for the live one.
|
|
139
|
+
|
|
140
|
+
The word is **PROGRESS, not "liveness", on purpose.** A child-stall probe
|
|
141
|
+
reports a child ALIVE whenever its log was touched inside the quiet window
|
|
142
|
+
(~10 min) — so a grandchild REAPED seconds ago at parent-exit still reads
|
|
143
|
+
"alive" for ten minutes (a corpse). "Liveness" invites that conflation; this
|
|
144
|
+
enum's contract is FORWARD DELTA only. The host maps the child-stall facts to
|
|
145
|
+
this enum and MUST collapse a log-touched-but-no-commit "alive" to
|
|
146
|
+
`NONE_OBSERVED`, mapping `ADVANCING` ONLY on a real forward delta — HEAD
|
|
147
|
+
advanced since the iteration's start SHA (`new_commit`) OR the ancestry-backed
|
|
148
|
+
CHURNING verdict (all registered picks already ancestors of HEAD). That
|
|
149
|
+
corpse-guard is what keeps the adopt-wait from waiting on a dead child.
|
|
150
|
+
|
|
151
|
+
Values:
|
|
152
|
+
ADVANCING — the descendant landed a forward delta (new commit since
|
|
153
|
+
start, or all picks already shipped/churning): a parked-but-
|
|
154
|
+
PRODUCTIVE child; the UNCLEAR is not a fault, so adopt-wait.
|
|
155
|
+
DEAD — the descendant is genuinely dead (no log growth AND no new
|
|
156
|
+
commit): today's behavior exactly — the honest UNCLEAR stop.
|
|
157
|
+
NONE_OBSERVED — no forward-progress signal (no own descendant, no ancestry
|
|
158
|
+
window, or a log-touched-but-not-committing "alive" corpse).
|
|
159
|
+
Treated identically to `None` (the un-migrated default).
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
ADVANCING = "advancing"
|
|
163
|
+
DEAD = "dead"
|
|
164
|
+
NONE_OBSERVED = "none-observed"
|
|
165
|
+
|
|
166
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
167
|
+
return self.value
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass(frozen=True)
|
|
171
|
+
class IterationOutcome:
|
|
172
|
+
"""One iteration's typed result, as Step 3 produces it.
|
|
173
|
+
|
|
174
|
+
`kind` is the Step-3 grep token. `verdict` is set ONLY for `kind=GATE` — it
|
|
175
|
+
is the typed `Verdict` from the structural `verdict=<X>` token in
|
|
176
|
+
/dispatch's Step 9 archive subject (QWB8). For every other kind it is None
|
|
177
|
+
(a SHIPPED iteration has no gate verdict; an UNCLEAR one never reached the
|
|
178
|
+
gate).
|
|
179
|
+
|
|
180
|
+
`replan_productivity` is the FQ-240 signal, set ONLY for `kind=REPLAN_DONE`
|
|
181
|
+
— the typed `ReplanProductivity` verdict from
|
|
182
|
+
`gate_classify.classify_replan_productivity` over the /replan iteration's
|
|
183
|
+
terminal result text. It is what makes the drained-twice rule honest: a DRAIN
|
|
184
|
+
is only "drained twice" if the /replan between the two DRAINs was
|
|
185
|
+
PRODUCTIVE (a genuine refill attempt). When None on a REPLAN_DONE outcome
|
|
186
|
+
(a caller that did not classify), `decide()` defaults it to PRODUCTIVE — the
|
|
187
|
+
conservative pre-FQ-240 behavior.
|
|
188
|
+
|
|
189
|
+
`packet_judge` is the PJ2 stage-3 verdict from
|
|
190
|
+
`scripts/packet_verdict.py classify` (`SHIPPED-CLEAN` / `SHIPPED-DIRTY` /
|
|
191
|
+
`STALLED` / `BLOCKED-OUTCOME`), set ONLY for `kind=SHIPPED`. `ship_count` is the
|
|
192
|
+
measured ship-count from the same classify evidence — required when
|
|
193
|
+
`packet_judge` is set. The pair drives the SHIPPED-DIRTY-0 breaker: a
|
|
194
|
+
SHIPPED iter with packet_judge=`SHIPPED-DIRTY` AND ship_count==0 is the
|
|
195
|
+
degraded-shipping signal the breaker counts; any other SHIPPED outcome
|
|
196
|
+
resets the streak.
|
|
197
|
+
|
|
198
|
+
`measurement_expected` is the FQ-420 distrust flag (set ONLY for
|
|
199
|
+
`kind=SHIPPED`). A SHIPPED token is a *self-report* — "/dispatch says it
|
|
200
|
+
shipped picks." The PJ2 packet-judge is the kernel's independent measurement
|
|
201
|
+
of that claim against the post-fanout commit set. When the driver INTENDED to
|
|
202
|
+
measure but could not resolve the fanout run-ts (`packet_judge` came back
|
|
203
|
+
None on a head==SHIPPED iteration), the measurement is MISSING, not absent-
|
|
204
|
+
by-design — and a missing measurement on a claimed ship is exactly the lie
|
|
205
|
+
the kernel exists to refuse. Setting `measurement_expected=True` asserts "a
|
|
206
|
+
measurement was owed here"; `decide()` then STALLs the loop with
|
|
207
|
+
`UNMEASURED_SHIPPED` rather than taking the conservative healthy path, so a
|
|
208
|
+
null-on-SHIPPED can never silently pass `continue`. The default `False`
|
|
209
|
+
preserves the un-migrated-caller behavior: a caller that never measures
|
|
210
|
+
(no PJ2 stage at all) still gets the pre-FQ-420 conservative healthy path
|
|
211
|
+
when it omits `packet_judge` — the kernel only distrusts a SHIPPED whose
|
|
212
|
+
owner SAID it would measure it. Requiring `packet_judge` to be present
|
|
213
|
+
whenever `measurement_expected=True` AND the iter is healthy is the
|
|
214
|
+
caller's contract; the kernel reads the *absence* of the judge under an
|
|
215
|
+
expectation as the STALL signal. Must be False unless `kind=SHIPPED`.
|
|
216
|
+
|
|
217
|
+
`blocked_cause` is the classified `dos.tokens.BlockedReason` key for a GATE
|
|
218
|
+
BLOCKED — the canonical cause the driver mined from the Outcome cell (via
|
|
219
|
+
`unstick_audit.classify_cause`), set ONLY for `kind=GATE` with
|
|
220
|
+
`verdict=BLOCKED`. It is what lets `decide()` distinguish a *re-dispatch-
|
|
221
|
+
curable* BLOCKED (a stale-stamp / refill drift a `/replan` clears — counts
|
|
222
|
+
toward the FQ-452 spin-breaker, routes to /replan as before) from a
|
|
223
|
+
*re-dispatch-INVARIANT* BLOCKED (an operator-decision, a false-ship oracle
|
|
224
|
+
conflation — a reason whose `BLOCKED_REASONS[cause].self_heals_via` is NOT
|
|
225
|
+
`/replan`). An invariant BLOCKED re-blocks identically on every re-dispatch,
|
|
226
|
+
so spinning it through /replan up to the FQ-452 cap (3 iters) is pure churn;
|
|
227
|
+
`decide()` honest-STOPs on the FIRST such BLOCKED instead (the post-run
|
|
228
|
+
analogue of the pre-launch `PICK_HELD_INVARIANT` rung). None (an un-migrated
|
|
229
|
+
caller, or a BLOCKED whose cause the driver could not classify) preserves
|
|
230
|
+
today's behavior exactly — the FQ-452 spin-breaker still bounds the churn at
|
|
231
|
+
3. Must be None unless `kind=GATE` with `verdict=BLOCKED`.
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
kind: OutcomeKind
|
|
235
|
+
verdict: Optional[Verdict] = None
|
|
236
|
+
replan_productivity: Optional[ReplanProductivity] = None
|
|
237
|
+
packet_judge: Optional[str] = None
|
|
238
|
+
ship_count: Optional[int] = None
|
|
239
|
+
measurement_expected: bool = False
|
|
240
|
+
blocked_cause: Optional[str] = None
|
|
241
|
+
|
|
242
|
+
def __post_init__(self) -> None:
|
|
243
|
+
if self.kind is OutcomeKind.GATE and self.verdict is None:
|
|
244
|
+
raise ValueError(
|
|
245
|
+
"a GATE outcome must carry a typed verdict "
|
|
246
|
+
"(the verdict=<X> token from /dispatch's Step 9 archive subject)"
|
|
247
|
+
)
|
|
248
|
+
if self.kind is not OutcomeKind.GATE and self.verdict is not None:
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f"a {self.kind} outcome must not carry a verdict "
|
|
251
|
+
f"(only a GATE iteration has a gate verdict)"
|
|
252
|
+
)
|
|
253
|
+
if (
|
|
254
|
+
self.kind is not OutcomeKind.REPLAN_DONE
|
|
255
|
+
and self.replan_productivity is not None
|
|
256
|
+
):
|
|
257
|
+
raise ValueError(
|
|
258
|
+
f"a {self.kind} outcome must not carry a replan_productivity "
|
|
259
|
+
f"verdict (only a REPLAN_DONE iteration is a /replan)"
|
|
260
|
+
)
|
|
261
|
+
if self.kind is not OutcomeKind.SHIPPED and (
|
|
262
|
+
self.packet_judge is not None or self.ship_count is not None
|
|
263
|
+
):
|
|
264
|
+
raise ValueError(
|
|
265
|
+
f"a {self.kind} outcome must not carry packet_judge/ship_count "
|
|
266
|
+
f"(only a SHIPPED iteration has a packet-outcome verdict)"
|
|
267
|
+
)
|
|
268
|
+
if (self.packet_judge is None) != (self.ship_count is None):
|
|
269
|
+
raise ValueError(
|
|
270
|
+
"packet_judge and ship_count must be set together "
|
|
271
|
+
"(both required when present on a SHIPPED outcome)"
|
|
272
|
+
)
|
|
273
|
+
if self.measurement_expected and self.kind is not OutcomeKind.SHIPPED:
|
|
274
|
+
raise ValueError(
|
|
275
|
+
f"a {self.kind} outcome must not set measurement_expected "
|
|
276
|
+
f"(only a SHIPPED iteration owes a packet-judge measurement)"
|
|
277
|
+
)
|
|
278
|
+
if self.blocked_cause is not None and not (
|
|
279
|
+
self.kind is OutcomeKind.GATE and self.verdict is Verdict.BLOCKED
|
|
280
|
+
):
|
|
281
|
+
raise ValueError(
|
|
282
|
+
f"a {self.kind} outcome (verdict={self.verdict}) must not carry "
|
|
283
|
+
f"blocked_cause (only a GATE BLOCKED iteration has a blocked cause)"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# ---------------------------------------------------------------------------
|
|
288
|
+
# Loop state — the carried working context.
|
|
289
|
+
#
|
|
290
|
+
# These are the per-loop counters Step 0/Step 3/Step 4 thread through working
|
|
291
|
+
# context. Holding them in one frozen dataclass — and transitioning them in one
|
|
292
|
+
# function — is what makes the loop's control flow inspectable: a reader checks
|
|
293
|
+
# the five stop conditions against these fields, not against scattered prose.
|
|
294
|
+
# ---------------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@dataclass(frozen=True)
|
|
298
|
+
class LoopState:
|
|
299
|
+
"""The /dispatch-loop working-context counters `decide()` transitions.
|
|
300
|
+
|
|
301
|
+
Fields (the loop-level carry-over — `SCOPE`/`LANE` are not here because they
|
|
302
|
+
are constant for the whole loop and never drive a stop decision):
|
|
303
|
+
|
|
304
|
+
iteration — 1-based count of the iteration that just ran.
|
|
305
|
+
last_replan_drained — True iff the immediately-prior iteration was a
|
|
306
|
+
**productive** /replan that followed a DRAIN. The
|
|
307
|
+
drained-twice trigger: a DRAIN on the /dispatch
|
|
308
|
+
*after* such a /replan means /replan tried to refill
|
|
309
|
+
and could not. FQ-240: an unproductive /replan (0
|
|
310
|
+
gardening / 0 refill) leaves this False — it was not
|
|
311
|
+
a refill attempt.
|
|
312
|
+
consecutive_unclear — back-to-back UNCLEAR streak; the circuit breaker.
|
|
313
|
+
consecutive_dirty_zero — back-to-back SHIPPED-DIRTY iters where the
|
|
314
|
+
measured ship-count was 0. The breaker that pairs
|
|
315
|
+
with the cap-10 raise: catches a /dispatch
|
|
316
|
+
regression that ships apparently-successful but
|
|
317
|
+
actually-empty iters indefinitely (input gate
|
|
318
|
+
says LIVE, packet-judge says SHIPPED-DIRTY, 0
|
|
319
|
+
commits land). Reset on any SHIPPED-CLEAN /
|
|
320
|
+
GATE / REPLAN_DONE outcome.
|
|
321
|
+
gate_mode — the --gate policy (hard|soft|drive), constant for
|
|
322
|
+
the loop; passed straight to `gate_policy`.
|
|
323
|
+
max_iterations — the hard cap (10; no override flag in the SKILL).
|
|
324
|
+
Raised from 5 in the 2026-05-22 cap raise — see
|
|
325
|
+
the SKILL's Contract section for the named
|
|
326
|
+
damage-bound rationale (the degraded-shipping
|
|
327
|
+
scenario the SHIPPED-DIRTY-0 breaker now kills).
|
|
328
|
+
max_unclear — the circuit-breaker threshold (3).
|
|
329
|
+
max_dirty_zero — the SHIPPED-DIRTY-0 breaker threshold (3).
|
|
330
|
+
Sized to detect a sustained degraded-shipping
|
|
331
|
+
regression while tolerating one-off DIRTY-0 iters
|
|
332
|
+
that may recover on the next /dispatch.
|
|
333
|
+
consecutive_stale_stamp — FQ-452: back-to-back GATE iterations whose
|
|
334
|
+
verdict was STALE-STAMP or BLOCKED and routed to
|
|
335
|
+
/replan WITHOUT the lane recovering. The
|
|
336
|
+
non-converging-spin breaker: a plan-meta `remaining:`
|
|
337
|
+
list naming already-shipped phases makes the picker
|
|
338
|
+
re-derive 0-live → GATE BLOCKED → /replan → (the
|
|
339
|
+
§1.5 skip-gate keys on new_findings/substantive_ships,
|
|
340
|
+
not stale-stamp drift, so /replan exits UNPRODUCTIVE
|
|
341
|
+
without reconciling the list) → BLOCKED again, forever.
|
|
342
|
+
The streak SURVIVES the intervening REPLAN_DONE on
|
|
343
|
+
purpose (the /replan is the *response* to the
|
|
344
|
+
stale-stamp; if it didn't fix it, the next /dispatch
|
|
345
|
+
stale-stamps again and the streak must continue) — it
|
|
346
|
+
resets only on a SHIPPED iteration or a DIFFERENT gate
|
|
347
|
+
verdict (LIVE/DRAIN/RACE). On the Kth consecutive
|
|
348
|
+
instance `decide()` STOPs with
|
|
349
|
+
STALE_STAMP_UNRECONCILED + surface so the loop refuses
|
|
350
|
+
to spin a (K+1)th /replan into the same unreconciled
|
|
351
|
+
list; the caller (driver) names the
|
|
352
|
+
`plan-meta-gardening:<series>` actuation the operator
|
|
353
|
+
/replan must run (the kernel is pure + series-blind).
|
|
354
|
+
max_stale_stamp — the FQ-452 spin-breaker threshold (3). One
|
|
355
|
+
stale-stamp gate routes to /replan normally (the
|
|
356
|
+
gardening sweep usually clears it); three in a row
|
|
357
|
+
without recovery means /replan is structurally NOT
|
|
358
|
+
reconciling the list and another iteration would
|
|
359
|
+
just re-spin.
|
|
360
|
+
liveness — the OPTIONAL in-flight `Liveness` verdict
|
|
361
|
+
(ADVANCING/SPINNING/STALLED) the caller gathered via
|
|
362
|
+
`dos liveness` for THIS run over the interval since it
|
|
363
|
+
started (docs/99 / docs/82 Phase-3a). It lives here,
|
|
364
|
+
not on `IterationOutcome`, because liveness is a
|
|
365
|
+
property of the run *across the interval* — carried
|
|
366
|
+
context, like `gate_mode` — not of one iteration's
|
|
367
|
+
exit token. `decide()` STOPs the loop with
|
|
368
|
+
`StopReason.SPINNING` when this is `SPINNING`: a
|
|
369
|
+
ground-truth anti-spin breaker that complements the
|
|
370
|
+
self-report breakers (`consecutive_dirty_zero` et al.)
|
|
371
|
+
by reading git/journal, not the caller's outcome
|
|
372
|
+
token. **Opt-in**: `None` (the default) means the
|
|
373
|
+
caller did not gather a verdict, and `decide()` is
|
|
374
|
+
then BYTE-IDENTICAL to the pre-3a behavior — the same
|
|
375
|
+
conservative-default discipline as
|
|
376
|
+
`IterationOutcome.measurement_expected=False`. ADVANCING
|
|
377
|
+
and STALLED never stop the loop here: ADVANCING is the
|
|
378
|
+
benign verdict, and STALLED ("dead/hung") is the
|
|
379
|
+
SUPERVISOR's reap input (`supervise.py`), not a live
|
|
380
|
+
loop's self-stop — a loop making decisions is by
|
|
381
|
+
construction alive, so STALLED reaching `decide()` is
|
|
382
|
+
degenerate and mapping it would duplicate the
|
|
383
|
+
supervisor's job and blur the alive-vs-dead line.
|
|
384
|
+
(`Liveness` is a SIBLING kernel import — `liveness` is
|
|
385
|
+
`loop_decide`'s sibling per CLAUDE.md; the litmus is
|
|
386
|
+
"no host, no I/O", not "no sibling import", and
|
|
387
|
+
`loop_decide` stays pure: it READS a verdict value,
|
|
388
|
+
never computes one.)
|
|
389
|
+
completion — the OPTIONAL in-flight `CompletionVerdict` (docs/117
|
|
390
|
+
§5.4 / Phase 3) the caller gathered for THIS run after
|
|
391
|
+
the iteration: it ran `completion.classify` over the
|
|
392
|
+
run's `LedgerState` + freshly-read `AncestryFacts` (the
|
|
393
|
+
same git read `resume`'s evidence-gather does) and
|
|
394
|
+
handed the result in. Like `liveness` it is in-flight
|
|
395
|
+
EVIDENCE, not carried counter state — it lives here
|
|
396
|
+
because `decide()` is pure and may not read git itself.
|
|
397
|
+
`decide()` STOPs with `StopReason.COMPLETE` when this is
|
|
398
|
+
`COMPLETE` (every declared unit verified on the
|
|
399
|
+
non-forgeable rung → the work is *finished*, the first
|
|
400
|
+
non-give-up terminal) and with `StopReason.THRASHING`
|
|
401
|
+
when it is `UNDERDECLARED` (done-but-under-declared; a
|
|
402
|
+
human must reconcile → surface). INCOMPLETE and
|
|
403
|
+
INDETERMINATE never stop here: INCOMPLETE means the loop
|
|
404
|
+
should *continue* re-dispatching the residual (the
|
|
405
|
+
caller owns that), and INDETERMINATE means "can't tell"
|
|
406
|
+
— we never *assert* done on an unsound fold, so it falls
|
|
407
|
+
through to the existing logic. **Opt-in**: `None` (the
|
|
408
|
+
default) means the caller gathered no verdict and
|
|
409
|
+
`decide()` is BYTE-IDENTICAL to the pre-Phase-3 loop —
|
|
410
|
+
the same conservative default as `liveness`.
|
|
411
|
+
pickability — the OPTIONAL pre-dispatch `Pickability` verdict
|
|
412
|
+
(docs/168 §5) the caller gathered for the lane it would
|
|
413
|
+
dispatch NEXT: it ran `pickable.classify` over the
|
|
414
|
+
host-gathered unit state and handed the result in. Like
|
|
415
|
+
`liveness`/`completion` it is in-flight EVIDENCE, not
|
|
416
|
+
carried counter state — `decide()` is pure and may not
|
|
417
|
+
read the plan class / soak index / live claims itself.
|
|
418
|
+
`decide()` STOPs with `StopReason.PICK_HELD_INVARIANT`
|
|
419
|
+
when this verdict `is_redispatch_invariant` (the lane is
|
|
420
|
+
held ONLY by a reason a re-dispatch cannot change —
|
|
421
|
+
DRAFT_CLASS / OPERATOR_GATED / SOAK_OPEN /
|
|
422
|
+
DEPENDENCY_UNMET): re-dispatching it would re-block
|
|
423
|
+
identically, so the loop honest-STOPs and surfaces the
|
|
424
|
+
typed hold for routing (DRAFT→/promote, OPERATOR→
|
|
425
|
+
escalate a decision, SOAK→wait) instead of spinning. This
|
|
426
|
+
converts the per-run human "honest STOP" override
|
|
427
|
+
(documented across a dozen drain-trap run READMEs — ASI
|
|
428
|
+
#475, RTN soak, FMP #493) into a kernel rule. An
|
|
429
|
+
OFFERABLE verdict, or a HELD verdict whose reason is
|
|
430
|
+
re-dispatch-CURABLE (IN_FLIGHT / SOFT_CLAIMED_ELSEWHERE /
|
|
431
|
+
STALE_CLAIM / COOLDOWN / SHIPPED / UNPARSEABLE), never
|
|
432
|
+
stops here — those CAN clear, so the loop keeps its
|
|
433
|
+
existing behavior. **Opt-in**: `None` (the default)
|
|
434
|
+
skips the rung entirely → BYTE-IDENTICAL to the
|
|
435
|
+
pre-docs/168 loop, the same conservative default as
|
|
436
|
+
`liveness` / `completion`.
|
|
437
|
+
cooldown — the OPTIONAL anti-churn `Cooldown` verdict (docs/207 §3)
|
|
438
|
+
the caller gathered for the unit it would dispatch NEXT,
|
|
439
|
+
AFTER it already skipped every fresher candidate: it ran
|
|
440
|
+
`cooldown.cooldown_verdict` over the unit's `OP_ATTEMPT`
|
|
441
|
+
history and handed the result in. `decide()` STOPs with
|
|
442
|
+
`StopReason.PICK_COOLDOWN` when this verdict is
|
|
443
|
+
`RECENTLY_ATTEMPTED` — the unit was attempted-and-didn't-
|
|
444
|
+
move inside the window AND (by the host's pick-selection
|
|
445
|
+
contract) nothing fresher is offerable, so re-dispatching
|
|
446
|
+
it would re-storm (the ~5%-shipping re-pick storm the bare
|
|
447
|
+
loop hit). This is the cross-run memory `liveness` (a
|
|
448
|
+
single-run verdict) cannot provide. A `CLEAR` verdict
|
|
449
|
+
never stops — the window elapsed or nothing held it. Like
|
|
450
|
+
`pickability` it is in-flight EVIDENCE, not carried state
|
|
451
|
+
— `decide()` is pure and may not read the journal. The
|
|
452
|
+
host's contract: only hand a `RECENTLY_ATTEMPTED` here
|
|
453
|
+
once it has ALREADY skipped the offerable-and-not-cooled
|
|
454
|
+
units (the skip-to-next is pick-selection's job; the STOP
|
|
455
|
+
is the all-cooled terminal). **Opt-in**: `None` skips it
|
|
456
|
+
→ byte-identical to the pre-docs/207 loop.
|
|
457
|
+
convergence — the OPTIONAL in-flight `ConvergenceVerdict` (docs/117
|
|
458
|
+
§5.2 / Phase 3) over the residual-size history: the
|
|
459
|
+
DYNAMIC companion to `completion`. `COMPLETE` is a
|
|
460
|
+
static fixpoint (residual empty *now*); this catches the
|
|
461
|
+
*won't-ever-get-there* loop (the residual churns but
|
|
462
|
+
never empties — the reviewer-finds-new-findings case).
|
|
463
|
+
`decide()` STOPs with `StopReason.THRASHING` (surface)
|
|
464
|
+
when this verdict `should_surface` (THRASHING or
|
|
465
|
+
STARVED). CONVERGING / INSUFFICIENT never stop — the
|
|
466
|
+
loop keeps going (no fixpoint reached *yet* is not a
|
|
467
|
+
stop). Checked only when `completion` did not already
|
|
468
|
+
stop the loop COMPLETE — a converged run is done, not
|
|
469
|
+
thrashing. **Opt-in**: `None` skips the rung entirely.
|
|
470
|
+
descendant_progress — FQ-509: the OPTIONAL `DescendantProgress` verdict for
|
|
471
|
+
THIS iteration's own parked descendant (the headless
|
|
472
|
+
`-p` child the iteration launched that PARKED while a
|
|
473
|
+
grandchild it spawned is still committing). In-flight
|
|
474
|
+
EVIDENCE the caller re-gathers each iteration from the
|
|
475
|
+
child-stall probe (NOT carried state — cleared up-front
|
|
476
|
+
like `liveness`). `decide()` reads it ONLY inside the
|
|
477
|
+
UNCLEAR rung: when it is `ADVANCING` (the descendant
|
|
478
|
+
landed a forward delta — new commit since start, or all
|
|
479
|
+
picks already ancestors), the UNCLEAR is a parked-but-
|
|
480
|
+
PRODUCTIVE child, NOT a /dispatch fault — so `decide()`
|
|
481
|
+
CONTINUEs (re-dispatch / adopt-wait for the live child
|
|
482
|
+
to land its picks) WITHOUT charging the
|
|
483
|
+
`consecutive_unclear` breaker, bounded by
|
|
484
|
+
`consecutive_adopt_wait`. `DEAD` / `NONE_OBSERVED` /
|
|
485
|
+
`None` all take today's exact UNCLEAR path — the host's
|
|
486
|
+
corpse-guard (a log-touched-but-not-committing "alive"
|
|
487
|
+
must map to `NONE_OBSERVED`, never `ADVANCING`) is what
|
|
488
|
+
keeps a reaped descendant from ever adopt-waiting.
|
|
489
|
+
**Opt-in**: `None` (the default) skips the pre-check →
|
|
490
|
+
BYTE-IDENTICAL to the pre-FQ-509 loop, the same
|
|
491
|
+
conservative default as `liveness`/`pickability`.
|
|
492
|
+
consecutive_adopt_wait — the carried bound for the `descendant_progress`
|
|
493
|
+
adopt-wait. Back-to-back UNCLEAR iters where the
|
|
494
|
+
descendant read `ADVANCING` but STILL had not landed
|
|
495
|
+
the registered picks. Bumped on each ADVANCING adopt-
|
|
496
|
+
wait continue; reset to 0 on any non-ADVANCING UNCLEAR
|
|
497
|
+
iter (so a flapping child cannot accrue it) AND
|
|
498
|
+
implicitly on any non-UNCLEAR outcome. On the Kth
|
|
499
|
+
(`max_adopt_wait`) the adopt-wait rung FALLS THROUGH to
|
|
500
|
+
today's UNCLEAR breaker path (which itself caps at
|
|
501
|
+
`max_unclear`) — a clock-free bound that degrades to
|
|
502
|
+
current behavior rather than a new terminal. UNLIKE
|
|
503
|
+
`consecutive_unclear`, this IS reset on the non-
|
|
504
|
+
advancing branch, but `consecutive_unclear` is NOT
|
|
505
|
+
reset there — so a flapping ALIVE/quiet child still
|
|
506
|
+
reaches `max_unclear` and stops. CARRIED state (it must
|
|
507
|
+
round-trip through the driver's next_state).
|
|
508
|
+
max_adopt_wait — the adopt-wait bound (default 2). Two consecutive
|
|
509
|
+
ADVANCING-but-uncommitted iters is enough evidence the
|
|
510
|
+
descendant is not actually about to land its picks
|
|
511
|
+
(or its "advance" is unrelated drift); fall through to
|
|
512
|
+
the UNCLEAR breaker rather than wait a 3rd.
|
|
513
|
+
consecutive_unproductive_replan_drains — FQ-509-sibling (QWD benign-drain).
|
|
514
|
+
Back-to-back UNPRODUCTIVE /replans, each the response
|
|
515
|
+
to a DRAIN, on the same lane. The drained-twice rung
|
|
516
|
+
(`last_replan_drained`) only arms off a PRODUCTIVE
|
|
517
|
+
/replan (FQ-240) — but a BENIGN genuinely-drained lane
|
|
518
|
+
(every phase already shipped/in-flight, nothing left to
|
|
519
|
+
refill) returns UNPRODUCTIVE from every /replan, so
|
|
520
|
+
drained-twice never arms and the loop spins
|
|
521
|
+
DRAIN→/replan→DRAIN→/replan to the iteration cap. This
|
|
522
|
+
counter catches that: incremented in 5b when a
|
|
523
|
+
REPLAN_DONE is UNPRODUCTIVE *and* the immediately-prior
|
|
524
|
+
gate was a DRAIN (`last_gate_was_drain`); reset to 0 on
|
|
525
|
+
any SHIPPED, any PRODUCTIVE /replan, or any non-DRAIN
|
|
526
|
+
gate verdict (the lane moved off the benign-drain
|
|
527
|
+
pattern). On the Kth, the DRAIN that would route the
|
|
528
|
+
(K+1)th /replan instead STOPs with
|
|
529
|
+
`StopReason.BENIGN_DRAIN` — the kernel reaches the
|
|
530
|
+
honest-STOP from typed verdicts the operator otherwise
|
|
531
|
+
has to eyeball (the QWD run-README override). Default 0
|
|
532
|
+
keeps the loop BYTE-IDENTICAL for any lane that ever
|
|
533
|
+
ships or has a productive /replan.
|
|
534
|
+
max_unproductive_replan_drains — the benign-drain breaker threshold (2). Two
|
|
535
|
+
UNPRODUCTIVE /replans around DRAINs without recovery
|
|
536
|
+
means /replan is structurally unable to refill the lane
|
|
537
|
+
(it is benignly drained) and a third would just re-spin.
|
|
538
|
+
Sized to the QWD memory's measured "2 consecutive
|
|
539
|
+
UNPRODUCTIVE replans around DRAINs → honest-STOP".
|
|
540
|
+
consecutive_unproductive_replan — #506 / docs/258: back-to-back UNPRODUCTIVE
|
|
541
|
+
/replans REGARDLESS of the prior gate. The BROADER
|
|
542
|
+
sibling of `consecutive_unproductive_replan_drains`:
|
|
543
|
+
that one counts only unproductive replans BRACKETED by a
|
|
544
|
+
DRAIN (a benignly-drained lane); this one counts EVERY
|
|
545
|
+
unproductive replan, because the measured pathology (#506:
|
|
546
|
+
/replan = 45% of loop wall-clock, 43% of replan iters
|
|
547
|
+
refill nothing) includes a 53-turn replan that produced 0
|
|
548
|
+
refill even though commits had landed — so the gate was
|
|
549
|
+
NOT a DRAIN and the benign-drain bracket deliberately
|
|
550
|
+
skips it (pinned by
|
|
551
|
+
`test_benign_drain_unproductive_replan_without_prior_drain_no_count`).
|
|
552
|
+
Bumped in 5b on an UNPRODUCTIVE REPLAN_DONE (via the
|
|
553
|
+
`dos.breaker` primitive — the FIRST loop_decide counter
|
|
554
|
+
so expressed); reset to 0 on any PRODUCTIVE replan, any
|
|
555
|
+
SHIPPED, or a non-stale gate (the lane moved off the
|
|
556
|
+
stall). On the Kth, `decide()` STOPs with
|
|
557
|
+
`StopReason.REPLAN_STALLED` + surface. **Opt-in**: only an
|
|
558
|
+
UNPRODUCTIVE `REPLAN_DONE` (`outcome.replan_productivity is
|
|
559
|
+
UNPRODUCTIVE`) ever bumps it, and the FQ-240 default treats
|
|
560
|
+
an unclassified replan as PRODUCTIVE — so a caller that
|
|
561
|
+
never classifies replan productivity never feeds this and
|
|
562
|
+
is BYTE-IDENTICAL to the pre-#506 loop, the same
|
|
563
|
+
conservative default as the benign-drain rung.
|
|
564
|
+
max_unproductive_replan — the REPLAN_STALLED threshold (2). #506: "trip on the
|
|
565
|
+
2nd unproductive `REPLAN_DONE` — a sweep that refilled
|
|
566
|
+
nothing twice won't on a 3rd identical pass." Two
|
|
567
|
+
expensive (16-22min / ~$5) 0-refill replans in a row is
|
|
568
|
+
enough evidence /replan is structurally unproductive on
|
|
569
|
+
this lane right now.
|
|
570
|
+
last_gate_was_drain — internal one-iteration carry: True iff the gate of the
|
|
571
|
+
immediately-prior iteration was a DRAIN that routed to
|
|
572
|
+
/replan. Read+reset in 5b to know a following
|
|
573
|
+
REPLAN_DONE is the response to a DRAIN (the bracket that
|
|
574
|
+
makes an UNPRODUCTIVE /replan count toward the
|
|
575
|
+
benign-drain breaker). Set in 5c on a DRAIN that routes
|
|
576
|
+
to /replan; cleared on any non-DRAIN outcome. Not a
|
|
577
|
+
stop signal on its own.
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
iteration: int = 1
|
|
581
|
+
last_replan_drained: bool = False
|
|
582
|
+
consecutive_unclear: int = 0
|
|
583
|
+
consecutive_dirty_zero: int = 0
|
|
584
|
+
consecutive_overloaded: int = 0
|
|
585
|
+
consecutive_stale_stamp: int = 0
|
|
586
|
+
gate_mode: str = GATE_HARD
|
|
587
|
+
max_iterations: int = 10
|
|
588
|
+
max_unclear: int = 3
|
|
589
|
+
max_dirty_zero: int = 3
|
|
590
|
+
max_overloaded: int = 3
|
|
591
|
+
max_stale_stamp: int = 3
|
|
592
|
+
consecutive_unproductive_replan_drains: int = 0
|
|
593
|
+
max_unproductive_replan_drains: int = 2
|
|
594
|
+
consecutive_unproductive_replan: int = 0
|
|
595
|
+
max_unproductive_replan: int = 2
|
|
596
|
+
last_gate_was_drain: bool = False
|
|
597
|
+
liveness: Optional[Liveness] = None
|
|
598
|
+
completion: Optional[CompletionVerdict] = None
|
|
599
|
+
convergence: Optional[ConvergenceVerdict] = None
|
|
600
|
+
pickability: Optional[Pickability] = None
|
|
601
|
+
cooldown: Optional[Cooldown] = None
|
|
602
|
+
descendant_progress: Optional[DescendantProgress] = None
|
|
603
|
+
consecutive_adopt_wait: int = 0
|
|
604
|
+
max_adopt_wait: int = 2
|
|
605
|
+
|
|
606
|
+
def __post_init__(self) -> None:
|
|
607
|
+
if self.gate_mode not in GATE_MODES:
|
|
608
|
+
raise ValueError(
|
|
609
|
+
f"unknown gate_mode {self.gate_mode!r} — expected one of {GATE_MODES}"
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
class StopReason(str, enum.Enum):
|
|
614
|
+
"""Why the loop stopped — the named stop conditions, in one enum.
|
|
615
|
+
|
|
616
|
+
These ARE the answer to "under what exact conditions does this loop stop?"
|
|
617
|
+
— every terminal path produces one of these.
|
|
618
|
+
"""
|
|
619
|
+
|
|
620
|
+
ITERATION_CAP = "iteration-cap" # reached max_iterations
|
|
621
|
+
DRAINED_TWICE = "drained-twice" # DRAIN after a PRODUCTIVE /replan that still couldn't refill
|
|
622
|
+
DRAIN = "drain" # soft/drive: a single true DRAIN
|
|
623
|
+
BLOCKED = "blocked" # soft/drive: picks blocked (was WEDGE)
|
|
624
|
+
CONSECUTIVE_UNCLEAR = "consecutive-unclear" # circuit breaker
|
|
625
|
+
CONSECUTIVE_DIRTY_ZERO = "consecutive-dirty-zero" # K back-to-back SHIPPED-DIRTY+0 iters
|
|
626
|
+
CONSECUTIVE_OVERLOADED = "consecutive-overloaded" # K back-to-back 529s — outage, not transient
|
|
627
|
+
RATE_LIMITED = "rate-limited" # usage/rate-limit window exhausted
|
|
628
|
+
LAUNCH_FAILED = "launch-failed" # subprocess never started
|
|
629
|
+
UNMEASURED_SHIPPED = "unmeasured-shipped" # FQ-420: SHIPPED claimed, PJ2 measurement owed but missing
|
|
630
|
+
SPINNING = "spinning" # docs/99: liveness() says SPINNING — alive, 0 forward delta (ground-truth anti-spin)
|
|
631
|
+
STALE_STAMP_UNRECONCILED = "stale-stamp-unreconciled" # FQ-452: K consecutive STALE-STAMP/BLOCKED gates /replan never reconciled — refuse to spin another
|
|
632
|
+
BLOCKED_REDISPATCH_INVARIANT = "blocked-redispatch-invariant" # FQ-510: a GATE BLOCKED whose classified cause is re-dispatch-INVARIANT (operator_decision / a false-ship oracle conflation — any reason whose BLOCKED_REASONS[cause].self_heals_via is NOT /replan). A /replan provably cannot clear it, so it re-blocks identically every iteration; honest-STOP on the FIRST such BLOCKED (the post-run analogue of PICK_HELD_INVARIANT) rather than spinning /replan to the FQ-452 cap (~$15-25/1.5h of churn). The operator-decision sub-case is also auto-filed once by the driver's emit-decision-needed actuation.
|
|
633
|
+
COMPLETE = "complete" # docs/117: completion.classify() says COMPLETE — every declared unit verified; the FIRST stop reason that means "finished," not "gave up" (the anti-ITERATION_CAP)
|
|
634
|
+
THRASHING = "thrashing" # docs/117: completion.convergence() says THRASHING/STARVED — the residual won't reach a fixpoint; surface, don't burn the cap silently
|
|
635
|
+
PICK_HELD_INVARIANT = "pick-held-invariant" # docs/168 §5: the next lane is HELD only by a re-dispatch-invariant reason (DRAFT_CLASS/OPERATOR_GATED/SOAK_OPEN/DEPENDENCY_UNMET) — re-dispatch re-blocks identically; honest-STOP + surface the typed hold for routing
|
|
636
|
+
PICK_COOLDOWN = "pick-cooldown" # docs/207 §3: the next unit was attempted-and-didn't-move inside the cooldown window AND nothing fresher is offerable — re-dispatching it would re-storm; honest-STOP + surface the cooled unit (the anti-churn breaker; the ~5%-shipping re-pick storm)
|
|
637
|
+
BENIGN_DRAIN = "benign-drain" # FQ-509-sibling (QWD): K consecutive UNPRODUCTIVE /replans, each bracketed by a DRAIN, on the same lane — the lane is genuinely drained but BENIGN (every phase already shipped/in-flight, nothing to refill). The drained-twice rung never arms (an UNPRODUCTIVE /replan is not a refill attempt, FQ-240), so without this rung the loop spins DRAIN→/replan→DRAIN→/replan to the iteration cap (~$11+/55min for 0 refill). Stop instead + surface (re-scope or wait for the in-flight phases to settle). The benign-drain analogue of DRAINED_TWICE: that one is "a PRODUCTIVE /replan still couldn't refill"; this is "the /replans are all UNPRODUCTIVE because there is nothing left to refill."
|
|
638
|
+
REPLAN_STALLED = "replan-stalled" # #506 / docs/258: K consecutive UNPRODUCTIVE /replans regardless of WHY (the broader sibling of BENIGN_DRAIN). MEASURED: /replan is 45% of all loop wall-clock and 43% of replan iters STALL (0 refill) — a 53-turn replan that refilled nothing even though commits landed (so the gate was NOT a DRAIN, which is exactly the case BENIGN_DRAIN's `last_gate_was_drain` bracket deliberately ignores). BENIGN_DRAIN = "lane empty"; REPLAN_STALLED = "/replan keeps doing costly nothing." Trips on the Kth unproductive REPLAN_DONE ITSELF (default K=2). The FIRST loop_decide rung expressed through the `dos.breaker` primitive rather than a hand-written inline counter.
|
|
639
|
+
|
|
640
|
+
# PERMANENT legacy alias — same object as BLOCKED, so any un-migrated
|
|
641
|
+
# `is StopReason.WEDGE` keeps working (mirrors GateVerdict.WEDGE).
|
|
642
|
+
WEDGE = "blocked"
|
|
643
|
+
|
|
644
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
645
|
+
return self.value
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
@dataclass(frozen=True)
|
|
649
|
+
class LoopDecision:
|
|
650
|
+
"""The single decision `decide()` returns for one iteration.
|
|
651
|
+
|
|
652
|
+
`action` — `"continue"` or `"stop"`. The loop branches on this and nothing
|
|
653
|
+
else; everything below is detail for the continue/stop path it picks.
|
|
654
|
+
|
|
655
|
+
Continue fields (action == "continue"):
|
|
656
|
+
next_mode — `"dispatch"` | `"replan"`: the next iteration's mode.
|
|
657
|
+
reconcile — True iff the loop must run an inline stamp-reconcile pass
|
|
658
|
+
before the next iteration (a soft/drive STALE-STAMP). Read
|
|
659
|
+
straight off `gate_policy`'s GateAction.
|
|
660
|
+
|
|
661
|
+
Stop fields (action == "stop"):
|
|
662
|
+
stop_reason — the named StopReason.
|
|
663
|
+
surface — True iff the stop needs operator attention (a BLOCKED, a
|
|
664
|
+
soft/drive DRAIN). Read off `gate_policy` for gate stops.
|
|
665
|
+
|
|
666
|
+
Always set:
|
|
667
|
+
next_state — the transitioned `LoopState` to carry into the next
|
|
668
|
+
iteration (only meaningful when action == "continue", but
|
|
669
|
+
always returned so the caller never re-derives counters).
|
|
670
|
+
reason — a one-line operator-facing summary for the tally row.
|
|
671
|
+
"""
|
|
672
|
+
|
|
673
|
+
action: str # "continue" | "retry-same-iter" | "stop"
|
|
674
|
+
next_state: LoopState
|
|
675
|
+
reason: str
|
|
676
|
+
next_mode: str = ""
|
|
677
|
+
reconcile: bool = False
|
|
678
|
+
stop_reason: Optional[StopReason] = None
|
|
679
|
+
surface: bool = False
|
|
680
|
+
# Set on action == "retry-same-iter" (transient 529 OVERLOADED): seconds the
|
|
681
|
+
# caller should sleep before relaunching the SAME iteration number. The 60s
|
|
682
|
+
# → 270s → 1200s ladder is well inside the prompt-cache TTL on attempt 1 and
|
|
683
|
+
# past it on attempts 2-3; 3 OVERLOADED in a row escalates to STOP via
|
|
684
|
+
# `consecutive_overloaded` (an outage, not transient).
|
|
685
|
+
backoff_seconds: int = 0
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
_CONTINUE = "continue"
|
|
689
|
+
_STOP = "stop"
|
|
690
|
+
_RETRY_SAME_ITER = "retry-same-iter"
|
|
691
|
+
|
|
692
|
+
# Backoff schedule for OVERLOADED retries — 60s, 270s, 1200s. First step stays
|
|
693
|
+
# inside the prompt-cache TTL (cheap); the second and third pay the cache miss
|
|
694
|
+
# but are still cheaper than burning a real /dispatch iter under server overload.
|
|
695
|
+
# After the third retry hits OVERLOADED again, `consecutive_overloaded` reaches
|
|
696
|
+
# `max_overloaded` and the loop STOPs with CONSECUTIVE_OVERLOADED — that's not
|
|
697
|
+
# a transient capacity blip, it is a sustained outage and an operator should look.
|
|
698
|
+
_OVERLOADED_BACKOFF = (60, 270, 1200)
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
# ---------------------------------------------------------------------------
|
|
702
|
+
# The breaker bridge (docs/258 — the loop_decide → breaker migration).
|
|
703
|
+
#
|
|
704
|
+
# Every consecutive-streak rung below — UNCLEAR / OVERLOADED / DIRTY-ZERO /
|
|
705
|
+
# STALE-STAMP / benign-drain / the new REPLAN_STALLED — is the SAME mechanism:
|
|
706
|
+
# bump a count, compare it to a max, trip if reached, reset on a clean outcome.
|
|
707
|
+
# `breaker.py` IS that mechanism, lifted into one pure leaf (docs/223). These two
|
|
708
|
+
# helpers are the only bridge `decide()` needs: they turn one of `LoopState`'s
|
|
709
|
+
# int counter fields + its max field into a `breaker.BreakerCounts` /
|
|
710
|
+
# `BreakerPolicy`, run the primitive's fold, and hand back the new count + the
|
|
711
|
+
# trip bit. The int fields STAY the public surface (callers construct/read them);
|
|
712
|
+
# the bump/compare ARITHMETIC is what moves into `breaker`. Mechanism lifted,
|
|
713
|
+
# policy (which field, which threshold, which outcome resets it) stays at the call
|
|
714
|
+
# site — exactly the split `breaker.py`'s docstring argues for.
|
|
715
|
+
#
|
|
716
|
+
# Each loop_decide rung is consecutive-only (no cumulative/flapping rung), so the
|
|
717
|
+
# policy is always `max_consecutive=<max>, max_total=0`.
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _breaker_fail(consecutive: int, max_consecutive: int) -> tuple[int, bool]:
|
|
721
|
+
"""Record one failure of a consecutive-only streak. Returns (new_count, is_open).
|
|
722
|
+
|
|
723
|
+
The `breaker.record_failure` fold, specialized to a loop_decide counter:
|
|
724
|
+
`BreakerCounts(consecutive=…)` + `BreakerPolicy(max_consecutive=…, max_total=0)`.
|
|
725
|
+
Byte-identical to the inline `streak = consecutive + 1; is_open = streak >= max`
|
|
726
|
+
it replaces, BECAUSE `record_failure` bumps then `_classify` trips on `>=`.
|
|
727
|
+
|
|
728
|
+
The one boundary the primitive can't take: `max_consecutive == 0`. Inline,
|
|
729
|
+
`max=0` means "trip on the first" (`0+1 >= 0`); but `BreakerPolicy` REFUSES a
|
|
730
|
+
both-zero policy (a breaker that can never trip is a config error). To preserve
|
|
731
|
+
the degenerate exactly, `max == 0` is reproduced here (`new >= 0` is always
|
|
732
|
+
True → trips immediately) rather than routed through the primitive. Every real
|
|
733
|
+
threshold is ≥ 2, so the breaker path is the live one; this guard changes no
|
|
734
|
+
behavior, it only keeps the boundary byte-identical.
|
|
735
|
+
"""
|
|
736
|
+
if max_consecutive <= 0:
|
|
737
|
+
new = consecutive + 1
|
|
738
|
+
return new, new >= max_consecutive
|
|
739
|
+
t = breaker.record_failure(
|
|
740
|
+
breaker.BreakerCounts(consecutive=consecutive),
|
|
741
|
+
breaker.BreakerPolicy(max_consecutive=max_consecutive, max_total=0),
|
|
742
|
+
)
|
|
743
|
+
return t.counts.consecutive, t.verdict.is_open
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
def _replan_stall_policy(state: LoopState) -> breaker.BreakerPolicy:
|
|
747
|
+
"""The `BreakerPolicy` for the #506 REPLAN_STALLED rung (docs/258).
|
|
748
|
+
|
|
749
|
+
A consecutive-only policy keyed on `max_unproductive_replan`. Clamped to a
|
|
750
|
+
minimum of 1 so `breaker.record_success` (which only reads the healed count,
|
|
751
|
+
always 0, and never the verdict on the success path) can be called even when a
|
|
752
|
+
caller passes the degenerate `max_unproductive_replan == 0` — the FAILURE path
|
|
753
|
+
routes through `_breaker_fail`, which preserves the trip-on-first degenerate
|
|
754
|
+
itself, so this clamp affects only the success-side classify (whose count is 0
|
|
755
|
+
regardless of threshold).
|
|
756
|
+
"""
|
|
757
|
+
return breaker.BreakerPolicy(
|
|
758
|
+
max_consecutive=max(state.max_unproductive_replan, 1), max_total=0
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def decide(state: LoopState, outcome: IterationOutcome) -> LoopDecision:
|
|
763
|
+
"""Decide continue/stop for one /dispatch-loop iteration. PURE — no I/O.
|
|
764
|
+
|
|
765
|
+
`state` is the working-context carry-over (the iteration that just produced
|
|
766
|
+
`outcome`). `outcome` is that iteration's typed result (Step 3's grep token,
|
|
767
|
+
plus the GATE verdict when applicable).
|
|
768
|
+
|
|
769
|
+
Returns one `LoopDecision`. The decision order is the loop's actual control
|
|
770
|
+
flow, top to bottom — read this function to know exactly when the loop
|
|
771
|
+
stops:
|
|
772
|
+
|
|
773
|
+
1. LAUNCH_FAILED → stop (a repeating launch failure burns all slots).
|
|
774
|
+
2. RATE_LIMITED / OVERLOADED → stop / retry-with-backoff; NOT a fault, so
|
|
775
|
+
neither counts toward the UNCLEAR breaker.
|
|
776
|
+
3. COMPLETE / THRASHING → stop (docs/117 Phase 3): if `state.completion`
|
|
777
|
+
is COMPLETE the work is verifiably DONE — stop, no
|
|
778
|
+
surface (the anti-`ITERATION_CAP`). UNDERDECLARED, or a
|
|
779
|
+
`state.convergence` that `should_surface`
|
|
780
|
+
(THRASHING/STARVED), stops AND surfaces (no fixpoint /
|
|
781
|
+
scope in doubt). Checked AFTER the not-a-fault stops and
|
|
782
|
+
BEFORE SPINNING (a provably-finished run beats a
|
|
783
|
+
zero-delta SPINNING read — the resumed-already-done
|
|
784
|
+
case). Opt-in: `None` skips these rungs → byte-identical.
|
|
785
|
+
4. SPINNING → stop (docs/99): if `state.liveness` is `SPINNING`, the
|
|
786
|
+
run is alive but landing zero forward delta — a
|
|
787
|
+
ground-truth anti-spin breaker. Checked AFTER the
|
|
788
|
+
upstream/transient breakers (an outage-induced idle is
|
|
789
|
+
not a spin) and BEFORE the outcome block (ground truth
|
|
790
|
+
overrides the SHIPPED self-report). Opt-in: `None`
|
|
791
|
+
liveness skips this rung entirely → byte-identical.
|
|
792
|
+
4b. PICK_HELD_INVARIANT → stop (docs/168 §5): if `state.pickability` is HELD
|
|
793
|
+
by a re-dispatch-invariant reason (DRAFT_CLASS /
|
|
794
|
+
OPERATOR_GATED / SOAK_OPEN / DEPENDENCY_UNMET), the next
|
|
795
|
+
lane would re-block identically — honest-STOP + surface the
|
|
796
|
+
typed hold for routing rather than spin. Checked AFTER the
|
|
797
|
+
not-a-fault/COMPLETE/SPINNING stops and BEFORE the outcome
|
|
798
|
+
block (the gate beats the self-report). Opt-in: `None`
|
|
799
|
+
skips it → byte-identical.
|
|
800
|
+
4c. PICK_COOLDOWN → stop (docs/207 §3): if `state.cooldown` is
|
|
801
|
+
RECENTLY_ATTEMPTED (the next unit was attempted-and-didn't-
|
|
802
|
+
move inside the window AND the host already skipped every
|
|
803
|
+
fresher candidate), re-dispatching it would re-storm —
|
|
804
|
+
honest-STOP + surface the cooled unit. The anti-churn
|
|
805
|
+
breaker; checked AFTER PICK_HELD_INVARIANT (an invariant
|
|
806
|
+
hold is more terminal than a time-bounded cooldown). Opt-in:
|
|
807
|
+
`None` skips it → byte-identical.
|
|
808
|
+
5. UNCLEAR → increment the streak; stop if it hit max_unclear,
|
|
809
|
+
else retry `dispatch`.
|
|
810
|
+
6. SHIPPED / REPLAN_DONE / GATE → route via the next-mode + drained-twice
|
|
811
|
+
+ gate-policy logic, then apply the iteration cap. Within
|
|
812
|
+
the GATE sub-block, a BLOCKED whose `outcome.blocked_cause`
|
|
813
|
+
is re-dispatch-INVARIANT (FQ-510: a cause whose
|
|
814
|
+
`BLOCKED_REASONS[cause].self_heals_via` is NOT `/replan` —
|
|
815
|
+
operator_decision, a false-ship oracle conflation, …) STOPs
|
|
816
|
+
on the FIRST occurrence (`BLOCKED_REDISPATCH_INVARIANT`),
|
|
817
|
+
checked BEFORE the FQ-452 stale-stamp spin-counter so an
|
|
818
|
+
invariant cause never spins /replan to the cap. The
|
|
819
|
+
post-run analogue of rung 4b. A `/replan`-curable BLOCKED,
|
|
820
|
+
or one with no classified cause, falls through unchanged.
|
|
821
|
+
|
|
822
|
+
The iteration cap is applied LAST, after a continue decision is otherwise
|
|
823
|
+
reached, so a stop *reason* (drained-twice, breaker, rate-limit, spinning)
|
|
824
|
+
always wins over the bare cap — the operator wants the specific reason, not
|
|
825
|
+
"reached 5".
|
|
826
|
+
"""
|
|
827
|
+
# The in-flight liveness verdict (docs/99) is per-iteration EVIDENCE the
|
|
828
|
+
# caller re-gathers each turn (via `dos liveness`), never carried state like
|
|
829
|
+
# `consecutive_unclear`. Read it into a local for the SPINNING rung below and
|
|
830
|
+
# CLEAR it from `state` up front, so it never survives into ANY returned
|
|
831
|
+
# `next_state` (terminal or continuing) — a stale verdict can't linger and
|
|
832
|
+
# fire spuriously next iteration; the caller must supply a fresh one. This is
|
|
833
|
+
# the evidence-in-not-state-carried discipline (the same reason `now_ms` is an
|
|
834
|
+
# input to `liveness.classify`, never stored), and it is also what makes the
|
|
835
|
+
# ADVANCING / STALLED / no-verdict paths byte-identical to the pre-3a loop:
|
|
836
|
+
# with the field cleared everywhere, their decisions differ in no field at all.
|
|
837
|
+
# The SPINNING `reason` string + `surface=True` carry the *why* for the
|
|
838
|
+
# operator, so dropping the verdict from `next_state` costs no legibility.
|
|
839
|
+
#
|
|
840
|
+
# The completion + convergence verdicts (docs/117 Phase 3) are gathered and
|
|
841
|
+
# cleared the SAME way and for the SAME reason: they are in-flight evidence the
|
|
842
|
+
# caller re-derives each turn (it owns the intent ledger and re-reads git
|
|
843
|
+
# ancestry), never carried state, so a stale verdict must not survive into the
|
|
844
|
+
# next iteration's `state`. With all three cleared up front, every path that
|
|
845
|
+
# does NOT stop on them is byte-identical to the pre-Phase-3 loop.
|
|
846
|
+
live = state.liveness
|
|
847
|
+
comp = state.completion
|
|
848
|
+
conv = state.convergence
|
|
849
|
+
pick = state.pickability
|
|
850
|
+
cool = state.cooldown
|
|
851
|
+
dprog = state.descendant_progress
|
|
852
|
+
state = replace(
|
|
853
|
+
state, liveness=None, completion=None, convergence=None, pickability=None,
|
|
854
|
+
cooldown=None, descendant_progress=None,
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
# 1. LAUNCH_FAILED — the subprocess never produced a valid init envelope.
|
|
858
|
+
# A repeating launch failure would burn every remaining slot, so stop on
|
|
859
|
+
# the first one (the SKILL's Step 2 init-line guard).
|
|
860
|
+
if outcome.kind is OutcomeKind.LAUNCH_FAILED:
|
|
861
|
+
return LoopDecision(
|
|
862
|
+
action=_STOP,
|
|
863
|
+
next_state=state,
|
|
864
|
+
stop_reason=StopReason.LAUNCH_FAILED,
|
|
865
|
+
surface=True,
|
|
866
|
+
reason="iteration subprocess failed to start (no valid init envelope)",
|
|
867
|
+
)
|
|
868
|
+
|
|
869
|
+
# 2. RATE_LIMITED — a hard usage/rate-limit rejection. Every retry fails the
|
|
870
|
+
# same way until the window resets; it is NOT a /dispatch fault, so it
|
|
871
|
+
# must not increment the consecutive-UNCLEAR breaker. Stop and let the
|
|
872
|
+
# operator re-invoke once the window resets.
|
|
873
|
+
if outcome.kind is OutcomeKind.RATE_LIMITED:
|
|
874
|
+
return LoopDecision(
|
|
875
|
+
action=_STOP,
|
|
876
|
+
next_state=state,
|
|
877
|
+
stop_reason=StopReason.RATE_LIMITED,
|
|
878
|
+
surface=True,
|
|
879
|
+
reason="usage/rate-limit window exhausted — not a fault; re-invoke after reset",
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
# 2b. OVERLOADED — a transient 529 / overloaded_error. Unlike a quota window,
|
|
883
|
+
# this clears in seconds to a couple minutes. Retry the SAME iteration
|
|
884
|
+
# with exponential backoff (60s → 270s → 1200s). After
|
|
885
|
+
# `max_overloaded` (3) consecutive OVERLOADED hits, escalate to STOP —
|
|
886
|
+
# that's an outage, not a capacity blip, and the operator should look.
|
|
887
|
+
# The breaker does NOT increment the consecutive-UNCLEAR streak (an
|
|
888
|
+
# OVERLOADED is upstream, not a /dispatch fault), same precedent as
|
|
889
|
+
# RATE_LIMITED.
|
|
890
|
+
if outcome.kind is OutcomeKind.OVERLOADED:
|
|
891
|
+
streak, tripped = _breaker_fail(
|
|
892
|
+
state.consecutive_overloaded, state.max_overloaded
|
|
893
|
+
)
|
|
894
|
+
bumped = replace(state, consecutive_overloaded=streak)
|
|
895
|
+
if tripped:
|
|
896
|
+
return LoopDecision(
|
|
897
|
+
action=_STOP,
|
|
898
|
+
next_state=bumped,
|
|
899
|
+
stop_reason=StopReason.CONSECUTIVE_OVERLOADED,
|
|
900
|
+
surface=True,
|
|
901
|
+
reason=(
|
|
902
|
+
f"{streak} consecutive OVERLOADED (529) hits — sustained "
|
|
903
|
+
f"server-side overload, not a transient blip; stop and "
|
|
904
|
+
f"re-invoke after the upstream incident clears"
|
|
905
|
+
),
|
|
906
|
+
)
|
|
907
|
+
backoff = _OVERLOADED_BACKOFF[min(streak - 1, len(_OVERLOADED_BACKOFF) - 1)]
|
|
908
|
+
return LoopDecision(
|
|
909
|
+
action=_RETRY_SAME_ITER,
|
|
910
|
+
next_state=bumped,
|
|
911
|
+
backoff_seconds=backoff,
|
|
912
|
+
reason=(
|
|
913
|
+
f"OVERLOADED (streak {streak}/{state.max_overloaded}) — "
|
|
914
|
+
f"transient 529, sleep {backoff}s then retry same iter"
|
|
915
|
+
),
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
# A non-OVERLOADED outcome resets the OVERLOADED streak — a clean run means
|
|
919
|
+
# the upstream incident cleared.
|
|
920
|
+
state = replace(state, consecutive_overloaded=0)
|
|
921
|
+
|
|
922
|
+
# 3. COMPLETE (docs/117 Phase 3) — the stop-on-DONE gate, the first terminal
|
|
923
|
+
# that means "finished," not "gave up." If the caller gathered a
|
|
924
|
+
# `CompletionVerdict` and it is COMPLETE, every declared unit is verified on
|
|
925
|
+
# the non-forgeable ancestry rung (the residual is empty): the work is done,
|
|
926
|
+
# so stop — cleanly, NO surface (a clean finish is not an operator decision).
|
|
927
|
+
# This is the anti-`ITERATION_CAP`: a healthy loop now terminates HERE, and
|
|
928
|
+
# the cap demotes to a backstop for genuinely pathological runs (docs/117
|
|
929
|
+
# §5.4 — "the critical inversion").
|
|
930
|
+
#
|
|
931
|
+
# Placement is load-bearing and was an explicit operator decision: COMPLETE is
|
|
932
|
+
# checked BEFORE the SPINNING rung. The two can BOTH fire for one legitimate
|
|
933
|
+
# case — a run resumed with nothing left to do has zero git delta since start
|
|
934
|
+
# (SPINNING) AND every declared unit already verified (COMPLETE). When the
|
|
935
|
+
# work is provably finished on the non-forgeable rung, "done" is the honest
|
|
936
|
+
# reason even with zero recent delta, so COMPLETE wins. (It stays AFTER the
|
|
937
|
+
# not-a-fault stops — LAUNCH_FAILED / RATE_LIMITED / OVERLOADED — for the same
|
|
938
|
+
# reason SPINNING does: a run that failed to launch or 529'd on its last turn
|
|
939
|
+
# has not "finished," and the specific outage is the reason the operator
|
|
940
|
+
# wants.)
|
|
941
|
+
#
|
|
942
|
+
# UNDERDECLARED (Phase 4, not emitted yet) → stop AND surface: the run thinks
|
|
943
|
+
# it is done but an external `ScopeSource` says it under-declared its extent;
|
|
944
|
+
# a human must reconcile. We route it through `StopReason.THRASHING` (the
|
|
945
|
+
# "no clean finish, look at this" terminal) with surface=True — the residual
|
|
946
|
+
# is empty but the *scope* is in doubt, which is exactly a surface-for-review.
|
|
947
|
+
# INCOMPLETE / INDETERMINATE never stop here: INCOMPLETE means "continue,
|
|
948
|
+
# re-dispatch the residual" (the caller owns that actuation), and INDETERMINATE
|
|
949
|
+
# means "can't tell from an unsound fold" — we never ASSERT done on it, so it
|
|
950
|
+
# falls through to the existing logic untouched.
|
|
951
|
+
#
|
|
952
|
+
# Opt-in / byte-identical: `comp is None` (the default) skips this rung
|
|
953
|
+
# entirely, so an un-migrated caller is unaffected.
|
|
954
|
+
if comp is not None:
|
|
955
|
+
if comp.state is Completion.COMPLETE:
|
|
956
|
+
return LoopDecision(
|
|
957
|
+
action=_STOP,
|
|
958
|
+
next_state=state,
|
|
959
|
+
stop_reason=StopReason.COMPLETE,
|
|
960
|
+
surface=False,
|
|
961
|
+
reason=(
|
|
962
|
+
"completion() reports COMPLETE — every declared unit is verified "
|
|
963
|
+
"against git ancestry; the residual is empty, so the loop stops "
|
|
964
|
+
"because the work is DONE (stop-on-done, not out-of-budget). "
|
|
965
|
+
+ comp.reason
|
|
966
|
+
),
|
|
967
|
+
)
|
|
968
|
+
if comp.state is Completion.UNDERDECLARED:
|
|
969
|
+
return LoopDecision(
|
|
970
|
+
action=_STOP,
|
|
971
|
+
next_state=state,
|
|
972
|
+
stop_reason=StopReason.THRASHING,
|
|
973
|
+
surface=True,
|
|
974
|
+
reason=(
|
|
975
|
+
"completion() reports UNDERDECLARED — the declared residual is "
|
|
976
|
+
"empty but an external scope check says the extent was "
|
|
977
|
+
"under-declared; stopping and surfacing for a human to reconcile. "
|
|
978
|
+
+ comp.reason
|
|
979
|
+
),
|
|
980
|
+
)
|
|
981
|
+
|
|
982
|
+
# 3b. THRASHING / STARVED (docs/117 Phase 3, §5.2) — the dynamic no-fixpoint
|
|
983
|
+
# gate. COMPLETE above is the STATIC fixpoint (residual empty now); this is
|
|
984
|
+
# its dynamic companion: the residual keeps churning but never empties (each
|
|
985
|
+
# pass closes some work and opens as much — the reviewer-finds-new-findings
|
|
986
|
+
# loop). If the caller gathered a `ConvergenceVerdict` over the residual-size
|
|
987
|
+
# history and it `should_surface` (THRASHING or STARVED), the loop will not
|
|
988
|
+
# reach a fixpoint — stop and surface rather than burn the iteration cap
|
|
989
|
+
# silently. Checked AFTER the COMPLETE gate (a run whose residual just reached
|
|
990
|
+
# 0 is CONVERGING/done, never thrashing) and, like it, before the
|
|
991
|
+
# UNCLEAR/SHIPPED/GATE block. CONVERGING / INSUFFICIENT never stop — "no
|
|
992
|
+
# fixpoint *yet*" is not a stop signal. Opt-in: `conv is None` skips it.
|
|
993
|
+
if conv is not None and conv.state.should_surface:
|
|
994
|
+
return LoopDecision(
|
|
995
|
+
action=_STOP,
|
|
996
|
+
next_state=state,
|
|
997
|
+
stop_reason=StopReason.THRASHING,
|
|
998
|
+
surface=True,
|
|
999
|
+
reason=(
|
|
1000
|
+
"convergence() reports "
|
|
1001
|
+
f"{conv.state.value} — the residual is not trending to empty over "
|
|
1002
|
+
"the recent window; the loop is productive but has no fixpoint, so "
|
|
1003
|
+
"stopping and surfacing rather than spending the cap. " + conv.reason
|
|
1004
|
+
),
|
|
1005
|
+
)
|
|
1006
|
+
|
|
1007
|
+
# 4. SPINNING (docs/99 / docs/82 Phase-3a) — the ground-truth anti-spin
|
|
1008
|
+
# breaker. If the caller gathered an in-flight `Liveness` verdict for this
|
|
1009
|
+
# run and it is SPINNING (alive — fresh heartbeat — but zero commits and
|
|
1010
|
+
# zero state-mutating lane events since start), the loop is burning tokens
|
|
1011
|
+
# narrating motion it is not making. Stop on the hard evidence rather than
|
|
1012
|
+
# waiting for the iteration cap or a self-report streak.
|
|
1013
|
+
#
|
|
1014
|
+
# Placement is load-bearing: AFTER LAUNCH_FAILED / RATE_LIMITED / OVERLOADED
|
|
1015
|
+
# (a run idle only because it is backing off a 529 / quota window is NOT
|
|
1016
|
+
# spinning — those not-a-fault stops must win, the same precedence they get
|
|
1017
|
+
# over the UNCLEAR breaker), and BEFORE the UNCLEAR / SHIPPED / GATE block
|
|
1018
|
+
# (liveness reads ground truth, and the whole docs/82 thesis is that ground
|
|
1019
|
+
# truth overrides the self-report — a loop reporting SHIPPED every iteration
|
|
1020
|
+
# while landing 0 commits is the canonical spin, and SHIPPED's healthy path
|
|
1021
|
+
# must not pre-empt the verdict). This mirrors UNMEASURED_SHIPPED being
|
|
1022
|
+
# checked FIRST inside the SHIPPED branch: a ground-truth distrust signal
|
|
1023
|
+
# pre-empts the conservative continue.
|
|
1024
|
+
#
|
|
1025
|
+
# Opt-in / byte-identical: `live is None` (the default) skips this rung
|
|
1026
|
+
# entirely, so an un-migrated caller gets the pre-3a behavior exactly.
|
|
1027
|
+
# Only SPINNING stops here — ADVANCING is benign; STALLED ("dead/hung") is
|
|
1028
|
+
# the supervisor's reap input (`supervise.py`), not a live loop's self-stop.
|
|
1029
|
+
if live is Liveness.SPINNING:
|
|
1030
|
+
return LoopDecision(
|
|
1031
|
+
action=_STOP,
|
|
1032
|
+
next_state=state,
|
|
1033
|
+
stop_reason=StopReason.SPINNING,
|
|
1034
|
+
surface=True,
|
|
1035
|
+
reason=(
|
|
1036
|
+
"liveness() reports SPINNING — the run is alive but has landed 0 "
|
|
1037
|
+
"commits and 0 lane events since it started; stopping on "
|
|
1038
|
+
"ground-truth evidence rather than burning the iteration budget "
|
|
1039
|
+
"narrating motion it is not making"
|
|
1040
|
+
),
|
|
1041
|
+
)
|
|
1042
|
+
|
|
1043
|
+
# 4b. PICK_HELD_INVARIANT (docs/168 §5) — the honest-STOP rung. If the caller
|
|
1044
|
+
# gathered a pre-dispatch `Pickability` verdict for the lane it would
|
|
1045
|
+
# dispatch next and that verdict is HELD by a reason a re-dispatch CANNOT
|
|
1046
|
+
# change (DRAFT_CLASS / OPERATOR_GATED / SOAK_OPEN / DEPENDENCY_UNMET), the
|
|
1047
|
+
# next iteration would re-block on the identical deterministic gate. This is
|
|
1048
|
+
# the drain-trap the host hit on three distinct lanes in 36h (ASI #475
|
|
1049
|
+
# operator-gated, RTN soak, FMP #493 DRAFT): the loop's `decide()` modeled
|
|
1050
|
+
# continue→dispatch on a DRAIN, so the operator had to OVERRIDE with an
|
|
1051
|
+
# "honest STOP" every time. With the hold reason typed, that override
|
|
1052
|
+
# becomes a kernel rule — STOP and surface the typed hold so the host can
|
|
1053
|
+
# route it (DRAFT→/promote, OPERATOR_GATED→escalate a decision, SOAK_OPEN→
|
|
1054
|
+
# wait, never /replan; DEPENDENCY_UNMET→ship the prerequisite).
|
|
1055
|
+
#
|
|
1056
|
+
# EVIDENCE-GATED: it fires ONLY when the verdict is present AND
|
|
1057
|
+
# `is_redispatch_invariant`. An OFFERABLE verdict, or a HELD verdict whose
|
|
1058
|
+
# reason is re-dispatch-CURABLE (IN_FLIGHT / SOFT_CLAIMED_ELSEWHERE /
|
|
1059
|
+
# STALE_CLAIM / COOLDOWN / SHIPPED / UNPARSEABLE — all CAN clear), never
|
|
1060
|
+
# stops here.
|
|
1061
|
+
#
|
|
1062
|
+
# Placement is load-bearing: AFTER the not-a-fault stops (LAUNCH_FAILED /
|
|
1063
|
+
# RATE_LIMITED / OVERLOADED — an outage is not a reason to declare the lane
|
|
1064
|
+
# un-pickable) and AFTER COMPLETE / SPINNING (a provably-finished or
|
|
1065
|
+
# ground-truth-spinning run names a more specific terminal), and BEFORE the
|
|
1066
|
+
# UNCLEAR / SHIPPED / GATE outcome block (an invariant hold on the next lane
|
|
1067
|
+
# pre-empts whatever this iteration's outcome token says — the same "the
|
|
1068
|
+
# gate beats the self-report" precedence the SPINNING rung has).
|
|
1069
|
+
#
|
|
1070
|
+
# Opt-in / byte-identical: `pick is None` (the default) skips this rung
|
|
1071
|
+
# entirely, so an un-migrated caller is unaffected.
|
|
1072
|
+
if pick is not None and pick.is_redispatch_invariant:
|
|
1073
|
+
reason = pick.reason # guaranteed non-None by is_redispatch_invariant
|
|
1074
|
+
return LoopDecision(
|
|
1075
|
+
action=_STOP,
|
|
1076
|
+
next_state=state,
|
|
1077
|
+
stop_reason=StopReason.PICK_HELD_INVARIANT,
|
|
1078
|
+
surface=True,
|
|
1079
|
+
reason=(
|
|
1080
|
+
f"next lane is HELD by {reason.value} — a re-dispatch-invariant "
|
|
1081
|
+
f"hold a re-dispatch cannot change; honest-STOP rather than "
|
|
1082
|
+
f"re-block on the identical gate next iteration. "
|
|
1083
|
+
+ (pick.evidence or "")
|
|
1084
|
+
).strip(),
|
|
1085
|
+
)
|
|
1086
|
+
|
|
1087
|
+
# 4c. PICK_COOLDOWN (docs/207 §3) — the anti-churn breaker. If the caller
|
|
1088
|
+
# gathered a `Cooldown` verdict for the unit it would dispatch NEXT (after
|
|
1089
|
+
# it ALREADY skipped every fresher offerable-and-not-cooled candidate — the
|
|
1090
|
+
# host's pick-selection contract) and that verdict is RECENTLY_ATTEMPTED,
|
|
1091
|
+
# the unit was attempted-and-didn't-move inside the window and nothing
|
|
1092
|
+
# fresher is left. Re-dispatching it would re-storm (the ~5%-shipping
|
|
1093
|
+
# re-pick loop the bare loop hit), so honest-STOP + surface the cooled unit
|
|
1094
|
+
# rather than burn the iteration re-confirming a known drain. This is the
|
|
1095
|
+
# CROSS-RUN memory `liveness` (a single-run verdict) cannot provide.
|
|
1096
|
+
#
|
|
1097
|
+
# EVIDENCE-GATED: fires ONLY when the verdict is present AND `held`
|
|
1098
|
+
# (RECENTLY_ATTEMPTED). A CLEAR verdict — the window elapsed, or nothing
|
|
1099
|
+
# held the unit — never stops; the loop keeps its existing behavior.
|
|
1100
|
+
#
|
|
1101
|
+
# Placement: AFTER the not-a-fault stops + COMPLETE/SPINNING/PICK_HELD
|
|
1102
|
+
# (an invariant hold names a more specific terminal than a cooldown — a
|
|
1103
|
+
# DRAFT lane is held forever, a cooled one only until the wall), and BEFORE
|
|
1104
|
+
# the outcome block (the cooldown pre-empts the iteration's self-report, the
|
|
1105
|
+
# same "the gate beats the self-report" precedence the sibling rungs have).
|
|
1106
|
+
#
|
|
1107
|
+
# Opt-in / byte-identical: `cool is None` (the default) skips this rung.
|
|
1108
|
+
if cool is not None and cool.held:
|
|
1109
|
+
return LoopDecision(
|
|
1110
|
+
action=_STOP,
|
|
1111
|
+
next_state=state,
|
|
1112
|
+
stop_reason=StopReason.PICK_COOLDOWN,
|
|
1113
|
+
surface=True,
|
|
1114
|
+
reason=(
|
|
1115
|
+
f"next unit {cool.unit_id!r} is in a cooldown window — "
|
|
1116
|
+
+ (cool.reason or "attempted recently and did not move")
|
|
1117
|
+
+ "; nothing fresher is offerable, so honest-STOP rather than "
|
|
1118
|
+
"re-storm a known drain (the anti-churn breaker)"
|
|
1119
|
+
),
|
|
1120
|
+
)
|
|
1121
|
+
|
|
1122
|
+
# 4. UNCLEAR — crashed/killed before Step 9, or an INTERIM envelope. Retry
|
|
1123
|
+
# as `dispatch`, but increment the streak; three in a row means the
|
|
1124
|
+
# subprocess is failing systematically (the circuit breaker).
|
|
1125
|
+
if outcome.kind is OutcomeKind.UNCLEAR:
|
|
1126
|
+
# 4d. DESCENDANT-PROGRESS adopt-wait (FQ-509) — the pre-check that
|
|
1127
|
+
# distinguishes a *parked-but-PRODUCTIVE* parent from a systematic
|
|
1128
|
+
# failure. A headless `-p` child that PARKED its own turn while a
|
|
1129
|
+
# grandchild it spawned is still committing the registered picks lands
|
|
1130
|
+
# here as UNCLEAR (the parent's ancestry check ran the instant it
|
|
1131
|
+
# exited, saw 0 committed picks, and the token collapsed to UNCLEAR).
|
|
1132
|
+
# Charging that to the UNCLEAR breaker is WRONG: the descendant is
|
|
1133
|
+
# healthy and about to land its commits — counting it as a fault makes
|
|
1134
|
+
# the loop self-stop with CONSECUTIVE_UNCLEAR over live work AND
|
|
1135
|
+
# re-dispatch a fresh child each time instead of waiting for the live
|
|
1136
|
+
# one. When the host supplied `descendant_progress == ADVANCING` (the
|
|
1137
|
+
# descendant landed a forward delta — a real new commit since start, or
|
|
1138
|
+
# the ancestry-backed CHURNING verdict; the host's corpse-guard ensures
|
|
1139
|
+
# a log-touched-but-not-committing "alive" maps to NONE_OBSERVED, never
|
|
1140
|
+
# here), CONTINUE the loop (adopt-wait: re-dispatch so the live child
|
|
1141
|
+
# gets the chance to land its picks → the NEXT iteration's ancestry
|
|
1142
|
+
# check lifts it to SHIPPED) WITHOUT charging the UNCLEAR breaker, and
|
|
1143
|
+
# RESET consecutive_unclear to 0 (a live committing child means the
|
|
1144
|
+
# prior UNCLEARs were not a systematic fault).
|
|
1145
|
+
#
|
|
1146
|
+
# BOUNDED, clock-free: the adopt-wait is itself counted by
|
|
1147
|
+
# `consecutive_adopt_wait`; after `max_adopt_wait` consecutive
|
|
1148
|
+
# ADVANCING-but-the-picks-still-uncommitted iters it FALLS THROUGH to
|
|
1149
|
+
# today's UNCLEAR breaker path (which caps at max_unclear) rather than
|
|
1150
|
+
# a new terminal — so a descendant that keeps "advancing" but never
|
|
1151
|
+
# lands its registered picks can never adopt-wait forever. The continue
|
|
1152
|
+
# also cannot persist past death: descendant_progress is re-gathered
|
|
1153
|
+
# every iteration (cleared up-front), so a child that DIES flips to
|
|
1154
|
+
# DEAD next iter and takes the normal UNCLEAR path.
|
|
1155
|
+
#
|
|
1156
|
+
# Opt-in / byte-identical: `dprog` defaults None (cleared up-front),
|
|
1157
|
+
# and the guard is `dprog is DescendantProgress.ADVANCING` — DEAD,
|
|
1158
|
+
# NONE_OBSERVED, and None all skip it → the rung below is byte-identical
|
|
1159
|
+
# to the pre-FQ-509 loop.
|
|
1160
|
+
if dprog is DescendantProgress.ADVANCING:
|
|
1161
|
+
aw_streak, aw_tripped = _breaker_fail(
|
|
1162
|
+
state.consecutive_adopt_wait, state.max_adopt_wait
|
|
1163
|
+
)
|
|
1164
|
+
if not aw_tripped:
|
|
1165
|
+
# Live committing descendant — adopt-wait. Do NOT charge the
|
|
1166
|
+
# UNCLEAR breaker; reset it (this iter is not a fault).
|
|
1167
|
+
bumped = replace(
|
|
1168
|
+
state, consecutive_adopt_wait=aw_streak, consecutive_unclear=0
|
|
1169
|
+
)
|
|
1170
|
+
return _continue_or_cap(
|
|
1171
|
+
bumped,
|
|
1172
|
+
next_mode="dispatch",
|
|
1173
|
+
reason=(
|
|
1174
|
+
f"descendant FORWARD-PROGRESSING (adopt-wait "
|
|
1175
|
+
f"{aw_streak}/{state.max_adopt_wait}) — the parent parked "
|
|
1176
|
+
f"but a descendant it spawned is committing the registered "
|
|
1177
|
+
f"picks; wait for it to land them, not a /dispatch fault"
|
|
1178
|
+
),
|
|
1179
|
+
)
|
|
1180
|
+
# aw_tripped: the descendant kept "advancing" but never landed its
|
|
1181
|
+
# picks within the bound → fall through to the normal UNCLEAR breaker
|
|
1182
|
+
# path below (degrade to today's behavior; not a new terminal). The
|
|
1183
|
+
# bumped adopt-wait count rides into next_state via the streak below.
|
|
1184
|
+
# Non-advancing UNCLEAR (DEAD / NONE_OBSERVED / None, or a tripped
|
|
1185
|
+
# adopt-wait): today's exact path. Reset consecutive_adopt_wait (a
|
|
1186
|
+
# non-advancing iter breaks the adopt streak) but NOT consecutive_unclear
|
|
1187
|
+
# (it accrues — so a flapping ALIVE/quiet child still reaches max_unclear).
|
|
1188
|
+
streak, tripped = _breaker_fail(state.consecutive_unclear, state.max_unclear)
|
|
1189
|
+
bumped = replace(state, consecutive_unclear=streak, consecutive_adopt_wait=0)
|
|
1190
|
+
if tripped:
|
|
1191
|
+
return LoopDecision(
|
|
1192
|
+
action=_STOP,
|
|
1193
|
+
next_state=bumped,
|
|
1194
|
+
stop_reason=StopReason.CONSECUTIVE_UNCLEAR,
|
|
1195
|
+
surface=True,
|
|
1196
|
+
reason=(
|
|
1197
|
+
f"{streak} consecutive UNCLEAR iterations — the /dispatch "
|
|
1198
|
+
f"subprocess is failing systematically, not draining a backlog"
|
|
1199
|
+
),
|
|
1200
|
+
)
|
|
1201
|
+
return _continue_or_cap(
|
|
1202
|
+
bumped,
|
|
1203
|
+
next_mode="dispatch",
|
|
1204
|
+
reason=f"UNCLEAR (streak {streak}/{state.max_unclear}) — retrying dispatch",
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
# A non-UNCLEAR, non-fault iteration completed → reset the UNCLEAR breaker.
|
|
1208
|
+
# The SHIPPED-DIRTY-0 breaker is reset only inside the SHIPPED branch on a
|
|
1209
|
+
# *healthy* SHIPPED outcome (or on a REPLAN_DONE / GATE outcome that
|
|
1210
|
+
# naturally interrupts a back-to-back-SHIPPED streak — handled below).
|
|
1211
|
+
base = replace(state, consecutive_unclear=0)
|
|
1212
|
+
if outcome.kind in (OutcomeKind.REPLAN_DONE, OutcomeKind.GATE):
|
|
1213
|
+
# A non-SHIPPED outcome breaks the back-to-back-SHIPPED-DIRTY-0 streak.
|
|
1214
|
+
base = replace(base, consecutive_dirty_zero=0)
|
|
1215
|
+
|
|
1216
|
+
# 5a. SHIPPED — picks landed. Backlog still has work; clear the drained flag.
|
|
1217
|
+
#
|
|
1218
|
+
# FQ-420 unmeasured-ship STALL (checked FIRST): a SHIPPED token is the
|
|
1219
|
+
# /dispatch child's *self-report*. The PJ2 packet-judge is the kernel's
|
|
1220
|
+
# independent measurement of that claim against the post-fanout commit set.
|
|
1221
|
+
# If the driver asserted a measurement was owed (`measurement_expected`) but
|
|
1222
|
+
# the judge came back None — the FQ-420 shape: head==SHIPPED yet the fanout
|
|
1223
|
+
# run-ts could not be resolved, so PJ2 classify never ran — the kernel has a
|
|
1224
|
+
# claimed ship it could NOT verify. It must not fall through to the healthy
|
|
1225
|
+
# path on the strength of an unverified self-report (that is the exact lie
|
|
1226
|
+
# the substrate exists to refuse — a manual git-log check should never be
|
|
1227
|
+
# what catches it). STALL and surface so the operator re-measures: resolve
|
|
1228
|
+
# the fanout ts from the archive, or treat the ship as unproven. This guard
|
|
1229
|
+
# precedes the dirty-zero / healthy classification because a missing
|
|
1230
|
+
# measurement makes ALL of that sub-classification untrustworthy.
|
|
1231
|
+
#
|
|
1232
|
+
# SHIPPED-DIRTY-0 breaker: a SHIPPED iter that the packet-judge classified
|
|
1233
|
+
# as SHIPPED-DIRTY AND measured 0 commits is the degraded-shipping signal
|
|
1234
|
+
# the breaker counts (input gate says LIVE, packet-judge says DIRTY, no
|
|
1235
|
+
# commits actually landed). K back-to-back instances → stop; this is the
|
|
1236
|
+
# structural defense that justifies the iteration cap raise from 5 to 10
|
|
1237
|
+
# — it kills the degraded-shipping damage path at iter K regardless of cap.
|
|
1238
|
+
# Every other SHIPPED outcome (SHIPPED-CLEAN, SHIPPED-DIRTY with ship_count>0,
|
|
1239
|
+
# or no packet-judge supplied AND none expected) resets the streak. Callers
|
|
1240
|
+
# that do not pass packet_judge/ship_count AND do not set
|
|
1241
|
+
# measurement_expected get pre-breaker behavior — the streak is held
|
|
1242
|
+
# constant rather than incremented; this matches the "treat as PRODUCTIVE
|
|
1243
|
+
# when unclassified" conservative-default precedent (an un-migrated caller
|
|
1244
|
+
# that never measures is trusted; one that SAID it would measure is not).
|
|
1245
|
+
if outcome.kind is OutcomeKind.SHIPPED:
|
|
1246
|
+
# FQ-452: a SHIPPED iteration is genuine forward progress — the lane is
|
|
1247
|
+
# no longer stuck on a stale-stamp gate. Reset the spin-breaker streak.
|
|
1248
|
+
# (Reset here, NOT in the shared `base` block above, because a
|
|
1249
|
+
# REPLAN_DONE must NOT reset it — the /replan is the *response* to the
|
|
1250
|
+
# stale-stamp and the streak has to survive it to ever reach the cap.)
|
|
1251
|
+
base = replace(base, consecutive_stale_stamp=0)
|
|
1252
|
+
# QWD benign-drain: a ship means the lane was NOT benignly drained — clear
|
|
1253
|
+
# the unproductive-replan-drain streak + the prior-DRAIN carry. (Same
|
|
1254
|
+
# reasoning as the stale-stamp reset: reset on a real ship, not in the
|
|
1255
|
+
# shared block, because rung 5b consumes `last_gate_was_drain`.)
|
|
1256
|
+
# #506: a ship also clears the REPLAN_STALLED streak — the lane produced
|
|
1257
|
+
# work, so /replan is not in the 0-refill stall. Like the two resets above,
|
|
1258
|
+
# done here (not in the shared `base` block) so a REPLAN_DONE does NOT reset
|
|
1259
|
+
# it: the stall streak must SURVIVE the dispatch→GATE→/replan cycle between
|
|
1260
|
+
# two unproductive replans to ever reach the threshold (a GATE always sits
|
|
1261
|
+
# between two REPLAN_DONE outcomes, exactly as `consecutive_stale_stamp`
|
|
1262
|
+
# survives the intervening REPLAN_DONE for the mirror reason).
|
|
1263
|
+
base = replace(
|
|
1264
|
+
base,
|
|
1265
|
+
consecutive_unproductive_replan_drains=0,
|
|
1266
|
+
consecutive_unproductive_replan=0,
|
|
1267
|
+
last_gate_was_drain=False,
|
|
1268
|
+
)
|
|
1269
|
+
if outcome.measurement_expected and outcome.packet_judge is None:
|
|
1270
|
+
return LoopDecision(
|
|
1271
|
+
action=_STOP,
|
|
1272
|
+
next_state=replace(base, last_replan_drained=False),
|
|
1273
|
+
stop_reason=StopReason.UNMEASURED_SHIPPED,
|
|
1274
|
+
surface=True,
|
|
1275
|
+
reason=(
|
|
1276
|
+
"SHIPPED claimed but the PJ2 packet-judge measurement is "
|
|
1277
|
+
"missing (fanout run-ts unresolved) — the ship is "
|
|
1278
|
+
"self-reported and unverified; STALL and re-measure rather "
|
|
1279
|
+
"than trust an unmeasured ship"
|
|
1280
|
+
),
|
|
1281
|
+
)
|
|
1282
|
+
is_dirty_zero = (
|
|
1283
|
+
outcome.packet_judge == "SHIPPED-DIRTY"
|
|
1284
|
+
and outcome.ship_count == 0
|
|
1285
|
+
)
|
|
1286
|
+
if is_dirty_zero:
|
|
1287
|
+
streak, tripped = _breaker_fail(
|
|
1288
|
+
base.consecutive_dirty_zero, base.max_dirty_zero
|
|
1289
|
+
)
|
|
1290
|
+
bumped = replace(
|
|
1291
|
+
base, last_replan_drained=False, consecutive_dirty_zero=streak
|
|
1292
|
+
)
|
|
1293
|
+
if tripped:
|
|
1294
|
+
return LoopDecision(
|
|
1295
|
+
action=_STOP,
|
|
1296
|
+
next_state=bumped,
|
|
1297
|
+
stop_reason=StopReason.CONSECUTIVE_DIRTY_ZERO,
|
|
1298
|
+
surface=True,
|
|
1299
|
+
reason=(
|
|
1300
|
+
f"{streak} consecutive SHIPPED-DIRTY iters with 0 commits "
|
|
1301
|
+
f"— /dispatch is shipping apparently-successful but "
|
|
1302
|
+
f"actually-empty iters (degraded-shipping regression)"
|
|
1303
|
+
),
|
|
1304
|
+
)
|
|
1305
|
+
return _continue_or_cap(
|
|
1306
|
+
bumped,
|
|
1307
|
+
next_mode="dispatch",
|
|
1308
|
+
reason=(
|
|
1309
|
+
f"SHIPPED-DIRTY-0 (streak {streak}/{base.max_dirty_zero}) "
|
|
1310
|
+
f"— continue dispatch, but watch the streak"
|
|
1311
|
+
),
|
|
1312
|
+
)
|
|
1313
|
+
# Healthy SHIPPED outcome (SHIPPED-CLEAN, or SHIPPED-DIRTY with ≥1 commit,
|
|
1314
|
+
# or no packet-judge supplied) — reset the dirty-zero streak.
|
|
1315
|
+
nxt = replace(
|
|
1316
|
+
base, last_replan_drained=False, consecutive_dirty_zero=0
|
|
1317
|
+
)
|
|
1318
|
+
return _continue_or_cap(
|
|
1319
|
+
nxt, next_mode="dispatch", reason="SHIPPED — picks shipped, continue dispatch"
|
|
1320
|
+
)
|
|
1321
|
+
|
|
1322
|
+
# 5b. REPLAN_DONE — a /replan iteration completed. Next is `dispatch`. The
|
|
1323
|
+
# FQ-240 fix: arm `last_replan_drained` (the drained-twice trigger) ONLY
|
|
1324
|
+
# when the /replan was PRODUCTIVE — i.e. it actually refilled / gardened.
|
|
1325
|
+
# An UNPRODUCTIVE /replan (the §1.5 no-op skip, or a 0/0/0 sweep) is NOT
|
|
1326
|
+
# a refill attempt; arming the trigger off it would let a DRAIN that
|
|
1327
|
+
# follows a /replan-that-did-nothing false-stop the loop as DRAINED_TWICE
|
|
1328
|
+
# (finding #240's second shape, distinct from the QWB7 STALE-STAMP half).
|
|
1329
|
+
# Default to PRODUCTIVE when unclassified — the conservative pre-FQ-240
|
|
1330
|
+
# behavior, so this change can never make the loop run *longer*.
|
|
1331
|
+
if outcome.kind is OutcomeKind.REPLAN_DONE:
|
|
1332
|
+
productivity = outcome.replan_productivity or ReplanProductivity.PRODUCTIVE
|
|
1333
|
+
productive = productivity is ReplanProductivity.PRODUCTIVE
|
|
1334
|
+
# QWD benign-drain breaker (FQ-509-sibling). An UNPRODUCTIVE /replan whose
|
|
1335
|
+
# immediately-prior gate was a DRAIN (`last_gate_was_drain`) is the
|
|
1336
|
+
# benign-drain signal: /replan was asked to refill a drained lane and
|
|
1337
|
+
# produced nothing because there is nothing left. Count it. A PRODUCTIVE
|
|
1338
|
+
# /replan, or one not preceded by a DRAIN, resets the streak (the lane is
|
|
1339
|
+
# not in the benign-drain spin). The prior-DRAIN carry is consumed either
|
|
1340
|
+
# way (it describes only the one transition into this /replan). The count
|
|
1341
|
+
# is bumped via the `dos.breaker` fold (docs/258); the trip is NOT checked
|
|
1342
|
+
# here — the benign-drain stop fires on the NEXT DRAIN (rung 5c), so we keep
|
|
1343
|
+
# only the new count and discard the trip bit at this point.
|
|
1344
|
+
if productive:
|
|
1345
|
+
benign_streak = 0
|
|
1346
|
+
elif base.last_gate_was_drain:
|
|
1347
|
+
benign_streak, _ = _breaker_fail(
|
|
1348
|
+
base.consecutive_unproductive_replan_drains,
|
|
1349
|
+
base.max_unproductive_replan_drains,
|
|
1350
|
+
)
|
|
1351
|
+
else:
|
|
1352
|
+
benign_streak = base.consecutive_unproductive_replan_drains
|
|
1353
|
+
|
|
1354
|
+
# #506 / docs/258 — the REPLAN_STALLED breaker, the BROADER sibling, and the
|
|
1355
|
+
# FIRST loop_decide rung whose trip is taken straight off `dos.breaker`. An
|
|
1356
|
+
# UNPRODUCTIVE /replan is a failure of this class REGARDLESS of the prior
|
|
1357
|
+
# gate (a costly 0-refill sweep is the pathology whether or not a DRAIN
|
|
1358
|
+
# preceded it — the gap the benign-drain bracket leaves). A PRODUCTIVE
|
|
1359
|
+
# /replan is a success (the sweep refilled → the stall cleared) and heals the
|
|
1360
|
+
# streak. On the Kth (default 2) consecutive unproductive /replan, STOP +
|
|
1361
|
+
# surface rather than spend another 16-22min/~$5 sweep that the measurement
|
|
1362
|
+
# says will refill nothing. Opt-in/byte-identical: an unclassified /replan
|
|
1363
|
+
# defaults to PRODUCTIVE (FQ-240), so a caller that never classifies
|
|
1364
|
+
# productivity records only successes here and never trips this.
|
|
1365
|
+
if productive:
|
|
1366
|
+
stall_t = breaker.record_success(
|
|
1367
|
+
breaker.BreakerCounts(consecutive=base.consecutive_unproductive_replan),
|
|
1368
|
+
_replan_stall_policy(base),
|
|
1369
|
+
)
|
|
1370
|
+
stall_streak = stall_t.counts.consecutive # 0 — healed
|
|
1371
|
+
else:
|
|
1372
|
+
stall_streak, stall_open = _breaker_fail(
|
|
1373
|
+
base.consecutive_unproductive_replan, base.max_unproductive_replan
|
|
1374
|
+
)
|
|
1375
|
+
if stall_open:
|
|
1376
|
+
return LoopDecision(
|
|
1377
|
+
action=_STOP,
|
|
1378
|
+
next_state=replace(
|
|
1379
|
+
base,
|
|
1380
|
+
last_replan_drained=False,
|
|
1381
|
+
consecutive_unproductive_replan_drains=benign_streak,
|
|
1382
|
+
consecutive_unproductive_replan=stall_streak,
|
|
1383
|
+
last_gate_was_drain=False,
|
|
1384
|
+
),
|
|
1385
|
+
stop_reason=StopReason.REPLAN_STALLED,
|
|
1386
|
+
surface=True,
|
|
1387
|
+
reason=(
|
|
1388
|
+
f"{stall_streak} consecutive UNPRODUCTIVE /replans — /replan "
|
|
1389
|
+
f"keeps refilling nothing (the measured 0-refill stall, ~45% "
|
|
1390
|
+
f"of loop wall-clock); stop and surface rather than spend "
|
|
1391
|
+
f"another ~16-22min/~$5 sweep that won't refill on a "
|
|
1392
|
+
f"{stall_streak + 1}th identical pass"
|
|
1393
|
+
),
|
|
1394
|
+
)
|
|
1395
|
+
|
|
1396
|
+
nxt = replace(
|
|
1397
|
+
base,
|
|
1398
|
+
last_replan_drained=productive,
|
|
1399
|
+
consecutive_unproductive_replan_drains=benign_streak,
|
|
1400
|
+
consecutive_unproductive_replan=stall_streak,
|
|
1401
|
+
last_gate_was_drain=False,
|
|
1402
|
+
)
|
|
1403
|
+
if productive:
|
|
1404
|
+
reason = "REPLAN_DONE (productive) — backlog refilled, dispatch next (drained-twice armed)"
|
|
1405
|
+
else:
|
|
1406
|
+
reason = (
|
|
1407
|
+
"REPLAN_DONE (unproductive) — /replan did 0 gardening / 0 refill; "
|
|
1408
|
+
"drained-twice NOT armed (a DRAIN next is not drained-twice)"
|
|
1409
|
+
)
|
|
1410
|
+
if benign_streak:
|
|
1411
|
+
reason += (
|
|
1412
|
+
f"; benign-drain streak {benign_streak}/"
|
|
1413
|
+
f"{base.max_unproductive_replan_drains}"
|
|
1414
|
+
)
|
|
1415
|
+
if stall_streak:
|
|
1416
|
+
reason += (
|
|
1417
|
+
f"; replan-stall streak {stall_streak}/"
|
|
1418
|
+
f"{base.max_unproductive_replan}"
|
|
1419
|
+
)
|
|
1420
|
+
return _continue_or_cap(nxt, next_mode="dispatch", reason=reason)
|
|
1421
|
+
|
|
1422
|
+
# 5c. GATE — /dispatch reached Step 9 with child2 skipped. The typed verdict
|
|
1423
|
+
# + the --gate policy decide what to do (the pure `gate_policy`). The
|
|
1424
|
+
# loop-level part this layer adds is the drained-twice counter.
|
|
1425
|
+
assert outcome.kind is OutcomeKind.GATE and outcome.verdict is not None
|
|
1426
|
+
action: GateAction = gate_policy(outcome.verdict, base.gate_mode)
|
|
1427
|
+
|
|
1428
|
+
# FQ-510 — re-dispatch-INVARIANT BLOCKED stop (the post-run analogue of the
|
|
1429
|
+
# pre-launch PICK_HELD_INVARIANT rung 4b). A BLOCKED gate carries a classified
|
|
1430
|
+
# cause (`outcome.blocked_cause`, the dos.tokens.BlockedReason key the driver
|
|
1431
|
+
# mined from the Outcome cell). When that cause's catalog entry self-heals via
|
|
1432
|
+
# something OTHER than /replan — an `operator_decision` (`self_heals_via=""`),
|
|
1433
|
+
# a false-ship oracle conflation / stale-claim / lying-verdict (`/unstick`),
|
|
1434
|
+
# any non-`/replan` remedy — routing it to /replan is structurally wrong: the
|
|
1435
|
+
# next /dispatch re-derives the identical BLOCKED and the loop spins
|
|
1436
|
+
# BLOCKED→/replan→BLOCKED to the FQ-452 cap (3 iters, ~$15-25/1.5h) before the
|
|
1437
|
+
# spin-breaker catches it. So STOP on the FIRST such BLOCKED + surface, instead
|
|
1438
|
+
# of spinning. Checked BEFORE the FQ-452 counter so the invariant cause never
|
|
1439
|
+
# even increments the stale-stamp streak (it is a different, terminal class).
|
|
1440
|
+
# A BLOCKED whose cause IS /replan-curable (lane_soak_gated /
|
|
1441
|
+
# lane_all_inflight_or_deferred / data_gated_closeout — a genuine refill/stamp
|
|
1442
|
+
# drift) falls through to the FQ-452 path unchanged, as does a BLOCKED with no
|
|
1443
|
+
# classified cause (the driver could not name one) — both preserve today's
|
|
1444
|
+
# behavior exactly. The operator-decision sub-case is ALSO auto-filed once by
|
|
1445
|
+
# the driver's `emit-decision-needed` actuation, so the operator sees it in
|
|
1446
|
+
# the findings queue regardless of this stop.
|
|
1447
|
+
if outcome.verdict is Verdict.BLOCKED and outcome.blocked_cause:
|
|
1448
|
+
_info = blocked_reason_for_key(outcome.blocked_cause)
|
|
1449
|
+
if _info is not None and _info.self_heals_via != "/replan":
|
|
1450
|
+
_route = (
|
|
1451
|
+
"file the operator decision (auto-filed) and resolve it"
|
|
1452
|
+
if _info.operator_action_required
|
|
1453
|
+
else f"run {_info.self_heals_via or '/unstick'} for the structural fix"
|
|
1454
|
+
)
|
|
1455
|
+
return LoopDecision(
|
|
1456
|
+
action=_STOP,
|
|
1457
|
+
next_state=base,
|
|
1458
|
+
stop_reason=StopReason.BLOCKED_REDISPATCH_INVARIANT,
|
|
1459
|
+
surface=True,
|
|
1460
|
+
reason=(
|
|
1461
|
+
f"BLOCKED on {outcome.blocked_cause} ({_info.label}) — a "
|
|
1462
|
+
f"re-dispatch-invariant cause a /replan cannot clear; stop on "
|
|
1463
|
+
f"the first occurrence rather than spinning /replan to the "
|
|
1464
|
+
f"FQ-452 cap. Route: {_route}."
|
|
1465
|
+
),
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1468
|
+
# FQ-452 — the non-converging-spin breaker. A STALE-STAMP or BLOCKED gate
|
|
1469
|
+
# routes to /replan (under hard) or an inline reconcile (under soft/drive),
|
|
1470
|
+
# but when the root cause is plan-meta `remaining:`-list drift the §1.5
|
|
1471
|
+
# skip-gate never reconciles, /replan exits UNPRODUCTIVE and the very next
|
|
1472
|
+
# /dispatch re-derives the same 0-live gate — forever. Count consecutive
|
|
1473
|
+
# STALE-STAMP/BLOCKED gates that DON'T recover; on the Kth, refuse to spin
|
|
1474
|
+
# another /replan into the same unreconciled list and STOP so the operator's
|
|
1475
|
+
# /replan (now carrying the FQ-452 unconditional remaining-reconcile) runs
|
|
1476
|
+
# once and clears it. A LIVE/DRAIN/RACE verdict means the lane moved off the
|
|
1477
|
+
# stale-stamp cause → reset the streak. The streak deliberately SURVIVES the
|
|
1478
|
+
# intervening REPLAN_DONE (handled in 5b, which never touches it) — that is
|
|
1479
|
+
# what lets three dispatch→/replan→dispatch cycles accumulate to the cap.
|
|
1480
|
+
is_stale_stamp_class = outcome.verdict in (Verdict.STALE_STAMP, Verdict.BLOCKED)
|
|
1481
|
+
if is_stale_stamp_class:
|
|
1482
|
+
stale_streak, tripped = _breaker_fail(
|
|
1483
|
+
base.consecutive_stale_stamp, base.max_stale_stamp
|
|
1484
|
+
)
|
|
1485
|
+
base = replace(base, consecutive_stale_stamp=stale_streak)
|
|
1486
|
+
if tripped:
|
|
1487
|
+
return LoopDecision(
|
|
1488
|
+
action=_STOP,
|
|
1489
|
+
next_state=base,
|
|
1490
|
+
stop_reason=StopReason.STALE_STAMP_UNRECONCILED,
|
|
1491
|
+
surface=True,
|
|
1492
|
+
reason=(
|
|
1493
|
+
f"{stale_streak} consecutive {outcome.verdict.value} gates "
|
|
1494
|
+
f"that /replan did not reconcile — the picker keeps deriving "
|
|
1495
|
+
f"0-live from a stale plan-meta `remaining:` list; stop and "
|
|
1496
|
+
f"run a /replan that reconciles the list (plan-meta-gardening) "
|
|
1497
|
+
f"rather than spinning another /replan into the same drift"
|
|
1498
|
+
),
|
|
1499
|
+
)
|
|
1500
|
+
else:
|
|
1501
|
+
# LIVE / DRAIN / RACE — the lane moved off the stale-stamp cause.
|
|
1502
|
+
base = replace(base, consecutive_stale_stamp=0)
|
|
1503
|
+
|
|
1504
|
+
# QWD benign-drain — a non-DRAIN gate verdict (LIVE / STALE_STAMP / BLOCKED /
|
|
1505
|
+
# RACE) means the lane is NOT in the benign genuinely-drained spin: clear the
|
|
1506
|
+
# unproductive-replan-drain streak + the prior-DRAIN carry. A DRAIN verdict is
|
|
1507
|
+
# the spin's own signal, so it must NOT reset here — its handling (count-check
|
|
1508
|
+
# + arm the carry) lives in the `counts_toward_drain` branch below.
|
|
1509
|
+
if outcome.verdict is not Verdict.DRAIN:
|
|
1510
|
+
base = replace(
|
|
1511
|
+
base,
|
|
1512
|
+
consecutive_unproductive_replan_drains=0,
|
|
1513
|
+
last_gate_was_drain=False,
|
|
1514
|
+
)
|
|
1515
|
+
|
|
1516
|
+
# soft/drive can return next_mode="stop" (a true DRAIN or a BLOCKED) — the
|
|
1517
|
+
# gate policy already decided the loop stops; name the StopReason from the
|
|
1518
|
+
# verdict and pass `surface` through.
|
|
1519
|
+
if action.next_mode == "stop":
|
|
1520
|
+
stop_reason = (
|
|
1521
|
+
StopReason.DRAIN if outcome.verdict is Verdict.DRAIN else StopReason.BLOCKED
|
|
1522
|
+
)
|
|
1523
|
+
return LoopDecision(
|
|
1524
|
+
action=_STOP,
|
|
1525
|
+
next_state=base,
|
|
1526
|
+
stop_reason=stop_reason,
|
|
1527
|
+
surface=action.surface,
|
|
1528
|
+
reason=action.reason,
|
|
1529
|
+
)
|
|
1530
|
+
|
|
1531
|
+
# reconcile=True (a soft/drive STALE-STAMP) → re-dispatch after an inline
|
|
1532
|
+
# stamp-reconcile pass; never counts toward drained-twice.
|
|
1533
|
+
if action.reconcile:
|
|
1534
|
+
return _continue_or_cap(
|
|
1535
|
+
base,
|
|
1536
|
+
next_mode=action.next_mode, # "dispatch"
|
|
1537
|
+
reconcile=True,
|
|
1538
|
+
reason=action.reason,
|
|
1539
|
+
)
|
|
1540
|
+
|
|
1541
|
+
# next_mode == "replan" (hard on any non-LIVE verdict). Now apply the
|
|
1542
|
+
# drained-twice rule, keyed on `action.counts_toward_drain` — QWB7's rule
|
|
1543
|
+
# is DRAIN-only, so STALE-STAMP/BLOCKED route to /replan but never arm a stop.
|
|
1544
|
+
if action.counts_toward_drain:
|
|
1545
|
+
# verdict was DRAIN. QWD benign-drain breaker FIRST (FQ-509-sibling): if
|
|
1546
|
+
# `max_unproductive_replan_drains` UNPRODUCTIVE /replans have already
|
|
1547
|
+
# bracketed DRAINs on this lane, /replan is structurally unable to refill
|
|
1548
|
+
# it — the lane is benignly drained (every phase shipped/in-flight). This
|
|
1549
|
+
# DRAIN is the one that would route the (K+1)th /replan; STOP instead so
|
|
1550
|
+
# the loop does not spin DRAIN→/replan to the iteration cap. This precedes
|
|
1551
|
+
# the drained-twice check because a benign-drain streak only accumulates
|
|
1552
|
+
# when every intervening /replan was UNPRODUCTIVE — which means
|
|
1553
|
+
# `last_replan_drained` is False (it arms only on a PRODUCTIVE /replan), so
|
|
1554
|
+
# the two stops are mutually exclusive and the benign one is the correct
|
|
1555
|
+
# name for the all-unproductive spin.
|
|
1556
|
+
if (
|
|
1557
|
+
base.consecutive_unproductive_replan_drains
|
|
1558
|
+
>= base.max_unproductive_replan_drains
|
|
1559
|
+
):
|
|
1560
|
+
return LoopDecision(
|
|
1561
|
+
action=_STOP,
|
|
1562
|
+
next_state=base,
|
|
1563
|
+
stop_reason=StopReason.BENIGN_DRAIN,
|
|
1564
|
+
surface=True,
|
|
1565
|
+
reason=(
|
|
1566
|
+
f"DRAIN after {base.consecutive_unproductive_replan_drains} "
|
|
1567
|
+
f"consecutive UNPRODUCTIVE /replans — the lane is genuinely "
|
|
1568
|
+
f"drained but BENIGN (every phase already shipped/in-flight, "
|
|
1569
|
+
f"nothing to refill); /replan cannot refill it, so stop and "
|
|
1570
|
+
f"re-scope (or wait for the in-flight phases to settle) rather "
|
|
1571
|
+
f"than spinning another /replan to the iteration cap"
|
|
1572
|
+
),
|
|
1573
|
+
)
|
|
1574
|
+
# If the prior iteration was a PRODUCTIVE /replan that followed a DRAIN
|
|
1575
|
+
# (last_replan_drained — armed by 4b only when the /replan actually
|
|
1576
|
+
# refilled/gardened), /replan tried and could not refill → stop early. An
|
|
1577
|
+
# UNPRODUCTIVE /replan never armed the flag (FQ-240), so a DRAIN after a
|
|
1578
|
+
# /replan-that-did-nothing falls through to a fresh /replan route below
|
|
1579
|
+
# rather than a false drained-twice stop.
|
|
1580
|
+
if base.last_replan_drained:
|
|
1581
|
+
return LoopDecision(
|
|
1582
|
+
action=_STOP,
|
|
1583
|
+
next_state=base,
|
|
1584
|
+
stop_reason=StopReason.DRAINED_TWICE,
|
|
1585
|
+
surface=False,
|
|
1586
|
+
reason=(
|
|
1587
|
+
"DRAIN again after a productive /replan — /replan tried but "
|
|
1588
|
+
"could not refill, lane/portfolio genuinely drained"
|
|
1589
|
+
),
|
|
1590
|
+
)
|
|
1591
|
+
# The normal first drain: route to /replan, disarm the drained-twice flag
|
|
1592
|
+
# (it only becomes meaningful *after* the /replan completes — REPLAN_DONE
|
|
1593
|
+
# re-arms it) and ARM the benign-drain prior-DRAIN carry so an UNPRODUCTIVE
|
|
1594
|
+
# /replan that follows counts toward the benign-drain breaker.
|
|
1595
|
+
nxt = replace(base, last_replan_drained=False, last_gate_was_drain=True)
|
|
1596
|
+
return _continue_or_cap(
|
|
1597
|
+
nxt, next_mode="replan", reason=action.reason
|
|
1598
|
+
)
|
|
1599
|
+
|
|
1600
|
+
# STALE-STAMP / BLOCKED under `hard` — route to /replan, do NOT touch the
|
|
1601
|
+
# drained-twice flag. A stale-stamp/blocked gate can never arm a false stop.
|
|
1602
|
+
return _continue_or_cap(
|
|
1603
|
+
base, next_mode="replan", reason=action.reason
|
|
1604
|
+
)
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def _continue_or_cap(
|
|
1608
|
+
next_state: LoopState,
|
|
1609
|
+
*,
|
|
1610
|
+
next_mode: str,
|
|
1611
|
+
reason: str,
|
|
1612
|
+
reconcile: bool = False,
|
|
1613
|
+
) -> LoopDecision:
|
|
1614
|
+
"""Apply the iteration cap as the LAST gate on an otherwise-continue path.
|
|
1615
|
+
|
|
1616
|
+
A continue decision has been reached. But if the iteration that just ran was
|
|
1617
|
+
the `max_iterations`th, the loop is done — there is no slot for `next_mode`.
|
|
1618
|
+
Applying the cap here (and only here) means a specific stop reason
|
|
1619
|
+
(drained-twice, breaker, rate-limit, launch-fail) always wins over the bare
|
|
1620
|
+
cap, because those return a `stop` directly before reaching this helper.
|
|
1621
|
+
"""
|
|
1622
|
+
if next_state.iteration >= next_state.max_iterations:
|
|
1623
|
+
return LoopDecision(
|
|
1624
|
+
action=_STOP,
|
|
1625
|
+
next_state=next_state,
|
|
1626
|
+
stop_reason=StopReason.ITERATION_CAP,
|
|
1627
|
+
surface=False,
|
|
1628
|
+
reason=f"reached max_iterations ({next_state.max_iterations})",
|
|
1629
|
+
)
|
|
1630
|
+
advanced = replace(next_state, iteration=next_state.iteration + 1)
|
|
1631
|
+
return LoopDecision(
|
|
1632
|
+
action=_CONTINUE,
|
|
1633
|
+
next_state=advanced,
|
|
1634
|
+
next_mode=next_mode,
|
|
1635
|
+
reconcile=reconcile,
|
|
1636
|
+
reason=reason,
|
|
1637
|
+
)
|
|
1638
|
+
|
|
1639
|
+
|
|
1640
|
+
# ---------------------------------------------------------------------------
|
|
1641
|
+
# Wait-marker budget (OC2 billing addendum, 2026-05-19).
|
|
1642
|
+
#
|
|
1643
|
+
# Every `claude -p` keep-alive marker is its own assistant turn that replays the
|
|
1644
|
+
# full system+skill+context out of cache. Session 4b4ff97c burned 252 markers /
|
|
1645
|
+
# ~26M cache-read tokens / ~$7.80 in one run (91% of the run's cache_read). The
|
|
1646
|
+
# SKILL-level prose caps (/dispatch 2-per-child, /dispatch-loop 4-per-run) are
|
|
1647
|
+
# prose the model must remember; this is the runtime lever — a pure decision the
|
|
1648
|
+
# loop can consult before emitting a marker, so a marker that won't earn its
|
|
1649
|
+
# cache-read cost is refused, not emitted.
|
|
1650
|
+
#
|
|
1651
|
+
# `headless_telemetry.py`'s `keepalive_poll` flag (fires at >=5 markers) is the
|
|
1652
|
+
# POST-HOC surface; this is its PRE-HOC decision-surface sibling. The default
|
|
1653
|
+
# `max_markers` here (4) matches the /dispatch-loop SKILL's 4-per-run prose cap,
|
|
1654
|
+
# so the runtime refusal lands one marker before the telemetry flag would fire.
|
|
1655
|
+
# ---------------------------------------------------------------------------
|
|
1656
|
+
|
|
1657
|
+
|
|
1658
|
+
@dataclass(frozen=True)
|
|
1659
|
+
class WaitMarkerDecision:
|
|
1660
|
+
"""Whether to emit one keep-alive wait-marker, and why.
|
|
1661
|
+
|
|
1662
|
+
`allow` — True to emit the marker, False to refuse it. `markers_emitted` is
|
|
1663
|
+
the count carried into the *next* decision (incremented iff allowed).
|
|
1664
|
+
`reason` is operator-facing.
|
|
1665
|
+
"""
|
|
1666
|
+
|
|
1667
|
+
allow: bool
|
|
1668
|
+
markers_emitted: int
|
|
1669
|
+
reason: str
|
|
1670
|
+
|
|
1671
|
+
|
|
1672
|
+
def wait_marker_budget(
|
|
1673
|
+
markers_emitted: int,
|
|
1674
|
+
max_markers: int = 4,
|
|
1675
|
+
) -> WaitMarkerDecision:
|
|
1676
|
+
"""Decide whether the loop should emit one more keep-alive wait-marker.
|
|
1677
|
+
|
|
1678
|
+
PURE — the caller passes the running marker count; this returns the
|
|
1679
|
+
allow/refuse decision and the count to carry forward. A refused marker means
|
|
1680
|
+
the loop stops holding the turn open with no-op Bash calls and instead waits
|
|
1681
|
+
on the existing Bash `<task-notification>` (which fires on real exit
|
|
1682
|
+
regardless) — OC1's deterministic orphan sweep is the safety net, so a
|
|
1683
|
+
refused marker cannot silently lose a child.
|
|
1684
|
+
|
|
1685
|
+
`max_markers` defaults to 4 — the /dispatch-loop SKILL's per-run prose cap —
|
|
1686
|
+
so the runtime refusal fires one marker before `headless_telemetry.py`'s
|
|
1687
|
+
`keepalive_poll` flag (>=5) would. Each marker past the budget is pure
|
|
1688
|
+
cache-replay cost (~$0.03-0.10) for no work, so the budget is the cost guard
|
|
1689
|
+
the prose cap could only suggest.
|
|
1690
|
+
"""
|
|
1691
|
+
if markers_emitted >= max_markers:
|
|
1692
|
+
return WaitMarkerDecision(
|
|
1693
|
+
allow=False,
|
|
1694
|
+
markers_emitted=markers_emitted,
|
|
1695
|
+
reason=(
|
|
1696
|
+
f"wait-marker budget exhausted ({markers_emitted}/{max_markers}) "
|
|
1697
|
+
f"— each further marker replays full context out of cache for no "
|
|
1698
|
+
f"work; wait on the Bash task-notification, OC1's orphan sweep "
|
|
1699
|
+
f"is the safety net"
|
|
1700
|
+
),
|
|
1701
|
+
)
|
|
1702
|
+
return WaitMarkerDecision(
|
|
1703
|
+
allow=True,
|
|
1704
|
+
markers_emitted=markers_emitted + 1,
|
|
1705
|
+
reason=f"wait-marker {markers_emitted + 1}/{max_markers} — turn held open",
|
|
1706
|
+
)
|
|
1707
|
+
|
|
1708
|
+
|
|
1709
|
+
# The generalized verdict over this same count — `noop_streak.classify` — re-aims the
|
|
1710
|
+
# arithmetic above off "markers emitted" onto "no-op turns since the last forward
|
|
1711
|
+
# delta" (docs/259 §Follow-up 1). It is a SIBLING module, not a call from here (no new
|
|
1712
|
+
# import edge into loop_decide); a test pins that the two agree on the allow/refuse bit.
|
|
1713
|
+
|
|
1714
|
+
|
|
1715
|
+
def propose_tighter_budget(observed_markers: int, current_max: int = 4) -> int:
|
|
1716
|
+
"""Propose a tighter wait-marker budget from an OBSERVED keep-alive burst. PURE.
|
|
1717
|
+
|
|
1718
|
+
The audit→budget closing of the loop (docs/259 §Follow-up 3): `trajectory-audit`'s
|
|
1719
|
+
`keepalive_poll` finding saw `observed_markers` keep-alive markers in one session,
|
|
1720
|
+
under a budget of `current_max`; this proposes a TIGHTER cap so the pre-hoc lever
|
|
1721
|
+
(`wait_marker_budget`) would have refused sooner. ADVISORY — a proposal a human or
|
|
1722
|
+
host consumes, NEVER auto-applied (the kernel computes the number; nothing here
|
|
1723
|
+
feeds it back into `wait_marker_budget`, the PDP/PEP line).
|
|
1724
|
+
|
|
1725
|
+
The arithmetic, and why each clamp:
|
|
1726
|
+
|
|
1727
|
+
* `observed_markers - 1` — the doc's proposal: refuse one marker before the
|
|
1728
|
+
burst's length, so a repeat of the same wait would land under budget.
|
|
1729
|
+
* `min(current_max, …)` — NEVER propose a LOOSER cap than the one already in
|
|
1730
|
+
force (monotone-down). This is the load-bearing clamp, and it encodes the
|
|
1731
|
+
honest reading of a HUGE burst: 252 markers under a 4-cap proposes
|
|
1732
|
+
`min(4, 251) = 4` — i.e. NO tightening — because 252 ≫ 4 does not mean "4 is
|
|
1733
|
+
too loose," it means the cap was not ENFORCED (the hook was unwired or
|
|
1734
|
+
bypassed). The fix for that is to wire `dos hook marker`, not to lower a number
|
|
1735
|
+
that was never consulted; the caller surfaces the `observed > current_max`
|
|
1736
|
+
alarm separately. The clamp only bites — produces a genuinely tighter number —
|
|
1737
|
+
when the burst sat *within* the current cap yet still tripped the telemetry
|
|
1738
|
+
threshold (e.g. observed 5 under a generous current 8 → propose 4).
|
|
1739
|
+
* `max(1, …)` — floor at 1: a 0 budget would refuse the FIRST legitimate
|
|
1740
|
+
wait-marker outright, trapping a loop that has a real reason to wait one turn.
|
|
1741
|
+
|
|
1742
|
+
So: `max(1, min(current_max, observed_markers - 1))`. Monotone-down, floored at 1,
|
|
1743
|
+
and deliberately conservative — a cost-guard proposal never loosens, and a burst
|
|
1744
|
+
that proves non-enforcement yields no spurious "lower the cap" noise.
|
|
1745
|
+
"""
|
|
1746
|
+
return max(1, min(current_max, observed_markers - 1))
|