dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/decisions.py
ADDED
|
@@ -0,0 +1,1274 @@
|
|
|
1
|
+
"""The operator-decision queue — *what is waiting on a human right now*, as a projection.
|
|
2
|
+
|
|
3
|
+
DOS's single most common dispatch outcome is a no-pick that needs a decision:
|
|
4
|
+
a WEDGE verdict, an arbiter refusal, a preflight refusal, an open soak gate.
|
|
5
|
+
But those decisions are **ephemeral and scattered** — each is emitted once, as
|
|
6
|
+
prose, into one of several on-disk surfaces, then lost. The `dos` CLI is
|
|
7
|
+
one-shot, so an operator has no "what needs me right now" view and no way to act
|
|
8
|
+
on one. (The DOM plan names the same pain: "the most common dispatch outcome is
|
|
9
|
+
the least observable" — DOM defines what the *tokens* mean; this module is the
|
|
10
|
+
queue of live *instances* and the way into each one.)
|
|
11
|
+
|
|
12
|
+
This module is a **read-only projection**, never a store (DOM Design-rules 1 & 4,
|
|
13
|
+
and the `dos.reasons` thesis): it stores nothing of its own. `collect_decisions`
|
|
14
|
+
joins four sources that already persist their decisions —
|
|
15
|
+
|
|
16
|
+
arbiter refusals <- lane_journal.jsonl `OP_REFUSE` entries (already journaled)
|
|
17
|
+
WEDGE / gate surfaces <- output/next-up/.verdict-<tag>.json envelopes
|
|
18
|
+
preflight refusals <- a verdict envelope's refusal shape (FQ-410)
|
|
19
|
+
soak / time gates <- docs/_soaks/index.yaml open windows
|
|
20
|
+
|
|
21
|
+
— normalizes each into one `Decision`, and renders. The detail/action text is a
|
|
22
|
+
projection of the active `ReasonRegistry` (`config.reasons`), exactly as
|
|
23
|
+
`dos man` projects it. Delete this module and you lose the reader, not any data.
|
|
24
|
+
|
|
25
|
+
**Resolver kinds — the LLM-as-judge intersection.** A decision is not always
|
|
26
|
+
"waiting on a human." DOS already has judges: `picker_oracle` is a deterministic
|
|
27
|
+
judge (it cross-checks a WEDGE's self-reported cause against on-disk truth and
|
|
28
|
+
emits `oracle_disagrees`), and `loop_decide` carries a `packet_judge` LLM
|
|
29
|
+
verdict. So each `Decision` carries a `resolver_kind`:
|
|
30
|
+
|
|
31
|
+
ORACLE — a deterministic oracle can cross-check / may auto-clear it
|
|
32
|
+
(a STALE_CLAIM / INFLIGHT reason picker_oracle verifies).
|
|
33
|
+
JUDGE — an LLM adjudicator could rule before a human spends attention.
|
|
34
|
+
HUMAN — a genuine operator call (answer the open decision, `--force`).
|
|
35
|
+
|
|
36
|
+
`collect_decisions(..., resolver="HUMAN")` (the default) returns only the rows
|
|
37
|
+
that need *you*; `resolver=None` returns everything so the operator can see what
|
|
38
|
+
a judge/oracle already handled or could handle.
|
|
39
|
+
|
|
40
|
+
Pure-stdlib, read-only — never writes, never mutates a registry. The readers are
|
|
41
|
+
the only I/O; the normalization + ranking + `next_steps` mapping are pure and are
|
|
42
|
+
the unit-test surface (mirrors `picker_oracle.classify` / `timeline.build_timeline`).
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
from __future__ import annotations
|
|
46
|
+
|
|
47
|
+
import datetime as dt
|
|
48
|
+
import enum
|
|
49
|
+
import io
|
|
50
|
+
import json
|
|
51
|
+
import re
|
|
52
|
+
import sys
|
|
53
|
+
from dataclasses import dataclass, field, replace
|
|
54
|
+
from pathlib import Path
|
|
55
|
+
from typing import Any
|
|
56
|
+
|
|
57
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
58
|
+
try:
|
|
59
|
+
sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[union-attr]
|
|
60
|
+
except Exception: # pragma: no cover
|
|
61
|
+
pass
|
|
62
|
+
elif not isinstance(sys.stdout, io.TextIOWrapper): # pragma: no cover
|
|
63
|
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
|
|
64
|
+
|
|
65
|
+
from dos import config as _config
|
|
66
|
+
from dos import lane_journal
|
|
67
|
+
from dos import wedge_reason
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
# The closed vocabularies — kind (where the decision came from) and resolver
|
|
72
|
+
# (who can clear it). Both `str`-valued so they round-trip through `--json`
|
|
73
|
+
# without a lookup table (mirrors `gate_classify.Verdict` / `OutcomeKind`).
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class DecisionKind(str, enum.Enum):
|
|
78
|
+
"""Which kernel surface emitted the decision."""
|
|
79
|
+
|
|
80
|
+
ARBITER_REFUSE = "ARBITER_REFUSE" # arbitrate() refused a lane lease
|
|
81
|
+
WEDGE = "WEDGE" # a /next-up no-pick verdict envelope
|
|
82
|
+
PREFLIGHT_REFUSE = "PREFLIGHT_REFUSE" # build_context() refused a packet launch
|
|
83
|
+
SOAK_GATE = "SOAK_GATE" # an OPERATOR_GATE soak window (time-triggered)
|
|
84
|
+
LIVENESS = "LIVENESS" # an OP_HALT: a watchdog proposed stopping a
|
|
85
|
+
# SPINNING/hung run (docs/82 3b, docs/101 §4)
|
|
86
|
+
|
|
87
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
88
|
+
return self.value
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ResolverKind(str, enum.Enum):
|
|
92
|
+
"""Who can resolve this decision — the LLM-as-judge axis.
|
|
93
|
+
|
|
94
|
+
Derived from the reason's category + the decision kind (see
|
|
95
|
+
`_resolver_for`): the queue default-filters to HUMAN ("what needs me"),
|
|
96
|
+
and `--all` surfaces ORACLE/JUDGE-resolvable rows too.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
ORACLE = "ORACLE" # a deterministic oracle (picker_oracle) can cross-check / auto-clear
|
|
100
|
+
JUDGE = "JUDGE" # an LLM adjudicator could rule before a human looks
|
|
101
|
+
HUMAN = "HUMAN" # a genuine operator call (answer the decision, --force)
|
|
102
|
+
BACKPRESSURE = "BACKPRESSURE" # NO ONE — it self-resolves. A lane refusal whose
|
|
103
|
+
# lever is "wait / re-pick a disjoint lane", which
|
|
104
|
+
# the dispatch loop already does automatically
|
|
105
|
+
# (route-replan-nolivepicks / backoff-capacity /
|
|
106
|
+
# reroute-sibling): "already held", a class/loop
|
|
107
|
+
# budget, a soft-ratio or empty-tree overlap. These
|
|
108
|
+
# are healthy mutex contention, not a decision — the
|
|
109
|
+
# default queue hides them; `--all` shows them.
|
|
110
|
+
|
|
111
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
112
|
+
return self.value
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# Categories whose decisions a deterministic oracle can cross-check. These mirror
|
|
116
|
+
# `picker_oracle.NoPickCause` values: STALE_CLAIM / INFLIGHT-shaped reasons are
|
|
117
|
+
# exactly what `picker_oracle._check_stale_claim_real` adjudicates, so they are
|
|
118
|
+
# ORACLE-resolvable (the oracle may confirm the claim is fresh and clear it). A
|
|
119
|
+
# TRUE_DRAIN is also oracle-checkable (is the backlog really empty?). An
|
|
120
|
+
# OPERATOR_GATE is, by name, the human's call.
|
|
121
|
+
_ORACLE_CATEGORIES = frozenset({"STALE_CLAIM", "TRUE_DRAIN"})
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass(frozen=True)
|
|
125
|
+
class Decision:
|
|
126
|
+
"""One pending operator decision — a row in the queue.
|
|
127
|
+
|
|
128
|
+
A pure value normalized from one of the four sources. `reason_token` is the
|
|
129
|
+
closed `WedgeReason`/registry token when the source carried one (so the
|
|
130
|
+
detail pane can project its `ReasonSpec`); it is "" for a source that has no
|
|
131
|
+
token (a bare arbiter refusal). `run_id` is the CID correlation key when the
|
|
132
|
+
source recorded one, so "this decision and everything it touched" is a join.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
kind: DecisionKind
|
|
136
|
+
resolver_kind: ResolverKind
|
|
137
|
+
lane: str
|
|
138
|
+
reason_token: str # a WedgeReason/registry token, or "" if none
|
|
139
|
+
reason_text: str # the one-line operator-facing reason (prose)
|
|
140
|
+
run_id: str # CID correlation id, or "" if the source had none
|
|
141
|
+
age_seconds: int | None # age of the decision, or None if untimestamped
|
|
142
|
+
source_path: str # where this decision was read from (for drill-in)
|
|
143
|
+
evidence: tuple[str, ...] = field(default_factory=tuple)
|
|
144
|
+
run_ts: str = "" # the chained-run dir name (the `dos judge` key), if known
|
|
145
|
+
proposed_command: str = "" # a host-supplied stop command (a LIVENESS halt proposal),
|
|
146
|
+
# surfaced as the paste-to-stop emit-and-exit action
|
|
147
|
+
handle: str = "" # the opaque stop handle of a LIVENESS halt proposal, if any
|
|
148
|
+
dup_count: int = 1 # how many identical rows this one stands in for (see
|
|
149
|
+
# `_dedup`): a journal that recorded the SAME refusal N
|
|
150
|
+
# times collapses to one row carrying dup_count=N.
|
|
151
|
+
|
|
152
|
+
def to_dict(self) -> dict:
|
|
153
|
+
return {
|
|
154
|
+
"kind": self.kind.value,
|
|
155
|
+
"resolver_kind": self.resolver_kind.value,
|
|
156
|
+
"lane": self.lane,
|
|
157
|
+
"reason_token": self.reason_token,
|
|
158
|
+
"reason_text": self.reason_text,
|
|
159
|
+
"run_id": self.run_id,
|
|
160
|
+
"run_ts": self.run_ts,
|
|
161
|
+
"age_seconds": self.age_seconds,
|
|
162
|
+
"source_path": self.source_path,
|
|
163
|
+
"evidence": list(self.evidence),
|
|
164
|
+
"proposed_command": self.proposed_command,
|
|
165
|
+
"handle": self.handle,
|
|
166
|
+
"dup_count": self.dup_count,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Resolver derivation — the LLM-as-judge classification.
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _resolver_for(kind: DecisionKind, reason_token: str, config,
|
|
176
|
+
reason_text: str = "") -> ResolverKind:
|
|
177
|
+
"""Decide who can resolve a decision, from its kind + reason category/prose.
|
|
178
|
+
|
|
179
|
+
Rules (most-specific first):
|
|
180
|
+
* A SOAK_GATE is always HUMAN — by definition it waits on a human's
|
|
181
|
+
come-back-when-the-window-closes call (an OPERATOR_GATE category).
|
|
182
|
+
* A lane refusal (ARBITER_REFUSE / PREFLIGHT_REFUSE) whose PROSE is a
|
|
183
|
+
backpressure shape ("already held", a class/loop budget, a soft-ratio or
|
|
184
|
+
empty-tree overlap) is BACKPRESSURE — it self-resolves (the loop waits /
|
|
185
|
+
re-picks), so it is not a human decision. Checked on the prose because
|
|
186
|
+
these refusals carry no closed token. An exact-glob hard collision is
|
|
187
|
+
excluded by `_is_backpressure_refusal` and falls through to HUMAN.
|
|
188
|
+
* Otherwise consult the active `ReasonRegistry` for the token's category:
|
|
189
|
+
a STALE_CLAIM / TRUE_DRAIN reason is ORACLE-resolvable (picker_oracle
|
|
190
|
+
can cross-check it); an OPERATOR_GATE reason is HUMAN.
|
|
191
|
+
* A WEDGE with no registry-known token, or an unclassified one, defaults
|
|
192
|
+
to JUDGE — an LLM adjudicator is the right next reader when no
|
|
193
|
+
deterministic oracle owns the category (the `UNCLASSIFIED` shape the
|
|
194
|
+
picker_oracle itself punts on).
|
|
195
|
+
* A bare ARBITER_REFUSE / PREFLIGHT_REFUSE with no token (and not
|
|
196
|
+
backpressure) is HUMAN (the operator picks a lane / --force / fixes it).
|
|
197
|
+
"""
|
|
198
|
+
if kind is DecisionKind.SOAK_GATE:
|
|
199
|
+
return ResolverKind.HUMAN
|
|
200
|
+
# Backpressure classification is on the PROSE, and applies only to the lane/
|
|
201
|
+
# packet refusal kinds (a WEDGE/soak is never backpressure). Done before the
|
|
202
|
+
# "no token ⇒ HUMAN" default so a routine "already held" refusal stops being a
|
|
203
|
+
# phantom operator decision (the junk-drawer fix, Layer 1).
|
|
204
|
+
if kind in (DecisionKind.ARBITER_REFUSE, DecisionKind.PREFLIGHT_REFUSE) \
|
|
205
|
+
and _is_backpressure_refusal(reason_text):
|
|
206
|
+
return ResolverKind.BACKPRESSURE
|
|
207
|
+
if not reason_token:
|
|
208
|
+
# No closed token: a bare lane refusal or packet refusal — operator's call.
|
|
209
|
+
return ResolverKind.HUMAN
|
|
210
|
+
category = config.reasons.category_for(reason_token)
|
|
211
|
+
if category in _ORACLE_CATEGORIES:
|
|
212
|
+
return ResolverKind.ORACLE
|
|
213
|
+
if category == "OPERATOR_GATE":
|
|
214
|
+
return ResolverKind.HUMAN
|
|
215
|
+
# MISROUTE / UNCLASSIFIED / anything else with no deterministic owner — an
|
|
216
|
+
# LLM judge is the cheapest next adjudicator before a human is pulled in.
|
|
217
|
+
return ResolverKind.JUDGE
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# A closed `reason_class` token is short, single-word, UPPER_SNAKE — never a
|
|
221
|
+
# prose sentence. The lane journal's `reason_class` field, however, is sometimes
|
|
222
|
+
# written with a whole human sentence (a host emitting an OP_REFUSE with the
|
|
223
|
+
# prose reason where the closed token belongs), and a verdict envelope can do the
|
|
224
|
+
# same. If we lifted that prose into `reason_token`, `next_steps` would emit
|
|
225
|
+
# nonsense like `dos man wedge EVERY CONCURRENCY-FREE LANE …` or
|
|
226
|
+
# `python -m dos.drivers.llm_judge EVERY CONCURRENCY-FREE LANE …`. So we admit a
|
|
227
|
+
# value as a token only when it LOOKS like one: a member of the active registry
|
|
228
|
+
# (any case — the registry normalizes), or a clean UPPER_SNAKE shape (forward-
|
|
229
|
+
# compatible with a not-yet-declared token, the same posture as
|
|
230
|
+
# `ReasonRegistry.category_for`). Prose falls through to `""` and stays in
|
|
231
|
+
# `reason_text` — the prose is still shown, it just doesn't pretend to be a token.
|
|
232
|
+
_TOKEN_SHAPE = re.compile(r"^[A-Z][A-Z0-9_]{1,63}$")
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _clean_token(raw: str | None, config) -> str:
|
|
236
|
+
"""Return `raw` as an UPPER-cased closed token, or `""` if it is not one.
|
|
237
|
+
|
|
238
|
+
Pure. Admits a value iff it is a registry member (case-insensitive) or has the
|
|
239
|
+
clean UPPER_SNAKE token shape; otherwise (prose, whitespace, empty) → `""`.
|
|
240
|
+
"""
|
|
241
|
+
if not raw:
|
|
242
|
+
return ""
|
|
243
|
+
candidate = raw.strip().upper()
|
|
244
|
+
if not candidate:
|
|
245
|
+
return ""
|
|
246
|
+
if config.reasons.get(candidate) is not None:
|
|
247
|
+
return candidate
|
|
248
|
+
if _TOKEN_SHAPE.match(candidate):
|
|
249
|
+
return candidate
|
|
250
|
+
return ""
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# A curated-cluster scope string is a pre-dos/119 relic. The de-clustering
|
|
254
|
+
# (2026-06-02, operator directive "delete the cluster concept, it's bad" — see
|
|
255
|
+
# `_job_policy` `concurrent`/`autopick` are empty) made a lane a single DYNAMIC
|
|
256
|
+
# handle whose concurrency is gated by tree-disjointness, never a curated set. But
|
|
257
|
+
# an OLD verdict envelope (and a host that still writes one) can carry a scope like
|
|
258
|
+
# "apply cluster (AFR, ALO, ANC, APC, CHR, LF, MLP, TFO)"
|
|
259
|
+
# or a slash-pathy "a/b/apply". Lifting that verbatim into `Decision.lane` makes
|
|
260
|
+
# `next_steps` emit an UNRESOLVABLE action — `/replan --scope apply cluster (AFR,
|
|
261
|
+
# ALO, …)` — that the host can only degrade to auto-pick (job finding 2026-06-08:
|
|
262
|
+
# the 8-member apply-cluster row with a broken [r]replan). The fix is to normalize
|
|
263
|
+
# the lane to its dynamic handle at read time: strip a `cluster (…)` / `(…)`
|
|
264
|
+
# decoration and any slash path down to the bare leading handle, so the surfaced
|
|
265
|
+
# action is a resolvable `--scope <handle>`. Pure; mirrors `_clean_token`'s posture
|
|
266
|
+
# (admit a clean shape, fall back conservatively). A scope that is ALREADY a bare
|
|
267
|
+
# handle round-trips unchanged.
|
|
268
|
+
_CLUSTER_DECORATION_RE = re.compile(r"\s*\bcluster\b.*$", re.IGNORECASE)
|
|
269
|
+
_PAREN_TAIL_RE = re.compile(r"\s*\(.*$")
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _dynamic_lane_handle(raw: str) -> str:
|
|
273
|
+
"""Normalize a scope/lane string to its bare dynamic lane handle (dos/119).
|
|
274
|
+
|
|
275
|
+
Pure. Strips a curated-cluster relic shape to the leading handle so the
|
|
276
|
+
decision's `/replan --scope <lane>` action is resolvable:
|
|
277
|
+
|
|
278
|
+
"apply cluster (AFR, ALO, ANC, …)" -> "apply"
|
|
279
|
+
"apply (AFR, ALO)" -> "apply"
|
|
280
|
+
"a/b/apply" -> "apply"
|
|
281
|
+
"apply" -> "apply" (already a handle)
|
|
282
|
+
|
|
283
|
+
A string with no decoration round-trips unchanged. An empty/whitespace input
|
|
284
|
+
returns "". The result is the last path segment after de-clustering, so a host
|
|
285
|
+
that namespaces a scope with `/` still resolves to the lane name.
|
|
286
|
+
"""
|
|
287
|
+
s = (raw or "").strip()
|
|
288
|
+
if not s:
|
|
289
|
+
return ""
|
|
290
|
+
# Drop a `cluster (...)` tail first (the named relic), then any bare `(...)`
|
|
291
|
+
# tail (a parenthesized member list with no "cluster" word).
|
|
292
|
+
s = _CLUSTER_DECORATION_RE.sub("", s)
|
|
293
|
+
s = _PAREN_TAIL_RE.sub("", s)
|
|
294
|
+
# Reduce a slash path to its final segment (mirrors dispatch_top `_lane_from_env`).
|
|
295
|
+
s = s.split("/")[-1].strip()
|
|
296
|
+
return s
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# A reason-string a refusing writer leaves on a *mislabeled* op (docs/139). The
|
|
300
|
+
# kernel's own arbiter records a denial as `OP_REFUSE`, but an out-of-tree writer
|
|
301
|
+
# (a host dispatch loop, a benchmark fixture) sometimes records the refused lease
|
|
302
|
+
# as a plain `OP_ACQUIRE` carrying a `REFUSED: …` reason instead. The reader
|
|
303
|
+
# cannot trust the self-labeled op, so it reads the *reason* — the more honest
|
|
304
|
+
# signal (the docs/103 "distrust the self-report, read the effect" move, applied
|
|
305
|
+
# to the op field itself). Matched at the START of the reason, case-insensitive,
|
|
306
|
+
# tolerant of leading whitespace.
|
|
307
|
+
_ACQUIRE_REFUSED_RE = re.compile(r"^\s*REFUSED\b", re.IGNORECASE)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _acquire_refusal_reason(entry: dict) -> str:
|
|
311
|
+
"""Return the refusal reason on an ACQUIRE row that is *really* a refusal, else "".
|
|
312
|
+
|
|
313
|
+
Pure. An ACQUIRE whose `reason` (or nested `lease.reason`) begins with
|
|
314
|
+
`REFUSED` is a denial mislabeled as an acquire (docs/139). We return the
|
|
315
|
+
reason prose so the caller can lift it into a degraded ARBITER_REFUSE row; a
|
|
316
|
+
genuine successful acquire (any other reason, or none) returns "" and is NOT
|
|
317
|
+
surfaced as a decision — the queue must not fill with every granted lease.
|
|
318
|
+
"""
|
|
319
|
+
reason = entry.get("reason")
|
|
320
|
+
if not reason:
|
|
321
|
+
lease = entry.get("lease")
|
|
322
|
+
if isinstance(lease, dict):
|
|
323
|
+
reason = lease.get("reason")
|
|
324
|
+
reason = str(reason or "")
|
|
325
|
+
return reason if _ACQUIRE_REFUSED_RE.match(reason) else ""
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# ---------------------------------------------------------------------------
|
|
329
|
+
# Backpressure — a refusal whose lever is "wait / re-pick", not "decide".
|
|
330
|
+
# ---------------------------------------------------------------------------
|
|
331
|
+
#
|
|
332
|
+
# Most arbiter refusals are NOT operator decisions: they are healthy mutex
|
|
333
|
+
# contention the dispatch loop already resolves on its own (it waits, backs off,
|
|
334
|
+
# or re-picks a disjoint lane — the route-replan-nolivepicks / backoff-capacity /
|
|
335
|
+
# reroute-sibling branches). Surfacing each as a HUMAN decision is the junk-drawer
|
|
336
|
+
# bug (job finding 2026-06-07): a ceiling bump multiplied these and 7 of 8 "pending
|
|
337
|
+
# decisions" were backpressure. The kernel's OWN refuse prose says so — e.g. the
|
|
338
|
+
# CLASS_BUDGET refuse comment: "the lever is 'wait for a slot to free' — NOT /replan
|
|
339
|
+
# ... The work exists and the regions are fine; the class is just full."
|
|
340
|
+
#
|
|
341
|
+
# We classify by matching the kernel/host refuse strings VERBATIM (grounded in
|
|
342
|
+
# arbiter.py / admission.py / the host fanout_state.py, not guessed — same posture
|
|
343
|
+
# as `_ACQUIRE_REFUSED_RE`). A backpressure refusal is given `resolver_kind =
|
|
344
|
+
# BACKPRESSURE`, so the default `resolver="HUMAN"` queue hides it and `--all` shows
|
|
345
|
+
# it. NB: an exact-glob HARD collision is deliberately NOT backpressure — two lanes
|
|
346
|
+
# claiming the SAME file is a structural lane-definition fact worth attention when it
|
|
347
|
+
# RECURS (the Layer-2 gardener feedback loop, docs/_design/arbiter-refuse-root-
|
|
348
|
+
# issues-concept.md); a single instance still self-resolves via supersession, but it
|
|
349
|
+
# stays a HUMAN row so a persistent collision is visible rather than silently hidden.
|
|
350
|
+
_BACKPRESSURE_REFUSE_RES = (
|
|
351
|
+
# "lane 'X' is already held by a live loop" — wait for the holder to release.
|
|
352
|
+
re.compile(r"already held by a live loop", re.IGNORECASE),
|
|
353
|
+
# GLOBAL_LOOP_CEILING — wait for a loop slot (host fanout_state.py).
|
|
354
|
+
re.compile(r"\bGLOBAL_LOOP_CEILING\b"),
|
|
355
|
+
# CLASS_BUDGET_EXHAUSTED — wait for a class slot (arbiter.py).
|
|
356
|
+
re.compile(r"\bCLASS_BUDGET", re.IGNORECASE),
|
|
357
|
+
# A soft RATIO overlap ("overlap too large (N/M = ..% ... threshold ..%)") —
|
|
358
|
+
# re-pick a disjoint lane; NOT a same-file hard collision. (admission.py via
|
|
359
|
+
# lane_overlap.) Excludes the exact-glob string by construction.
|
|
360
|
+
re.compile(r"overlap too large", re.IGNORECASE),
|
|
361
|
+
# An EMPTY-tree refusal ("unknown blast radius") — transient; the lane gets a
|
|
362
|
+
# tree on the next pick. Re-pick, don't decide. (admission.py)
|
|
363
|
+
re.compile(r"unknown blast radius", re.IGNORECASE),
|
|
364
|
+
# Measured-capacity saturation (proposed Layer-3 token) — wait for a lane.
|
|
365
|
+
re.compile(r"\bLANE_CAPACITY_SATURATED\b"),
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# An exact-glob HARD collision is the one refusal shape that is NOT backpressure
|
|
369
|
+
# (it is a structural lane-definition signal). Matched so it can VETO a
|
|
370
|
+
# backpressure classification even if another pattern loosely matched.
|
|
371
|
+
_EXACT_GLOB_COLLISION_RE = re.compile(r"exact-glob overlap", re.IGNORECASE)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _is_backpressure_refusal(reason_text: str) -> bool:
|
|
375
|
+
"""True when a refusal self-resolves (wait / re-pick), i.e. is NOT a decision.
|
|
376
|
+
|
|
377
|
+
Pure. Matches the kernel/host refuse strings verbatim. An exact-glob hard
|
|
378
|
+
collision is explicitly excluded — it stays a HUMAN row so a recurring
|
|
379
|
+
same-file lane overlap is visible (the actionable signal), not hidden.
|
|
380
|
+
"""
|
|
381
|
+
text = reason_text or ""
|
|
382
|
+
if _EXACT_GLOB_COLLISION_RE.search(text):
|
|
383
|
+
return False
|
|
384
|
+
return any(rx.search(text) for rx in _BACKPRESSURE_REFUSE_RES)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
# ---------------------------------------------------------------------------
|
|
388
|
+
# Supersession — an arbiter refusal is RESOLVED the moment its contention clears.
|
|
389
|
+
# ---------------------------------------------------------------------------
|
|
390
|
+
#
|
|
391
|
+
# An ARBITER_REFUSE is not a point-in-time artifact whose staleness is its age
|
|
392
|
+
# (the recency filter, finding #476, handles a verdict envelope's age). A refusal
|
|
393
|
+
# is a refusal *relative to a live contended lane*: "lane L cannot run because
|
|
394
|
+
# lane B is live" / "lane L is already held". The MOMENT B (or the prior holder of
|
|
395
|
+
# L) releases or is scavenged, that refusal is **resolved** — re-requesting L now
|
|
396
|
+
# would be admitted. The journal RECORDS those resolution events (RELEASE /
|
|
397
|
+
# SCAVENGE), but `_from_lane_journal` historically lifted every REFUSE into a
|
|
398
|
+
# standing decision and consulted only age — so a 40-min-old refusal whose blocker
|
|
399
|
+
# died 39 min ago still showed as "pending operator decision" forever, even with
|
|
400
|
+
# zero live leases (job finding: the 8-row arbiter-refuse junk-drawer, 2026-06-07).
|
|
401
|
+
#
|
|
402
|
+
# The fix is structural, not age-based: a REFUSE at seq S for lane L (blocked by
|
|
403
|
+
# the lanes in its `blocking_trees`) is superseded by any LATER journal entry
|
|
404
|
+
# (seq > S) that frees the contention — a RELEASE/SCAVENGE of L or of any blocking
|
|
405
|
+
# lane, or a genuine (non-REFUSED) ACQUIRE of L (it got in later, so the refusal
|
|
406
|
+
# is moot). This reads the SAME journal the refusals come from; it adds no new
|
|
407
|
+
# source and no new config knob (mirrors the `_is_stale` posture — drop a decision
|
|
408
|
+
# that is no longer live, keep everything when the signal is absent).
|
|
409
|
+
|
|
410
|
+
# Ops that FREE a lane's contention when they appear AFTER a refusal. RELEASE and
|
|
411
|
+
# SCAVENGE both vacate a held lane; ADOPT/RECONCILE re-seat ownership cleanly. A
|
|
412
|
+
# later ACQUIRE is handled separately (it must be a GENUINE acquire, not a refusal
|
|
413
|
+
# mislabeled as one — docs/139).
|
|
414
|
+
_LANE_FREEING_OPS = frozenset({
|
|
415
|
+
lane_journal.OP_RELEASE,
|
|
416
|
+
lane_journal.OP_SCAVENGE,
|
|
417
|
+
})
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _refuse_blocking_lanes(entry: dict) -> set[str]:
|
|
421
|
+
"""The set of lanes a REFUSE entry is blocked by — the contention to watch.
|
|
422
|
+
|
|
423
|
+
Pure. Prefers the structured `blocking_trees` dict (keyed by colliding lane
|
|
424
|
+
name), the authoritative signal the kernel writes on an OP_REFUSE. Always
|
|
425
|
+
includes the refused lane itself: an "already held by a live loop" refusal
|
|
426
|
+
names no other lane in `blocking_trees`, and is resolved when the prior holder
|
|
427
|
+
of *that same lane* frees it — so a later RELEASE/SCAVENGE of L clears it too.
|
|
428
|
+
"""
|
|
429
|
+
lanes: set[str] = set()
|
|
430
|
+
bt = entry.get("blocking_trees")
|
|
431
|
+
if isinstance(bt, dict):
|
|
432
|
+
lanes.update(str(k) for k in bt.keys() if k)
|
|
433
|
+
own = entry.get("lane")
|
|
434
|
+
if own:
|
|
435
|
+
lanes.add(str(own))
|
|
436
|
+
return lanes
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _superseded_refuse_seqs(entries: list[dict]) -> set[int]:
|
|
440
|
+
"""Seqs of REFUSE entries whose contention a LATER journal event resolved.
|
|
441
|
+
|
|
442
|
+
Pure, single forward pass + a per-refusal lookahead via a precomputed index.
|
|
443
|
+
A REFUSE at seq S for lane L blocked by lanes B is superseded iff some entry
|
|
444
|
+
at seq > S is a RELEASE/SCAVENGE of a lane in (B ∪ {L}), or a genuine ACQUIRE
|
|
445
|
+
of L. `read_all` returns journal order (ascending seq); we use the `seq` field
|
|
446
|
+
for the ordering so a torn/duplicated-seq journal still compares correctly.
|
|
447
|
+
|
|
448
|
+
Entries without a usable integer `seq` are skipped for ordering (they cannot be
|
|
449
|
+
placed on the timeline) — a refusal we cannot order is left to the recency
|
|
450
|
+
filter, never silently dropped here.
|
|
451
|
+
"""
|
|
452
|
+
# Build, per lane, the sorted list of seqs at which a freeing/acquire event
|
|
453
|
+
# happened for that lane. Then a refusal is superseded iff any watched lane has
|
|
454
|
+
# such an event at a seq strictly greater than the refusal's seq.
|
|
455
|
+
freed_at: dict[str, list[int]] = {}
|
|
456
|
+
acquired_at: dict[str, list[int]] = {}
|
|
457
|
+
|
|
458
|
+
def _seq_of(e: dict) -> int | None:
|
|
459
|
+
s = e.get("seq")
|
|
460
|
+
if isinstance(s, bool): # bool is an int subclass — exclude it
|
|
461
|
+
return None
|
|
462
|
+
return s if isinstance(s, int) else None
|
|
463
|
+
|
|
464
|
+
for e in entries:
|
|
465
|
+
s = _seq_of(e)
|
|
466
|
+
if s is None:
|
|
467
|
+
continue
|
|
468
|
+
op = e.get("op")
|
|
469
|
+
lane = str(e.get("lane") or "")
|
|
470
|
+
if not lane:
|
|
471
|
+
continue
|
|
472
|
+
if op in _LANE_FREEING_OPS:
|
|
473
|
+
freed_at.setdefault(lane, []).append(s)
|
|
474
|
+
elif op == lane_journal.OP_ACQUIRE and not _acquire_refusal_reason(e):
|
|
475
|
+
# A GENUINE acquire (not a docs/139 mislabeled refusal): the lane got
|
|
476
|
+
# leased, so any earlier refusal of it is moot.
|
|
477
|
+
acquired_at.setdefault(lane, []).append(s)
|
|
478
|
+
|
|
479
|
+
def _has_event_after(lane: str, after: int) -> bool:
|
|
480
|
+
for table in (freed_at, acquired_at):
|
|
481
|
+
for s in table.get(lane, ()): # short lists; linear scan is fine
|
|
482
|
+
if s > after:
|
|
483
|
+
return True
|
|
484
|
+
return False
|
|
485
|
+
|
|
486
|
+
superseded: set[int] = set()
|
|
487
|
+
for e in entries:
|
|
488
|
+
op = e.get("op")
|
|
489
|
+
# A first-class OP_REFUSE, OR a docs/139 refusal mislabeled as an ACQUIRE
|
|
490
|
+
# (reason begins `REFUSED`): both are arbiter refusals whose contention a
|
|
491
|
+
# later event can resolve.
|
|
492
|
+
is_refuse = op == lane_journal.OP_REFUSE
|
|
493
|
+
is_mislabeled = op == lane_journal.OP_ACQUIRE and bool(_acquire_refusal_reason(e))
|
|
494
|
+
if not (is_refuse or is_mislabeled):
|
|
495
|
+
continue
|
|
496
|
+
s = _seq_of(e)
|
|
497
|
+
if s is None:
|
|
498
|
+
continue
|
|
499
|
+
watch = _refuse_blocking_lanes(e)
|
|
500
|
+
# A refusal is resolved if its OWN lane was later acquired, or ANY watched
|
|
501
|
+
# lane (the blockers + itself) was later freed. NB: a mislabeled-ACQUIRE
|
|
502
|
+
# refusal's own seq is NOT in `acquired_at` (the `not _acquire_refusal_reason`
|
|
503
|
+
# guard above excluded it), so it cannot supersede itself.
|
|
504
|
+
if any(_has_event_after(lane, s) for lane in watch):
|
|
505
|
+
superseded.add(s)
|
|
506
|
+
return superseded
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
# ---------------------------------------------------------------------------
|
|
510
|
+
# Time helpers.
|
|
511
|
+
# ---------------------------------------------------------------------------
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _now() -> dt.datetime:
|
|
515
|
+
return dt.datetime.now(dt.timezone.utc)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def _parse_iso(ts: str | None) -> dt.datetime | None:
|
|
519
|
+
"""Best-effort parse of an ISO-8601 stamp (tolerant of a trailing Z)."""
|
|
520
|
+
if not ts:
|
|
521
|
+
return None
|
|
522
|
+
try:
|
|
523
|
+
return dt.datetime.fromisoformat(str(ts).replace("Z", "+00:00"))
|
|
524
|
+
except (ValueError, TypeError):
|
|
525
|
+
return None
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _age_seconds(ts: str | None, *, now: dt.datetime | None = None) -> int | None:
|
|
529
|
+
t = _parse_iso(ts)
|
|
530
|
+
if t is None:
|
|
531
|
+
return None
|
|
532
|
+
if t.tzinfo is None:
|
|
533
|
+
t = t.replace(tzinfo=dt.timezone.utc)
|
|
534
|
+
delta = (now or _now()) - t
|
|
535
|
+
return max(0, int(delta.total_seconds()))
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
# ---------------------------------------------------------------------------
|
|
539
|
+
# Source readers — each returns a list[Decision]. Read-only; the only I/O here.
|
|
540
|
+
# Every reader degrades to [] on a missing/malformed source so the queue never
|
|
541
|
+
# crashes on a torn file (the same defensive posture as picker_oracle's loaders).
|
|
542
|
+
# ---------------------------------------------------------------------------
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _from_lane_journal(config, *, now: dt.datetime | None = None) -> list[Decision]:
|
|
546
|
+
"""Arbiter refusals + watchdog halt proposals — two ops the journal records.
|
|
547
|
+
|
|
548
|
+
`lane_journal` appends an `OP_REFUSE` for every arbiter refusal and an
|
|
549
|
+
`OP_HALT` for every watchdog stop proposal (both "recorded, but do NOT mutate
|
|
550
|
+
lease state"), so the journal IS the durable feed for both. We read the tail
|
|
551
|
+
and lift each into a Decision:
|
|
552
|
+
|
|
553
|
+
* `OP_REFUSE` -> an ARBITER_REFUSE row (a denied lane request). Carries
|
|
554
|
+
`lane`, `reason` (prose), `ts`, and may carry `run_id` / `reason_class`.
|
|
555
|
+
* `OP_HALT` -> a LIVENESS row (docs/101 §4): a SPINNING / hung-past-budget
|
|
556
|
+
run a watchdog proposed stopping. Carries the opaque `handle`, the proposed
|
|
557
|
+
`command` (the paste-to-stop), `lane`/`run_id` for correlation, and the
|
|
558
|
+
halt `reason`. resolver_kind is ORACLE — liveness is a DETERMINISTIC
|
|
559
|
+
verdict (the ORACLE rung, like picker_oracle), so the proposal was
|
|
560
|
+
oracle-adjudicated even though enacting the stop is a human/driver act.
|
|
561
|
+
|
|
562
|
+
A REFUSE whose contention a LATER journal event already cleared
|
|
563
|
+
(`_superseded_refuse_seqs`: its blocker, or the prior holder of its own lane,
|
|
564
|
+
was released/scavenged, or its lane was later acquired) is **not** emitted — it
|
|
565
|
+
is a resolved decision, not a pending one. This is the structural counterpart
|
|
566
|
+
to the age-based recency filter: a refusal's staleness is its *contention being
|
|
567
|
+
over*, not its clock age.
|
|
568
|
+
"""
|
|
569
|
+
path = config.paths.lane_journal
|
|
570
|
+
try:
|
|
571
|
+
entries = lane_journal.read_all(path)
|
|
572
|
+
except Exception:
|
|
573
|
+
return []
|
|
574
|
+
superseded = _superseded_refuse_seqs(entries)
|
|
575
|
+
out: list[Decision] = []
|
|
576
|
+
for e in entries:
|
|
577
|
+
op = e.get("op")
|
|
578
|
+
if op == lane_journal.OP_REFUSE:
|
|
579
|
+
# Skip a refusal whose contention a later RELEASE/SCAVENGE/ACQUIRE
|
|
580
|
+
# already resolved — it is no longer waiting on anyone.
|
|
581
|
+
_seq = e.get("seq")
|
|
582
|
+
if isinstance(_seq, int) and not isinstance(_seq, bool) and _seq in superseded:
|
|
583
|
+
continue
|
|
584
|
+
# Admit `reason_class` as a token only if it LOOKS like a closed token
|
|
585
|
+
# (a host sometimes writes prose here); prose stays in `reason_text`.
|
|
586
|
+
token = _clean_token(e.get("reason_class"), config)
|
|
587
|
+
reason_text = str(e.get("reason") or "lane refused")
|
|
588
|
+
lane = _dynamic_lane_handle(str(e.get("lane") or ""))
|
|
589
|
+
run_id = str(e.get("run_id") or e.get("root_id") or "")
|
|
590
|
+
age = _age_seconds(e.get("ts"), now=now)
|
|
591
|
+
kind = DecisionKind.ARBITER_REFUSE
|
|
592
|
+
out.append(Decision(
|
|
593
|
+
kind=kind,
|
|
594
|
+
resolver_kind=_resolver_for(kind, token, config, reason_text),
|
|
595
|
+
lane=lane,
|
|
596
|
+
reason_token=token,
|
|
597
|
+
reason_text=reason_text,
|
|
598
|
+
run_id=run_id,
|
|
599
|
+
age_seconds=age,
|
|
600
|
+
source_path=str(path),
|
|
601
|
+
evidence=(f"journal seq #{e.get('seq', '?')}",),
|
|
602
|
+
))
|
|
603
|
+
elif op == lane_journal.OP_HALT:
|
|
604
|
+
# A watchdog stop proposal. The proposed command is carried in
|
|
605
|
+
# `reason_text` so render_detail/next_steps can surface it as the
|
|
606
|
+
# paste-to-stop action; the handle + run go to correlation fields.
|
|
607
|
+
handle = str(e.get("handle") or "")
|
|
608
|
+
command = str(e.get("command") or "")
|
|
609
|
+
lane = str(e.get("lane") or "")
|
|
610
|
+
run_id = str(e.get("run_id") or "")
|
|
611
|
+
age = _age_seconds(e.get("ts"), now=now)
|
|
612
|
+
halt_reason = str(e.get("reason") or "watchdog proposed stop")
|
|
613
|
+
# The detail prose leads with WHY (the liveness reason), and the
|
|
614
|
+
# evidence carries the handle + the proposed command verbatim.
|
|
615
|
+
evidence = [f"handle={handle or '?'}", f"journal seq #{e.get('seq', '?')}"]
|
|
616
|
+
if command:
|
|
617
|
+
evidence.append(f"proposed: {command}")
|
|
618
|
+
out.append(Decision(
|
|
619
|
+
kind=DecisionKind.LIVENESS,
|
|
620
|
+
resolver_kind=ResolverKind.ORACLE,
|
|
621
|
+
lane=lane,
|
|
622
|
+
reason_token="", # liveness carries no WedgeReason token
|
|
623
|
+
reason_text=halt_reason,
|
|
624
|
+
run_id=run_id,
|
|
625
|
+
age_seconds=age,
|
|
626
|
+
source_path=str(path),
|
|
627
|
+
evidence=tuple(evidence),
|
|
628
|
+
proposed_command=command,
|
|
629
|
+
handle=handle,
|
|
630
|
+
))
|
|
631
|
+
elif op == lane_journal.OP_ACQUIRE:
|
|
632
|
+
# The reader-side defense (docs/139): an ACQUIRE whose reason says
|
|
633
|
+
# `REFUSED` is a denial a writer mislabeled as an acquire — surface it
|
|
634
|
+
# as a *degraded* ARBITER_REFUSE so a refusal hidden under the wrong op
|
|
635
|
+
# is not silently invisible to the operator. We distrust the op and
|
|
636
|
+
# read the reason (docs/103). A genuine acquire returns "" here and is
|
|
637
|
+
# skipped — the queue stays the "what needs me" projection, not a log
|
|
638
|
+
# of every granted lease.
|
|
639
|
+
recovered = _acquire_refusal_reason(e)
|
|
640
|
+
if not recovered:
|
|
641
|
+
continue
|
|
642
|
+
# A mislabeled refusal (docs/139) is still an arbiter refusal — drop it
|
|
643
|
+
# too when a later event frees its lane / it was later acquired.
|
|
644
|
+
_seq = e.get("seq")
|
|
645
|
+
if isinstance(_seq, int) and not isinstance(_seq, bool) and _seq in superseded:
|
|
646
|
+
continue
|
|
647
|
+
token = _clean_token(e.get("reason_class"), config)
|
|
648
|
+
lane = _dynamic_lane_handle(str(e.get("lane") or ""))
|
|
649
|
+
run_id = str(e.get("run_id") or e.get("root_id") or "")
|
|
650
|
+
age = _age_seconds(e.get("ts"), now=now)
|
|
651
|
+
kind = DecisionKind.ARBITER_REFUSE
|
|
652
|
+
out.append(Decision(
|
|
653
|
+
kind=kind,
|
|
654
|
+
resolver_kind=_resolver_for(kind, token, config, recovered),
|
|
655
|
+
lane=lane,
|
|
656
|
+
reason_token=token,
|
|
657
|
+
reason_text=recovered,
|
|
658
|
+
run_id=run_id,
|
|
659
|
+
age_seconds=age,
|
|
660
|
+
source_path=str(path),
|
|
661
|
+
# The evidence MARKS this as recovered from a mislabeled op, so an
|
|
662
|
+
# operator can tell it apart from a first-class OP_REFUSE row.
|
|
663
|
+
evidence=(
|
|
664
|
+
f"journal seq #{e.get('seq', '?')}",
|
|
665
|
+
"recovered: refusal logged under op=ACQUIRE (docs/139)",
|
|
666
|
+
),
|
|
667
|
+
))
|
|
668
|
+
return out
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
# A verdict envelope is a refusal when it is a no-pick / blocked shape. Mirrors
|
|
672
|
+
# `preflight._envelope_refusal` exactly (one definition, two readers would drift)
|
|
673
|
+
# — but we re-implement the read here against the SAME keys rather than importing
|
|
674
|
+
# preflight (which pulls a heavier dependency chain). Launchable = LIVE/ACCEPT/absent.
|
|
675
|
+
_LAUNCHABLE_VERDICTS = frozenset({"", "LIVE", "ACCEPT"})
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def _envelope_is_refusal(env: dict) -> tuple[bool, str]:
|
|
679
|
+
"""(is_refusal, short_reason) for a `.verdict-<tag>.json` envelope.
|
|
680
|
+
|
|
681
|
+
Kept in lockstep with `preflight._envelope_refusal` / `wedge_reason.is_refusal`.
|
|
682
|
+
"""
|
|
683
|
+
verdict = str(env.get("verdict") or "").strip().upper()
|
|
684
|
+
reason_class = env.get("reason_class")
|
|
685
|
+
all_clear = bool(env.get("all_clear"))
|
|
686
|
+
if env.get("do_not_render"):
|
|
687
|
+
return (True, f"do_not_render verdict={verdict or '?'}")
|
|
688
|
+
if env.get("blocked") and not all_clear:
|
|
689
|
+
return (True, f"blocked verdict={verdict or '?'}")
|
|
690
|
+
if verdict and verdict not in _LAUNCHABLE_VERDICTS:
|
|
691
|
+
return (True, f"verdict={verdict}")
|
|
692
|
+
if reason_class is not None and wedge_reason.is_refusal(str(reason_class)):
|
|
693
|
+
return (True, f"reason_class={reason_class}")
|
|
694
|
+
return (False, "")
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _from_verdict_envelopes(config, *, now: dt.datetime | None = None) -> list[Decision]:
|
|
698
|
+
"""WEDGE / gate surfaces + preflight refusals — the verdict envelopes.
|
|
699
|
+
|
|
700
|
+
`output/next-up/.verdict-<tag>.json` is written for every /next-up run; a
|
|
701
|
+
refusal-shaped envelope (WEDGE / DRAIN / do_not_render / blocked) is a
|
|
702
|
+
pending decision. We classify the kind by the envelope's own signals: an
|
|
703
|
+
envelope flagged `do_not_render` / `blocked` is the PREFLIGHT_REFUSE shape
|
|
704
|
+
(the packet won't launch); a plain WEDGE/DRAIN verdict is the WEDGE shape.
|
|
705
|
+
"""
|
|
706
|
+
next_dir = config.paths.next_packets
|
|
707
|
+
if not next_dir.exists():
|
|
708
|
+
return []
|
|
709
|
+
out: list[Decision] = []
|
|
710
|
+
for p in sorted(next_dir.glob(".verdict-*.json")):
|
|
711
|
+
try:
|
|
712
|
+
env = json.loads(p.read_text(encoding="utf-8"))
|
|
713
|
+
except (OSError, json.JSONDecodeError):
|
|
714
|
+
continue
|
|
715
|
+
if not isinstance(env, dict):
|
|
716
|
+
continue
|
|
717
|
+
is_refusal, short = _envelope_is_refusal(env)
|
|
718
|
+
if not is_refusal:
|
|
719
|
+
continue
|
|
720
|
+
# Same token-hygiene as the journal reader: a `reason_class` that is prose
|
|
721
|
+
# (or absent) yields `""`, so `next_steps` never emits a `man wedge <prose>`.
|
|
722
|
+
token = _clean_token(env.get("reason_class"), config)
|
|
723
|
+
verdict = str(env.get("verdict") or "").strip().upper()
|
|
724
|
+
reason_text = str(env.get("reason") or short or verdict or "no-pick")
|
|
725
|
+
# The tag encodes the lane/scope loosely; prefer an explicit scope label.
|
|
726
|
+
# Normalize to the bare dynamic handle (dos/119) so a curated-cluster relic
|
|
727
|
+
# scope ("apply cluster (AFR, …)") cannot surface an unresolvable action.
|
|
728
|
+
scope = env.get("scope")
|
|
729
|
+
if isinstance(scope, dict):
|
|
730
|
+
lane = _dynamic_lane_handle(str(scope.get("lane") or ""))
|
|
731
|
+
elif isinstance(scope, str):
|
|
732
|
+
lane = _dynamic_lane_handle(scope)
|
|
733
|
+
else:
|
|
734
|
+
lane = _dynamic_lane_handle(str(env.get("lane") or ""))
|
|
735
|
+
run_id = str(env.get("run_id") or env.get("root_id") or "")
|
|
736
|
+
# The chained-run dir name, if the envelope recorded it — this is the
|
|
737
|
+
# `dos judge` key. Often absent on a raw verdict envelope; the [j] action
|
|
738
|
+
# gates on it (a judge with no run_ts degrades to a sweep hint).
|
|
739
|
+
run_ts = str(env.get("run_ts") or "")
|
|
740
|
+
age = _age_seconds(env.get("generated_at") or env.get("ts"), now=now)
|
|
741
|
+
# do_not_render / blocked => the packet was refused at preflight; a bare
|
|
742
|
+
# WEDGE/DRAIN verdict is the no-pick gate surface.
|
|
743
|
+
if env.get("do_not_render") or (env.get("blocked") and not env.get("all_clear")):
|
|
744
|
+
kind = DecisionKind.PREFLIGHT_REFUSE
|
|
745
|
+
else:
|
|
746
|
+
kind = DecisionKind.WEDGE
|
|
747
|
+
out.append(Decision(
|
|
748
|
+
kind=kind,
|
|
749
|
+
resolver_kind=_resolver_for(kind, token, config, reason_text),
|
|
750
|
+
lane=lane,
|
|
751
|
+
reason_token=token,
|
|
752
|
+
reason_text=reason_text[:300],
|
|
753
|
+
run_id=run_id,
|
|
754
|
+
run_ts=run_ts,
|
|
755
|
+
age_seconds=age,
|
|
756
|
+
source_path=str(p),
|
|
757
|
+
evidence=(f"envelope {p.name}", short) if short else (f"envelope {p.name}",),
|
|
758
|
+
))
|
|
759
|
+
return out
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def _from_soaks(config) -> list[Decision]:
|
|
763
|
+
"""Soak / time gates — open windows in docs/_soaks/index.yaml.
|
|
764
|
+
|
|
765
|
+
A soak gate is the time-triggered decision: a phase whose `soak_until` is
|
|
766
|
+
still in the future is gating its lane now and will need a re-pick when the
|
|
767
|
+
window closes. We surface windows open *as of today* (closed ones are not
|
|
768
|
+
pending). The index shape isn't pinned in the package, so we read it
|
|
769
|
+
defensively — a list of entries, or a dict keyed by id — and look for a
|
|
770
|
+
`soak_until` / `deadline` / `until` date on each.
|
|
771
|
+
"""
|
|
772
|
+
path = config.paths.soaks_index
|
|
773
|
+
if not path.exists():
|
|
774
|
+
return []
|
|
775
|
+
try:
|
|
776
|
+
import yaml # type: ignore
|
|
777
|
+
except ImportError:
|
|
778
|
+
return []
|
|
779
|
+
try:
|
|
780
|
+
data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
|
781
|
+
except Exception:
|
|
782
|
+
return []
|
|
783
|
+
|
|
784
|
+
# Normalize to an iterable of (key, entry-dict).
|
|
785
|
+
items: list[tuple[str, dict]] = []
|
|
786
|
+
if isinstance(data, dict):
|
|
787
|
+
# Either {id: {...}} or {"soaks": [...]} / {"entries": [...]}.
|
|
788
|
+
seq = data.get("soaks") or data.get("entries")
|
|
789
|
+
if isinstance(seq, list):
|
|
790
|
+
for i, e in enumerate(seq):
|
|
791
|
+
if isinstance(e, dict):
|
|
792
|
+
items.append((str(e.get("id") or e.get("phase") or i), e))
|
|
793
|
+
else:
|
|
794
|
+
for k, e in data.items():
|
|
795
|
+
if isinstance(e, dict):
|
|
796
|
+
items.append((str(k), e))
|
|
797
|
+
elif isinstance(data, list):
|
|
798
|
+
for i, e in enumerate(data):
|
|
799
|
+
if isinstance(e, dict):
|
|
800
|
+
items.append((str(e.get("id") or e.get("phase") or i), e))
|
|
801
|
+
|
|
802
|
+
today = _now().date().isoformat()
|
|
803
|
+
out: list[Decision] = []
|
|
804
|
+
for key, e in items:
|
|
805
|
+
deadline = (
|
|
806
|
+
e.get("soak_until") or e.get("deadline") or e.get("until")
|
|
807
|
+
or e.get("soak_deadline")
|
|
808
|
+
)
|
|
809
|
+
deadline_s = str(deadline)[:10] if deadline else ""
|
|
810
|
+
if not deadline_s or deadline_s < today:
|
|
811
|
+
continue # no deadline, or already closed — not a pending decision
|
|
812
|
+
lane = str(e.get("lane") or e.get("series") or e.get("plan") or "")
|
|
813
|
+
phase = str(e.get("phase") or key)
|
|
814
|
+
reason_text = f"soak open until {deadline_s} ({phase})"
|
|
815
|
+
out.append(Decision(
|
|
816
|
+
kind=DecisionKind.SOAK_GATE,
|
|
817
|
+
# A soak gate maps onto the closed soak-gated reason so the detail
|
|
818
|
+
# pane projects its ReasonSpec fix; resolver is HUMAN by definition.
|
|
819
|
+
resolver_kind=ResolverKind.HUMAN,
|
|
820
|
+
lane=lane,
|
|
821
|
+
reason_token="LANE_BLOCKED_ON_SOAK_GATED_PHASES",
|
|
822
|
+
reason_text=reason_text,
|
|
823
|
+
run_id="",
|
|
824
|
+
age_seconds=None, # a soak is forward-dated; age is not meaningful
|
|
825
|
+
source_path=str(path),
|
|
826
|
+
evidence=(f"soak_until={deadline_s}", f"phase={phase}"),
|
|
827
|
+
))
|
|
828
|
+
return out
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
# ---------------------------------------------------------------------------
|
|
832
|
+
# Collection + ranking.
|
|
833
|
+
# ---------------------------------------------------------------------------
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
# Sort key precedence by kind: a LIVENESS halt proposal (a run hung/spinning RIGHT
|
|
837
|
+
# NOW, burning budget) is the most urgent — it outranks even a refusal, because a
|
|
838
|
+
# refusal blocks future work while a hung run is actively wasting it. Then the
|
|
839
|
+
# failure-refusals (a loop stopped / couldn't launch) outrank a forward-dated soak
|
|
840
|
+
# gate. Within a kind, oldest first (an aged decision is the most likely to be
|
|
841
|
+
# silently costing).
|
|
842
|
+
_KIND_RANK = {
|
|
843
|
+
DecisionKind.LIVENESS: 0,
|
|
844
|
+
DecisionKind.ARBITER_REFUSE: 1,
|
|
845
|
+
DecisionKind.PREFLIGHT_REFUSE: 2,
|
|
846
|
+
DecisionKind.WEDGE: 3,
|
|
847
|
+
DecisionKind.SOAK_GATE: 4,
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def _dedup(decisions: list[Decision]) -> list[Decision]:
|
|
852
|
+
"""Collapse rows that describe the SAME pending decision into one, with a count.
|
|
853
|
+
|
|
854
|
+
Pure. The lane journal is an append-only WAL, so a refusal re-emitted on every
|
|
855
|
+
sweep lands as N identical `OP_REFUSE` entries — and a fleet of verdict
|
|
856
|
+
envelopes with no verdict string all normalize to the same `blocked verdict=?`
|
|
857
|
+
row. Either way the operator faces ONE decision, not N, so a chooser/list that
|
|
858
|
+
shows N copies is noise. We group by the identity tuple
|
|
859
|
+
`(kind, lane, reason_token, reason_text)` — the fields that make two rows "the
|
|
860
|
+
same decision to a human" — keep the FIRST-seen representative (callers feed us
|
|
861
|
+
source-order, which the subsequent sort re-orders by age anyway), and stamp it
|
|
862
|
+
with `dup_count` = the group size. A LIVENESS halt is NOT deduped against a
|
|
863
|
+
different `handle`: its handle/command are part of `reason_text`-adjacent
|
|
864
|
+
identity only loosely, but two halts with the same reason text + lane + run are
|
|
865
|
+
the same proposal — acceptable, and the common case (one halt) is unaffected.
|
|
866
|
+
"""
|
|
867
|
+
groups: dict[tuple, Decision] = {}
|
|
868
|
+
counts: dict[tuple, int] = {}
|
|
869
|
+
order: list[tuple] = []
|
|
870
|
+
for d in decisions:
|
|
871
|
+
key = (d.kind, d.lane, d.reason_token, d.reason_text)
|
|
872
|
+
if key not in groups:
|
|
873
|
+
groups[key] = d
|
|
874
|
+
counts[key] = 1
|
|
875
|
+
order.append(key)
|
|
876
|
+
else:
|
|
877
|
+
counts[key] += 1
|
|
878
|
+
out: list[Decision] = []
|
|
879
|
+
for key in order:
|
|
880
|
+
rep = groups[key]
|
|
881
|
+
n = counts[key]
|
|
882
|
+
out.append(rep if n == 1 else replace(rep, dup_count=n))
|
|
883
|
+
return out
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
# Decision kinds whose recency is the time-axis of the decision ITSELF, not the
|
|
887
|
+
# age of the artifact that recorded it — these are never aged out by the recency
|
|
888
|
+
# filter. A SOAK_GATE is forward-dated (it's pending precisely because its window
|
|
889
|
+
# is still open; `_from_soaks` already drops closed ones). A LIVENESS halt names a
|
|
890
|
+
# run hung RIGHT NOW; an old OP_HALT for a run that is no longer spinning is still
|
|
891
|
+
# the operator's call to enact-or-decline, and the journal compaction (not this
|
|
892
|
+
# filter) is what bounds it. Everything else — a WEDGE/refusal envelope — is a
|
|
893
|
+
# point-in-time no-pick whose staleness IS its age.
|
|
894
|
+
_RECENCY_EXEMPT_KINDS = frozenset({DecisionKind.SOAK_GATE, DecisionKind.LIVENESS})
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def _is_stale(decision: Decision, *, max_age_seconds: float | None) -> bool:
|
|
898
|
+
"""True when a decision is too old (or un-ageable) to still be 'pending'.
|
|
899
|
+
|
|
900
|
+
The fix for the junk-drawer queue (job finding #476): `dos.decisions` read
|
|
901
|
+
EVERY refusal-shaped `.verdict-*.json` on disk and surfaced each as pending,
|
|
902
|
+
with no recency bound — so a WEDGE resolved weeks ago, and especially the
|
|
903
|
+
common envelope that carries NO `generated_at`/`ts` (so `age_seconds is None`),
|
|
904
|
+
showed as "pending operator decision" forever. The honest rule for a
|
|
905
|
+
point-in-time artifact (a verdict has no liveness — see `RetentionPolicy.
|
|
906
|
+
verdicts_keep_last`): past the cutoff it is stale, and an UN-timestamped one is
|
|
907
|
+
treated as stale too (a verdict we cannot date is not a live decision — the
|
|
908
|
+
conservative default). `max_age_seconds is None` disables the filter entirely
|
|
909
|
+
(the keep-everything opt-out, mirroring `journal_max_age_days=None`).
|
|
910
|
+
Recency-exempt kinds (soak/liveness) are never stale here.
|
|
911
|
+
"""
|
|
912
|
+
if max_age_seconds is None:
|
|
913
|
+
return False
|
|
914
|
+
if decision.kind in _RECENCY_EXEMPT_KINDS:
|
|
915
|
+
return False
|
|
916
|
+
if decision.age_seconds is None:
|
|
917
|
+
return True # an un-ageable point-in-time refusal is not a live decision
|
|
918
|
+
return decision.age_seconds > max_age_seconds
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
def collect_decisions(
|
|
922
|
+
config=None,
|
|
923
|
+
*,
|
|
924
|
+
resolver: str | None = "HUMAN",
|
|
925
|
+
now: dt.datetime | None = None,
|
|
926
|
+
) -> list[Decision]:
|
|
927
|
+
"""Gather every pending operator decision from the four sources, ranked.
|
|
928
|
+
|
|
929
|
+
`config` defaults to the process-active config. `resolver` filters by
|
|
930
|
+
`ResolverKind`: the default `"HUMAN"` returns only the rows that need a
|
|
931
|
+
person; `None` returns everything (so `--all` can show what a judge/oracle
|
|
932
|
+
owns); a specific value (`"ORACLE"` / `"JUDGE"`) narrows to that kind. `now`
|
|
933
|
+
(the clock for the recency filter + every reader's age) defaults to wall-clock
|
|
934
|
+
UTC; a test passes a frozen value so a seeded date is recency-stable.
|
|
935
|
+
|
|
936
|
+
Read-only. A **recency filter** (job finding #476) drops point-in-time
|
|
937
|
+
refusals older than `config.retention.journal_max_age_days` — and any verdict
|
|
938
|
+
envelope with no timestamp at all — so the queue is decision-bound, not
|
|
939
|
+
junk-drawer-bound (the read used to surface every stale `.verdict-*.json` on
|
|
940
|
+
disk as pending forever). Soak/liveness rows are exempt (their recency is the
|
|
941
|
+
decision's own time-axis). Ranking is then by kind precedence (refusals before
|
|
942
|
+
soak gates), then oldest-decision-first within a kind.
|
|
943
|
+
"""
|
|
944
|
+
cfg = config if config is not None else _config.active()
|
|
945
|
+
clock = now if now is not None else _now()
|
|
946
|
+
decisions: list[Decision] = []
|
|
947
|
+
decisions.extend(_from_lane_journal(cfg, now=clock))
|
|
948
|
+
decisions.extend(_from_verdict_envelopes(cfg, now=clock))
|
|
949
|
+
decisions.extend(_from_soaks(cfg))
|
|
950
|
+
|
|
951
|
+
# Recency gate — reuse the retention policy's `journal_max_age_days` cutoff as
|
|
952
|
+
# the one staleness number (no new config knob; same seam the WAL compaction +
|
|
953
|
+
# verdict reaper read). A config without a retention policy (a minimal/hand-built
|
|
954
|
+
# one) leaves the filter disabled rather than crashing.
|
|
955
|
+
retention = getattr(cfg, "retention", None)
|
|
956
|
+
max_age_days = getattr(retention, "journal_max_age_days", None) if retention else None
|
|
957
|
+
max_age_seconds = max_age_days * 86400 if max_age_days is not None else None
|
|
958
|
+
if max_age_seconds is not None:
|
|
959
|
+
decisions = [d for d in decisions
|
|
960
|
+
if not _is_stale(d, max_age_seconds=max_age_seconds)]
|
|
961
|
+
|
|
962
|
+
if resolver is not None:
|
|
963
|
+
want = resolver.strip().upper()
|
|
964
|
+
decisions = [d for d in decisions if d.resolver_kind.value == want]
|
|
965
|
+
|
|
966
|
+
# Collapse identical rows (an append-only WAL re-records the same refusal every
|
|
967
|
+
# sweep) BEFORE sorting, so the surviving representative carries an accurate
|
|
968
|
+
# dup_count and the list shows one row per real decision.
|
|
969
|
+
decisions = _dedup(decisions)
|
|
970
|
+
|
|
971
|
+
def _sort_key(d: Decision) -> tuple[int, float]:
|
|
972
|
+
# Oldest first => negate age; an unknown age sorts after known ages.
|
|
973
|
+
age = d.age_seconds if d.age_seconds is not None else -1
|
|
974
|
+
return (_KIND_RANK.get(d.kind, 99), -age)
|
|
975
|
+
|
|
976
|
+
decisions.sort(key=_sort_key)
|
|
977
|
+
return decisions
|
|
978
|
+
|
|
979
|
+
|
|
980
|
+
# ---------------------------------------------------------------------------
|
|
981
|
+
# next_steps — the action bar. Maps a decision to its (key, shell-command) list.
|
|
982
|
+
# The TUI's emit-and-exit keys print exactly these; the plain list shows them as
|
|
983
|
+
# a hint. Sourced from the matched ReasonSpec.fix + the decision kind + resolver.
|
|
984
|
+
# ---------------------------------------------------------------------------
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def next_steps(decision: Decision, config=None) -> list[tuple[str, str]]:
|
|
988
|
+
"""The ordered `(key_label, shell_command)` actions for one decision.
|
|
989
|
+
|
|
990
|
+
The TUI binds each key to "print this command and exit" (the locked
|
|
991
|
+
read-only-router model — the TUI never mutates state itself). The commands
|
|
992
|
+
are real, runnable invocations the operator pastes into their shell.
|
|
993
|
+
|
|
994
|
+
Always offered: `r` (/replan the lane) and `c` (copy). `f` (force the lane)
|
|
995
|
+
is offered for the lane-refusal kinds. `j` (adjudicate) is offered iff the
|
|
996
|
+
decision is JUDGE-resolvable — it routes to the DETERMINISTIC `dos judge`
|
|
997
|
+
(picker_oracle) when a `run_ts` is known, which cross-checks the verdict
|
|
998
|
+
against on-disk state; the LLM adjudicator that can rule on the rows the
|
|
999
|
+
deterministic judge only abstains on lives outside the kernel
|
|
1000
|
+
(`dos.drivers.llm_judge`), which `dos judge` points at on an abstain.
|
|
1001
|
+
"""
|
|
1002
|
+
cfg = config if config is not None else _config.active()
|
|
1003
|
+
lane = decision.lane or ""
|
|
1004
|
+
scope_arg = f" --scope {lane}" if lane else ""
|
|
1005
|
+
steps: list[tuple[str, str]] = []
|
|
1006
|
+
|
|
1007
|
+
# A LIVENESS halt proposal is its own action set: the host-supplied stop
|
|
1008
|
+
# command as the primary paste-to-stop (emit-and-exit — the queue NEVER signals
|
|
1009
|
+
# a process itself, the locked read-only-router model), plus an explicit
|
|
1010
|
+
# "let it ride" no-op. The kernel recorded the OP_HALT and proposed the command;
|
|
1011
|
+
# the operator enacts it (or declines). docs/101 §4.
|
|
1012
|
+
if decision.kind is DecisionKind.LIVENESS:
|
|
1013
|
+
if decision.proposed_command:
|
|
1014
|
+
steps.append(("k", decision.proposed_command))
|
|
1015
|
+
elif decision.handle:
|
|
1016
|
+
# No host command was supplied; surface the handle so the operator can
|
|
1017
|
+
# stop it by hand. We name no kill mechanism (domain-free) — just echo
|
|
1018
|
+
# the opaque handle the watchdog recorded.
|
|
1019
|
+
steps.append(("k", f"# stop the run with handle: {decision.handle}"))
|
|
1020
|
+
steps.append(("l", "# let it ride (take no action)"))
|
|
1021
|
+
steps.append(("c", "<copy selected command>"))
|
|
1022
|
+
return steps
|
|
1023
|
+
|
|
1024
|
+
# /replan is the universal "re-shape this lane" action.
|
|
1025
|
+
steps.append(("r", f"/replan{scope_arg}".strip()))
|
|
1026
|
+
|
|
1027
|
+
# Force the lane lease — only meaningful for a lane-level refusal.
|
|
1028
|
+
if decision.kind in (DecisionKind.ARBITER_REFUSE, DecisionKind.PREFLIGHT_REFUSE):
|
|
1029
|
+
lane_arg = f" --lane {lane}" if lane else ""
|
|
1030
|
+
steps.append(("f", f"dos arbitrate{lane_arg} --force".strip()))
|
|
1031
|
+
|
|
1032
|
+
# The adjudicate action: let the judge rule before a human spends attention.
|
|
1033
|
+
# The deterministic `dos judge` keys on a chained-run `run_ts`; when we have
|
|
1034
|
+
# one, emit the runnable command. Without a run_ts (a bare verdict envelope),
|
|
1035
|
+
# the deterministic judge has nothing to classify, so point at the LLM driver
|
|
1036
|
+
# that can adjudicate from the envelope/reason alone (outside the kernel).
|
|
1037
|
+
if decision.resolver_kind is ResolverKind.JUDGE:
|
|
1038
|
+
if decision.run_ts:
|
|
1039
|
+
steps.append(("j", f"dos judge wedge {decision.run_ts}"))
|
|
1040
|
+
else:
|
|
1041
|
+
ref = decision.run_id or decision.reason_token or "?"
|
|
1042
|
+
steps.append(("j", f"python -m dos.drivers.llm_judge {ref}"))
|
|
1043
|
+
|
|
1044
|
+
# A soak gate's action is "come back when it closes" — surface the man page
|
|
1045
|
+
# so the operator can read the gate definition + its typical fix.
|
|
1046
|
+
if decision.reason_token:
|
|
1047
|
+
steps.append(("m", f"dos man wedge {decision.reason_token}"))
|
|
1048
|
+
|
|
1049
|
+
steps.append(("c", "<copy selected command>"))
|
|
1050
|
+
return steps
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
# ---------------------------------------------------------------------------
|
|
1054
|
+
# Rendering — the plain list (the curses-unavailable floor + --no-tui).
|
|
1055
|
+
# ---------------------------------------------------------------------------
|
|
1056
|
+
|
|
1057
|
+
|
|
1058
|
+
def _fmt_age(age: int | None) -> str:
|
|
1059
|
+
"""Compact age: 45s / 18m / 2h / 3d / '-' when unknown."""
|
|
1060
|
+
if age is None:
|
|
1061
|
+
return "-"
|
|
1062
|
+
if age < 60:
|
|
1063
|
+
return f"{age}s"
|
|
1064
|
+
if age < 3600:
|
|
1065
|
+
return f"{age // 60}m"
|
|
1066
|
+
if age < 86400:
|
|
1067
|
+
return f"{age // 3600}h"
|
|
1068
|
+
return f"{age // 86400}d"
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
# ---------------------------------------------------------------------------
|
|
1072
|
+
# Urgency tiers + inline action hints — the presentation helpers the TUI reads
|
|
1073
|
+
# to make "what needs me RIGHT NOW" separable from "what can wait" at a glance.
|
|
1074
|
+
#
|
|
1075
|
+
# These are PURE (no curses, no I/O) and live here, beside the floor renderers,
|
|
1076
|
+
# precisely so they are unit-testable and the curses `_draw` stays a thin skin
|
|
1077
|
+
# over them — the same discipline that keeps the plain list and the TUI in
|
|
1078
|
+
# lockstep. The tier is anchored on `_KIND_RANK` (the existing sort precedence)
|
|
1079
|
+
# so the colour the eye reads and the order the list sorts can never disagree:
|
|
1080
|
+
# a LIVENESS halt (a run burning budget now) is the most urgent thing on screen
|
|
1081
|
+
# AND the reddest, by construction. docs/211 (the operator's attention is the
|
|
1082
|
+
# scarce resource) + the n=12 finding that the surface must front the actionable
|
|
1083
|
+
# fact, not the agent's narration.
|
|
1084
|
+
# ---------------------------------------------------------------------------
|
|
1085
|
+
|
|
1086
|
+
|
|
1087
|
+
class Urgency(str, enum.Enum):
|
|
1088
|
+
"""How loudly a decision should announce itself — a 3-tier collapse of rank.
|
|
1089
|
+
|
|
1090
|
+
NOW — actively costing (a hung run) or blocking a loop (a refusal): act.
|
|
1091
|
+
SOON — a no-pick / wedge surface: worth a look, not on fire.
|
|
1092
|
+
LATER — a forward-dated gate: it will ripen on its own clock.
|
|
1093
|
+
"""
|
|
1094
|
+
|
|
1095
|
+
NOW = "NOW"
|
|
1096
|
+
SOON = "SOON"
|
|
1097
|
+
LATER = "LATER"
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
# rank 0–1 (LIVENESS halt, ARBITER refuse) = NOW; 2–3 (preflight, wedge) = SOON;
|
|
1101
|
+
# 4+ (soak gate) = LATER. Driven off `_KIND_RANK` so the two never drift.
|
|
1102
|
+
def urgency_of(decision: Decision) -> Urgency:
|
|
1103
|
+
"""The urgency tier for one decision — pure, anchored on the sort rank."""
|
|
1104
|
+
rank = _KIND_RANK.get(decision.kind, len(_KIND_RANK))
|
|
1105
|
+
if rank <= 1:
|
|
1106
|
+
return Urgency.NOW
|
|
1107
|
+
if rank <= 3:
|
|
1108
|
+
return Urgency.SOON
|
|
1109
|
+
return Urgency.LATER
|
|
1110
|
+
|
|
1111
|
+
|
|
1112
|
+
# The glyph that prefixes a row by tier: a filled dot shouts, a mid ring is
|
|
1113
|
+
# neutral, a low dot recedes. ASCII-safe fallbacks for a terminal that cannot
|
|
1114
|
+
# render the unicode (the floor never assumes a font).
|
|
1115
|
+
_URGENCY_GLYPH = {Urgency.NOW: "●", Urgency.SOON: "○", Urgency.LATER: "·"}
|
|
1116
|
+
_URGENCY_GLYPH_ASCII = {Urgency.NOW: "!", Urgency.SOON: "+", Urgency.LATER: "."}
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
def urgency_glyph(decision: Decision, *, ascii_only: bool = False) -> str:
|
|
1120
|
+
"""The one-char severity marker for a row (`●`/`○`/`·`, or `!`/`+`/`.`)."""
|
|
1121
|
+
tier = urgency_of(decision)
|
|
1122
|
+
table = _URGENCY_GLYPH_ASCII if ascii_only else _URGENCY_GLYPH
|
|
1123
|
+
return table[tier]
|
|
1124
|
+
|
|
1125
|
+
|
|
1126
|
+
def urgency_tally(decisions: list[Decision]) -> str:
|
|
1127
|
+
"""A glanceable '3 now · 5 later' split for the title bar.
|
|
1128
|
+
|
|
1129
|
+
Collapses the 3 tiers to the two words an operator triages on: how many
|
|
1130
|
+
need me NOW vs everything-else (SOON+LATER folded into 'later'). Empty
|
|
1131
|
+
string when nothing is pending (the caller shows its own 'none' line).
|
|
1132
|
+
"""
|
|
1133
|
+
if not decisions:
|
|
1134
|
+
return ""
|
|
1135
|
+
now = sum(1 for d in decisions if urgency_of(d) is Urgency.NOW)
|
|
1136
|
+
later = len(decisions) - now
|
|
1137
|
+
parts: list[str] = []
|
|
1138
|
+
if now:
|
|
1139
|
+
parts.append(f"{now} now")
|
|
1140
|
+
if later:
|
|
1141
|
+
parts.append(f"{later} later")
|
|
1142
|
+
return " · ".join(parts)
|
|
1143
|
+
|
|
1144
|
+
|
|
1145
|
+
# Action keys we never surface as an inline hint: the always-present `c` (copy)
|
|
1146
|
+
# and the `l` (let-it-ride no-op) carry no triage signal — the operator already
|
|
1147
|
+
# knows they can copy or ignore. We front the keys that DO something.
|
|
1148
|
+
_HINT_SKIP_KEYS = frozenset({"c", "l"})
|
|
1149
|
+
|
|
1150
|
+
# A short human label per action key, so the inline hint reads `[k]stop` not the
|
|
1151
|
+
# whole shell command (which lives in the detail pane + is what gets emitted).
|
|
1152
|
+
_ACTION_LABEL = {
|
|
1153
|
+
"k": "stop",
|
|
1154
|
+
"r": "replan",
|
|
1155
|
+
"f": "force",
|
|
1156
|
+
"j": "judge",
|
|
1157
|
+
"m": "man",
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def action_hints(decision: Decision, config=None, *, limit: int = 2) -> list[tuple[str, str]]:
|
|
1162
|
+
"""The top `(key, label)` actions to show inline on the list row.
|
|
1163
|
+
|
|
1164
|
+
Pure projection over `next_steps` — drops the no-signal keys (`c`/`l`) and
|
|
1165
|
+
caps to `limit` so the row stays readable. The detail pane still shows the
|
|
1166
|
+
full action bar with the real commands; this is just the at-a-glance "what
|
|
1167
|
+
can I do to this row" the n=12 study says should be front-and-centre.
|
|
1168
|
+
"""
|
|
1169
|
+
out: list[tuple[str, str]] = []
|
|
1170
|
+
for key, _cmd in next_steps(decision, config):
|
|
1171
|
+
if key in _HINT_SKIP_KEYS:
|
|
1172
|
+
continue
|
|
1173
|
+
out.append((key, _ACTION_LABEL.get(key, key)))
|
|
1174
|
+
if len(out) >= limit:
|
|
1175
|
+
break
|
|
1176
|
+
return out
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
def fmt_action_hints(decision: Decision, config=None, *, limit: int = 2) -> str:
|
|
1180
|
+
"""Render `action_hints` as a compact `[k]stop [r]replan` string (or '')."""
|
|
1181
|
+
return " ".join(f"[{k}]{label}" for k, label in action_hints(decision, config, limit=limit))
|
|
1182
|
+
|
|
1183
|
+
|
|
1184
|
+
# Labels for the keys the dense row-hint hides (copy, let-it-ride) — the footer
|
|
1185
|
+
# IS allowed to show them, because it describes the ONE focused row, not 20.
|
|
1186
|
+
_FOOTER_LABEL = dict(_ACTION_LABEL)
|
|
1187
|
+
_FOOTER_LABEL.update({"c": "copy", "l": "ride"})
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def footer_keys(decision: Decision, config=None) -> str:
|
|
1191
|
+
"""Every action key for the focused row, labelled — the TUI footer.
|
|
1192
|
+
|
|
1193
|
+
Unlike the per-row hint (which drops `c`/`l` to stay scannable across many
|
|
1194
|
+
rows), the footer describes the single selected decision, so it shows the
|
|
1195
|
+
full set including copy and let-it-ride. Pure over `next_steps`.
|
|
1196
|
+
"""
|
|
1197
|
+
parts = [f"[{k}]{_FOOTER_LABEL.get(k, k)}" for k, _cmd in next_steps(decision, config)]
|
|
1198
|
+
return " · ".join(parts)
|
|
1199
|
+
|
|
1200
|
+
|
|
1201
|
+
def render_list_plain(decisions: list[Decision]) -> str:
|
|
1202
|
+
"""The column-aligned queue table — the `dos decisions --no-tui` output.
|
|
1203
|
+
|
|
1204
|
+
Reuses `timeline.py`'s small-column rendering idiom so it fits a terminal.
|
|
1205
|
+
"""
|
|
1206
|
+
out: list[str] = []
|
|
1207
|
+
out.append("# operator decisions")
|
|
1208
|
+
if not decisions:
|
|
1209
|
+
# ASCII hyphen, not an em-dash: this is the plain floor that prints to a
|
|
1210
|
+
# raw console (incl. a cp1252 Windows terminal), and a test pins it
|
|
1211
|
+
# byte-equal to the CLI's subprocess stdout. Keep the floor ASCII-clean.
|
|
1212
|
+
out.append(" (none pending - nothing is waiting on you)")
|
|
1213
|
+
return "\n".join(out)
|
|
1214
|
+
tally = urgency_tally(decisions)
|
|
1215
|
+
out.append(f" {len(decisions)} pending" + (f" ({tally})" if tally else ""))
|
|
1216
|
+
header = f" {' ':1} {'#':>2} {'age':>4} {'kind':<16} {'lane':<10} waiting on / do"
|
|
1217
|
+
out.append(header)
|
|
1218
|
+
out.append(" " + "-" * (len(header) - 2))
|
|
1219
|
+
for i, d in enumerate(decisions, 1):
|
|
1220
|
+
# Reason TEXT first (human-readable), not the raw enum token — the token
|
|
1221
|
+
# is for the detail pane's ReasonSpec lookup, not the at-a-glance row.
|
|
1222
|
+
waiting = d.reason_text or d.reason_token
|
|
1223
|
+
dup = f" ×{d.dup_count}" if d.dup_count > 1 else ""
|
|
1224
|
+
hint = fmt_action_hints(d)
|
|
1225
|
+
hint_s = f" {hint}" if hint else ""
|
|
1226
|
+
out.append(
|
|
1227
|
+
f" {urgency_glyph(d, ascii_only=True):1} {i:>2} {_fmt_age(d.age_seconds):>4} "
|
|
1228
|
+
f"{d.kind.value:<16} {(d.lane or '-'):<10} {waiting[:40]}{dup}{hint_s}"
|
|
1229
|
+
)
|
|
1230
|
+
out.append("")
|
|
1231
|
+
by_resolver = {}
|
|
1232
|
+
for d in decisions:
|
|
1233
|
+
by_resolver[d.resolver_kind.value] = by_resolver.get(d.resolver_kind.value, 0) + 1
|
|
1234
|
+
tally = " · ".join(f"{k}:{v}" for k, v in sorted(by_resolver.items()))
|
|
1235
|
+
out.append(f" → {len(decisions)} pending ({tally}) · `dos decisions show <#>` to drill in")
|
|
1236
|
+
return "\n".join(out)
|
|
1237
|
+
|
|
1238
|
+
|
|
1239
|
+
def render_detail_plain(decision: Decision, config=None) -> str:
|
|
1240
|
+
"""The non-interactive drill-in for one decision (`dos decisions show <#>`).
|
|
1241
|
+
|
|
1242
|
+
Renders the same projection the TUI detail pane shows: the decision's
|
|
1243
|
+
`ReasonSpec` (summary / fix / see-also, from the registry), its evidence,
|
|
1244
|
+
and the action bar — but as static text.
|
|
1245
|
+
"""
|
|
1246
|
+
cfg = config if config is not None else _config.active()
|
|
1247
|
+
spec = cfg.reasons.get(decision.reason_token) if decision.reason_token else None
|
|
1248
|
+
out: list[str] = []
|
|
1249
|
+
out.append(f"KIND {decision.kind.value}")
|
|
1250
|
+
out.append(f"RESOLVER {decision.resolver_kind.value}")
|
|
1251
|
+
out.append(f"LANE {decision.lane or '-'}")
|
|
1252
|
+
if decision.reason_token:
|
|
1253
|
+
out.append(f"REASON {decision.reason_token}")
|
|
1254
|
+
if spec is not None and spec.summary:
|
|
1255
|
+
out.append(f"MEANS {spec.summary}")
|
|
1256
|
+
out.append(f"DETAIL {decision.reason_text}")
|
|
1257
|
+
if decision.run_id:
|
|
1258
|
+
out.append(f"RUN {decision.run_id}")
|
|
1259
|
+
out.append(f"AGE {_fmt_age(decision.age_seconds)}")
|
|
1260
|
+
if decision.dup_count > 1:
|
|
1261
|
+
out.append(f"SEEN {decision.dup_count}× (identical rows collapsed)")
|
|
1262
|
+
if decision.evidence:
|
|
1263
|
+
out.append("EVIDENCE " + "\n ".join(decision.evidence))
|
|
1264
|
+
if spec is not None and spec.fix:
|
|
1265
|
+
out.append(f"TYPICAL FIX {spec.fix}")
|
|
1266
|
+
out.append(f"SOURCE {decision.source_path}")
|
|
1267
|
+
out.append("")
|
|
1268
|
+
out.append("ACTIONS")
|
|
1269
|
+
for key, cmd in next_steps(decision, cfg):
|
|
1270
|
+
out.append(f" [{key}] {cmd}")
|
|
1271
|
+
if spec is not None and spec.see_also:
|
|
1272
|
+
out.append("")
|
|
1273
|
+
out.append("SEE ALSO " + " · ".join(spec.see_also))
|
|
1274
|
+
return "\n".join(out)
|