dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/productivity.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""PRD — the productivity verdict: *is this run still doing work, or just spending?*
|
|
2
|
+
|
|
3
|
+
docs/218 — the **loop-economics completion of `liveness()`**. `liveness` asks a
|
|
4
|
+
binary, lifetime question off ground truth: did git/journal state advance *at all*
|
|
5
|
+
since the run started (ADVANCING), is the run alive-but-not-moving (SPINNING), or
|
|
6
|
+
dead (STALLED)? PRD asks a different, *continuous* question off a **trend**: is the
|
|
7
|
+
amount of work landed *per step* collapsing toward nothing? A run can be ADVANCING
|
|
8
|
+
(it committed) and still be DIMINISHING (each successive step does less and less
|
|
9
|
+
until it is burning budget to refine the same thing). That gap — productive vs.
|
|
10
|
+
*productive-but-fading* — has no home in `liveness` (a single since-start count
|
|
11
|
+
cannot see a trend) and no home in `loop_decide` (whose every stop is a hard count
|
|
12
|
+
cap or a discrete verdict, never a velocity).
|
|
13
|
+
|
|
14
|
+
This is `liveness`'s **lateral sibling** — the same pure-verdict shape, re-aimed
|
|
15
|
+
from "did state move?" to "is the work-per-step rate fading?":
|
|
16
|
+
|
|
17
|
+
arbiter.arbitrate (request, live_leases, config) -> decision
|
|
18
|
+
loop_decide.decide (LoopState, IterationOutcome) -> LoopDecision
|
|
19
|
+
liveness.classify (ProgressEvidence, policy) -> LivenessVerdict
|
|
20
|
+
productivity.classify (WorkHistory, policy) -> ProductivityVerdict
|
|
21
|
+
^ THIS module
|
|
22
|
+
|
|
23
|
+
It is lifted faithfully from the diminishing-returns gate Claude Code ships in its
|
|
24
|
+
own session loop (`query/tokenBudget.ts` `checkTokenBudget` — the
|
|
25
|
+
`isDiminishing = continuationCount>=3 AND lastDelta<T AND priorDelta<T` rule, the
|
|
26
|
+
docs/189 audit's "cleanest loop-economics lift"). DOS owns the *mechanism* — a pure
|
|
27
|
+
trend verdict — and pushes the *policy* (which unit the deltas count, how many
|
|
28
|
+
steps before judging, what floor counts as "fading") out to data, the
|
|
29
|
+
mechanism/policy split that lets a small thing be a universal cog: the kernel does
|
|
30
|
+
not know whether a "work unit" is a token, a commit, a changed byte, or a passed
|
|
31
|
+
test. The host names the unit in `dos.toml [productivity]`; the kernel only knows
|
|
32
|
+
*the rate is falling*.
|
|
33
|
+
|
|
34
|
+
**Byte-clean by construction.** A per-step work delta is a count the *runtime/env*
|
|
35
|
+
authors (tokens spent this turn, commits this step, bytes diffed) — never the
|
|
36
|
+
judged agent's narration. PRD reads the same kind of agent-external counter
|
|
37
|
+
`liveness` reads off git, and `tool_stream` reads off env-authored result digests
|
|
38
|
+
(the docs/138 invariant). So DIMINISHING is "the work rate the environment
|
|
39
|
+
recorded is fading," never "the agent says it's almost done" — a quantity, not a
|
|
40
|
+
self-report. PRD says the *rate* fell; it never says the work was *wrong* (quality
|
|
41
|
+
is an advisory judge's call — `llm_judge` — never this deterministic verb, the
|
|
42
|
+
distrust-state / distrust-judgment line `liveness` draws).
|
|
43
|
+
|
|
44
|
+
**Multi-signal, so one slow step can't false-trip.** The whole reason CC ANDs
|
|
45
|
+
three signals (enough steps AND this delta small AND the prior delta small) is that
|
|
46
|
+
a single quiet turn is not a fading run — a run legitimately pauses to read, to
|
|
47
|
+
plan, to wait on eventual consistency. DIMINISHING requires a *sustained* low rate
|
|
48
|
+
(the two most recent deltas both under the floor) past a minimum step count, so the
|
|
49
|
+
verdict fires on a trend, not a blip. This is the productivity analogue of
|
|
50
|
+
`liveness`'s `grace_ms` young-and-alive guard: withhold the accusation until there
|
|
51
|
+
is enough evidence to make it.
|
|
52
|
+
|
|
53
|
+
**Advisory.** Like `liveness.SPINNING`, DIMINISHING REPORTS; it never kills a
|
|
54
|
+
process or refuses a lease. A loop may consult PRD and choose to stop (the natural
|
|
55
|
+
first consumer — a `loop_decide` DIMINISHING_RETURNS rung that converts
|
|
56
|
+
stop-after-N into stop-when-unproductive), the enforce ladder may attach a
|
|
57
|
+
WARN-before-BLOCK nudge, and `dos top` may surface a fading run — but the
|
|
58
|
+
productivity verdict and the admission decision stay different syscalls.
|
|
59
|
+
|
|
60
|
+
**No-telemetry / no-plan discipline** (the `test_verify_no_plan` sibling): PRD needs
|
|
61
|
+
*nothing* but a list of per-step work deltas the caller already has. No plan, no
|
|
62
|
+
registry, no journal, no clock — `classify()` makes no I/O at all (there is no clock
|
|
63
|
+
rung here; unlike `liveness`, productivity is timeless — it reads a sequence, not
|
|
64
|
+
ages). A caller with two deltas gets a verdict; a caller with none gets the honest
|
|
65
|
+
"not enough history to judge" (PRODUCTIVE-benign, the withhold-the-accusation
|
|
66
|
+
floor).
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
from __future__ import annotations
|
|
70
|
+
|
|
71
|
+
import enum
|
|
72
|
+
from dataclasses import dataclass
|
|
73
|
+
from typing import Sequence
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Productivity(str, enum.Enum):
|
|
77
|
+
"""The typed productivity verdict — three states, mutually exclusive.
|
|
78
|
+
|
|
79
|
+
`str`-valued so it round-trips through a CLI stdout token / exit-code map
|
|
80
|
+
without a lookup table (mirrors `liveness.Liveness` and `gate_classify.Verdict`).
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
PRODUCTIVE = "PRODUCTIVE" # still landing work per step (or too little history to judge)
|
|
84
|
+
DIMINISHING = "DIMINISHING" # a sustained low work-rate past the min-step count — fading
|
|
85
|
+
STALLED = "STALLED" # the most recent step landed ZERO work — flat-lined
|
|
86
|
+
|
|
87
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
88
|
+
return self.value
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass(frozen=True)
|
|
92
|
+
class ProductivityPolicy:
|
|
93
|
+
"""The thresholds that separate PRODUCTIVE / DIMINISHING / STALLED — policy, not mechanism.
|
|
94
|
+
|
|
95
|
+
The same "mechanism is kernel, thresholds are config" split as `liveness`'s
|
|
96
|
+
windows and `loop_decide`'s `max_iterations`. The defaults are GENERIC and lifted
|
|
97
|
+
from Claude Code's own loop (`tokenBudget.ts`: 3 continuations, a 500-unit
|
|
98
|
+
diminishing threshold); a workspace declares its own in `dos.toml
|
|
99
|
+
[productivity]`, the closed-config-as-data pattern (`[lanes]` / `[stamp]` /
|
|
100
|
+
`[liveness]`).
|
|
101
|
+
|
|
102
|
+
min_steps — the **minimum number of work steps** before PRD will call a run
|
|
103
|
+
DIMINISHING. Below it there is not enough of a trend to judge a
|
|
104
|
+
*fading* rate (one or two small deltas are a blip, not a decline),
|
|
105
|
+
and the verdict withholds the accusation. CC's `continuationCount
|
|
106
|
+
>= 3`. This is the productivity analogue of `liveness.grace_ms`.
|
|
107
|
+
floor — the **per-step work-unit floor** below which a step counts as "did
|
|
108
|
+
little." A run is DIMINISHING only when the two most recent deltas
|
|
109
|
+
are BOTH under this floor (a sustained low rate). CC's
|
|
110
|
+
`DIMINISHING_THRESHOLD` (500 tokens). The UNIT is the host's —
|
|
111
|
+
tokens, commits, changed bytes — declared alongside the floor; the
|
|
112
|
+
kernel only compares magnitudes.
|
|
113
|
+
|
|
114
|
+
Defaults: 3 steps, a 500-unit floor. So a run that has taken ≥3 steps and whose
|
|
115
|
+
last two steps each landed < 500 units of work is fading; fewer steps, or either
|
|
116
|
+
of the last two steps clearing the floor, is still PRODUCTIVE.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
min_steps: int = 3 # CC continuationCount>=3 — min trend length before judging
|
|
120
|
+
floor: int = 500 # CC DIMINISHING_THRESHOLD — per-step "did little" work-unit floor
|
|
121
|
+
|
|
122
|
+
def __post_init__(self) -> None:
|
|
123
|
+
if self.min_steps < 0:
|
|
124
|
+
raise ValueError("min_steps must be non-negative")
|
|
125
|
+
if self.floor < 0:
|
|
126
|
+
raise ValueError("the work-unit floor must be non-negative")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
DEFAULT_POLICY = ProductivityPolicy()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass(frozen=True)
|
|
133
|
+
class WorkHistory:
|
|
134
|
+
"""The per-step work-delta trend `classify()` reads — gathered by the CALLER.
|
|
135
|
+
|
|
136
|
+
No clock, no I/O inside the verdict — the arbiter rule, sharpened: there is not
|
|
137
|
+
even a clock rung here (productivity is *timeless*; it reads a sequence of
|
|
138
|
+
deltas, never an age). The caller's boundary (the `dos productivity`
|
|
139
|
+
evidence-gather) measures each step's work — tokens spent that step, commits that
|
|
140
|
+
step, bytes diffed — and freezes the ordered list here.
|
|
141
|
+
|
|
142
|
+
deltas — the ordered per-step work deltas, OLDEST first, one number per step.
|
|
143
|
+
Each is a count of *work units* (the host's chosen unit) the
|
|
144
|
+
runtime/env measured for that step. Empty or one-element is "not
|
|
145
|
+
enough history to judge a trend." Negative values are rejected — a
|
|
146
|
+
work delta is a non-negative quantity of work done (a step that
|
|
147
|
+
*removed* work is still a step that did the work of removing; the host
|
|
148
|
+
passes the magnitude, never a signed regression).
|
|
149
|
+
|
|
150
|
+
The two load-bearing reads are `deltas[-1]` (this step) and `deltas[-2]` (the
|
|
151
|
+
prior step) — the same `lastDeltaTokens` / `deltaSinceLastCheck` pair CC's
|
|
152
|
+
`isDiminishing` ANDs. The full list is carried so `--output json` can echo the
|
|
153
|
+
whole trend (the legible-distrust renderer seam: the operator sees not just
|
|
154
|
+
DIMINISHING but the falling sequence behind it), and so `step_count` is honest.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
deltas: tuple[int, ...] = ()
|
|
158
|
+
|
|
159
|
+
def __post_init__(self) -> None:
|
|
160
|
+
# Accept any Sequence at the boundary, freeze to a tuple so the dataclass
|
|
161
|
+
# stays hashable/immutable (the frozen-evidence discipline). A caller that
|
|
162
|
+
# passes a list does not get a shared-mutable field.
|
|
163
|
+
if not isinstance(self.deltas, tuple):
|
|
164
|
+
object.__setattr__(self, "deltas", tuple(self.deltas))
|
|
165
|
+
if any(d < 0 for d in self.deltas):
|
|
166
|
+
raise ValueError("work deltas must be non-negative (a count of work done)")
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def step_count(self) -> int:
|
|
170
|
+
"""How many work steps the trend covers."""
|
|
171
|
+
return len(self.deltas)
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def of(cls, deltas: Sequence[int]) -> "WorkHistory":
|
|
175
|
+
"""Build a history from any ordered (oldest-first) sequence of deltas."""
|
|
176
|
+
return cls(tuple(deltas))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@dataclass(frozen=True)
|
|
180
|
+
class ProductivityVerdict:
|
|
181
|
+
"""The single verdict `classify()` returns, with the trend echoed back.
|
|
182
|
+
|
|
183
|
+
`verdict` is the typed `Productivity`. `reason` is a one-line operator-facing
|
|
184
|
+
summary (the tally-row string). `history` is the `WorkHistory` that drove the
|
|
185
|
+
call, carried so `dos productivity --output json` can emit the verdict *and the
|
|
186
|
+
facts behind it* in one object (the RND/Axis-4 renderer seam) — legible
|
|
187
|
+
distrust: the operator sees not just DIMINISHING but *why* (last two steps 40,
|
|
188
|
+
12 units, both under the 500 floor, 6 steps in). `to_dict` is the json shape.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
verdict: Productivity
|
|
192
|
+
reason: str
|
|
193
|
+
history: WorkHistory
|
|
194
|
+
|
|
195
|
+
def to_dict(self) -> dict:
|
|
196
|
+
h = self.history
|
|
197
|
+
return {
|
|
198
|
+
"verdict": self.verdict.value,
|
|
199
|
+
"reason": self.reason,
|
|
200
|
+
"history": {
|
|
201
|
+
"deltas": list(h.deltas),
|
|
202
|
+
"step_count": h.step_count,
|
|
203
|
+
"last_delta": h.deltas[-1] if h.deltas else None,
|
|
204
|
+
"prior_delta": h.deltas[-2] if len(h.deltas) >= 2 else None,
|
|
205
|
+
},
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def classify(
|
|
210
|
+
history: WorkHistory, policy: ProductivityPolicy = DEFAULT_POLICY
|
|
211
|
+
) -> ProductivityVerdict:
|
|
212
|
+
"""Classify a run's productivity from its per-step work trend. PURE — no I/O.
|
|
213
|
+
|
|
214
|
+
Reads the ladder top to bottom (this function IS the answer to "is it still
|
|
215
|
+
doing work?"):
|
|
216
|
+
|
|
217
|
+
1. PRODUCTIVE (too little history) — fewer than `min_steps` steps: there is
|
|
218
|
+
not enough of a trend to accuse a run of fading. Withhold the accusation
|
|
219
|
+
(the `liveness` young-and-alive guard, lateral). This is checked FIRST so a
|
|
220
|
+
brand-new run with one big step is never mislabelled on a length
|
|
221
|
+
technicality.
|
|
222
|
+
2. STALLED — the most recent step landed ZERO work (`deltas[-1] == 0`): the run
|
|
223
|
+
flat-lined, the degenerate floor of diminishing. Distinguished from
|
|
224
|
+
DIMINISHING (which is a fading-but-nonzero rate) because a zero is the
|
|
225
|
+
operator's clearest "it stopped doing anything" signal — the give-up rung.
|
|
226
|
+
Checked before DIMINISHING so an exact flat-line is named precisely.
|
|
227
|
+
3. DIMINISHING — a sustained low rate: `step_count >= min_steps` AND the last
|
|
228
|
+
two deltas are BOTH under `floor`. The CC `isDiminishing` rule exactly —
|
|
229
|
+
fading, but still moving a little. The multi-signal AND is what keeps one
|
|
230
|
+
quiet step from false-tripping.
|
|
231
|
+
4. PRODUCTIVE — none of the above: either a recent step cleared the floor, or
|
|
232
|
+
the run simply hasn't sustained a low rate. Still doing real work.
|
|
233
|
+
|
|
234
|
+
The DIMINISHING test needs the prior delta (`deltas[-2]`); with exactly
|
|
235
|
+
`min_steps` steps that always exists when `min_steps >= 2`. A pathological
|
|
236
|
+
`min_steps < 2` policy is handled: the prior-delta read falls back so the verdict
|
|
237
|
+
never indexes off the end (a one-step history can only be PRODUCTIVE or STALLED).
|
|
238
|
+
"""
|
|
239
|
+
n = history.step_count
|
|
240
|
+
|
|
241
|
+
# 1. PRODUCTIVE (too little history) — not enough steps to judge a trend.
|
|
242
|
+
# Withhold the DIMINISHING accusation; report the benign verdict. A run with
|
|
243
|
+
# no steps at all also lands here (nothing to judge, no problem yet).
|
|
244
|
+
if n < policy.min_steps or n == 0:
|
|
245
|
+
return ProductivityVerdict(
|
|
246
|
+
verdict=Productivity.PRODUCTIVE,
|
|
247
|
+
reason=(
|
|
248
|
+
f"{n} work step(s) so far (< min {policy.min_steps}) — not enough "
|
|
249
|
+
f"history to judge a fading rate; no productivity problem yet"
|
|
250
|
+
),
|
|
251
|
+
history=history,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
last = history.deltas[-1]
|
|
255
|
+
|
|
256
|
+
# 2. STALLED — the most recent step did zero work. The flat-line / give-up rung,
|
|
257
|
+
# named distinctly from a merely-fading rate so the operator's clearest signal
|
|
258
|
+
# ("it stopped") is not blurred into DIMINISHING.
|
|
259
|
+
if last == 0:
|
|
260
|
+
return ProductivityVerdict(
|
|
261
|
+
verdict=Productivity.STALLED,
|
|
262
|
+
reason=(
|
|
263
|
+
f"the most recent of {n} steps landed 0 work units — flat-lined "
|
|
264
|
+
f"(zero forward work this step)"
|
|
265
|
+
),
|
|
266
|
+
history=history,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# The prior delta — the second of CC's two ANDed signals. Guarded so a
|
|
270
|
+
# degenerate min_steps<2 policy cannot index off the front (a 1-element history
|
|
271
|
+
# has no prior; treat it as "above floor" so it can never satisfy DIMINISHING).
|
|
272
|
+
prior = history.deltas[-2] if n >= 2 else policy.floor
|
|
273
|
+
|
|
274
|
+
# 3. DIMINISHING — a SUSTAINED low rate: enough steps AND both recent deltas
|
|
275
|
+
# under the floor. The CC `isDiminishing` rule, the whole point of the module.
|
|
276
|
+
if last < policy.floor and prior < policy.floor:
|
|
277
|
+
return ProductivityVerdict(
|
|
278
|
+
verdict=Productivity.DIMINISHING,
|
|
279
|
+
reason=(
|
|
280
|
+
f"the last two of {n} steps landed {prior} then {last} work units, "
|
|
281
|
+
f"both under the {policy.floor}-unit floor — a sustained fading rate "
|
|
282
|
+
f"(diminishing returns)"
|
|
283
|
+
),
|
|
284
|
+
history=history,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# 4. PRODUCTIVE — a recent step cleared the floor, or the low rate is not
|
|
288
|
+
# sustained across the last two steps. Still doing real work.
|
|
289
|
+
return ProductivityVerdict(
|
|
290
|
+
verdict=Productivity.PRODUCTIVE,
|
|
291
|
+
reason=(
|
|
292
|
+
f"last step landed {last} work units over {n} steps "
|
|
293
|
+
f"(prior {prior}; floor {policy.floor}) — still productive"
|
|
294
|
+
),
|
|
295
|
+
history=history,
|
|
296
|
+
)
|
dos/provider_limit.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Provider-limit category — the one canonical vocabulary the dispatch family
|
|
2
|
+
collapses every rate-limit / quota / overload signal into (the PI5 collapse
|
|
3
|
+
target promised in the job repo's ``agents/quota/base.py``).
|
|
4
|
+
|
|
5
|
+
Three independent taxonomies exist upstream, each correct for its own input:
|
|
6
|
+
|
|
7
|
+
* ``rate_limit_classify.Kind`` (job) — string markers on a ``claude -p``
|
|
8
|
+
terminal envelope ({RATE_LIMITED, OVERLOADED, CREDIT_LOW, NONE}).
|
|
9
|
+
* ``agents.quota.QuotaErrorClass`` (job) — provider exceptions
|
|
10
|
+
({RPM_THROTTLED, DAILY_QUOTA_EXHAUSTED, SUBSCRIPTION_BLACKOUT, TRANSIENT_429}).
|
|
11
|
+
* apply-next-loop outcome tokens (job) — exit-code + log regex
|
|
12
|
+
({LLM-QUOTA-EXHAUSTED, LLM-QUOTA-EXHAUSTED-DURABLE, CORRELATED-OUTAGE, …}).
|
|
13
|
+
|
|
14
|
+
They overlap but share no OUTPUT type, so every loop re-decided "transient vs
|
|
15
|
+
usage vs hard-quota" on its own and drifted. This module is **not** a fourth
|
|
16
|
+
classifier — it is the shared category + the canonical backoff policy that all
|
|
17
|
+
three map *into* via the thin pure ``from_*`` translators below.
|
|
18
|
+
|
|
19
|
+
⚓ Provider-invariance (job CLAUDE.md "Bulkhead"): provider distinctions stay
|
|
20
|
+
infrastructure inside the adapter. The mapper takes the upstream enum's VALUE
|
|
21
|
+
(a plain ``str``), never the upstream class — so ``dos`` imports nothing from
|
|
22
|
+
``agents.quota`` / ``rate_limit_classify``; the dependency arrow points the
|
|
23
|
+
right way (job → dos), never back.
|
|
24
|
+
|
|
25
|
+
The kernel decision logic that ACTS on a category already lives in
|
|
26
|
+
``dos.loop_decide.decide`` (``OutcomeKind.OVERLOADED`` → ``retry-same-iter``
|
|
27
|
+
with the same backoff ladder; ``RATE_LIMITED`` → stop). This module does not
|
|
28
|
+
change that — it standardizes the *word*, and ``policy_for`` makes the backoff
|
|
29
|
+
ladder a single source of truth both sides can read.
|
|
30
|
+
|
|
31
|
+
PURE — no I/O, no clock. py.typed.
|
|
32
|
+
"""
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import enum
|
|
36
|
+
from dataclasses import dataclass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ProviderLimit(str, enum.Enum):
|
|
40
|
+
"""The canonical provider-limit category — what dispatch reasons about.
|
|
41
|
+
|
|
42
|
+
``str``-valued so it round-trips as a token (``ProviderLimit.USAGE_WINDOW
|
|
43
|
+
== "usage_window"``), same convention as ``loop_decide.OutcomeKind`` and
|
|
44
|
+
``gate_classify.Verdict``.
|
|
45
|
+
|
|
46
|
+
TRANSIENT_OVERLOAD — server-side 529 / ``overloaded_error`` / the harness
|
|
47
|
+
"Server is temporarily limiting requests (not your
|
|
48
|
+
usage limit)" surface. Clears in seconds-to-minutes.
|
|
49
|
+
Policy: retry the SAME unit of work with backoff;
|
|
50
|
+
escalate to stop only after K consecutive hits (an
|
|
51
|
+
outage, not a blip).
|
|
52
|
+
USAGE_WINDOW — a 429 / quota / 5-hour / 7-day / weekly cap. Every
|
|
53
|
+
retry fails identically until the window resets on a
|
|
54
|
+
TIMER. Policy: stop (or durable-defer past a measured
|
|
55
|
+
``window_end``); re-invoke after reset.
|
|
56
|
+
HARD_QUOTA — a billing block ("credit balance too low") or an
|
|
57
|
+
opaque subscription blackout. No timer fixes it — an
|
|
58
|
+
OPERATOR must act. Policy: stop + surface.
|
|
59
|
+
NONE — no provider-limit signal.
|
|
60
|
+
|
|
61
|
+
The load-bearing split is TRANSIENT_OVERLOAD (retry) vs everything else
|
|
62
|
+
(stop/defer). A real overload and a real quota window can BOTH arrive as a
|
|
63
|
+
``rejected`` rate-limit event — the disambiguator is the error TYPE
|
|
64
|
+
(529/overloaded vs 429/quota) and the "(not your usage limit)" prose, NOT
|
|
65
|
+
the ``rejected`` status alone.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
TRANSIENT_OVERLOAD = "transient_overload"
|
|
69
|
+
USAGE_WINDOW = "usage_window"
|
|
70
|
+
HARD_QUOTA = "hard_quota"
|
|
71
|
+
NONE = "none"
|
|
72
|
+
|
|
73
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
74
|
+
return self.value
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Canonical backoff ladder for a transient overload retry. Mirrors
|
|
78
|
+
# ``loop_decide._OVERLOADED_BACKOFF`` deliberately — this module is the shared
|
|
79
|
+
# source of truth, ``loop_decide`` keeps its own copy for the hot decide() path
|
|
80
|
+
# but the two MUST stay equal (asserted by a cross-module test in both repos).
|
|
81
|
+
_OVERLOAD_BACKOFF: tuple[int, ...] = (60, 270, 1200)
|
|
82
|
+
_OVERLOAD_ESCALATE_AFTER = 3 # consecutive TRANSIENT_OVERLOAD hits → stop
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True)
|
|
86
|
+
class LimitPolicy:
|
|
87
|
+
"""The canonical handling policy for one :class:`ProviderLimit` category.
|
|
88
|
+
|
|
89
|
+
A pure lookup (see :func:`policy_for`) — the single place the dispatch
|
|
90
|
+
family reads "is this retryable, with what backoff, when do I escalate,
|
|
91
|
+
does an operator have to act, will it clear on its own". Consumers must not
|
|
92
|
+
re-derive these per-loop (that is the drift this module exists to kill).
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
category: ProviderLimit
|
|
96
|
+
retryable_same_iter: bool
|
|
97
|
+
"""True only for TRANSIENT_OVERLOAD — retry the same unit of work."""
|
|
98
|
+
|
|
99
|
+
backoff_seconds: tuple[int, ...]
|
|
100
|
+
"""Backoff ladder for the retry; ``()`` for non-retryable categories."""
|
|
101
|
+
|
|
102
|
+
escalate_after: int
|
|
103
|
+
"""Consecutive hits of this category before escalating to a hard stop.
|
|
104
|
+
|
|
105
|
+
``_OVERLOAD_ESCALATE_AFTER`` (3) for TRANSIENT_OVERLOAD; ``1`` for the
|
|
106
|
+
stop-now categories (the first hit already stops).
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
operator_action_required: bool
|
|
110
|
+
"""True for HARD_QUOTA — no backoff/wait resolves it; a human must act."""
|
|
111
|
+
|
|
112
|
+
resets_on_timer: bool
|
|
113
|
+
"""True when the limit clears on its own (TRANSIENT_OVERLOAD, USAGE_WINDOW);
|
|
114
|
+
False for HARD_QUOTA (operator-gated) and NONE."""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
_POLICIES: dict[ProviderLimit, LimitPolicy] = {
|
|
118
|
+
ProviderLimit.TRANSIENT_OVERLOAD: LimitPolicy(
|
|
119
|
+
category=ProviderLimit.TRANSIENT_OVERLOAD,
|
|
120
|
+
retryable_same_iter=True,
|
|
121
|
+
backoff_seconds=_OVERLOAD_BACKOFF,
|
|
122
|
+
escalate_after=_OVERLOAD_ESCALATE_AFTER,
|
|
123
|
+
operator_action_required=False,
|
|
124
|
+
resets_on_timer=True,
|
|
125
|
+
),
|
|
126
|
+
ProviderLimit.USAGE_WINDOW: LimitPolicy(
|
|
127
|
+
category=ProviderLimit.USAGE_WINDOW,
|
|
128
|
+
retryable_same_iter=False,
|
|
129
|
+
backoff_seconds=(),
|
|
130
|
+
escalate_after=1,
|
|
131
|
+
operator_action_required=False,
|
|
132
|
+
resets_on_timer=True,
|
|
133
|
+
),
|
|
134
|
+
ProviderLimit.HARD_QUOTA: LimitPolicy(
|
|
135
|
+
category=ProviderLimit.HARD_QUOTA,
|
|
136
|
+
retryable_same_iter=False,
|
|
137
|
+
backoff_seconds=(),
|
|
138
|
+
escalate_after=1,
|
|
139
|
+
operator_action_required=True,
|
|
140
|
+
resets_on_timer=False,
|
|
141
|
+
),
|
|
142
|
+
ProviderLimit.NONE: LimitPolicy(
|
|
143
|
+
category=ProviderLimit.NONE,
|
|
144
|
+
retryable_same_iter=False,
|
|
145
|
+
backoff_seconds=(),
|
|
146
|
+
escalate_after=1,
|
|
147
|
+
operator_action_required=False,
|
|
148
|
+
resets_on_timer=False,
|
|
149
|
+
),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def policy_for(category: ProviderLimit) -> LimitPolicy:
|
|
154
|
+
"""Return the canonical :class:`LimitPolicy` for ``category``.
|
|
155
|
+
|
|
156
|
+
Total over the enum — every :class:`ProviderLimit` member has a policy (a
|
|
157
|
+
test asserts exhaustiveness, so a new category cannot ship without one).
|
|
158
|
+
"""
|
|
159
|
+
return _POLICIES[category]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
# Mappers — pure translators FROM each upstream taxonomy INTO the canonical
|
|
164
|
+
# category. They do NOT classify (the upstream classifier already did); they
|
|
165
|
+
# translate. Each takes the upstream token's str VALUE, so this module never
|
|
166
|
+
# imports the upstream class (keeps the job→dos dependency arrow one-way).
|
|
167
|
+
# ---------------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
# rate_limit_classify.Kind values (job/scripts/rate_limit_classify.py).
|
|
170
|
+
_RATE_LIMIT_KIND_TO_CATEGORY: dict[str, ProviderLimit] = {
|
|
171
|
+
"OVERLOADED": ProviderLimit.TRANSIENT_OVERLOAD,
|
|
172
|
+
"RATE_LIMITED": ProviderLimit.USAGE_WINDOW,
|
|
173
|
+
"CREDIT_LOW": ProviderLimit.HARD_QUOTA,
|
|
174
|
+
"NONE": ProviderLimit.NONE,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def from_rate_limit_kind(kind: str) -> ProviderLimit:
|
|
179
|
+
"""Map a ``rate_limit_classify.Kind`` value → canonical category.
|
|
180
|
+
|
|
181
|
+
Accepts the enum member or its ``str`` value (the enum is ``str``-valued,
|
|
182
|
+
so ``str(Kind.OVERLOADED) == "OVERLOADED"``). Unknown → NONE (defensive:
|
|
183
|
+
an unrecognized token must not masquerade as a real limit).
|
|
184
|
+
"""
|
|
185
|
+
return _RATE_LIMIT_KIND_TO_CATEGORY.get(str(kind), ProviderLimit.NONE)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# agents.quota.QuotaErrorClass values (job/agents/quota/base.py).
|
|
189
|
+
_QUOTA_ERROR_CLASS_TO_CATEGORY: dict[str, ProviderLimit] = {
|
|
190
|
+
"rpm_throttled": ProviderLimit.TRANSIENT_OVERLOAD,
|
|
191
|
+
"transient_429": ProviderLimit.TRANSIENT_OVERLOAD,
|
|
192
|
+
"daily_quota_exhausted": ProviderLimit.USAGE_WINDOW,
|
|
193
|
+
"subscription_blackout": ProviderLimit.USAGE_WINDOW,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def from_quota_error_class(qec: str) -> ProviderLimit:
|
|
198
|
+
"""Map an ``agents.quota.QuotaErrorClass`` value → canonical category.
|
|
199
|
+
|
|
200
|
+
This is the Bulkhead seam: the apply adapter keeps ``QuotaErrorClass``
|
|
201
|
+
internally for its own backoff; at the dispatch boundary it maps UP into
|
|
202
|
+
the canonical category. ``rpm_throttled``/``transient_429`` are short-timer
|
|
203
|
+
server-side throttles → TRANSIENT_OVERLOAD; the daily/subscription caps are
|
|
204
|
+
timer-reset windows → USAGE_WINDOW. (A genuine billing block surfaces as a
|
|
205
|
+
HARD_QUOTA via the rate_limit_classify CREDIT_LOW path, not here.) Unknown →
|
|
206
|
+
NONE.
|
|
207
|
+
"""
|
|
208
|
+
return _QUOTA_ERROR_CLASS_TO_CATEGORY.get(str(qec), ProviderLimit.NONE)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# apply-next-loop Step-3 outcome tokens (job/.claude/skills/apply-next-loop).
|
|
212
|
+
_APPLY_OUTCOME_TOKEN_TO_CATEGORY: dict[str, ProviderLimit] = {
|
|
213
|
+
"LLM-QUOTA-EXHAUSTED": ProviderLimit.USAGE_WINDOW,
|
|
214
|
+
"LLM-QUOTA-EXHAUSTED-DURABLE": ProviderLimit.USAGE_WINDOW,
|
|
215
|
+
# CORRELATED-OUTAGE / BROWSER-SERVICE-UNAVAILABLE are NOT provider limits —
|
|
216
|
+
# they are infra outages with their own stop policy; they map to NONE so a
|
|
217
|
+
# caller asking "is this a provider limit?" gets a truthful no.
|
|
218
|
+
"CORRELATED-OUTAGE": ProviderLimit.NONE,
|
|
219
|
+
"BROWSER-SERVICE-UNAVAILABLE": ProviderLimit.NONE,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def from_apply_outcome_token(token: str) -> ProviderLimit:
|
|
224
|
+
"""Map an apply-next-loop Step-3 outcome token → canonical category.
|
|
225
|
+
|
|
226
|
+
Both the transient (``LLM-QUOTA-EXHAUSTED``, Q==3 stop) and the durable
|
|
227
|
+
(``LLM-QUOTA-EXHAUSTED-DURABLE``, measured-window stop-on-first) quota
|
|
228
|
+
tokens are USAGE_WINDOW — the durability difference is a policy nuance
|
|
229
|
+
(``resets_on_timer`` + a measured ``window_end``), not a different category.
|
|
230
|
+
Unknown / non-limit tokens → NONE.
|
|
231
|
+
"""
|
|
232
|
+
return _APPLY_OUTCOME_TOKEN_TO_CATEGORY.get(str(token), ProviderLimit.NONE)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
__all__ = [
|
|
236
|
+
"ProviderLimit",
|
|
237
|
+
"LimitPolicy",
|
|
238
|
+
"policy_for",
|
|
239
|
+
"from_rate_limit_kind",
|
|
240
|
+
"from_quota_error_class",
|
|
241
|
+
"from_apply_outcome_token",
|
|
242
|
+
]
|
dos/py.typed
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
# PEP 561 marker: the `dos` package ships inline type hints. Its presence tells
|
|
2
|
+
# type checkers (mypy/pyright) to read this package's annotations for downstream
|
|
3
|
+
# consumers (the userland app, dos_mcp). Matches the `Typing :: Typed` classifier
|
|
4
|
+
# in pyproject.toml. See https://peps.python.org/pep-0561/.
|