dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/productivity.py ADDED
@@ -0,0 +1,296 @@
1
+ """PRD — the productivity verdict: *is this run still doing work, or just spending?*
2
+
3
+ docs/218 — the **loop-economics completion of `liveness()`**. `liveness` asks a
4
+ binary, lifetime question off ground truth: did git/journal state advance *at all*
5
+ since the run started (ADVANCING), is the run alive-but-not-moving (SPINNING), or
6
+ dead (STALLED)? PRD asks a different, *continuous* question off a **trend**: is the
7
+ amount of work landed *per step* collapsing toward nothing? A run can be ADVANCING
8
+ (it committed) and still be DIMINISHING (each successive step does less and less
9
+ until it is burning budget to refine the same thing). That gap — productive vs.
10
+ *productive-but-fading* — has no home in `liveness` (a single since-start count
11
+ cannot see a trend) and no home in `loop_decide` (whose every stop is a hard count
12
+ cap or a discrete verdict, never a velocity).
13
+
14
+ This is `liveness`'s **lateral sibling** — the same pure-verdict shape, re-aimed
15
+ from "did state move?" to "is the work-per-step rate fading?":
16
+
17
+ arbiter.arbitrate (request, live_leases, config) -> decision
18
+ loop_decide.decide (LoopState, IterationOutcome) -> LoopDecision
19
+ liveness.classify (ProgressEvidence, policy) -> LivenessVerdict
20
+ productivity.classify (WorkHistory, policy) -> ProductivityVerdict
21
+ ^ THIS module
22
+
23
+ It is lifted faithfully from the diminishing-returns gate Claude Code ships in its
24
+ own session loop (`query/tokenBudget.ts` `checkTokenBudget` — the
25
+ `isDiminishing = continuationCount>=3 AND lastDelta<T AND priorDelta<T` rule, the
26
+ docs/189 audit's "cleanest loop-economics lift"). DOS owns the *mechanism* — a pure
27
+ trend verdict — and pushes the *policy* (which unit the deltas count, how many
28
+ steps before judging, what floor counts as "fading") out to data, the
29
+ mechanism/policy split that lets a small thing be a universal cog: the kernel does
30
+ not know whether a "work unit" is a token, a commit, a changed byte, or a passed
31
+ test. The host names the unit in `dos.toml [productivity]`; the kernel only knows
32
+ *the rate is falling*.
33
+
34
+ **Byte-clean by construction.** A per-step work delta is a count the *runtime/env*
35
+ authors (tokens spent this turn, commits this step, bytes diffed) — never the
36
+ judged agent's narration. PRD reads the same kind of agent-external counter
37
+ `liveness` reads off git, and `tool_stream` reads off env-authored result digests
38
+ (the docs/138 invariant). So DIMINISHING is "the work rate the environment
39
+ recorded is fading," never "the agent says it's almost done" — a quantity, not a
40
+ self-report. PRD says the *rate* fell; it never says the work was *wrong* (quality
41
+ is an advisory judge's call — `llm_judge` — never this deterministic verb, the
42
+ distrust-state / distrust-judgment line `liveness` draws).
43
+
44
+ **Multi-signal, so one slow step can't false-trip.** The whole reason CC ANDs
45
+ three signals (enough steps AND this delta small AND the prior delta small) is that
46
+ a single quiet turn is not a fading run — a run legitimately pauses to read, to
47
+ plan, to wait on eventual consistency. DIMINISHING requires a *sustained* low rate
48
+ (the two most recent deltas both under the floor) past a minimum step count, so the
49
+ verdict fires on a trend, not a blip. This is the productivity analogue of
50
+ `liveness`'s `grace_ms` young-and-alive guard: withhold the accusation until there
51
+ is enough evidence to make it.
52
+
53
+ **Advisory.** Like `liveness.SPINNING`, DIMINISHING REPORTS; it never kills a
54
+ process or refuses a lease. A loop may consult PRD and choose to stop (the natural
55
+ first consumer — a `loop_decide` DIMINISHING_RETURNS rung that converts
56
+ stop-after-N into stop-when-unproductive), the enforce ladder may attach a
57
+ WARN-before-BLOCK nudge, and `dos top` may surface a fading run — but the
58
+ productivity verdict and the admission decision stay different syscalls.
59
+
60
+ **No-telemetry / no-plan discipline** (the `test_verify_no_plan` sibling): PRD needs
61
+ *nothing* but a list of per-step work deltas the caller already has. No plan, no
62
+ registry, no journal, no clock — `classify()` makes no I/O at all (there is no clock
63
+ rung here; unlike `liveness`, productivity is timeless — it reads a sequence, not
64
+ ages). A caller with two deltas gets a verdict; a caller with none gets the honest
65
+ "not enough history to judge" (PRODUCTIVE-benign, the withhold-the-accusation
66
+ floor).
67
+ """
68
+
69
+ from __future__ import annotations
70
+
71
+ import enum
72
+ from dataclasses import dataclass
73
+ from typing import Sequence
74
+
75
+
76
+ class Productivity(str, enum.Enum):
77
+ """The typed productivity verdict — three states, mutually exclusive.
78
+
79
+ `str`-valued so it round-trips through a CLI stdout token / exit-code map
80
+ without a lookup table (mirrors `liveness.Liveness` and `gate_classify.Verdict`).
81
+ """
82
+
83
+ PRODUCTIVE = "PRODUCTIVE" # still landing work per step (or too little history to judge)
84
+ DIMINISHING = "DIMINISHING" # a sustained low work-rate past the min-step count — fading
85
+ STALLED = "STALLED" # the most recent step landed ZERO work — flat-lined
86
+
87
+ def __str__(self) -> str: # pragma: no cover - trivial
88
+ return self.value
89
+
90
+
91
+ @dataclass(frozen=True)
92
+ class ProductivityPolicy:
93
+ """The thresholds that separate PRODUCTIVE / DIMINISHING / STALLED — policy, not mechanism.
94
+
95
+ The same "mechanism is kernel, thresholds are config" split as `liveness`'s
96
+ windows and `loop_decide`'s `max_iterations`. The defaults are GENERIC and lifted
97
+ from Claude Code's own loop (`tokenBudget.ts`: 3 continuations, a 500-unit
98
+ diminishing threshold); a workspace declares its own in `dos.toml
99
+ [productivity]`, the closed-config-as-data pattern (`[lanes]` / `[stamp]` /
100
+ `[liveness]`).
101
+
102
+ min_steps — the **minimum number of work steps** before PRD will call a run
103
+ DIMINISHING. Below it there is not enough of a trend to judge a
104
+ *fading* rate (one or two small deltas are a blip, not a decline),
105
+ and the verdict withholds the accusation. CC's `continuationCount
106
+ >= 3`. This is the productivity analogue of `liveness.grace_ms`.
107
+ floor — the **per-step work-unit floor** below which a step counts as "did
108
+ little." A run is DIMINISHING only when the two most recent deltas
109
+ are BOTH under this floor (a sustained low rate). CC's
110
+ `DIMINISHING_THRESHOLD` (500 tokens). The UNIT is the host's —
111
+ tokens, commits, changed bytes — declared alongside the floor; the
112
+ kernel only compares magnitudes.
113
+
114
+ Defaults: 3 steps, a 500-unit floor. So a run that has taken ≥3 steps and whose
115
+ last two steps each landed < 500 units of work is fading; fewer steps, or either
116
+ of the last two steps clearing the floor, is still PRODUCTIVE.
117
+ """
118
+
119
+ min_steps: int = 3 # CC continuationCount>=3 — min trend length before judging
120
+ floor: int = 500 # CC DIMINISHING_THRESHOLD — per-step "did little" work-unit floor
121
+
122
+ def __post_init__(self) -> None:
123
+ if self.min_steps < 0:
124
+ raise ValueError("min_steps must be non-negative")
125
+ if self.floor < 0:
126
+ raise ValueError("the work-unit floor must be non-negative")
127
+
128
+
129
+ DEFAULT_POLICY = ProductivityPolicy()
130
+
131
+
132
+ @dataclass(frozen=True)
133
+ class WorkHistory:
134
+ """The per-step work-delta trend `classify()` reads — gathered by the CALLER.
135
+
136
+ No clock, no I/O inside the verdict — the arbiter rule, sharpened: there is not
137
+ even a clock rung here (productivity is *timeless*; it reads a sequence of
138
+ deltas, never an age). The caller's boundary (the `dos productivity`
139
+ evidence-gather) measures each step's work — tokens spent that step, commits that
140
+ step, bytes diffed — and freezes the ordered list here.
141
+
142
+ deltas — the ordered per-step work deltas, OLDEST first, one number per step.
143
+ Each is a count of *work units* (the host's chosen unit) the
144
+ runtime/env measured for that step. Empty or one-element is "not
145
+ enough history to judge a trend." Negative values are rejected — a
146
+ work delta is a non-negative quantity of work done (a step that
147
+ *removed* work is still a step that did the work of removing; the host
148
+ passes the magnitude, never a signed regression).
149
+
150
+ The two load-bearing reads are `deltas[-1]` (this step) and `deltas[-2]` (the
151
+ prior step) — the same `lastDeltaTokens` / `deltaSinceLastCheck` pair CC's
152
+ `isDiminishing` ANDs. The full list is carried so `--output json` can echo the
153
+ whole trend (the legible-distrust renderer seam: the operator sees not just
154
+ DIMINISHING but the falling sequence behind it), and so `step_count` is honest.
155
+ """
156
+
157
+ deltas: tuple[int, ...] = ()
158
+
159
+ def __post_init__(self) -> None:
160
+ # Accept any Sequence at the boundary, freeze to a tuple so the dataclass
161
+ # stays hashable/immutable (the frozen-evidence discipline). A caller that
162
+ # passes a list does not get a shared-mutable field.
163
+ if not isinstance(self.deltas, tuple):
164
+ object.__setattr__(self, "deltas", tuple(self.deltas))
165
+ if any(d < 0 for d in self.deltas):
166
+ raise ValueError("work deltas must be non-negative (a count of work done)")
167
+
168
+ @property
169
+ def step_count(self) -> int:
170
+ """How many work steps the trend covers."""
171
+ return len(self.deltas)
172
+
173
+ @classmethod
174
+ def of(cls, deltas: Sequence[int]) -> "WorkHistory":
175
+ """Build a history from any ordered (oldest-first) sequence of deltas."""
176
+ return cls(tuple(deltas))
177
+
178
+
179
+ @dataclass(frozen=True)
180
+ class ProductivityVerdict:
181
+ """The single verdict `classify()` returns, with the trend echoed back.
182
+
183
+ `verdict` is the typed `Productivity`. `reason` is a one-line operator-facing
184
+ summary (the tally-row string). `history` is the `WorkHistory` that drove the
185
+ call, carried so `dos productivity --output json` can emit the verdict *and the
186
+ facts behind it* in one object (the RND/Axis-4 renderer seam) — legible
187
+ distrust: the operator sees not just DIMINISHING but *why* (last two steps 40,
188
+ 12 units, both under the 500 floor, 6 steps in). `to_dict` is the json shape.
189
+ """
190
+
191
+ verdict: Productivity
192
+ reason: str
193
+ history: WorkHistory
194
+
195
+ def to_dict(self) -> dict:
196
+ h = self.history
197
+ return {
198
+ "verdict": self.verdict.value,
199
+ "reason": self.reason,
200
+ "history": {
201
+ "deltas": list(h.deltas),
202
+ "step_count": h.step_count,
203
+ "last_delta": h.deltas[-1] if h.deltas else None,
204
+ "prior_delta": h.deltas[-2] if len(h.deltas) >= 2 else None,
205
+ },
206
+ }
207
+
208
+
209
+ def classify(
210
+ history: WorkHistory, policy: ProductivityPolicy = DEFAULT_POLICY
211
+ ) -> ProductivityVerdict:
212
+ """Classify a run's productivity from its per-step work trend. PURE — no I/O.
213
+
214
+ Reads the ladder top to bottom (this function IS the answer to "is it still
215
+ doing work?"):
216
+
217
+ 1. PRODUCTIVE (too little history) — fewer than `min_steps` steps: there is
218
+ not enough of a trend to accuse a run of fading. Withhold the accusation
219
+ (the `liveness` young-and-alive guard, lateral). This is checked FIRST so a
220
+ brand-new run with one big step is never mislabelled on a length
221
+ technicality.
222
+ 2. STALLED — the most recent step landed ZERO work (`deltas[-1] == 0`): the run
223
+ flat-lined, the degenerate floor of diminishing. Distinguished from
224
+ DIMINISHING (which is a fading-but-nonzero rate) because a zero is the
225
+ operator's clearest "it stopped doing anything" signal — the give-up rung.
226
+ Checked before DIMINISHING so an exact flat-line is named precisely.
227
+ 3. DIMINISHING — a sustained low rate: `step_count >= min_steps` AND the last
228
+ two deltas are BOTH under `floor`. The CC `isDiminishing` rule exactly —
229
+ fading, but still moving a little. The multi-signal AND is what keeps one
230
+ quiet step from false-tripping.
231
+ 4. PRODUCTIVE — none of the above: either a recent step cleared the floor, or
232
+ the run simply hasn't sustained a low rate. Still doing real work.
233
+
234
+ The DIMINISHING test needs the prior delta (`deltas[-2]`); with exactly
235
+ `min_steps` steps that always exists when `min_steps >= 2`. A pathological
236
+ `min_steps < 2` policy is handled: the prior-delta read falls back so the verdict
237
+ never indexes off the end (a one-step history can only be PRODUCTIVE or STALLED).
238
+ """
239
+ n = history.step_count
240
+
241
+ # 1. PRODUCTIVE (too little history) — not enough steps to judge a trend.
242
+ # Withhold the DIMINISHING accusation; report the benign verdict. A run with
243
+ # no steps at all also lands here (nothing to judge, no problem yet).
244
+ if n < policy.min_steps or n == 0:
245
+ return ProductivityVerdict(
246
+ verdict=Productivity.PRODUCTIVE,
247
+ reason=(
248
+ f"{n} work step(s) so far (< min {policy.min_steps}) — not enough "
249
+ f"history to judge a fading rate; no productivity problem yet"
250
+ ),
251
+ history=history,
252
+ )
253
+
254
+ last = history.deltas[-1]
255
+
256
+ # 2. STALLED — the most recent step did zero work. The flat-line / give-up rung,
257
+ # named distinctly from a merely-fading rate so the operator's clearest signal
258
+ # ("it stopped") is not blurred into DIMINISHING.
259
+ if last == 0:
260
+ return ProductivityVerdict(
261
+ verdict=Productivity.STALLED,
262
+ reason=(
263
+ f"the most recent of {n} steps landed 0 work units — flat-lined "
264
+ f"(zero forward work this step)"
265
+ ),
266
+ history=history,
267
+ )
268
+
269
+ # The prior delta — the second of CC's two ANDed signals. Guarded so a
270
+ # degenerate min_steps<2 policy cannot index off the front (a 1-element history
271
+ # has no prior; treat it as "above floor" so it can never satisfy DIMINISHING).
272
+ prior = history.deltas[-2] if n >= 2 else policy.floor
273
+
274
+ # 3. DIMINISHING — a SUSTAINED low rate: enough steps AND both recent deltas
275
+ # under the floor. The CC `isDiminishing` rule, the whole point of the module.
276
+ if last < policy.floor and prior < policy.floor:
277
+ return ProductivityVerdict(
278
+ verdict=Productivity.DIMINISHING,
279
+ reason=(
280
+ f"the last two of {n} steps landed {prior} then {last} work units, "
281
+ f"both under the {policy.floor}-unit floor — a sustained fading rate "
282
+ f"(diminishing returns)"
283
+ ),
284
+ history=history,
285
+ )
286
+
287
+ # 4. PRODUCTIVE — a recent step cleared the floor, or the low rate is not
288
+ # sustained across the last two steps. Still doing real work.
289
+ return ProductivityVerdict(
290
+ verdict=Productivity.PRODUCTIVE,
291
+ reason=(
292
+ f"last step landed {last} work units over {n} steps "
293
+ f"(prior {prior}; floor {policy.floor}) — still productive"
294
+ ),
295
+ history=history,
296
+ )
dos/provider_limit.py ADDED
@@ -0,0 +1,242 @@
1
+ """Provider-limit category — the one canonical vocabulary the dispatch family
2
+ collapses every rate-limit / quota / overload signal into (the PI5 collapse
3
+ target promised in the job repo's ``agents/quota/base.py``).
4
+
5
+ Three independent taxonomies exist upstream, each correct for its own input:
6
+
7
+ * ``rate_limit_classify.Kind`` (job) — string markers on a ``claude -p``
8
+ terminal envelope ({RATE_LIMITED, OVERLOADED, CREDIT_LOW, NONE}).
9
+ * ``agents.quota.QuotaErrorClass`` (job) — provider exceptions
10
+ ({RPM_THROTTLED, DAILY_QUOTA_EXHAUSTED, SUBSCRIPTION_BLACKOUT, TRANSIENT_429}).
11
+ * apply-next-loop outcome tokens (job) — exit-code + log regex
12
+ ({LLM-QUOTA-EXHAUSTED, LLM-QUOTA-EXHAUSTED-DURABLE, CORRELATED-OUTAGE, …}).
13
+
14
+ They overlap but share no OUTPUT type, so every loop re-decided "transient vs
15
+ usage vs hard-quota" on its own and drifted. This module is **not** a fourth
16
+ classifier — it is the shared category + the canonical backoff policy that all
17
+ three map *into* via the thin pure ``from_*`` translators below.
18
+
19
+ ⚓ Provider-invariance (job CLAUDE.md "Bulkhead"): provider distinctions stay
20
+ infrastructure inside the adapter. The mapper takes the upstream enum's VALUE
21
+ (a plain ``str``), never the upstream class — so ``dos`` imports nothing from
22
+ ``agents.quota`` / ``rate_limit_classify``; the dependency arrow points the
23
+ right way (job → dos), never back.
24
+
25
+ The kernel decision logic that ACTS on a category already lives in
26
+ ``dos.loop_decide.decide`` (``OutcomeKind.OVERLOADED`` → ``retry-same-iter``
27
+ with the same backoff ladder; ``RATE_LIMITED`` → stop). This module does not
28
+ change that — it standardizes the *word*, and ``policy_for`` makes the backoff
29
+ ladder a single source of truth both sides can read.
30
+
31
+ PURE — no I/O, no clock. py.typed.
32
+ """
33
+ from __future__ import annotations
34
+
35
+ import enum
36
+ from dataclasses import dataclass
37
+
38
+
39
+ class ProviderLimit(str, enum.Enum):
40
+ """The canonical provider-limit category — what dispatch reasons about.
41
+
42
+ ``str``-valued so it round-trips as a token (``ProviderLimit.USAGE_WINDOW
43
+ == "usage_window"``), same convention as ``loop_decide.OutcomeKind`` and
44
+ ``gate_classify.Verdict``.
45
+
46
+ TRANSIENT_OVERLOAD — server-side 529 / ``overloaded_error`` / the harness
47
+ "Server is temporarily limiting requests (not your
48
+ usage limit)" surface. Clears in seconds-to-minutes.
49
+ Policy: retry the SAME unit of work with backoff;
50
+ escalate to stop only after K consecutive hits (an
51
+ outage, not a blip).
52
+ USAGE_WINDOW — a 429 / quota / 5-hour / 7-day / weekly cap. Every
53
+ retry fails identically until the window resets on a
54
+ TIMER. Policy: stop (or durable-defer past a measured
55
+ ``window_end``); re-invoke after reset.
56
+ HARD_QUOTA — a billing block ("credit balance too low") or an
57
+ opaque subscription blackout. No timer fixes it — an
58
+ OPERATOR must act. Policy: stop + surface.
59
+ NONE — no provider-limit signal.
60
+
61
+ The load-bearing split is TRANSIENT_OVERLOAD (retry) vs everything else
62
+ (stop/defer). A real overload and a real quota window can BOTH arrive as a
63
+ ``rejected`` rate-limit event — the disambiguator is the error TYPE
64
+ (529/overloaded vs 429/quota) and the "(not your usage limit)" prose, NOT
65
+ the ``rejected`` status alone.
66
+ """
67
+
68
+ TRANSIENT_OVERLOAD = "transient_overload"
69
+ USAGE_WINDOW = "usage_window"
70
+ HARD_QUOTA = "hard_quota"
71
+ NONE = "none"
72
+
73
+ def __str__(self) -> str: # pragma: no cover - trivial
74
+ return self.value
75
+
76
+
77
+ # Canonical backoff ladder for a transient overload retry. Mirrors
78
+ # ``loop_decide._OVERLOADED_BACKOFF`` deliberately — this module is the shared
79
+ # source of truth, ``loop_decide`` keeps its own copy for the hot decide() path
80
+ # but the two MUST stay equal (asserted by a cross-module test in both repos).
81
+ _OVERLOAD_BACKOFF: tuple[int, ...] = (60, 270, 1200)
82
+ _OVERLOAD_ESCALATE_AFTER = 3 # consecutive TRANSIENT_OVERLOAD hits → stop
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class LimitPolicy:
87
+ """The canonical handling policy for one :class:`ProviderLimit` category.
88
+
89
+ A pure lookup (see :func:`policy_for`) — the single place the dispatch
90
+ family reads "is this retryable, with what backoff, when do I escalate,
91
+ does an operator have to act, will it clear on its own". Consumers must not
92
+ re-derive these per-loop (that is the drift this module exists to kill).
93
+ """
94
+
95
+ category: ProviderLimit
96
+ retryable_same_iter: bool
97
+ """True only for TRANSIENT_OVERLOAD — retry the same unit of work."""
98
+
99
+ backoff_seconds: tuple[int, ...]
100
+ """Backoff ladder for the retry; ``()`` for non-retryable categories."""
101
+
102
+ escalate_after: int
103
+ """Consecutive hits of this category before escalating to a hard stop.
104
+
105
+ ``_OVERLOAD_ESCALATE_AFTER`` (3) for TRANSIENT_OVERLOAD; ``1`` for the
106
+ stop-now categories (the first hit already stops).
107
+ """
108
+
109
+ operator_action_required: bool
110
+ """True for HARD_QUOTA — no backoff/wait resolves it; a human must act."""
111
+
112
+ resets_on_timer: bool
113
+ """True when the limit clears on its own (TRANSIENT_OVERLOAD, USAGE_WINDOW);
114
+ False for HARD_QUOTA (operator-gated) and NONE."""
115
+
116
+
117
+ _POLICIES: dict[ProviderLimit, LimitPolicy] = {
118
+ ProviderLimit.TRANSIENT_OVERLOAD: LimitPolicy(
119
+ category=ProviderLimit.TRANSIENT_OVERLOAD,
120
+ retryable_same_iter=True,
121
+ backoff_seconds=_OVERLOAD_BACKOFF,
122
+ escalate_after=_OVERLOAD_ESCALATE_AFTER,
123
+ operator_action_required=False,
124
+ resets_on_timer=True,
125
+ ),
126
+ ProviderLimit.USAGE_WINDOW: LimitPolicy(
127
+ category=ProviderLimit.USAGE_WINDOW,
128
+ retryable_same_iter=False,
129
+ backoff_seconds=(),
130
+ escalate_after=1,
131
+ operator_action_required=False,
132
+ resets_on_timer=True,
133
+ ),
134
+ ProviderLimit.HARD_QUOTA: LimitPolicy(
135
+ category=ProviderLimit.HARD_QUOTA,
136
+ retryable_same_iter=False,
137
+ backoff_seconds=(),
138
+ escalate_after=1,
139
+ operator_action_required=True,
140
+ resets_on_timer=False,
141
+ ),
142
+ ProviderLimit.NONE: LimitPolicy(
143
+ category=ProviderLimit.NONE,
144
+ retryable_same_iter=False,
145
+ backoff_seconds=(),
146
+ escalate_after=1,
147
+ operator_action_required=False,
148
+ resets_on_timer=False,
149
+ ),
150
+ }
151
+
152
+
153
+ def policy_for(category: ProviderLimit) -> LimitPolicy:
154
+ """Return the canonical :class:`LimitPolicy` for ``category``.
155
+
156
+ Total over the enum — every :class:`ProviderLimit` member has a policy (a
157
+ test asserts exhaustiveness, so a new category cannot ship without one).
158
+ """
159
+ return _POLICIES[category]
160
+
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # Mappers — pure translators FROM each upstream taxonomy INTO the canonical
164
+ # category. They do NOT classify (the upstream classifier already did); they
165
+ # translate. Each takes the upstream token's str VALUE, so this module never
166
+ # imports the upstream class (keeps the job→dos dependency arrow one-way).
167
+ # ---------------------------------------------------------------------------
168
+
169
+ # rate_limit_classify.Kind values (job/scripts/rate_limit_classify.py).
170
+ _RATE_LIMIT_KIND_TO_CATEGORY: dict[str, ProviderLimit] = {
171
+ "OVERLOADED": ProviderLimit.TRANSIENT_OVERLOAD,
172
+ "RATE_LIMITED": ProviderLimit.USAGE_WINDOW,
173
+ "CREDIT_LOW": ProviderLimit.HARD_QUOTA,
174
+ "NONE": ProviderLimit.NONE,
175
+ }
176
+
177
+
178
+ def from_rate_limit_kind(kind: str) -> ProviderLimit:
179
+ """Map a ``rate_limit_classify.Kind`` value → canonical category.
180
+
181
+ Accepts the enum member or its ``str`` value (the enum is ``str``-valued,
182
+ so ``str(Kind.OVERLOADED) == "OVERLOADED"``). Unknown → NONE (defensive:
183
+ an unrecognized token must not masquerade as a real limit).
184
+ """
185
+ return _RATE_LIMIT_KIND_TO_CATEGORY.get(str(kind), ProviderLimit.NONE)
186
+
187
+
188
+ # agents.quota.QuotaErrorClass values (job/agents/quota/base.py).
189
+ _QUOTA_ERROR_CLASS_TO_CATEGORY: dict[str, ProviderLimit] = {
190
+ "rpm_throttled": ProviderLimit.TRANSIENT_OVERLOAD,
191
+ "transient_429": ProviderLimit.TRANSIENT_OVERLOAD,
192
+ "daily_quota_exhausted": ProviderLimit.USAGE_WINDOW,
193
+ "subscription_blackout": ProviderLimit.USAGE_WINDOW,
194
+ }
195
+
196
+
197
+ def from_quota_error_class(qec: str) -> ProviderLimit:
198
+ """Map an ``agents.quota.QuotaErrorClass`` value → canonical category.
199
+
200
+ This is the Bulkhead seam: the apply adapter keeps ``QuotaErrorClass``
201
+ internally for its own backoff; at the dispatch boundary it maps UP into
202
+ the canonical category. ``rpm_throttled``/``transient_429`` are short-timer
203
+ server-side throttles → TRANSIENT_OVERLOAD; the daily/subscription caps are
204
+ timer-reset windows → USAGE_WINDOW. (A genuine billing block surfaces as a
205
+ HARD_QUOTA via the rate_limit_classify CREDIT_LOW path, not here.) Unknown →
206
+ NONE.
207
+ """
208
+ return _QUOTA_ERROR_CLASS_TO_CATEGORY.get(str(qec), ProviderLimit.NONE)
209
+
210
+
211
+ # apply-next-loop Step-3 outcome tokens (job/.claude/skills/apply-next-loop).
212
+ _APPLY_OUTCOME_TOKEN_TO_CATEGORY: dict[str, ProviderLimit] = {
213
+ "LLM-QUOTA-EXHAUSTED": ProviderLimit.USAGE_WINDOW,
214
+ "LLM-QUOTA-EXHAUSTED-DURABLE": ProviderLimit.USAGE_WINDOW,
215
+ # CORRELATED-OUTAGE / BROWSER-SERVICE-UNAVAILABLE are NOT provider limits —
216
+ # they are infra outages with their own stop policy; they map to NONE so a
217
+ # caller asking "is this a provider limit?" gets a truthful no.
218
+ "CORRELATED-OUTAGE": ProviderLimit.NONE,
219
+ "BROWSER-SERVICE-UNAVAILABLE": ProviderLimit.NONE,
220
+ }
221
+
222
+
223
+ def from_apply_outcome_token(token: str) -> ProviderLimit:
224
+ """Map an apply-next-loop Step-3 outcome token → canonical category.
225
+
226
+ Both the transient (``LLM-QUOTA-EXHAUSTED``, Q==3 stop) and the durable
227
+ (``LLM-QUOTA-EXHAUSTED-DURABLE``, measured-window stop-on-first) quota
228
+ tokens are USAGE_WINDOW — the durability difference is a policy nuance
229
+ (``resets_on_timer`` + a measured ``window_end``), not a different category.
230
+ Unknown / non-limit tokens → NONE.
231
+ """
232
+ return _APPLY_OUTCOME_TOKEN_TO_CATEGORY.get(str(token), ProviderLimit.NONE)
233
+
234
+
235
+ __all__ = [
236
+ "ProviderLimit",
237
+ "LimitPolicy",
238
+ "policy_for",
239
+ "from_rate_limit_kind",
240
+ "from_quota_error_class",
241
+ "from_apply_outcome_token",
242
+ ]
dos/py.typed ADDED
@@ -0,0 +1,4 @@
1
+ # PEP 561 marker: the `dos` package ships inline type hints. Its presence tells
2
+ # type checkers (mypy/pyright) to read this package's annotations for downstream
3
+ # consumers (the userland app, dos_mcp). Matches the `Typing :: Typed` classifier
4
+ # in pyproject.toml. See https://peps.python.org/pep-0561/.