dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/efficiency.py ADDED
@@ -0,0 +1,318 @@
1
+ """EFF — the token-effectiveness verdict: *did the tokens this run spent buy work?*
2
+
3
+ The **token-economics completion of `productivity()`**. The kernel already has two
4
+ loop-economics verdicts and a clean gap between them:
5
+
6
+ liveness.classify (ProgressEvidence, policy) -> did state move AT ALL? (a binary, lifetime count)
7
+ productivity.classify (WorkHistory, policy) -> is the work-per-step RATE fading? (a trend over steps)
8
+ efficiency.classify (EfficiencyEvidence, policy) -> did the tokens buy WORK? (a ratio: work per token)
9
+ ^ THIS module
10
+
11
+ `liveness` reads a single since-start count; `productivity` reads a *trend* of
12
+ per-step work deltas; neither relates the work to its **price**. A run can be
13
+ ADVANCING (it committed) AND PRODUCTIVE (each step lands work) and still be
14
+ spending ten times the tokens that work was worth — the gap between *did it do
15
+ work?* and *was the work worth what it cost?* That gap is **efficiency**, and it
16
+ is the question an operator means by "token effectiveness": not "is the run
17
+ moving" but "is the run **spending well**." EFF is `productivity`'s lateral
18
+ sibling, re-aimed from a trend over time onto a **ratio**: `work / tokens`.
19
+
20
+ **Byte-clean by construction (the docs/138 invariant).** Both inputs are counts
21
+ the *runtime / environment* authors, never the judged agent's narration:
22
+
23
+ * `tokens` — what the model API billed this run (the usage record the provider
24
+ returns), the same env-authored counter `liveness.tokens_spent_since` reads.
25
+ * `work` — a count of ground-truth work the environment measured: commits
26
+ landed, bytes diffed, tests passed — the same `productivity` work-unit, the
27
+ same kind of thing `verify` confirms off git. Whatever unit the host chooses.
28
+
29
+ So WASTEFUL is "the environment recorded N tokens spent and ~0 work landed,"
30
+ never "the agent says it was inefficient." A quantity, not a self-report — and
31
+ crucially **non-forgeable in the direction that matters**: an agent cannot move
32
+ the verdict toward EFFICIENT by *narrating* productivity, because the numerator
33
+ is work the environment witnessed (a commit the git machinery authored, a test
34
+ the runner authored), not a claim the agent emitted. This is the same reason
35
+ `reward.admit` trusts the read-back and not the answer text: the bytes that move
36
+ the verdict are bytes the claimant did not write.
37
+
38
+ **EFF reports a price, never a quality.** Like `productivity` says the *rate*
39
+ fell (never that the work was *wrong*), EFF says the *cost per unit of work* is
40
+ high — it never says the work was bad. A run can be perfectly correct and
41
+ WASTEFUL (it burned tokens deliberating, re-reading, marker-spinning); it can be
42
+ EFFICIENT and wrong (cheap garbage). Quality is an advisory judge's call
43
+ (`llm_judge`), never this deterministic verb — the distrust-state / distrust-
44
+ judgment line the whole temporal-verdict family draws.
45
+
46
+ **Withhold the accusation until there is enough spend to judge.** The whole
47
+ reason EFF has a `min_tokens` floor is the `productivity.min_steps` reason: a run
48
+ that has barely started has spent too little to have an honest ratio (3 tokens
49
+ and 0 work is not a wasteful run, it is a run that has not done anything yet).
50
+ Below the floor EFF returns EFFICIENT-benign ("not enough spend to judge") — the
51
+ young-and-alive guard, lateral. The accusation (COSTLY / WASTEFUL) fires only
52
+ once the run has spent enough that a low ratio is real signal.
53
+
54
+ **No-telemetry / no-plan discipline** (the `test_verify_no_plan` sibling, the
55
+ strongest of the verdict family alongside `productivity`): EFF needs *nothing*
56
+ but the two counts the caller already has. No git, no registry, no journal, no
57
+ clock — `classify()` makes no I/O at all (EFF is timeless, like `productivity`;
58
+ it reads two numbers, not ages). A caller with a work count and a token count
59
+ gets a verdict; a caller with too few tokens gets the honest "not enough spend to
60
+ judge" (EFFICIENT-benign).
61
+ """
62
+
63
+ from __future__ import annotations
64
+
65
+ import enum
66
+ from dataclasses import dataclass
67
+
68
+
69
+ class Efficiency(str, enum.Enum):
70
+ """The typed token-effectiveness verdict — three states, mutually exclusive.
71
+
72
+ `str`-valued so it round-trips through a CLI stdout token / exit-code map
73
+ without a lookup table (mirrors `productivity.Productivity` and
74
+ `liveness.Liveness`).
75
+ """
76
+
77
+ EFFICIENT = "EFFICIENT" # work-per-token at/above the floor (or too little spend to judge)
78
+ COSTLY = "COSTLY" # nonzero work, but the ratio is under the floor — spending a lot per unit
79
+ WASTEFUL = "WASTEFUL" # meaningful tokens spent, ~0 work landed — the tokens bought nothing
80
+
81
+ def __str__(self) -> str: # pragma: no cover - trivial
82
+ return self.value
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class EfficiencyPolicy:
87
+ """The thresholds that separate EFFICIENT / COSTLY / WASTEFUL — policy, not mechanism.
88
+
89
+ The same "mechanism is kernel, thresholds are config" split as
90
+ `productivity`'s `min_steps`/`floor` and `liveness`'s windows. The defaults
91
+ are GENERIC; a workspace declares its own in `dos.toml [efficiency]` (the
92
+ closed-config-as-data pattern, the forward-looking seam `productivity` also
93
+ documents).
94
+
95
+ min_tokens — the **minimum tokens spent** before EFF will accuse a run of
96
+ being COSTLY / WASTEFUL. Below it the run has spent too little
97
+ to have an honest ratio (a handful of tokens and no work is a
98
+ run that has barely started, not a wasteful one), and the
99
+ verdict withholds the accusation. The token analogue of
100
+ `productivity.min_steps` — the `liveness.grace_ms` guard,
101
+ measured in spend instead of steps or time.
102
+ floor — the **work-per-token efficiency floor**: the minimum ratio
103
+ `work / tokens` a run must clear to be EFFICIENT. Below it (but
104
+ with nonzero work) the run is COSTLY — it is doing work, but
105
+ paying a lot per unit. The UNIT of `work` is the host's
106
+ (commits, changed bytes, passed tests); the kernel only
107
+ compares the ratio to the floor. A float, because work-per-
108
+ token is normally « 1 (one commit might be tens of thousands of
109
+ tokens → a floor like 0.00002 commits/token, or — far more
110
+ legibly — the host counts work in a coarser unit so the floor
111
+ is a readable number).
112
+
113
+ Defaults: `min_tokens=1000` (a run that has spent under ~1k tokens has barely
114
+ started — too little to judge), `floor=0.0` (DISABLED by default — see below).
115
+
116
+ **Why the default floor is 0.0 (disabled), not a guessed number.** Unlike
117
+ `productivity`, which could lift a real constant from Claude Code's own loop
118
+ (`tokenBudget.ts`'s 500-token diminishing threshold), there is no universal
119
+ "good" work-per-token ratio — it depends entirely on what the host counts as a
120
+ work unit (a ratio sensible for "changed bytes" is meaningless for "commits").
121
+ Shipping a guessed floor would manufacture COSTLY verdicts out of a unit
122
+ mismatch (the docs/235 slice-must-have-power lesson: a threshold that fires for
123
+ the wrong reason is worse than none). So the default floor is 0.0 — every
124
+ nonzero-work run is EFFICIENT until the host declares a floor that means
125
+ something for *its* unit. The one verdict EFF always gives for free, no floor
126
+ needed, is **WASTEFUL** (zero work for meaningful spend), because "tokens
127
+ bought literally nothing" is unit-independent: 0 work is 0 work whatever the
128
+ unit. That is the cost-free, always-correct half of the verdict; COSTLY is the
129
+ opt-in half a host arms by setting a floor.
130
+ """
131
+
132
+ min_tokens: int = 1000 # below this spend, withhold the accusation (the productivity.min_steps analogue)
133
+ floor: float = 0.0 # work-per-token floor; 0.0 = disabled (only WASTEFUL fires) — see docstring
134
+
135
+ def __post_init__(self) -> None:
136
+ if self.min_tokens < 0:
137
+ raise ValueError("min_tokens must be non-negative")
138
+ if self.floor < 0:
139
+ raise ValueError("the work-per-token floor must be non-negative")
140
+
141
+
142
+ DEFAULT_POLICY = EfficiencyPolicy()
143
+
144
+
145
+ @dataclass(frozen=True)
146
+ class EfficiencyEvidence:
147
+ """The two counts `classify()` reads — gathered by the CALLER at its boundary.
148
+
149
+ No clock, no I/O inside the verdict — the arbiter rule, sharpened the way
150
+ `productivity` sharpens it: there is not even a clock rung (EFF is *timeless*;
151
+ it reads two numbers, never an age). The caller's boundary (the `dos
152
+ efficiency` evidence-gather, or a loop reading the provider usage record + its
153
+ own git delta) measures the work and the spend and freezes them here.
154
+
155
+ work — the count of ground-truth **work units** the environment measured
156
+ for this run (commits landed, bytes diffed, tests passed — the
157
+ host's unit, the same one `productivity` counts). Non-negative: a
158
+ run that *removed* work still did the work of removing it (the host
159
+ passes the magnitude, never a signed regression), and a run that
160
+ landed nothing passes 0.
161
+ tokens — the count of **tokens** the run spent (the provider usage record),
162
+ the env-authored price. Non-negative. Zero tokens is the degenerate
163
+ "no spend yet" case (a ratio is undefined) — handled as
164
+ EFFICIENT-benign, never a divide-by-zero.
165
+
166
+ Both are env-authored (the docs/138 invariant): `work` is what git/the test
167
+ runner witnessed, `tokens` is what the API billed — neither is the agent's
168
+ "I was efficient" narration. The ratio `work / tokens` is the run's
169
+ efficiency; the verdict compares it to the policy floor.
170
+ """
171
+
172
+ work: int = 0
173
+ tokens: int = 0
174
+
175
+ def __post_init__(self) -> None:
176
+ if self.work < 0:
177
+ raise ValueError("work must be non-negative (a count of work done)")
178
+ if self.tokens < 0:
179
+ raise ValueError("tokens must be non-negative (a count of tokens spent)")
180
+
181
+ @property
182
+ def ratio(self) -> float:
183
+ """Work per token spent — the efficiency. 0.0 when no tokens were spent
184
+ (the degenerate no-spend case; the verdict treats it as benign, never a
185
+ divide-by-zero)."""
186
+ if self.tokens <= 0:
187
+ return 0.0
188
+ return self.work / self.tokens
189
+
190
+ @classmethod
191
+ def of(cls, work: int, tokens: int) -> "EfficiencyEvidence":
192
+ """Build evidence from a work count and a token count."""
193
+ return cls(work=work, tokens=tokens)
194
+
195
+
196
+ @dataclass(frozen=True)
197
+ class EfficiencyVerdict:
198
+ """The single verdict `classify()` returns, with the facts echoed back.
199
+
200
+ `verdict` is the typed `Efficiency`. `reason` is a one-line operator-facing
201
+ summary (the tally-row string). `evidence` is the `EfficiencyEvidence` that
202
+ drove the call, carried so `dos efficiency --json` can emit the verdict *and
203
+ the facts behind it* in one object (the legible-distrust renderer seam): the
204
+ operator sees not just WASTEFUL but *why* (80,000 tokens spent, 0 work landed),
205
+ and not just COSTLY but the ratio and the floor it fell under. `to_dict` is the
206
+ json shape.
207
+ """
208
+
209
+ verdict: Efficiency
210
+ reason: str
211
+ evidence: EfficiencyEvidence
212
+
213
+ def to_dict(self) -> dict:
214
+ e = self.evidence
215
+ return {
216
+ "verdict": self.verdict.value,
217
+ "reason": self.reason,
218
+ "evidence": {
219
+ "work": e.work,
220
+ "tokens": e.tokens,
221
+ "ratio": e.ratio,
222
+ },
223
+ }
224
+
225
+
226
+ def classify(
227
+ evidence: EfficiencyEvidence, policy: EfficiencyPolicy = DEFAULT_POLICY
228
+ ) -> EfficiencyVerdict:
229
+ """Classify a run's token effectiveness from its work and its spend. PURE — no I/O.
230
+
231
+ Reads the ladder top to bottom (this function IS the answer to "did the tokens
232
+ buy work?"):
233
+
234
+ 1. EFFICIENT (too little spend) — fewer than `min_tokens` tokens spent (or
235
+ zero): the run has barely started; there is not enough spend to have an
236
+ honest ratio, so withhold the COSTLY/WASTEFUL accusation (the
237
+ `productivity` young-and-alive guard, lateral). Checked FIRST so a
238
+ just-launched run with one token and no commit is never mislabelled
239
+ WASTEFUL on a spend technicality.
240
+ 2. WASTEFUL — meaningful tokens spent (`tokens >= min_tokens`) AND zero work
241
+ landed (`work == 0`): the tokens bought nothing — the degenerate floor of
242
+ inefficiency, the unit-independent half of the verdict (0 work is 0 work
243
+ whatever the unit, so this fires with NO floor needed). Named distinctly
244
+ from COSTLY (a fading-but-nonzero ratio) because zero is the operator's
245
+ clearest "the spend was pure overhead" signal — the marker-storm /
246
+ spin-without-shipping rung. Checked before COSTLY so an exact zero is
247
+ named precisely.
248
+ 3. COSTLY — meaningful spend AND nonzero work AND the ratio under `floor`:
249
+ the run is doing work but paying a lot per unit (fading efficiency, but
250
+ not pure waste). The opt-in half of the verdict — fires only when the host
251
+ has armed a `floor` that means something for its work unit (with the
252
+ default `floor=0.0` this rung never fires; every nonzero-work run is
253
+ EFFICIENT). The efficiency analogue of `productivity.DIMINISHING`.
254
+ 4. EFFICIENT — none of the above: the ratio is at/above the floor (or the
255
+ floor is disabled and work is nonzero). The tokens bought their work.
256
+
257
+ The COSTLY test uses `>` on the floor (ratio strictly under floor is costly),
258
+ so a ratio exactly AT the floor is EFFICIENT — the floor is the minimum
259
+ acceptable efficiency, inclusive. With the default `floor=0.0`, no nonzero-work
260
+ ratio is under it, so only WASTEFUL ever fires without an explicit floor.
261
+ """
262
+ tokens = evidence.tokens
263
+ work = evidence.work
264
+
265
+ # 1. EFFICIENT (too little spend) — not enough tokens spent to judge a ratio.
266
+ # Withhold the COSTLY/WASTEFUL accusation; report the benign verdict. A run
267
+ # that has spent nothing at all also lands here (no spend, no problem yet).
268
+ if tokens < policy.min_tokens or tokens == 0:
269
+ return EfficiencyVerdict(
270
+ verdict=Efficiency.EFFICIENT,
271
+ reason=(
272
+ f"{tokens} token(s) spent (< min {policy.min_tokens}) — not enough "
273
+ f"spend to judge token effectiveness; no efficiency problem yet"
274
+ ),
275
+ evidence=evidence,
276
+ )
277
+
278
+ # 2. WASTEFUL — meaningful spend bought ZERO work. The pure-overhead rung, named
279
+ # distinctly from a merely-low ratio so the operator's clearest signal
280
+ # ("the tokens bought nothing") is not blurred into COSTLY. Unit-independent:
281
+ # fires with no floor, because 0 work is 0 work whatever the host's unit.
282
+ if work == 0:
283
+ return EfficiencyVerdict(
284
+ verdict=Efficiency.WASTEFUL,
285
+ reason=(
286
+ f"{tokens} tokens spent and 0 work units landed — the spend bought "
287
+ f"nothing (pure overhead)"
288
+ ),
289
+ evidence=evidence,
290
+ )
291
+
292
+ ratio = evidence.ratio
293
+
294
+ # 3. COSTLY — a low-but-nonzero efficiency: the run is doing work but paying a
295
+ # lot per unit. The opt-in half — fires only when the host armed a floor that
296
+ # means something for its work unit. With the default floor=0.0 this never
297
+ # fires (no nonzero ratio is < 0.0). The productivity.DIMINISHING analogue.
298
+ if ratio < policy.floor:
299
+ return EfficiencyVerdict(
300
+ verdict=Efficiency.COSTLY,
301
+ reason=(
302
+ f"{work} work units for {tokens} tokens — {ratio:.6g} work/token, "
303
+ f"under the {policy.floor:.6g} floor (doing work, but spending a lot "
304
+ f"per unit)"
305
+ ),
306
+ evidence=evidence,
307
+ )
308
+
309
+ # 4. EFFICIENT — the ratio cleared the floor (or the floor is disabled and work
310
+ # is nonzero). The tokens bought their work.
311
+ return EfficiencyVerdict(
312
+ verdict=Efficiency.EFFICIENT,
313
+ reason=(
314
+ f"{work} work units for {tokens} tokens — {ratio:.6g} work/token "
315
+ f"(at/above the {policy.floor:.6g} floor) — the spend bought its work"
316
+ ),
317
+ evidence=evidence,
318
+ )