dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
dos/judges.py
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""The judge seam — Axis 6 of hackability: pluggable adjudicators (the JUDGE rung).
|
|
2
|
+
|
|
3
|
+
Why this exists
|
|
4
|
+
===============
|
|
5
|
+
|
|
6
|
+
Trace one blocked claim through DOS and you find a **hierarchy of adjudicators at
|
|
7
|
+
escalating cost and trust**, the scalable-oversight shape in code:
|
|
8
|
+
|
|
9
|
+
* **ORACLE** (kernel) — `verify()` / `picker_oracle`: deterministic, forgery-proof,
|
|
10
|
+
grounded in git + on-disk state. Cheap, total, but *narrow* — it can only rule on
|
|
11
|
+
what it can mechanically cross-check, and it ABSTAINS on everything else
|
|
12
|
+
(`UNCLASSIFIED`).
|
|
13
|
+
* **JUDGE** (this seam, lives in a driver) — a model, a heuristic, a debate, a
|
|
14
|
+
fine-tuned verifier: anything that can rule on the residue the oracle abstained on.
|
|
15
|
+
More expensive, *not* forgery-proof — so it is hedged by the four disciplines below.
|
|
16
|
+
* **HUMAN** (the `dos decisions` queue) — the scarce resource, for what neither rung
|
|
17
|
+
could resolve.
|
|
18
|
+
|
|
19
|
+
`decisions._resolver_for` is the router that classifies each blocked decision into
|
|
20
|
+
ORACLE / JUDGE / HUMAN; `drivers/llm_judge` is the first occupant of the JUDGE rung.
|
|
21
|
+
This module is the *seam* that occupant plugs into — a domain-neutral protocol a
|
|
22
|
+
researcher implements to drop in **their own** adjudicator (a debate judge, a
|
|
23
|
+
build/test oracle, a learned verifier) and have DOS compose it under the same
|
|
24
|
+
discipline as the built-in one. It is the bring-your-own-adjudicator surface, and the
|
|
25
|
+
companion `dos.judge_eval` is the instrument that scores what you plug in.
|
|
26
|
+
|
|
27
|
+
The unit a judge rules on is a `Claim` — a domain-neutral
|
|
28
|
+
``{claim_text, stated_reason, evidence}`` triple (the "claim → unforgeable-evidence →
|
|
29
|
+
verdict" schema). A judge is **not** told the answer; it is handed an agent's narration
|
|
30
|
+
plus the evidence the kernel could gather, and asked whether the narration is
|
|
31
|
+
believable. That decoupling is what lets a judge rule on a ship claim, a refusal, or an
|
|
32
|
+
arbitrary external assertion — not just DOS's own no-pick rows.
|
|
33
|
+
|
|
34
|
+
The four disciplines (what keeps an *open* adjudicator set honest)
|
|
35
|
+
==================================================================
|
|
36
|
+
|
|
37
|
+
This is the highest-trust-leverage axis — a judge's whole job is to rule on the claims
|
|
38
|
+
the deterministic oracle could *not* — so the guardrails are structural, mirroring the
|
|
39
|
+
renderer rule (pure presentation) and the predicate rule (conjunctive-only):
|
|
40
|
+
|
|
41
|
+
1. **Deterministic-first** is the *composition's* job, not the protocol's:
|
|
42
|
+
`judge_eval.compose_deterministic_first` (and `drivers/llm_judge.adjudicate`) run
|
|
43
|
+
the oracle FIRST and only hand the judge the residue. A judge never overrides a
|
|
44
|
+
provable verdict; it is consulted exactly where the oracle abstained.
|
|
45
|
+
2. **Advisory-only** is enforced by *shape*: a judge is handed a frozen `Claim` + a
|
|
46
|
+
read-only `config` and returns a frozen `JudgeVerdict`. It is given nothing it
|
|
47
|
+
could mutate — no lease, no registry, no writable state. A judge can no more
|
|
48
|
+
"believe itself into" a state change than a renderer can mis-verify a ship.
|
|
49
|
+
3. **Fail-to-ABSTAIN, never fail-to-AGREE.** `run_judge` converts any exception — OR
|
|
50
|
+
any non-`JudgeVerdict` return — into an `ABSTAIN`, never an `AGREE`. This is the
|
|
51
|
+
*inverse direction* from the predicate rule on purpose: a safety predicate that
|
|
52
|
+
can't answer fails CLOSED (refuse, the safe direction for admission); an advisory
|
|
53
|
+
judge that can't answer ABSTAINS (punt to the next rung up, the safe direction for
|
|
54
|
+
adjudication). Neither failure mode ever auto-clears a claim. The dangerous cell —
|
|
55
|
+
a judge that AGREES with a claim that is in fact false (a false-clear) — is exactly
|
|
56
|
+
what `judge_eval` measures and what these rules make a judge structurally unable to
|
|
57
|
+
reach *by accident*.
|
|
58
|
+
4. **Abstention is a first-class verdict, not an error.** A judge that says "I can't
|
|
59
|
+
tell" is doing its job — it routes the claim onward to a human. `ABSTAIN` is the
|
|
60
|
+
conservative default, and the built-in `AbstainJudge` (which abstains on
|
|
61
|
+
everything) is the always-available, unshadowable baseline — the judge analogue of
|
|
62
|
+
the `text` renderer: a trusted fallback a plugin can never displace.
|
|
63
|
+
|
|
64
|
+
Purity & layering
|
|
65
|
+
==================
|
|
66
|
+
|
|
67
|
+
This module is **pure** — a Protocol, two frozen value types, a built-in judge that
|
|
68
|
+
abstains, and resolver/runner helpers. It has NO provider surface, no I/O inside a
|
|
69
|
+
verdict, and names no host. So it sits in the kernel layer beside `render`/`admission`
|
|
70
|
+
(which likewise hold a pure protocol + resolver while the *implementations* live
|
|
71
|
+
outside). Every real judge with model/provider/I/O surface lives in a `drivers/*`
|
|
72
|
+
module or an installed plugin — `drivers/llm_judge` is the reference one; the kernel
|
|
73
|
+
points to it and never imports it (`drivers/__init__`: "they import the kernel; the
|
|
74
|
+
kernel never imports them"). Entry-point discovery (the one bit of I/O) happens at the
|
|
75
|
+
call boundary in `active_judges`, exactly as `active_predicates` / renderer discovery do.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
from __future__ import annotations
|
|
79
|
+
|
|
80
|
+
import enum
|
|
81
|
+
import sys
|
|
82
|
+
from dataclasses import dataclass, field
|
|
83
|
+
from typing import Protocol, runtime_checkable
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Stance(str, enum.Enum):
|
|
87
|
+
"""A judge's three-valued ruling on a claim.
|
|
88
|
+
|
|
89
|
+
Three-valued by design: a binary agree/disagree would force a judge to guess
|
|
90
|
+
when it cannot tell, and a guess is exactly the false-clear (`AGREE` on a false
|
|
91
|
+
claim) the whole seam is built to make hard. `ABSTAIN` is the honest third
|
|
92
|
+
answer — "I can't adjudicate this; send it up the ladder" — and it is the
|
|
93
|
+
conservative default everything degrades to.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
AGREE = "AGREE" # the claim is believable given its evidence
|
|
97
|
+
DISAGREE = "DISAGREE" # the claim looks false / unsupported — flag it
|
|
98
|
+
ABSTAIN = "ABSTAIN" # cannot tell — punt to the next rung (a human)
|
|
99
|
+
|
|
100
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
101
|
+
return self.value
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass(frozen=True)
|
|
105
|
+
class Claim:
|
|
106
|
+
"""The domain-neutral unit a judge rules on: a narration plus its evidence.
|
|
107
|
+
|
|
108
|
+
Deliberately NOT coupled to DOS's no-pick rows — a `Claim` can wrap a ship
|
|
109
|
+
claim ("phase AUTH2 shipped"), a refusal, or an arbitrary external assertion.
|
|
110
|
+
A judge sees the agent's `claim_text` + `stated_reason` (the *narration*, the
|
|
111
|
+
part DOS does not believe) alongside `evidence` (the part it can — git lines,
|
|
112
|
+
file state, a diff), and decides whether the narration is supported. The kernel
|
|
113
|
+
gathers the evidence; the judge weighs it. `subject` is an optional opaque
|
|
114
|
+
correlation handle (a run-id, a phase id) carried through for the caller's
|
|
115
|
+
join — a judge MUST NOT need it to rule.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
claim_text: str # what was asserted (the thing to adjudicate)
|
|
119
|
+
stated_reason: str = "" # the agent's narration / justification, if any
|
|
120
|
+
evidence: tuple[str, ...] = field(default_factory=tuple) # forgery-resistant facts
|
|
121
|
+
subject: str = "" # opaque correlation handle (run-id/phase), optional
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass(frozen=True)
|
|
125
|
+
class JudgeVerdict:
|
|
126
|
+
"""A judge's frozen, advisory ruling on one `Claim`.
|
|
127
|
+
|
|
128
|
+
Three constructors, matching `Stance` — `.agree()`, `.disagree(why)`,
|
|
129
|
+
`.abstain(why)` — and no other way to build one, so a judge's whole expressible
|
|
130
|
+
output is "believable / not / can't-tell" plus prose. It carries NOTHING that
|
|
131
|
+
could mutate state (the advisory-only invariant by shape): it is read by the
|
|
132
|
+
decisions queue / the eval harness / an operator, and acting on it is always a
|
|
133
|
+
separate, explicit step.
|
|
134
|
+
|
|
135
|
+
`cost` is an OPTIONAL self-reported cost of producing this verdict (tokens,
|
|
136
|
+
dollars, seconds — the unit is the judge's, summed verbatim by `judge_eval`).
|
|
137
|
+
A judge that does not track cost leaves it 0.0; the rung-occupancy / cost-per-
|
|
138
|
+
claim numbers then simply read 0 for that judge, never crash.
|
|
139
|
+
|
|
140
|
+
The stored field is named ``_stance`` (private) so the ergonomic `.agree()`
|
|
141
|
+
CONSTRUCTOR does not collide with a public ``stance`` — read state via the
|
|
142
|
+
``stance`` / ``agreed`` / ``abstained`` accessors, the same idiom
|
|
143
|
+
`AdmissionVerdict` uses for ``_admit`` / ``.admitted``.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
_stance: Stance
|
|
147
|
+
why: str = ""
|
|
148
|
+
evidence: tuple[str, ...] = field(default_factory=tuple)
|
|
149
|
+
cost: float = 0.0
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def stance(self) -> Stance:
|
|
153
|
+
"""The three-valued ruling. The public read accessor."""
|
|
154
|
+
return self._stance
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def agreed(self) -> bool:
|
|
158
|
+
"""True iff the judge AGREED the claim is believable."""
|
|
159
|
+
return self._stance is Stance.AGREE
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def disagreed(self) -> bool:
|
|
163
|
+
"""True iff the judge DISAGREED — it flagged the claim as false/unsupported."""
|
|
164
|
+
return self._stance is Stance.DISAGREE
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def abstained(self) -> bool:
|
|
168
|
+
"""True iff the judge ABSTAINED — it could not rule (punt to a human)."""
|
|
169
|
+
return self._stance is Stance.ABSTAIN
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def agree(cls, why: str = "", *, evidence: tuple[str, ...] = (),
|
|
173
|
+
cost: float = 0.0) -> "JudgeVerdict":
|
|
174
|
+
"""The claim is believable. The one verdict that can clear a claim — and the
|
|
175
|
+
one a judge can NEVER reach by failing (see `run_judge`)."""
|
|
176
|
+
return cls(_stance=Stance.AGREE, why=why, evidence=tuple(evidence), cost=cost)
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
def disagree(cls, why: str = "", *, evidence: tuple[str, ...] = (),
|
|
180
|
+
cost: float = 0.0) -> "JudgeVerdict":
|
|
181
|
+
"""The claim looks false / unsupported — the judge flags it."""
|
|
182
|
+
return cls(_stance=Stance.DISAGREE, why=why, evidence=tuple(evidence), cost=cost)
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def abstain(cls, why: str = "", *, evidence: tuple[str, ...] = (),
|
|
186
|
+
cost: float = 0.0) -> "JudgeVerdict":
|
|
187
|
+
"""Cannot tell — punt up the ladder. The conservative default, and what every
|
|
188
|
+
failure (exception, bad return type, no provider) degrades to."""
|
|
189
|
+
return cls(_stance=Stance.ABSTAIN, why=why, evidence=tuple(evidence), cost=cost)
|
|
190
|
+
|
|
191
|
+
def to_dict(self) -> dict:
|
|
192
|
+
return {
|
|
193
|
+
"stance": self._stance.value,
|
|
194
|
+
"why": self.why,
|
|
195
|
+
"evidence": list(self.evidence),
|
|
196
|
+
"cost": self.cost,
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@runtime_checkable
|
|
201
|
+
class Judge(Protocol):
|
|
202
|
+
"""The contract a researcher implements to add an adjudicator.
|
|
203
|
+
|
|
204
|
+
``name`` is the token `dos judge-eval --judge <name>` selects and `dos doctor`
|
|
205
|
+
lists. ``rule`` is handed one frozen `Claim` and the active `config` (read-only —
|
|
206
|
+
a judge reads policy from it, e.g. the reason vocabulary, but the type gives it
|
|
207
|
+
nothing to mutate) and returns a `JudgeVerdict`.
|
|
208
|
+
|
|
209
|
+
A judge MAY do I/O *inside* ``rule`` (call a model, shell out, read a file) —
|
|
210
|
+
unlike a predicate or a renderer, which are pure. That is the whole reason a real
|
|
211
|
+
judge lives in a driver, outside the kernel boundary: the JUDGE rung is where
|
|
212
|
+
provider surface is allowed. The disciplines that keep it honest are
|
|
213
|
+
advisory-only (it returns a verdict, mutates nothing) and fail-to-abstain
|
|
214
|
+
(enforced by `run_judge`, not by trusting the judge to be careful), NOT purity.
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
name: str
|
|
218
|
+
|
|
219
|
+
def rule(self, claim: Claim, config: object) -> JudgeVerdict:
|
|
220
|
+
...
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class AbstainJudge:
|
|
224
|
+
"""The built-in, always-available judge: it abstains on everything.
|
|
225
|
+
|
|
226
|
+
The judge analogue of the `text` renderer — a trusted fallback a plugin can never
|
|
227
|
+
shadow (`resolve_judge` resolves built-ins first). It is the honest zero of the
|
|
228
|
+
seam: a workspace with NO judge wired still has a resolvable judge, and it punts
|
|
229
|
+
every claim to a human (the safe, conservative behavior). It is also the baseline
|
|
230
|
+
`judge_eval` measures every real judge against: a judge that does no better than
|
|
231
|
+
`abstain` on the residue has added nothing but cost.
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
name = "abstain"
|
|
235
|
+
|
|
236
|
+
def rule(self, claim: Claim, config: object) -> JudgeVerdict:
|
|
237
|
+
return JudgeVerdict.abstain(
|
|
238
|
+
"no adjudicator wired — the built-in judge abstains, routing this "
|
|
239
|
+
"claim to a human (configure a JUDGE-rung driver to rule on it)."
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def run_judge(judge: Judge, claim: Claim, config: object) -> JudgeVerdict:
|
|
244
|
+
"""Run one judge against one claim, enforcing **fail-to-abstain**.
|
|
245
|
+
|
|
246
|
+
This is the wrapper EVERY consumer should call instead of `judge.rule(...)`
|
|
247
|
+
directly — it is what makes "a judge can never auto-clear a claim by failing" a
|
|
248
|
+
structural guarantee rather than a hope:
|
|
249
|
+
|
|
250
|
+
* a judge that **raises** (model timeout, bad provider, a bug) → `ABSTAIN`,
|
|
251
|
+
naming the failure. Never propagates; never `AGREE`.
|
|
252
|
+
* a judge that returns **anything that is not a `JudgeVerdict`** (None, a dict,
|
|
253
|
+
a duck-typed look-alike) → `ABSTAIN`. We never read a foreign object's
|
|
254
|
+
`.agreed`, so no false-clear can sneak through a wrong return type.
|
|
255
|
+
|
|
256
|
+
Note the deliberate asymmetry with `admission.run_predicates`, which converts the
|
|
257
|
+
same failures to a **refuse**: a predicate guards admission, so its safe failure
|
|
258
|
+
is "deny"; a judge is advisory, so its safe failure is "I don't know — ask a
|
|
259
|
+
human." Both refuse to let a failure become an approval; they differ only in which
|
|
260
|
+
non-approval is the safe one for their role.
|
|
261
|
+
"""
|
|
262
|
+
name = getattr(judge, "name", type(judge).__name__)
|
|
263
|
+
try:
|
|
264
|
+
verdict = judge.rule(claim, config)
|
|
265
|
+
except Exception as e: # fail-to-abstain: a judge that raises cannot rule
|
|
266
|
+
return JudgeVerdict.abstain(
|
|
267
|
+
f"judge {name!r} raised ({e!r}) — abstaining (an advisory adjudicator "
|
|
268
|
+
f"that cannot answer punts to a human, it never auto-clears)."
|
|
269
|
+
)
|
|
270
|
+
if not isinstance(verdict, JudgeVerdict):
|
|
271
|
+
return JudgeVerdict.abstain(
|
|
272
|
+
f"judge {name!r} returned a {type(verdict).__name__}, not a "
|
|
273
|
+
f"JudgeVerdict — abstaining (a judge that does not return the verdict "
|
|
274
|
+
f"type cannot be trusted to clear a claim)."
|
|
275
|
+
)
|
|
276
|
+
return verdict
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# ---------------------------------------------------------------------------
|
|
280
|
+
# Resolution — built-in first, then the `dos.judges` entry-point group.
|
|
281
|
+
# ---------------------------------------------------------------------------
|
|
282
|
+
|
|
283
|
+
# The entry-point group a workspace/researcher registers a judge under.
|
|
284
|
+
JUDGE_ENTRY_POINT_GROUP = "dos.judges"
|
|
285
|
+
|
|
286
|
+
# The built-in judges, resolvable by name and UNSHADOWABLE by a plugin (a plugin
|
|
287
|
+
# registering `abstain` cannot displace this one — built-ins resolve first). Only
|
|
288
|
+
# the conservative `abstain` baseline ships in the kernel; every ruling judge lives
|
|
289
|
+
# in a driver/plugin (the kernel has no provider surface).
|
|
290
|
+
_BUILT_IN_JUDGES: dict[str, type] = {
|
|
291
|
+
AbstainJudge.name: AbstainJudge,
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _discover_entry_point_judges(*, _stderr=None) -> list[tuple[str, Judge]]:
|
|
296
|
+
"""Find judges registered under the `dos.judges` entry-point group.
|
|
297
|
+
|
|
298
|
+
A judge plugin registers ``name = "pkg.module:JudgeClass"`` in its
|
|
299
|
+
``[project.entry-points."dos.judges"]``. We load each, instantiate it if it is a
|
|
300
|
+
class, and return ``(entry_point_name, judge)`` pairs sorted by name (stable, so
|
|
301
|
+
`dos doctor` order is deterministic). A plugin that fails to load is skipped with
|
|
302
|
+
a one-line stderr note rather than crashing — the same posture
|
|
303
|
+
`admission._discover_entry_point_predicates` / renderer discovery take (a broken
|
|
304
|
+
third-party plugin is the operator's to fix, not a kernel fault).
|
|
305
|
+
"""
|
|
306
|
+
stderr = _stderr if _stderr is not None else sys.stderr
|
|
307
|
+
out: list[tuple[str, Judge]] = []
|
|
308
|
+
try:
|
|
309
|
+
from importlib.metadata import entry_points
|
|
310
|
+
except Exception: # pragma: no cover - importlib.metadata always present py3.11+
|
|
311
|
+
return out
|
|
312
|
+
try:
|
|
313
|
+
eps = entry_points(group=JUDGE_ENTRY_POINT_GROUP)
|
|
314
|
+
except TypeError: # pragma: no cover - py<3.10 selectable-API fallback
|
|
315
|
+
eps = entry_points().get(JUDGE_ENTRY_POINT_GROUP, []) # type: ignore[attr-defined]
|
|
316
|
+
except Exception: # pragma: no cover - defensive: never let discovery crash a call
|
|
317
|
+
return out
|
|
318
|
+
for ep in sorted(eps, key=lambda e: e.name):
|
|
319
|
+
try:
|
|
320
|
+
obj = ep.load()
|
|
321
|
+
judge = obj() if isinstance(obj, type) else obj
|
|
322
|
+
except Exception as e: # pragma: no cover - depends on third-party plugin
|
|
323
|
+
print(
|
|
324
|
+
f"warning: judge plugin {ep.name!r} failed to load ({e}); skipping",
|
|
325
|
+
file=stderr,
|
|
326
|
+
)
|
|
327
|
+
continue
|
|
328
|
+
out.append((ep.name, judge))
|
|
329
|
+
return out
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def resolve_judge(name: str, *, _stderr=None) -> Judge:
|
|
333
|
+
"""Resolve a judge by name: built-ins first, then `dos.judges` plugins.
|
|
334
|
+
|
|
335
|
+
Built-ins (`abstain`) resolve FIRST and cannot be shadowed by a plugin of the
|
|
336
|
+
same name — the trusted-fallback guarantee, identical to `resolve_renderer`. An
|
|
337
|
+
unknown name fails LOUD with the known list (it never silently degrades to
|
|
338
|
+
`abstain`, which would hide a typo'd `--judge`): the caller asked for a specific
|
|
339
|
+
adjudicator and getting a different one silently is exactly the kind of
|
|
340
|
+
unannounced substitution the kernel refuses.
|
|
341
|
+
"""
|
|
342
|
+
if name in _BUILT_IN_JUDGES:
|
|
343
|
+
return _BUILT_IN_JUDGES[name]()
|
|
344
|
+
discovered = dict(_discover_entry_point_judges(_stderr=_stderr))
|
|
345
|
+
if name in discovered:
|
|
346
|
+
return discovered[name]
|
|
347
|
+
known = sorted(set(_BUILT_IN_JUDGES) | set(discovered))
|
|
348
|
+
raise ValueError(
|
|
349
|
+
f"unknown judge {name!r}; known: {', '.join(known)}"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def active_judges(*, _stderr=None) -> list[tuple[str, Judge]]:
|
|
354
|
+
"""Every resolvable judge as ``(name, judge)`` — built-ins THEN discovered
|
|
355
|
+
plugins, the order `dos doctor` lists. Does ENTRY-POINT DISCOVERY (I/O), so it is
|
|
356
|
+
a call-boundary helper, never called inside a verdict."""
|
|
357
|
+
built = [(n, cls()) for n, cls in _BUILT_IN_JUDGES.items()]
|
|
358
|
+
discovered = _discover_entry_point_judges(_stderr=_stderr)
|
|
359
|
+
return built + discovered
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def active_judge_names(*, _stderr=None) -> list[str]:
|
|
363
|
+
"""The names of every active judge (built-in + discovered) — what `dos doctor`
|
|
364
|
+
lists so an operator can see which adjudicators the JUDGE rung can call (the
|
|
365
|
+
judge analogue of "see the active predicates / reason set")."""
|
|
366
|
+
return [name for name, _judge in active_judges(_stderr=_stderr)]
|
dos/lane_infer.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Infer a starter lane taxonomy from a repo's top-level directory layout.
|
|
2
|
+
|
|
3
|
+
This is the **public, reusable** form of the inference `dos init` already does
|
|
4
|
+
internally when it scaffolds a `dos.toml` (`cli._render_init_config`). The CLI
|
|
5
|
+
keeps its own inline copy for the scaffold-text path; this module is the importable
|
|
6
|
+
API a *foreign caller* — a host driver, a skill, an adopter's own tooling — calls to
|
|
7
|
+
get a `LaneTaxonomy` object directly, without parsing the scaffolded TOML back out
|
|
8
|
+
or re-deriving the `"<dir>/**"` tree convention by hand.
|
|
9
|
+
|
|
10
|
+
The rule (identical to `dos init`'s): every immediate subdirectory that is not a
|
|
11
|
+
dotdir and not obvious noise (VCS / caches / build output / deps / venvs) becomes a
|
|
12
|
+
**concurrent** lane owning `<dir>/**`, plus an **exclusive** `global` over the whole
|
|
13
|
+
repo. A repo with no source dirs falls back to the honest single-writer default —
|
|
14
|
+
one exclusive `main` over `**/*`, no concurrent lanes — so `dos doctor --check`
|
|
15
|
+
stays clean (an exclusive lane never enters the disjointness algebra).
|
|
16
|
+
|
|
17
|
+
Why a separate module rather than just calling the CLI helper: the CLI helper
|
|
18
|
+
(`_render_init_config`) returns *TOML text* and is private to the scaffold path; an
|
|
19
|
+
adopter wants the *typed object* (`LaneTaxonomy`) to pass into `SubstrateConfig`,
|
|
20
|
+
compare against a declared `dos.toml [lanes]`, or render however they like. Keeping
|
|
21
|
+
this pure (Path in → LaneTaxonomy out, the only I/O a single `iterdir`) makes it
|
|
22
|
+
testable and free of the CLI's argument/scaffold concerns — the
|
|
23
|
+
"I/O at the boundary, data to the pure core" rule, applied to lane discovery.
|
|
24
|
+
|
|
25
|
+
The constants are duplicated from `cli` deliberately (not imported): this module
|
|
26
|
+
must not import the CLI (a layer-3 helper) — the dependency arrow points the other
|
|
27
|
+
way. The two copies are pinned equal by `tests/test_lane_infer.py`, the same
|
|
28
|
+
discipline `cooldown` uses for the lane-journal schema family it inlines.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
from dos.config import LaneTaxonomy
|
|
36
|
+
|
|
37
|
+
# Cap on auto-derived concurrent lanes — a repo with hundreds of top-level dirs
|
|
38
|
+
# should not scaffold a hundred-lane taxonomy; beyond a handful the operator wants
|
|
39
|
+
# to curate by hand. Mirrors `cli._INIT_LANE_MAX` (pinned equal by the test).
|
|
40
|
+
LANE_INFER_MAX = 12
|
|
41
|
+
|
|
42
|
+
# Top-level entries that are never a source lane: VCS, caches, build output,
|
|
43
|
+
# dependency trees, virtualenvs, IDE/tooling dirs. Mirrors `cli._INIT_NOISE_DIRS`
|
|
44
|
+
# (pinned equal by the test). Dotdirs are skipped wholesale by the leading-`.`
|
|
45
|
+
# check in `detect_source_dirs`, so they need no entry here.
|
|
46
|
+
LANE_INFER_NOISE_DIRS = frozenset({
|
|
47
|
+
".git", "__pycache__", "node_modules", "dist", "build", "target",
|
|
48
|
+
"venv", ".venv", "env", ".env", ".idea", ".vscode", ".pytest_cache",
|
|
49
|
+
".mypy_cache", ".ruff_cache", ".tox", "site-packages", ".dos",
|
|
50
|
+
"htmlcov", ".eggs",
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def detect_source_dirs(
|
|
55
|
+
root: Path,
|
|
56
|
+
*,
|
|
57
|
+
cap: int = LANE_INFER_MAX,
|
|
58
|
+
noise_dirs: frozenset[str] | None = None,
|
|
59
|
+
) -> list[str]:
|
|
60
|
+
"""The repo's top-level source directories — sorted, noise-filtered, capped.
|
|
61
|
+
|
|
62
|
+
A "source dir" is any immediate subdirectory of ``root`` that is not a dotdir
|
|
63
|
+
and not in ``noise_dirs``. Returns at most ``cap`` names (sorted, so the
|
|
64
|
+
selection is deterministic). On any filesystem error (``root`` missing / not a
|
|
65
|
+
dir / unreadable) returns ``[]`` — the caller then gets the single-writer
|
|
66
|
+
fallback taxonomy, which is the safe default for an unscannable root.
|
|
67
|
+
"""
|
|
68
|
+
noise = LANE_INFER_NOISE_DIRS if noise_dirs is None else noise_dirs
|
|
69
|
+
try:
|
|
70
|
+
entries = sorted(
|
|
71
|
+
p.name for p in Path(root).iterdir()
|
|
72
|
+
if p.is_dir()
|
|
73
|
+
and not p.name.startswith(".")
|
|
74
|
+
and p.name not in noise
|
|
75
|
+
)
|
|
76
|
+
except OSError:
|
|
77
|
+
return []
|
|
78
|
+
return entries[:cap]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def infer_lanes_from_directory(
|
|
82
|
+
root: Path,
|
|
83
|
+
*,
|
|
84
|
+
cap: int = LANE_INFER_MAX,
|
|
85
|
+
noise_dirs: frozenset[str] | None = None,
|
|
86
|
+
) -> LaneTaxonomy:
|
|
87
|
+
"""Infer a starter ``LaneTaxonomy`` from ``root``'s top-level directories.
|
|
88
|
+
|
|
89
|
+
Each top-level source dir (see :func:`detect_source_dirs`) becomes a
|
|
90
|
+
**concurrent** lane owning ``<dir>/**`` and is added to ``autopick``; an
|
|
91
|
+
**exclusive** ``global`` owns ``**/*``. With no source dirs, falls back to the
|
|
92
|
+
honest single-writer default: one exclusive ``main`` over ``**/*``, no
|
|
93
|
+
concurrent lanes.
|
|
94
|
+
|
|
95
|
+
Returns a typed :class:`~dos.config.LaneTaxonomy` — byte-equivalent to the
|
|
96
|
+
``[lanes]`` table ``dos init`` scaffolds — so a caller can drop it straight into
|
|
97
|
+
a ``SubstrateConfig(lanes=…)`` or compare it against a declared ``dos.toml``.
|
|
98
|
+
Pure but for the single ``iterdir`` inside ``detect_source_dirs``.
|
|
99
|
+
"""
|
|
100
|
+
dirs = detect_source_dirs(root, cap=cap, noise_dirs=noise_dirs)
|
|
101
|
+
if dirs:
|
|
102
|
+
trees: dict[str, tuple[str, ...]] = {d: (f"{d}/**",) for d in dirs}
|
|
103
|
+
trees["global"] = ("**/*",)
|
|
104
|
+
return LaneTaxonomy(
|
|
105
|
+
concurrent=tuple(dirs),
|
|
106
|
+
exclusive=("global",),
|
|
107
|
+
autopick=tuple(dirs),
|
|
108
|
+
trees=trees,
|
|
109
|
+
aliases={},
|
|
110
|
+
)
|
|
111
|
+
# Honest single-writer fallback — no source dirs to make disjoint, so one
|
|
112
|
+
# exclusive whole-repo lane (matches `cli._render_init_config`'s else branch).
|
|
113
|
+
return LaneTaxonomy(
|
|
114
|
+
concurrent=(),
|
|
115
|
+
exclusive=("main",),
|
|
116
|
+
autopick=(),
|
|
117
|
+
trees={"main": ("**/*",)},
|
|
118
|
+
aliases={},
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
__all__ = [
|
|
123
|
+
"LANE_INFER_MAX",
|
|
124
|
+
"LANE_INFER_NOISE_DIRS",
|
|
125
|
+
"detect_source_dirs",
|
|
126
|
+
"infer_lanes_from_directory",
|
|
127
|
+
]
|