dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""dos.drivers.operator_judge — the operator-decision adjudicator (a JUDGE occupant).
|
|
2
|
+
|
|
3
|
+
A host's *operator-decision queue* (the reference userland app's "judge-operator"
|
|
4
|
+
JO machine) is a JUDGE-rung adjudicator: for each open operator decision it rules
|
|
5
|
+
**accept-recommended** (the recommended option is clearly correct and reversible),
|
|
6
|
+
**escalate** (genuinely ambiguous / value-laden / irreversible — a human must
|
|
7
|
+
decide), or **defer** (evidence is stale; re-propose later). That is exactly the
|
|
8
|
+
three-valued ruling `dos.judges` is the seam for — so the JO machine should be a
|
|
9
|
+
registered occupant of the JUDGE rung, scored by the same `dos.judge_eval`
|
|
10
|
+
instrument as any other judge, rather than a wholly-separate parallel machine.
|
|
11
|
+
|
|
12
|
+
This driver is the **thin binding**, not a rewrite. The host keeps ALL of its
|
|
13
|
+
machinery — the LLM adjudication that PRODUCES the accept/escalate/defer string,
|
|
14
|
+
the decisions-pending stamps, the findings-queue rows, the reversibility
|
|
15
|
+
(`JO_AUTO_ACCEPT`) gate, the cap/cooldown/veto failsafes. This module adds two
|
|
16
|
+
things, both pure and additive:
|
|
17
|
+
|
|
18
|
+
* `OperatorDecisionJudge` — a `dos.judges.Judge` occupant whose `rule()` reads a
|
|
19
|
+
host decision-string off the `Claim` and returns the canonical `JudgeVerdict`.
|
|
20
|
+
Registered under the `dos.judges` entry-point group so `dos doctor` lists it
|
|
21
|
+
and `resolve_judge("operator-decision")` returns it.
|
|
22
|
+
* `stance_for_decision` / `verdict_for_decision` — the pure mapping the host
|
|
23
|
+
adapter calls to translate its own accept/escalate/defer into the canonical
|
|
24
|
+
`Stance` / `JudgeVerdict`, so it can then feed `(Claim, verdict, truth)` triples
|
|
25
|
+
to `dos.judge_eval.false_clear_rate` and re-ground its ≤5%-false-accept gate on
|
|
26
|
+
the kernel instrument instead of a hand-rolled number.
|
|
27
|
+
|
|
28
|
+
THE MAPPING (the only non-trivial part — it encodes WHICH host string clears a claim):
|
|
29
|
+
|
|
30
|
+
accept-recommended → AGREE — the judge believes the recommended option is
|
|
31
|
+
correct AND reversible; this is the one verdict
|
|
32
|
+
that can let the lane proceed automatically. It
|
|
33
|
+
is the false-clear surface `judge_eval` measures.
|
|
34
|
+
escalate → DISAGREE — the judge flags the decision as one it should
|
|
35
|
+
NOT auto-clear; a human must rule (the safe,
|
|
36
|
+
non-clearing direction).
|
|
37
|
+
defer → ABSTAIN — the judge cannot rule yet (stale/unverifiable);
|
|
38
|
+
punt to the next cycle / a human.
|
|
39
|
+
|
|
40
|
+
Why this is the honest split: the kernel never holds the host's stamp formats,
|
|
41
|
+
findings schema, or reversibility gate — only the three-valued mapping and the
|
|
42
|
+
discipline (`run_judge` still fail-to-abstains; the occupant mutates nothing). A
|
|
43
|
+
grep of this driver for a host directory / lane / commit prefix returns nothing —
|
|
44
|
+
it names only the three domain-neutral decision strings, which are this judge's
|
|
45
|
+
*vocabulary*, the way a build/test judge would name "pass"/"fail".
|
|
46
|
+
"""
|
|
47
|
+
from __future__ import annotations
|
|
48
|
+
|
|
49
|
+
from dos.judges import Claim, JudgeVerdict, Stance
|
|
50
|
+
|
|
51
|
+
# The judge's name — the token `resolve_judge(...)` selects and `dos doctor` lists.
|
|
52
|
+
JUDGE_NAME = "operator-decision"
|
|
53
|
+
|
|
54
|
+
# The three host decision strings this judge rules in, mapped to the canonical
|
|
55
|
+
# three-valued Stance. `accept-recommended` is the ONLY one that clears (AGREE) —
|
|
56
|
+
# the false-clear surface the eval harness measures.
|
|
57
|
+
_DECISION_TO_STANCE: dict[str, Stance] = {
|
|
58
|
+
"accept-recommended": Stance.AGREE,
|
|
59
|
+
"escalate": Stance.DISAGREE,
|
|
60
|
+
"defer": Stance.ABSTAIN,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def stance_for_decision(decision: str) -> Stance:
|
|
65
|
+
"""Map a host accept/escalate/defer string to the canonical Stance.
|
|
66
|
+
|
|
67
|
+
An unknown / unparseable decision maps to ABSTAIN — the conservative default
|
|
68
|
+
(an adjudicator that produced something the seam doesn't recognise has not
|
|
69
|
+
cleared the claim). PURE.
|
|
70
|
+
"""
|
|
71
|
+
return _DECISION_TO_STANCE.get((decision or "").strip().lower(), Stance.ABSTAIN)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def verdict_for_decision(
|
|
75
|
+
decision: str, *, reason: str = "", evidence: tuple[str, ...] = (),
|
|
76
|
+
cost: float = 0.0,
|
|
77
|
+
) -> JudgeVerdict:
|
|
78
|
+
"""Build the canonical `JudgeVerdict` for a host decision string. PURE.
|
|
79
|
+
|
|
80
|
+
The host adapter calls this to translate its own already-produced
|
|
81
|
+
accept/escalate/defer ruling into the kernel verdict type, so the ruling can be
|
|
82
|
+
scored by `dos.judge_eval` alongside any other judge's.
|
|
83
|
+
"""
|
|
84
|
+
stance = stance_for_decision(decision)
|
|
85
|
+
if stance is Stance.AGREE:
|
|
86
|
+
return JudgeVerdict.agree(reason, evidence=evidence, cost=cost)
|
|
87
|
+
if stance is Stance.DISAGREE:
|
|
88
|
+
return JudgeVerdict.disagree(reason, evidence=evidence, cost=cost)
|
|
89
|
+
return JudgeVerdict.abstain(reason, evidence=evidence, cost=cost)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class OperatorDecisionJudge:
|
|
93
|
+
"""A `dos.judges.Judge` occupant for the operator-decision queue.
|
|
94
|
+
|
|
95
|
+
`rule()` reads the host's already-produced decision off the `Claim` — the
|
|
96
|
+
accept/escalate/defer string is carried in `claim_text` (with the human reason
|
|
97
|
+
in `stated_reason`) — and returns the canonical `JudgeVerdict`. It does NO I/O
|
|
98
|
+
and NO model call itself: the host's LLM adjudication runs upstream and writes
|
|
99
|
+
its decision into the `Claim`; this occupant is the registered, eval-scorable
|
|
100
|
+
seam that ruling plugs into. Mutates nothing (advisory-only by shape).
|
|
101
|
+
|
|
102
|
+
A `Claim` whose `claim_text` is not one of the three known decision strings
|
|
103
|
+
maps to ABSTAIN (and `run_judge` would also catch any raise), so this judge can
|
|
104
|
+
never auto-clear a claim it does not understand.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
name = JUDGE_NAME
|
|
108
|
+
|
|
109
|
+
def rule(self, claim: Claim, config: object) -> JudgeVerdict:
|
|
110
|
+
return verdict_for_decision(
|
|
111
|
+
claim.claim_text,
|
|
112
|
+
reason=claim.stated_reason,
|
|
113
|
+
evidence=claim.evidence,
|
|
114
|
+
)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""dos.drivers.os_acceptance — the acceptance-verb witness (the OS is the byte-author).
|
|
2
|
+
|
|
3
|
+
docs/121 §3.1 #4 and docs/117 §5 name the *acceptance verb* as the cheapest
|
|
4
|
+
non-forgeable witness a deployment can have: **the kernel runs a command and reads
|
|
5
|
+
the OS exit code.** It is the first concrete `dos.evidence_sources` backend — the
|
|
6
|
+
population proof that turns the `dos.evidence` seam from one built-in (`null`) into a
|
|
7
|
+
witness that can actually move `verify` toward belief.
|
|
8
|
+
|
|
9
|
+
Why the exit code is evidence and a pasted "it passed" is not
|
|
10
|
+
=============================================================
|
|
11
|
+
|
|
12
|
+
The whole `dos.evidence` thesis is "a witness is only evidence when the byte-author
|
|
13
|
+
is not the judged agent." When an agent runs a test and *tells you* it passed, the
|
|
14
|
+
agent authored every byte that reached you — the docs/103 self-report, the forgeable
|
|
15
|
+
floor (`AGENT_AUTHORED`). But when the **kernel** launches the process and the
|
|
16
|
+
**operating system** records the exit status, the agent under adjudication cannot
|
|
17
|
+
forge a `returncode == 0`: it did not run the process and does not author the OS's
|
|
18
|
+
record of how it ended. So this source is tagged `OS_RECORDED` — the non-forgeable
|
|
19
|
+
rung — and its ATTESTED facts are eligible to grant belief under
|
|
20
|
+
`evidence.believe_under_floor` (a pasted-receipt source, `AGENT_AUTHORED`, never is).
|
|
21
|
+
|
|
22
|
+
The mapping from exit status to stance is the honest, conservative one:
|
|
23
|
+
|
|
24
|
+
* exit 0 → **ATTESTED** (the effect's acceptance check passed)
|
|
25
|
+
* exit non-zero (clean run) → **REFUTED** (the check ran and said no — a
|
|
26
|
+
positive disconfirmation, stronger than "no signal")
|
|
27
|
+
* could not run the command → **NO_SIGNAL** (binary missing, timeout, OS error,
|
|
28
|
+
no command given — abstain, never a fabricated pass)
|
|
29
|
+
|
|
30
|
+
The same fail-safe-never-fail-open posture as `ci_status._run_gh`: every failure mode
|
|
31
|
+
degrades to an unreachable `no_signal`, never a raise, never an ATTESTED.
|
|
32
|
+
|
|
33
|
+
Shape & layering
|
|
34
|
+
================
|
|
35
|
+
|
|
36
|
+
A driver, outside the kernel boundary — it has the surface the kernel forbids
|
|
37
|
+
(spawning a process). It implements the `evidence.EvidenceSource` Protocol:
|
|
38
|
+
`name`/`accountability` class attributes + a boundary `gather(subject, config)` whose
|
|
39
|
+
ONE subprocess lives here, mirroring `ci_status.gather` / `git_delta`. It imports the
|
|
40
|
+
kernel; the kernel never imports it (the `drivers/__init__` rule, the existing
|
|
41
|
+
`no dos.drivers import` litmus covers it). Advisory: it reports an attestation; it
|
|
42
|
+
never refuses a lease or mutates state — a host CONSULTS it (a `dos verify` belief
|
|
43
|
+
fold, a RED row in `dos decisions`), it does not actuate.
|
|
44
|
+
|
|
45
|
+
The `subject` IS the command
|
|
46
|
+
============================
|
|
47
|
+
|
|
48
|
+
For this source the opaque `subject` correlation handle is *the acceptance command
|
|
49
|
+
itself* — the shell-free argv to run (passed as a single string, split with
|
|
50
|
+
`shlex`). "Witness that effect E happened" becomes "run the command that checks E and
|
|
51
|
+
read its exit code." The command is the host's to choose (`pytest -q`, `curl -fsS
|
|
52
|
+
https://… -o /dev/null` for an HTTP-200 re-GET of an idempotent effect, a
|
|
53
|
+
provider-CLI status probe); the kernel only runs it and reads the OS's verdict. A
|
|
54
|
+
host wires which command witnesses which effect; this driver supplies the
|
|
55
|
+
runs-it-and-reads-the-exit-code mechanism.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
from __future__ import annotations
|
|
59
|
+
|
|
60
|
+
import argparse
|
|
61
|
+
import json
|
|
62
|
+
import shlex
|
|
63
|
+
import subprocess
|
|
64
|
+
|
|
65
|
+
# Imports the kernel — never the other way round (the driver rule).
|
|
66
|
+
from dos import config as _config
|
|
67
|
+
from dos.evidence import Accountability, EvidenceFacts, believe_under_floor
|
|
68
|
+
|
|
69
|
+
# Cap the run so a hung acceptance command can't stall an evidence-gather — the
|
|
70
|
+
# `ci_status._GH_TIMEOUT_S` / `git_delta._GIT_TIMEOUT_S` discipline. A touch generous
|
|
71
|
+
# because an acceptance check (a test run, an HTTP probe) can legitimately take a
|
|
72
|
+
# little while; a host that needs a different cap passes `timeout_s`.
|
|
73
|
+
_DEFAULT_TIMEOUT_S = 120
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class OsAcceptanceEvidenceSource:
|
|
77
|
+
"""An `evidence.EvidenceSource`: run a command, read the OS exit code, witness it.
|
|
78
|
+
|
|
79
|
+
`name`/`accountability` are CLASS-LEVEL and fixed — this source is always
|
|
80
|
+
`OS_RECORDED` (it has no honest path to a higher or lower rung; the exit code is
|
|
81
|
+
the OS's record, full stop). The `subject` handed to `gather` is the acceptance
|
|
82
|
+
command to run (see the module docstring). `config` is accepted for Protocol
|
|
83
|
+
conformance and is unused here (the command is self-contained); a richer source
|
|
84
|
+
could read a per-effect command map out of `dos.toml [evidence]` via `config`.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
name = "os_acceptance"
|
|
88
|
+
accountability = Accountability.OS_RECORDED
|
|
89
|
+
|
|
90
|
+
def __init__(self, *, timeout_s: int = _DEFAULT_TIMEOUT_S, cwd: str | None = None) -> None:
|
|
91
|
+
self._timeout_s = timeout_s
|
|
92
|
+
self._cwd = cwd
|
|
93
|
+
|
|
94
|
+
def gather(self, subject: str, config: object) -> EvidenceFacts:
|
|
95
|
+
"""Run `subject` as a command and map its exit status to an EvidenceFacts.
|
|
96
|
+
|
|
97
|
+
Boundary I/O — the ONE subprocess lives here (the `ci_status.gather` rule);
|
|
98
|
+
the returned facts are pure data `believe_under_floor` consumes. Never raises:
|
|
99
|
+
every failure mode degrades to an unreachable `no_signal`, so a missing binary
|
|
100
|
+
/ timeout / OS error can never be mistaken for either an attestation or a
|
|
101
|
+
refutation. Wrapped by `evidence.gather_evidence` at the call site for the
|
|
102
|
+
belt-and-braces fail-safe, but defensive here too (a driver should not lean on
|
|
103
|
+
its wrapper to be safe).
|
|
104
|
+
"""
|
|
105
|
+
cmd = (subject or "").strip()
|
|
106
|
+
if not cmd:
|
|
107
|
+
return EvidenceFacts.no_signal(
|
|
108
|
+
self.name,
|
|
109
|
+
self.accountability,
|
|
110
|
+
subject,
|
|
111
|
+
detail="no acceptance command given — nothing to witness",
|
|
112
|
+
)
|
|
113
|
+
try:
|
|
114
|
+
argv = shlex.split(cmd, posix=True)
|
|
115
|
+
except ValueError as e: # unbalanced quotes etc. — not runnable, so no signal
|
|
116
|
+
return EvidenceFacts.no_signal(
|
|
117
|
+
self.name,
|
|
118
|
+
self.accountability,
|
|
119
|
+
subject,
|
|
120
|
+
detail=f"un-parseable acceptance command ({e}) — no signal",
|
|
121
|
+
)
|
|
122
|
+
if not argv:
|
|
123
|
+
return EvidenceFacts.no_signal(
|
|
124
|
+
self.name,
|
|
125
|
+
self.accountability,
|
|
126
|
+
subject,
|
|
127
|
+
detail="empty acceptance command after parsing — no signal",
|
|
128
|
+
)
|
|
129
|
+
try:
|
|
130
|
+
p = subprocess.run(
|
|
131
|
+
argv,
|
|
132
|
+
capture_output=True,
|
|
133
|
+
text=True,
|
|
134
|
+
check=False,
|
|
135
|
+
timeout=self._timeout_s,
|
|
136
|
+
cwd=self._cwd,
|
|
137
|
+
)
|
|
138
|
+
except FileNotFoundError:
|
|
139
|
+
return EvidenceFacts.no_signal(
|
|
140
|
+
self.name,
|
|
141
|
+
self.accountability,
|
|
142
|
+
subject,
|
|
143
|
+
detail=f"command not found: {argv[0]!r} — no signal",
|
|
144
|
+
)
|
|
145
|
+
except subprocess.TimeoutExpired:
|
|
146
|
+
return EvidenceFacts.no_signal(
|
|
147
|
+
self.name,
|
|
148
|
+
self.accountability,
|
|
149
|
+
subject,
|
|
150
|
+
detail=f"acceptance command timed out after {self._timeout_s}s — no signal",
|
|
151
|
+
)
|
|
152
|
+
except OSError as e: # pragma: no cover - environment-dependent
|
|
153
|
+
return EvidenceFacts.no_signal(
|
|
154
|
+
self.name,
|
|
155
|
+
self.accountability,
|
|
156
|
+
subject,
|
|
157
|
+
detail=f"acceptance command failed to start ({e.__class__.__name__}) — no signal",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
rc = p.returncode
|
|
161
|
+
# The OS authored `rc`. exit 0 → the acceptance check passed (ATTESTED);
|
|
162
|
+
# non-zero from a clean run → the check ran and said no (REFUTED); both are
|
|
163
|
+
# the OS's record, not the agent's narration.
|
|
164
|
+
if rc == 0:
|
|
165
|
+
return EvidenceFacts.attest(
|
|
166
|
+
self.name,
|
|
167
|
+
self.accountability,
|
|
168
|
+
subject,
|
|
169
|
+
detail=f"`{argv[0]}` exited 0 — acceptance check passed",
|
|
170
|
+
)
|
|
171
|
+
return EvidenceFacts.refute(
|
|
172
|
+
self.name,
|
|
173
|
+
self.accountability,
|
|
174
|
+
subject,
|
|
175
|
+
detail=f"`{argv[0]}` exited {rc} — acceptance check failed (OS-recorded)",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# CLI — `python -m dos.drivers.os_acceptance "<command>"` witnesses an effect.
|
|
181
|
+
# Folds the single source through `believe_under_floor` so the operator sees the
|
|
182
|
+
# belief verdict, not just the raw stance — i.e. that an OS_RECORDED attestation
|
|
183
|
+
# DOES grant belief (whereas a forgeable-floor one would not).
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def main(argv: list[str] | None = None) -> int:
|
|
188
|
+
ap = argparse.ArgumentParser(
|
|
189
|
+
prog="dos.drivers.os_acceptance",
|
|
190
|
+
description=__doc__.splitlines()[0],
|
|
191
|
+
)
|
|
192
|
+
ap.add_argument("command", help="the acceptance command to run (its exit code is the witness)")
|
|
193
|
+
ap.add_argument("--workspace", default=None,
|
|
194
|
+
help="workspace root, used to resolve the run cwd (default: $DISPATCH_WORKSPACE or cwd)")
|
|
195
|
+
ap.add_argument("--timeout", type=int, default=_DEFAULT_TIMEOUT_S,
|
|
196
|
+
help=f"seconds before the command is abandoned as NO_SIGNAL (default: {_DEFAULT_TIMEOUT_S})")
|
|
197
|
+
ap.add_argument("--json", action="store_true", help="machine-readable verdict")
|
|
198
|
+
args = ap.parse_args(argv)
|
|
199
|
+
|
|
200
|
+
cfg = _config.default_config(args.workspace)
|
|
201
|
+
source = OsAcceptanceEvidenceSource(timeout_s=args.timeout, cwd=str(cfg.paths.root))
|
|
202
|
+
# Use the kernel's fail-safe wrapper, exactly as a real consumer would.
|
|
203
|
+
from dos.evidence import gather_evidence
|
|
204
|
+
|
|
205
|
+
facts = gather_evidence(source, args.command, cfg)
|
|
206
|
+
belief = believe_under_floor((facts,))
|
|
207
|
+
|
|
208
|
+
if args.json:
|
|
209
|
+
print(json.dumps({"facts": facts.to_dict(), "belief": belief.to_dict()}, indent=2))
|
|
210
|
+
else:
|
|
211
|
+
print(f"COMMAND {args.command}")
|
|
212
|
+
print(f"SOURCE {facts.source_name} ({facts.accountability.value})")
|
|
213
|
+
print(f"STANCE {facts.stance.value}")
|
|
214
|
+
print(f"WHY {facts.detail}")
|
|
215
|
+
print(f"BELIEVE {belief.believe} (refuted={belief.refuted})")
|
|
216
|
+
print(f"VERDICT {belief.reason}")
|
|
217
|
+
|
|
218
|
+
# Exit-code map mirrors `dos verify` / `ci_status`: a believed attestation is 0,
|
|
219
|
+
# a refutation is 1 (the effect did not happen), no-signal is 3 (a human's call).
|
|
220
|
+
if belief.refuted:
|
|
221
|
+
return 1
|
|
222
|
+
if belief.believe:
|
|
223
|
+
return 0
|
|
224
|
+
return 3
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
if __name__ == "__main__":
|
|
228
|
+
raise SystemExit(main())
|
dos/drivers/paste_log.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""dos.drivers.paste_log — the floor log source: operator-pasted text. A JUDGE hint.
|
|
2
|
+
|
|
3
|
+
docs/117 §7 — the worked move-B example for the `log_source` seam, and **deliberately
|
|
4
|
+
the floor of the accountability spectrum**. It exists to demonstrate the fence the
|
|
5
|
+
seam puts around the easy-to-ingest sources, not to be trusted as a verdict source.
|
|
6
|
+
|
|
7
|
+
What it is
|
|
8
|
+
==========
|
|
9
|
+
|
|
10
|
+
A `log_source.LogSource` that wraps text the operator hands in — a pasted terminal
|
|
11
|
+
buffer, a copied stack trace, a `screen`/`tmux` scrollback dumped into the prompt. It
|
|
12
|
+
is the single easiest log source to ingest (~zero integration: the text is already in
|
|
13
|
+
hand) and, by the docs/117 §2 **inversion law**, the *least* trustworthy for exactly
|
|
14
|
+
that reason: the agent (or the operator relaying the agent) chose every byte that
|
|
15
|
+
reached here, so the bytes are a self-report wearing evidence's clothes — the docs/84
|
|
16
|
+
§3.1 forgeable floor, `INFO: tests passed` in a logger rendered as a paste.
|
|
17
|
+
|
|
18
|
+
Why it is hard-tagged `AGENT_AUTHORED`
|
|
19
|
+
======================================
|
|
20
|
+
|
|
21
|
+
Its `accountability` is `AGENT_AUTHORED` and there is no way to construct it at any
|
|
22
|
+
higher rung. That is the load-bearing point of this driver as an *example*: a consumer
|
|
23
|
+
routes off the tag (`if ev.accountability.is_agent_authored: feed_a_judge(ev)`), so
|
|
24
|
+
this source has no path to an oracle verdict by construction. It answers the docs/117
|
|
25
|
+
§1 objection ("an LLM already reads logs") concretely — pasted text IS that loop, and
|
|
26
|
+
the kernel's contribution is to give it the correct, lower rung (a JUDGE *hint*,
|
|
27
|
+
advisory and fail-to-abstain — `judges` / `drivers/llm_judge`), never a deterministic
|
|
28
|
+
verdict. The slop move is to ship a paste adapter as a "verification source"; the
|
|
29
|
+
honest move is to ship it tagged as the floor with the fence visible.
|
|
30
|
+
|
|
31
|
+
How a consumer uses it (the right way)
|
|
32
|
+
======================================
|
|
33
|
+
|
|
34
|
+
from dos import log_source as _ls
|
|
35
|
+
from dos.drivers.paste_log import PasteLogSource
|
|
36
|
+
|
|
37
|
+
src = PasteLogSource(text=pasted_buffer) # or .from_stdin()
|
|
38
|
+
ev = _ls.gather_log(src, subject=run_id, config=cfg)
|
|
39
|
+
# ev.accountability.is_agent_authored is True →
|
|
40
|
+
# hand ev.lines to a JUDGE as a hint (advisory), NEVER classify as an oracle.
|
|
41
|
+
|
|
42
|
+
This driver imports the kernel (`dos.log_source`); the kernel never imports it (the
|
|
43
|
+
`drivers/__init__` one-way rule, pinned by `tests/test_log_source.py`). It is NOT in
|
|
44
|
+
`dos.drivers.__init__`'s eager imports — like `ci_status`, it is loaded on demand by a
|
|
45
|
+
consumer, so it stays off the kernel's import surface.
|
|
46
|
+
|
|
47
|
+
Pure-stdlib; the only "I/O" is reading the text it was handed (or stdin in the
|
|
48
|
+
classmethod), and even a read failure degrades through `gather_log` to NO_SIGNAL.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
from __future__ import annotations
|
|
52
|
+
|
|
53
|
+
import sys
|
|
54
|
+
|
|
55
|
+
from dos.log_source import Accountability, LogEvidence
|
|
56
|
+
|
|
57
|
+
# Imported only for type clarity / the entry-point contract; a real plugin would
|
|
58
|
+
# register this class under `[project.entry-points."dos.log_sources"]`.
|
|
59
|
+
|
|
60
|
+
# Cap how much pasted text we retain, so a multi-megabyte paste can't bloat the
|
|
61
|
+
# evidence object a judge is handed. A floor source's value is a *hint*; the first N
|
|
62
|
+
# lines are plenty, and an unbounded buffer is a footgun, not a feature.
|
|
63
|
+
_MAX_LINES = 2000
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class PasteLogSource:
|
|
67
|
+
"""A `LogSource` over operator-supplied text. Hard-tagged `AGENT_AUTHORED`.
|
|
68
|
+
|
|
69
|
+
`name` is `"paste"` (the token a resolver/`dos doctor` would show).
|
|
70
|
+
`accountability` is `AGENT_AUTHORED`, fixed — a class-level constant, not a
|
|
71
|
+
per-call choice, so this source can never claim a higher rung (the docs/117 §2
|
|
72
|
+
inversion law made structural). `gather` ignores `subject`/`config` for routing
|
|
73
|
+
purposes (the text is whatever was handed in; there is nothing to look up) and
|
|
74
|
+
returns the retained lines as `reachable` evidence — "reachable" here means only
|
|
75
|
+
"we have the text," NOT "the text is trustworthy"; the `AGENT_AUTHORED` tag carries
|
|
76
|
+
the trust ceiling, and `reachable=True` on a floor source still routes to a judge.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
name = "paste"
|
|
80
|
+
accountability = Accountability.AGENT_AUTHORED
|
|
81
|
+
|
|
82
|
+
def __init__(self, text: str = "") -> None:
|
|
83
|
+
"""Wrap a block of pasted text (a terminal buffer, a stack trace).
|
|
84
|
+
|
|
85
|
+
Splitlines now (at construction, the boundary), capped at `_MAX_LINES`, so
|
|
86
|
+
`gather` does no work that could raise. Empty text is fine — it yields a
|
|
87
|
+
`no_signal` evidence (nothing was pasted), the honest floor.
|
|
88
|
+
"""
|
|
89
|
+
lines = (text or "").splitlines()
|
|
90
|
+
# Keep the LAST _MAX_LINES — a terminal buffer's tail (the recent output, the
|
|
91
|
+
# error, the exit summary) is the part a judge wants, not the scrolled-off head.
|
|
92
|
+
self._lines: tuple[str, ...] = tuple(lines[-_MAX_LINES:])
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_stdin(cls) -> "PasteLogSource":
|
|
96
|
+
"""Build a source from whatever is on stdin (the `dos … < buffer.txt` ergonomic).
|
|
97
|
+
|
|
98
|
+
The read happens HERE, at the construction boundary, fail-safe: any read error
|
|
99
|
+
degrades to empty text (→ a `no_signal` gather), never a raise — the
|
|
100
|
+
`git_delta`/`ci_status` "every failure → safe empty" posture.
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
text = sys.stdin.read()
|
|
104
|
+
except Exception:
|
|
105
|
+
text = ""
|
|
106
|
+
return cls(text=text)
|
|
107
|
+
|
|
108
|
+
def gather(self, subject: str, config: object) -> LogEvidence:
|
|
109
|
+
"""Return the pasted lines as evidence — or `no_signal` if nothing was pasted.
|
|
110
|
+
|
|
111
|
+
Never raises (the lines were split at construction). Empty paste → `no_signal`
|
|
112
|
+
(the honest floor: there is genuinely no log here). Non-empty → `reached` with
|
|
113
|
+
the lines, tagged `AGENT_AUTHORED` so the consumer routes it to a judge. The
|
|
114
|
+
`detail` says in plain words that this is a floor source, so an operator reading
|
|
115
|
+
`dos doctor` / a `--json` dump is reminded *why* it can't ground a verdict.
|
|
116
|
+
"""
|
|
117
|
+
if not self._lines:
|
|
118
|
+
return LogEvidence.no_signal(
|
|
119
|
+
self.name,
|
|
120
|
+
self.accountability,
|
|
121
|
+
detail="no text pasted — the floor source has no log signal.",
|
|
122
|
+
)
|
|
123
|
+
return LogEvidence.reached(
|
|
124
|
+
self.name,
|
|
125
|
+
self.accountability,
|
|
126
|
+
self._lines,
|
|
127
|
+
detail=(
|
|
128
|
+
f"{len(self._lines)} line(s) of operator-pasted text — "
|
|
129
|
+
f"AGENT_AUTHORED (the forgeable floor): a JUDGE hint only, never a "
|
|
130
|
+
f"deterministic verdict source (docs/117 §1)."
|
|
131
|
+
),
|
|
132
|
+
)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""dos.drivers.plan_scope — a reference `ScopeSource` (outside the kernel line).
|
|
2
|
+
|
|
3
|
+
The kernel ships the scope-source SEAM (`dos.scope_source`: the `ScopeSource`
|
|
4
|
+
Protocol, the `ScopeVerdict`, the `honest_under_floor` conjunction, the null
|
|
5
|
+
`AllDeclaredScope` baseline, the resolver) but NO *ruling* source — exactly as it
|
|
6
|
+
ships the judge seam but no ruling judge, and the overlap seam but no model-backed
|
|
7
|
+
scorer. A source that consults an EXTERNAL account of scope is a JUDGE-rung
|
|
8
|
+
adjudicator (it reads the world, it is not pure), so it lives **here, in a
|
|
9
|
+
driver** — outside the kernel boundary, where I/O is allowed
|
|
10
|
+
(`drivers/__init__.py`: "they import the kernel; the kernel never imports them").
|
|
11
|
+
|
|
12
|
+
What this source does
|
|
13
|
+
=====================
|
|
14
|
+
|
|
15
|
+
`PlanScopeSource` cross-checks a run's **declared** extent (`state.declared_steps`,
|
|
16
|
+
the self-reported denominator of the residual) against an **expected** set of
|
|
17
|
+
units — the *real* scope, supplied from outside the run. The expected set is the
|
|
18
|
+
external account the kernel must not trust the agent to report honestly:
|
|
19
|
+
|
|
20
|
+
* **Where it comes from.** `config.expected_scope_steps` — a workspace declares
|
|
21
|
+
its real phase list in `dos.toml` (`[completion] expected_scope`), or a host
|
|
22
|
+
driver injects it from its plan registry (the phase list of the plan the run is
|
|
23
|
+
executing). This driver is agnostic about the source; it reads the iterable off
|
|
24
|
+
the config it is handed.
|
|
25
|
+
* **The ruling.** If every expected unit appears in `declared_steps`, the extent
|
|
26
|
+
is honest → `extent_honest=True` (the run put the whole job on the books). If
|
|
27
|
+
any expected unit is MISSING from the declared steps, the run under-declared →
|
|
28
|
+
`extent_honest=False`, carrying the missing units. Fed into
|
|
29
|
+
`completion.classify`, that flips an otherwise-`COMPLETE` run to
|
|
30
|
+
`UNDERDECLARED`: the residual is empty, but it was measured against too small a
|
|
31
|
+
denominator.
|
|
32
|
+
|
|
33
|
+
This is the canonical Gap-B example from docs/117 §5.3 ("diff the declared steps
|
|
34
|
+
against the plan registry's phase list"), made concrete and deterministic.
|
|
35
|
+
|
|
36
|
+
Why it is a driver, not the kernel
|
|
37
|
+
==================================
|
|
38
|
+
|
|
39
|
+
It reads `config` to LOCATE the external scope and (in the host-injected case) the
|
|
40
|
+
plan registry is itself read from disk — that is the I/O a kernel verdict may not
|
|
41
|
+
do. The discipline that keeps it safe is the seam's, not purity: a `ScopeSource`
|
|
42
|
+
can only ever WITHHOLD `COMPLETE` (the conjunction + `run_scope` fail-to-strict
|
|
43
|
+
guarantee it), so even a buggy or lying scope source surfaces an `UNDERDECLARED`
|
|
44
|
+
decision rather than silently certifying done. The kernel imports nothing from
|
|
45
|
+
here; `completion.classify` takes the `ScopeVerdict` this produces as data.
|
|
46
|
+
|
|
47
|
+
Wiring it
|
|
48
|
+
=========
|
|
49
|
+
|
|
50
|
+
Register it under the `dos.scope_sources` entry-point group, then a workspace names
|
|
51
|
+
it in `dos.toml [completion] scope_sources = ["plan"]`; the CLI boundary resolves
|
|
52
|
+
it via `scope_source.active_scope_sources` and threads the verdict into
|
|
53
|
+
`completion.classify`. Or construct it directly and pass its verdict in (what the
|
|
54
|
+
tests do). The entry-point name is conventionally ``plan``.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
from __future__ import annotations
|
|
58
|
+
|
|
59
|
+
from typing import Iterable, Optional
|
|
60
|
+
|
|
61
|
+
# Imports the kernel — never the other way round (the driver rule).
|
|
62
|
+
from dos.intent_ledger import LedgerState
|
|
63
|
+
from dos.scope_source import ScopeVerdict
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _expected_from_config(config: object) -> Optional[tuple[str, ...]]:
|
|
67
|
+
"""The expected (real) scope unit ids, read off the config (the data seam).
|
|
68
|
+
|
|
69
|
+
Reads ``config.expected_scope_steps`` when present and iterable; returns it as a
|
|
70
|
+
tuple of str. Returns ``None`` when the config carries no expected scope at all —
|
|
71
|
+
the "I have no external account to check against" case, which the source treats
|
|
72
|
+
as *honest* (it cannot claim under-declaration it has no evidence for; that would
|
|
73
|
+
refuse completion for every run on a workspace that simply did not declare an
|
|
74
|
+
expected set). Defensive on purpose — a malformed value yields ``None`` rather
|
|
75
|
+
than crashing the completion path, the warn-and-fall-back posture every config
|
|
76
|
+
axis takes."""
|
|
77
|
+
raw = getattr(config, "expected_scope_steps", None)
|
|
78
|
+
if raw is None:
|
|
79
|
+
return None
|
|
80
|
+
try:
|
|
81
|
+
return tuple(str(x) for x in raw)
|
|
82
|
+
except TypeError:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class PlanScopeSource:
|
|
87
|
+
"""A reference `ScopeSource`: declared steps vs an expected phase list.
|
|
88
|
+
|
|
89
|
+
`name` is ``plan`` — the token a workspace names in `dos.toml [completion]
|
|
90
|
+
scope_sources` and `dos doctor` lists. `scope_verdict` reads the expected scope
|
|
91
|
+
off the config and diffs it against `state.declared_steps`.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
name = "plan"
|
|
95
|
+
|
|
96
|
+
def __init__(self, expected: Optional[Iterable[str]] = None) -> None:
|
|
97
|
+
"""`expected` lets a host inject the real scope directly (e.g. from its plan
|
|
98
|
+
registry) instead of via config — the tuple takes precedence over
|
|
99
|
+
``config.expected_scope_steps`` when both are present. With neither, the
|
|
100
|
+
source has no external account and votes honest (see `_expected_from_config`).
|
|
101
|
+
"""
|
|
102
|
+
self._expected: Optional[tuple[str, ...]] = (
|
|
103
|
+
tuple(str(x) for x in expected) if expected is not None else None
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def scope_verdict(self, state: LedgerState, config: object) -> ScopeVerdict:
|
|
107
|
+
expected = self._expected
|
|
108
|
+
if expected is None:
|
|
109
|
+
expected = _expected_from_config(config)
|
|
110
|
+
if expected is None:
|
|
111
|
+
# No external account of scope → nothing to contradict the declaration.
|
|
112
|
+
# Vote honest (the source has no evidence of under-declaration).
|
|
113
|
+
return ScopeVerdict(
|
|
114
|
+
extent_honest=True,
|
|
115
|
+
reason="no expected scope configured — declared extent not contested",
|
|
116
|
+
source=self.name,
|
|
117
|
+
)
|
|
118
|
+
declared = set(state.declared_steps)
|
|
119
|
+
missing = tuple(u for u in expected if u not in declared)
|
|
120
|
+
if not missing:
|
|
121
|
+
return ScopeVerdict(
|
|
122
|
+
extent_honest=True,
|
|
123
|
+
reason=(f"all {len(expected)} expected unit(s) are in the declared "
|
|
124
|
+
f"extent — the whole job was put on the books"),
|
|
125
|
+
source=self.name,
|
|
126
|
+
)
|
|
127
|
+
return ScopeVerdict(
|
|
128
|
+
extent_honest=False,
|
|
129
|
+
reason=(f"{len(missing)} expected unit(s) absent from the declared extent "
|
|
130
|
+
f"— the run under-declared its scope"),
|
|
131
|
+
source=self.name,
|
|
132
|
+
missing=missing,
|
|
133
|
+
)
|