dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: dos-unstick
|
|
3
|
+
description: Sweep the run-archive trail of BLOCKED/DRAIN verdicts, normalize each to a canonical cause via the recurring-wedge fold, cluster by recurrence × stall-cost, and propose ONE structural fix per recurring cause — a contract/oracle/preflight change, never a one-off unblock. Read-only on code; surfaces via `dos decisions`. The cause taxonomy is `[reasons]` data; every path/lane comes from `dos doctor --json`. Use when a fleet keeps stalling on the same thing across runs and you want the structural fix, not another manual unblock. The DOS operator remediation sweep (SKP Axis 5, docs/207 Phase 5a).
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# dos-unstick — the recurring-blocker remediation sweep
|
|
7
|
+
|
|
8
|
+
> **Stop the cause, not the instance.** A one-off unblock fixes today's stall;
|
|
9
|
+
> `/dos-unstick` asks the different question — *what keeps blocking progress
|
|
10
|
+
> across runs, and what one change would unblock it?* It mines the run archive,
|
|
11
|
+
> normalizes every blocker to a canonical **cause key**, clusters the recurring
|
|
12
|
+
> ones, ranks by recurrence × measured stall-cost, and proposes a **structural**
|
|
13
|
+
> fix per cluster. Read-only; it writes no code and surfaces findings, never acts.
|
|
14
|
+
|
|
15
|
+
The shape: **mine the trail → key each blocker → cluster the recurring ones →
|
|
16
|
+
rank → propose a structural fix → surface.** The recurrence fold is the kernel's
|
|
17
|
+
(`recurring_wedge`); the cause TAXONOMY is `[reasons]` data — a host adds a cause
|
|
18
|
+
by declaring a reason, never by editing this skill.
|
|
19
|
+
|
|
20
|
+
## Inputs
|
|
21
|
+
|
|
22
|
+
- `--runs <N>` (optional) — how many most-recent runs to sweep (default: a recent
|
|
23
|
+
window). `--since <Nd|Nh>` overrides with a time window.
|
|
24
|
+
- `--min-recurrence <N>` (optional) — a cause is "recurring" at N+ distinct runs
|
|
25
|
+
(default 2 — the `recurring_wedge` `DEFAULT_MIN_RECURRENCE`).
|
|
26
|
+
|
|
27
|
+
## Step 0 — Discover the layout + the cause taxonomy
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
dos doctor --workspace . --json
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Read `paths.runs` (the run-archive dir to sweep) and run `dos man wedge` to read
|
|
34
|
+
the **closed reason vocabulary** — the canonical cause keys this workspace knows.
|
|
35
|
+
**Use these; never hardcode a run path or a cause string.** A cause the workspace
|
|
36
|
+
cares about is a declared `[reasons]` entry, surfaced by `dos man wedge`.
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
dos man wedge
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Step 1 — Mine the run-archive trail
|
|
43
|
+
|
|
44
|
+
Read the run records under `paths.runs` (read each once, sequentially — no
|
|
45
|
+
tailing/polling, the very anti-pattern this sweep exists to surface). For each
|
|
46
|
+
run that STOPPED on a BLOCKED / DRAIN / STALLED outcome, capture: the run id, the
|
|
47
|
+
iteration, the blocker text, and the measured stall cost (`$`/wall) if recorded.
|
|
48
|
+
|
|
49
|
+
> **No host evidence reader is wired by default.** A host that curates a
|
|
50
|
+
> postmortem stream or a hand-ranked next-hits file can expose it as a
|
|
51
|
+
> `dos.evidence_sources` driver hook; the generic sweep reads only the run-archive
|
|
52
|
+
> verdicts. **`log` a one-line note when a host evidence source is not consulted**
|
|
53
|
+
> — no silent gap (the `/dos-dispatch-loop` discipline).
|
|
54
|
+
|
|
55
|
+
## Step 2 — Key each blocker to a canonical cause
|
|
56
|
+
|
|
57
|
+
Normalize each blocker text to ONE cause key from the declared `[reasons]`
|
|
58
|
+
vocabulary (the host's cue table maps an Outcome-cell string to a key — the same
|
|
59
|
+
kernel-catalog ↔ host-cue split `dos man wedge` documents). A blocker that maps to
|
|
60
|
+
no declared cause is keyed `UNCATEGORIZED` (surfaced as a gap, never dropped).
|
|
61
|
+
|
|
62
|
+
## Step 3 — Cluster the recurring causes (the kernel fold)
|
|
63
|
+
|
|
64
|
+
Feed the keyed blockers to the recurrence fold. It clusters by cause key, ranks by
|
|
65
|
+
recurrence (dominant) × stall-cost (the tie-break), and tells you whether the
|
|
66
|
+
cause the CURRENT sweep hit spans `>= min-recurrence` distinct runs:
|
|
67
|
+
|
|
68
|
+
The fold is `recurring_wedge.classify_recurring_wedge` — pure, frozen-data-in,
|
|
69
|
+
verdict-out. A cluster spanning ≥ `min-recurrence` runs is **recurring** (worth a
|
|
70
|
+
structural fix); a one-off is noise the sweep cannot help.
|
|
71
|
+
|
|
72
|
+
## Step 4 — Propose ONE structural fix per recurring cluster
|
|
73
|
+
|
|
74
|
+
For each recurring cluster, propose a **structural** change — a contract edit, an
|
|
75
|
+
oracle rung, a preflight check — that would stop the cause RECURRING, not a
|
|
76
|
+
one-off unblock of today's instance. Rank the proposals by the cluster's
|
|
77
|
+
stall-score (recurrence × cost). The cause→fix mapping is the host's `[reasons]`
|
|
78
|
+
fix-sketch (the reason's documented remediation), not a literal in this skill.
|
|
79
|
+
|
|
80
|
+
## Step 5 — Surface, don't act
|
|
81
|
+
|
|
82
|
+
Route each proposal to the operator-decision queue:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
dos decisions add # the routing surface; the proposal is a finding, not an edit
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
This skill is **read-only**: it writes no code, edits no plan, takes no lease. A
|
|
89
|
+
recurring cause is a finding for a human (or a follow-up plan), not an
|
|
90
|
+
auto-applied change — the structural fix is a real engineering decision.
|
|
91
|
+
|
|
92
|
+
## What this skill deliberately does NOT do (no silent gap)
|
|
93
|
+
|
|
94
|
+
- **No auto-unblock.** It proposes the structural fix; a human applies it. A loop
|
|
95
|
+
that auto-unblocks would paper over the recurring cause, the exact failure mode.
|
|
96
|
+
- **No curated postmortem / next-hits ingestion** unless a host wires a
|
|
97
|
+
`dos.evidence_sources` hook. `log` when it is not consulting one.
|
|
98
|
+
- **No trajectory mining** by default (the heavier read) — the run-archive
|
|
99
|
+
verdicts are the generic floor.
|
|
100
|
+
|
|
101
|
+
## Anti-patterns
|
|
102
|
+
|
|
103
|
+
- ❌ Proposing a one-off unblock for a 3-run recurring cause — that is the bug
|
|
104
|
+
this skill exists to replace. Recurring ⇒ structural.
|
|
105
|
+
- ❌ Hardcoding a cause string or a run path — read the vocabulary from
|
|
106
|
+
`dos man wedge` and the path from `dos doctor --json`.
|
|
107
|
+
- ❌ Tailing/polling the run dir — read each record once; polling is the waste
|
|
108
|
+
this sweep surfaces in others.
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: dos-witness-claim
|
|
3
|
+
description: Route a subagent's actionable claims through the witness rung instead of folding its return string. For any worker whose deliverable is a CHECKABLE EFFECT — a shipped git phase, a created file, a DB row, a sent message — do NOT believe what the worker said it did; extract the claim at the boundary, gather an independently-authored read-back, and fold ONLY the confirmed effects. Driven by `dos` verbs and the workspace's own `dos.toml` — no host-specific paths, lanes, or commit conventions. Use at a `parallel()`/`pipeline()` barrier, a synthesis step, or any fold site where one agent's output becomes another's input. This is the DOS reference pattern for the docs/197 §7(2) witness-routing stage; the seam below is honest about which steps have a CLI verb and which are Python-API-only today.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# dos-witness-claim — fold the witnessed effect, never the narrated one
|
|
7
|
+
|
|
8
|
+
> **This is the screenplay for the one move the whole substrate exists to make:**
|
|
9
|
+
> when a subagent hands you a result, the result is a *claim*, not a fact. A claim
|
|
10
|
+
> re-narrating the agent's own work is **consistency, not grounding** (docs/138).
|
|
11
|
+
> Belief is earned only by a read-back whose **byte-author is not the judged
|
|
12
|
+
> agent** (a fresh GET, a git existence check, an OS exit code, a state diff). This
|
|
13
|
+
> skill never decides ground truth itself — it shells `dos` verbs and reads the
|
|
14
|
+
> verdict. The kernel decides; the skill narrates.
|
|
15
|
+
|
|
16
|
+
The shape is domain-free: **discover the layout → classify the claim type →
|
|
17
|
+
witness it on a non-forgeable rung → fold ONLY confirmed.** The *policy* (which
|
|
18
|
+
lanes, which plan grammar, where state lives) is data the screenplay reads from
|
|
19
|
+
`dos doctor --json`, never literals it hardcodes.
|
|
20
|
+
|
|
21
|
+
**The seed-2 payoff.** This makes the worker's MODEL TIER irrelevant to trust. A
|
|
22
|
+
weak worker and a strong worker face the SAME witness gate: both have their
|
|
23
|
+
claimed effect re-read from a surface they did not author. A confident, fluent,
|
|
24
|
+
frontier-strength narration of a success the world does not corroborate is exactly
|
|
25
|
+
the silent fail this gate catches (docs/177) — and a weak worker that actually
|
|
26
|
+
shipped the effect passes it. Distrust is aimed at the *byte-author of the
|
|
27
|
+
evidence*, not at the worker's eloquence.
|
|
28
|
+
|
|
29
|
+
## Inputs
|
|
30
|
+
|
|
31
|
+
- A set of subagent **results** to fold (return strings + their transcript paths),
|
|
32
|
+
e.g. the children of a `parallel()` barrier or a `pipeline()` stage. A result is
|
|
33
|
+
the thing you are tempted to interpolate directly (`${result}`) — don't.
|
|
34
|
+
- For each result, the **effect it claims** (a `(plan, phase)`, a created file
|
|
35
|
+
path, a DB row key, a sent-message id). If the worker emitted no checkable
|
|
36
|
+
effect, that is a real outcome (NO_CLAIM), not a pass — see Step 2.
|
|
37
|
+
|
|
38
|
+
## Step 0 — Discover the workspace layout (one call)
|
|
39
|
+
|
|
40
|
+
Run the doctor verb and read the result. **This is the WCR on-ramp: every
|
|
41
|
+
path/lane/exit-code below comes from here, never a literal.**
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
dos doctor --workspace . --json
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Parse the JSON object. The fields this skill uses:
|
|
48
|
+
|
|
49
|
+
- `exit_codes.verify` — `{shipped, not_shipped, contract_error}`. Branch on these,
|
|
50
|
+
never on parsing the prose, for the **git-phase** claim type (Step 3a).
|
|
51
|
+
- `exit_codes."verify-result"` — `{healthy, unreadable, dead, contract_error}`
|
|
52
|
+
(`dead=3`). Branch on these for the **terminal-state** witness (Step 3, the
|
|
53
|
+
prerequisite gate every claim type runs first).
|
|
54
|
+
- `paths.plans_glob` / `lanes` / `stamp` — the host's plan grammar and lane
|
|
55
|
+
taxonomy, if a claim names a `(plan, phase)`.
|
|
56
|
+
- `git` — if `false`, the git-existence witness (Step 3a) has no history to read;
|
|
57
|
+
every git-phase claim will be `source="none"`. Say so; do not silently pass it.
|
|
58
|
+
|
|
59
|
+
You may also read `admission_predicates` and `overlap_policy` here, but they are
|
|
60
|
+
not load-bearing for witnessing — they describe the arbiter, not the witness rung.
|
|
61
|
+
|
|
62
|
+
## Step 1 — For each result: is the worker even ALIVE? (the terminal-state gate)
|
|
63
|
+
|
|
64
|
+
**Run this BEFORE you read the worker's return string at all.** A return string is
|
|
65
|
+
worthless if the "worker" was a harness-synthesized death (a rate-limit / quota /
|
|
66
|
+
auth / server error the harness wrote and the worker never authored). ~32% of real
|
|
67
|
+
subagents return such a string (docs/197 §2), and it survives a naive
|
|
68
|
+
`.filter(Boolean)` to be banked as a finished finding.
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
dos verify-result --workspace . --transcript <agent-transcript.jsonl>
|
|
72
|
+
echo "exit=$?"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Branch on `exit_codes."verify-result"`:
|
|
76
|
+
|
|
77
|
+
- `3` **DEAD** — the terminal record was harness-authored (`message.model ==
|
|
78
|
+
"<synthetic>"`, the unforgeable authorship marker). **Do NOT fold this result.**
|
|
79
|
+
Route the worker's OWN unit for re-dispatch, and **count it in the denominator**
|
|
80
|
+
(a 4-of-7 fan-out is 4/7, never silently 7/7). The catch is grounding because it
|
|
81
|
+
read a DIFFERENT byte-author than the worker — the harness, not the model.
|
|
82
|
+
- `0` **HEALTHY** — a real terminal result; proceed to Step 2. (`0` also covers
|
|
83
|
+
**UNREADABLE**, the fail-safe floor: a read fault never fabricates a death. An
|
|
84
|
+
UNREADABLE result is *not* trusted as confirmed — it simply isn't classified
|
|
85
|
+
DEAD; its effect still must be witnessed in Step 3.)
|
|
86
|
+
- `2` — a contract error (no transcript). Fix the wiring; do not treat as HEALTHY.
|
|
87
|
+
|
|
88
|
+
This step is itself the cheap, shipped form of "byte-author ≠ judged agent": the
|
|
89
|
+
liveness of the worker is read from the harness's own authorship marker.
|
|
90
|
+
|
|
91
|
+
## Step 2 — Classify the claim TYPE (so you know which witness to ask)
|
|
92
|
+
|
|
93
|
+
For each HEALTHY result, decide what checkable effect it claims. **Extract the
|
|
94
|
+
claim at the boundary — abstain, never invent.** Free prose ("I'm done", "shipped
|
|
95
|
+
the auth work") yields NO claim: there is no honest way to derive the effect's
|
|
96
|
+
*identifier* from prose without inventing it (docs/134 §2.1). A claim you cannot
|
|
97
|
+
name is a **NO_CLAIM** — surface it for a human, do not manufacture one to witness.
|
|
98
|
+
|
|
99
|
+
Map each named claim to its witness rung:
|
|
100
|
+
|
|
101
|
+
| Claim type | What the worker asserts | Witness (byte-author ≠ agent) | Verb today |
|
|
102
|
+
|---|---|---|---|
|
|
103
|
+
| **git phase** | "(plan, phase) shipped" | git ancestry + ship-stamp grammar | `dos verify` (Step 3a) — SHIPPED |
|
|
104
|
+
| **terminal state** | the result itself is real | harness authorship marker | `dos verify-result` (Step 1) — SHIPPED |
|
|
105
|
+
| **created file / DB row / sent message / deploy** | "I created X / inserted row Y / sent Z" | a fresh GET / state-diff / OS exit / counterparty record | **NO CLI verb** (Step 3b) — Python API gap |
|
|
106
|
+
|
|
107
|
+
The first two are shipped CLI verbs. The third is the open seam — handled honestly
|
|
108
|
+
in Step 3b.
|
|
109
|
+
|
|
110
|
+
## Step 3 — Witness the claim on a non-forgeable rung
|
|
111
|
+
|
|
112
|
+
### Step 3a — git-phase claims: ask the truth syscall
|
|
113
|
+
|
|
114
|
+
For a `(plan, phase)` claim, never trust the worker's "I committed it" and never
|
|
115
|
+
grep commit subjects yourself:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
dos verify --workspace . <PLAN> <PHASE> --json
|
|
119
|
+
echo "exit=$?"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Read the `ShipVerdict`: `{shipped, source, sha?}`. Branch on `exit_codes.verify`
|
|
123
|
+
(`shipped:0`, `not_shipped:1`, `contract_error:2`), and **read the rung**:
|
|
124
|
+
|
|
125
|
+
- `source: "registry"` — the strongest git ship; a ship row exists. Fold it.
|
|
126
|
+
- `source: "grep-subject"` — a commit *subject* carried the phase token. SHIPPED,
|
|
127
|
+
but weaker — a subject can flip the verdict even if little was built. Fold it,
|
|
128
|
+
but mark the rung; do not treat it as equal to `registry`.
|
|
129
|
+
- `source: "none"` — no positive git evidence. The worker CLAIMED the phase but
|
|
130
|
+
git does not corroborate it. **Do NOT fold the claim** — this is the narrated
|
|
131
|
+
success the world does not back. Surface it; route the unit for re-dispatch.
|
|
132
|
+
|
|
133
|
+
This is grounding because git is a witness the worker did not author: the commit's
|
|
134
|
+
existence in ancestry is a fact about the repository, not about the worker's prose.
|
|
135
|
+
|
|
136
|
+
### Step 3b — created-file / DB-row / sent-message claims: the read-back gap
|
|
137
|
+
|
|
138
|
+
**There is NO `dos verify-effect` (or `dos witness`) CLI verb.** The effect-witness
|
|
139
|
+
join is shipped in the kernel as a **Python API**, not a command — exactly like
|
|
140
|
+
`tool_stream` has no `dos tool-stream` verb (see EXAMPLES.md Recipe 5). So for any
|
|
141
|
+
effect outside git, the skill documents the pattern and **logs the gap at
|
|
142
|
+
runtime**, rather than pretending a verb exists.
|
|
143
|
+
|
|
144
|
+
The pattern (kernel modules, all pure; the read-back I/O lives in a `drivers/*`
|
|
145
|
+
witness, never in the skill):
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from dos.claim_extract import extract_claims # (a) abstain, never invent
|
|
149
|
+
from dos.effect_witness import EffectClaim, witness_effect
|
|
150
|
+
from dos.evidence import gather_evidence, resolve_evidence_source # (b) read-back at the boundary
|
|
151
|
+
|
|
152
|
+
# (a) Extract the claim from the worker's terminal text (boundary read done already).
|
|
153
|
+
# extract_claims fires only on an explicit ID-shaped token or a DOS-CLAIM:
|
|
154
|
+
# marker — free prose yields []. That [] is a NO_CLAIM, not a pass.
|
|
155
|
+
|
|
156
|
+
# (b) Gather a read-back from a witness the agent did not author. A real witness
|
|
157
|
+
# (a fresh HTTP GET, an OS exit code, a state-snapshot diff) lives in a driver
|
|
158
|
+
# registered under the `dos.evidence_sources` entry-point group; the skill
|
|
159
|
+
# resolves it by name and runs it at the boundary:
|
|
160
|
+
source = resolve_evidence_source("<your-witness-name>") # built-in `null` if none wired
|
|
161
|
+
facts = gather_evidence(source, subject="<effect key>", config=cfg)
|
|
162
|
+
|
|
163
|
+
# (c) Fold ONLY confirmed: the join is capped by the read-back's accountability.
|
|
164
|
+
verdict = witness_effect(EffectClaim(key="<effect key>", narrated="<what worker said>"),
|
|
165
|
+
(facts,))
|
|
166
|
+
# verdict.verdict -> CONFIRMED | REFUTED | UNWITNESSED | NO_CLAIM
|
|
167
|
+
# verdict.believe -> True ONLY on CONFIRMED (a non-forgeable witness saw it PRESENT)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
The load-bearing rule, enforced structurally by `believe_under_floor`: a read-back
|
|
171
|
+
on the **forgeable floor** (`AGENT_AUTHORED` — the agent re-reading its own
|
|
172
|
+
surface, a pasted receipt, an mtime) is recorded but **structurally incapable** of
|
|
173
|
+
granting belief. Belief requires a `OS_RECORDED` / `THIRD_PARTY` witness. So the
|
|
174
|
+
worst a lying same-surface read-back can do is be ignored (a safe-direction
|
|
175
|
+
no-op), never manufacture a CONFIRMED.
|
|
176
|
+
|
|
177
|
+
**The four outcomes a consumer routes on:**
|
|
178
|
+
|
|
179
|
+
- **CONFIRMED** — fold it. A non-forgeable witness re-read the world and the effect
|
|
180
|
+
is PRESENT.
|
|
181
|
+
- **REFUTED** — do NOT fold; RED-flag it. A non-forgeable witness re-read and the
|
|
182
|
+
effect is ABSENT (the silent frontier-fail made visible). Route for re-dispatch.
|
|
183
|
+
- **UNWITNESSED** — do NOT fold as confirmed. No accountable witness was reached
|
|
184
|
+
(or only a forgeable-floor read). The honest abstain — surface it; this is the
|
|
185
|
+
runtime gap to log (see "What this skill does NOT do").
|
|
186
|
+
- **NO_CLAIM** — the worker asserted no checkable effect. Nothing to witness;
|
|
187
|
+
surface it for a human. NOT a pass.
|
|
188
|
+
|
|
189
|
+
**Log the gap, never silently skip it.** The first time a non-git effect needs
|
|
190
|
+
witnessing and **no `dos.evidence_sources` driver is wired** (so `gather_evidence`
|
|
191
|
+
returns `null` → UNWITNESSED), emit a one-line `log` naming the unwitnessed effect
|
|
192
|
+
and the missing witness — so the capability gap is surfaced at runtime, not buried
|
|
193
|
+
here. An UNWITNESSED effect must be reported up, never laundered into the fold.
|
|
194
|
+
|
|
195
|
+
## Step 4 — Partition the fold and carry coverage forward
|
|
196
|
+
|
|
197
|
+
Fold the results into three buckets, and **carry the count into whatever consumes
|
|
198
|
+
the fold** (a synthesis prompt, a downstream stage):
|
|
199
|
+
|
|
200
|
+
- **CONFIRMED / SHIPPED** → folded. These are the only results another agent's
|
|
201
|
+
input may be built from.
|
|
202
|
+
- **REFUTED / NOT_SHIPPED / DEAD** → routed for re-dispatch, **counted in the
|
|
203
|
+
denominator**. A fan-out of N that confirmed M is M/N — never silently N/N.
|
|
204
|
+
- **UNWITNESSED / NO_CLAIM** → surfaced for a human; held out of the fold.
|
|
205
|
+
|
|
206
|
+
The coverage fact (`confirmed M of declared N`, plus the bucket each result landed
|
|
207
|
+
in) is data the synthesizer must SEE — a synthesis told "7/7 returned" when only
|
|
208
|
+
4/7 were witnessed will confidently launder the gap.
|
|
209
|
+
|
|
210
|
+
## What this skill deliberately does NOT do (no silent gap)
|
|
211
|
+
|
|
212
|
+
- **No model-judge over the return TEXT.** It NEVER asks a model (or a heuristic,
|
|
213
|
+
or itself) "does this return string *look* right / complete / successful?" That
|
|
214
|
+
is the docs/197 §4d wishful trap: a judge reading the worker's own narrated bytes
|
|
215
|
+
is re-deriving the author's output — **consistency, never grounding**. Belief
|
|
216
|
+
comes only from a read-back whose byte-author is not the worker. (A JUDGE rung
|
|
217
|
+
exists — the `dos.judges` seam — but it is advisory, fail-to-abstain, and applies
|
|
218
|
+
to the *residue the oracle ABSTAINED on*, fed independent evidence, never the
|
|
219
|
+
worker's own string as a substitute for the witness.)
|
|
220
|
+
- **No `dos verify-effect` verb (there isn't one).** Non-git effects are witnessed
|
|
221
|
+
via the Python API of Step 3b, and the gap is logged at runtime. The skill does
|
|
222
|
+
not pretend a CLI verb exists where it does not (the EXAMPLES.md Recipe 5
|
|
223
|
+
discipline).
|
|
224
|
+
- **No host evidence source baked in.** Which witness re-reads a created file or a
|
|
225
|
+
DB row is a `dos.evidence_sources` driver the host wires; the skill resolves it
|
|
226
|
+
by name, never names a host backend as a literal.
|
|
227
|
+
- **No enforcement.** It REPORTS a fold partition and proposes a re-dispatch; it
|
|
228
|
+
does not kill a worker or block a write. DOS is a PDP, not a PEP.
|
|
229
|
+
|
|
230
|
+
## Anti-patterns
|
|
231
|
+
|
|
232
|
+
- ❌ Interpolating `${result}` (a worker's return string) directly into a synthesis
|
|
233
|
+
prompt — that folds the worker's self-report as ground truth. Witness first.
|
|
234
|
+
- ❌ Treating a non-empty return string as success — a harness-synthesized death is
|
|
235
|
+
non-empty and survives `.filter(Boolean)`. Run `dos verify-result` first.
|
|
236
|
+
- ❌ Asking a model "is this output complete/correct?" — consistency, not
|
|
237
|
+
grounding. Gather a read-back from a surface the worker did not author.
|
|
238
|
+
- ❌ Counting only the workers that returned a string in the denominator — a DEAD or
|
|
239
|
+
UNWITNESSED result must be counted, or a 4/7 fan-out launders as 7/7.
|
|
240
|
+
- ❌ Folding an UNWITNESSED / NO_CLAIM result "because it probably worked" — abstain
|
|
241
|
+
surfaces it; it never folds as confirmed.
|
|
242
|
+
- ❌ Naming a specific lane, plan dir, or witness backend as a literal — read the
|
|
243
|
+
active layout from `dos doctor --json` and resolve witnesses by name.
|
|
244
|
+
|
|
245
|
+
## The one rule under this skill
|
|
246
|
+
|
|
247
|
+
A subagent's result is a **claim with a byte-author**. Fold it only when a witness
|
|
248
|
+
whose byte-author is *not the worker* corroborates the claimed effect. The worker's
|
|
249
|
+
model tier, its confidence, and its eloquence are irrelevant to that gate — which
|
|
250
|
+
is the whole point: the part that decides ground truth is never the part being
|
|
251
|
+
judged.
|