dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
@@ -0,0 +1,630 @@
1
+ """dos.drivers.ci_status — the CI/Checks oracle (a driver, the move-B reference).
2
+
3
+ docs/85 §2 names three ways to "extend the verifiable surface," and only one is a
4
+ kernel change. This module is the canonical instance of **move (B): a new artifact
5
+ oracle for the non-git surface**. `verify()` reads the git fossil — existence +
6
+ ancestry + subject grammar — and docs/84 §3.3 is blunt that a clean `verify()`
7
+ means *shipped*, not *correct*: git confirms the bytes are reachable, never that
8
+ they compile or pass tests. The single biggest *complete → correct* jump is a
9
+ **behavioral oracle**: "the build is green at this commit." That signal does not
10
+ live in git; it lives in a CI system. So it cannot be a kernel verb (it fails
11
+ gate 3 — *domain-free* — because it speaks a specific provider's Checks API), and
12
+ it must not grow a provider branch inside `dos.oracle`. It lives **here, in a
13
+ driver**, exactly as `drivers/llm_judge` does, and for the same structural reason:
14
+ it has the surface the kernel forbids (network I/O against a third party).
15
+
16
+ Where it sits on the docs/84 §4 rung-ladder — *above* every git rung, because its
17
+ referent is more accountable than a commit subject the agent typed:
18
+
19
+ non-git oracle (build/test/CI green) ← THIS module's verdict; strongest "complete ≈ correct"
20
+ registry stamp ⋈ git ancestry ← dos.oracle source="registry"
21
+ distinctive file-path overlap ← dos.oracle grep rung, file backstop
22
+ direct-ship subject match ← dos.oracle grep rung, subject
23
+ source="none" / via="" ← git history alone / could not confirm
24
+
25
+ The accountability spectrum (docs/85 §1) is the whole reason this is worth a rung:
26
+ a CI conclusion is **mutable third-party state on infrastructure the agent does not
27
+ control** — GitHub ran the workflow, recorded the conclusion, and the agent under
28
+ adjudication cannot retroactively forge a `check_run.conclusion == "success"` on a
29
+ public commit without compromising the CI system itself. That is a strictly more
30
+ accountable referent than the commit subject the same agent authored. It is NOT the
31
+ top of the spectrum — a CI system the agent *administers* (it can edit the workflow,
32
+ disable a required check, or re-run with a patched config) is only as honest as the
33
+ branch protection around it — which is exactly why this stays a **driver oracle the
34
+ host wires**, with the strength of the signal a property of the host's CI setup, not
35
+ a kernel guarantee. The kernel ships the socket; the host decides how accountable
36
+ the thing they plug in is.
37
+
38
+ The shape is the kernel's own, lifted from two templates already in the tree:
39
+
40
+ * the **boundary reader** `gather()` mirrors `dos.git_delta`: the subprocess
41
+ (`gh api …`) happens HERE, at the caller boundary, and every failure mode (no
42
+ `gh`, unauthenticated, network error, timeout, unknown SHA, malformed JSON)
43
+ degrades to an honest `NO_SIGNAL` evidence object — never a crash, never a
44
+ propagated exception. A repo with no CI wired gets "no signal," the truthful
45
+ floor, exactly as `verify` degrades to `source="none"`.
46
+ * the **pure classifier** `classify(CiEvidence, CiPolicy) -> CiVerdict` is in the
47
+ `dos.verdict` typed-verdict family (the `classify(Evidence, Policy) -> Verdict`
48
+ ABI that `liveness`/`scope` share): a closed-enum verdict, a frozen caller-
49
+ gathered evidence dataclass, a frozen policy with a `dos.toml`-shaped seam, an
50
+ operator-facing `reason`, and a `to_dict()` for the JSON/MCP/decisions surface.
51
+ `classify()` makes NO I/O — it reads the already-gathered check tallies, so the
52
+ whole verdict is replay-testable on frozen fixtures, the family discipline.
53
+
54
+ And it obeys the three judge-driver disciplines (docs/87) — it is the deterministic
55
+ cousin of `llm_judge`, so the same fences apply:
56
+
57
+ * **Advisory.** It reports a verdict; it never refuses a lease, reverts a commit,
58
+ or mutates a registry. A host MAY consult it (a `CiPredicate` over the arbiter's
59
+ conjunctive admission seam, or a RED row in the `dos decisions` queue) — but the
60
+ CI verdict and the admission decision stay different syscalls, the line
61
+ `liveness`/SPINNING and `scope`/SCOPE_CREEP both hold.
62
+ * **Fail-safe, never fail-open.** With no provider reachable the verdict is
63
+ `NO_SIGNAL` (route to a human), and a CI system mid-run is `PENDING` (not yet
64
+ answerable) — never a fabricated GREEN. The conservative direction, the
65
+ `run_judge` fail-to-abstain discipline restated for a deterministic reader: an
66
+ absent oracle degrades to "ask a human," never to a rubber-stamp.
67
+ * **One-way import.** It imports the kernel; the kernel never imports it
68
+ (`drivers/__init__` rule, pinned by `tests/test_kernel_no_driver_import`).
69
+
70
+ "Use this pipeline ourselves" (the dog-food hook): `gather()` defaults its `repo`
71
+ to this project's own GitHub remote, so `python -m dos.drivers.ci_status <sha>`
72
+ adjudicates DOS's *own* CI run (`.github/workflows/ci.yml`) for a commit — the
73
+ substrate consulting the same green-build fossil it asks its users to trust. The
74
+ `/release` and `/stable-release` gates (which today shell `pytest` locally) can
75
+ consult this oracle instead, so "the suite is green" becomes a *verified* claim
76
+ against the third-party CI record rather than a local self-report.
77
+ """
78
+
79
+ from __future__ import annotations
80
+
81
+ import argparse
82
+ import enum
83
+ import json
84
+ import subprocess
85
+ from dataclasses import dataclass
86
+ from typing import Optional
87
+
88
+ # Imports the kernel — never the other way round (the driver rule). `config` for the
89
+ # CLI's workspace seam; the evidence vocabulary for the `EvidenceSource` face
90
+ # (`CiStatusSource`, the `dos.evidence_sources` occupant). The verdict itself is
91
+ # self-contained.
92
+ from dos import config as _config
93
+ from dos.evidence import Accountability, EvidenceFacts
94
+
95
+ # The project's own remote — the dog-food default so `python -m dos.drivers.ci_status
96
+ # <sha>` with no --repo adjudicates DOS's own pipeline. A host wiring this for its own
97
+ # repo passes --repo / the `repo=` argument; this default is only a convenience for the
98
+ # substrate verifying itself.
99
+ DEFAULT_REPO = "anthony-chaudhary/dos"
100
+
101
+ # Cap the network call so a hung API can't stall an evidence-gather — the
102
+ # `git_delta._GIT_TIMEOUT_S` discipline, a touch longer for a network round-trip.
103
+ _GH_TIMEOUT_S = 20
104
+
105
+
106
+ class Ci(str, enum.Enum):
107
+ """The typed CI verdict — four states, mutually exclusive.
108
+
109
+ `str`-valued so it round-trips through a CLI stdout token / exit-code map
110
+ without a lookup table (mirrors `liveness.Liveness`, `scope.Scope`,
111
+ `gate_classify.Verdict`).
112
+
113
+ The four-way split is deliberate and is the honest part: a binary green/red
114
+ would have to *lie* about the two cases where there is no answer yet —
115
+ in-flight (PENDING) and unwired/unreachable (NO_SIGNAL). Collapsing either into
116
+ RED would manufacture a failure; collapsing either into GREEN would manufacture
117
+ a pass. Both are kept distinct so the verdict never claims more than the
118
+ evidence supports — the typed-verdict-over-binary-gate design law applied to a
119
+ source that is legitimately sometimes silent.
120
+ """
121
+
122
+ GREEN = "GREEN" # every required check concluded successfully
123
+ RED = "RED" # at least one required check failed/errored/was cancelled
124
+ PENDING = "PENDING" # checks exist but at least one is still queued/running (no failure yet)
125
+ NO_SIGNAL = "NO_SIGNAL" # no checks found, or the provider is unwired/unreachable — ask a human
126
+
127
+ def __str__(self) -> str: # pragma: no cover - trivial
128
+ return self.value
129
+
130
+
131
+ # GitHub check-run `conclusion` values that count as a hard failure. A check whose
132
+ # conclusion is none of these AND is not yet "completed" is still in flight
133
+ # (PENDING); a "neutral"/"skipped" conclusion is NOT a failure (a skipped optional
134
+ # job must not redden the verdict). This is the GitHub Checks vocabulary, named here
135
+ # because the verdict's meaning depends on it — the one place provider specifics are
136
+ # allowed (a driver, not the kernel).
137
+ _FAILING_CONCLUSIONS = frozenset({"failure", "timed_out", "cancelled", "action_required", "stale"})
138
+ _PASSING_CONCLUSIONS = frozenset({"success"})
139
+ # Conclusions that neither pass nor fail — they do not gate. A run made entirely of
140
+ # these (with none still in flight) is GREEN: nothing required failed.
141
+ _NEUTRAL_CONCLUSIONS = frozenset({"neutral", "skipped"})
142
+
143
+
144
+ @dataclass(frozen=True)
145
+ class CiPolicy:
146
+ """The knobs that separate GREEN/RED/PENDING — policy, not mechanism.
147
+
148
+ The same "mechanism is kernel, thresholds are config" split as
149
+ `LivenessPolicy`/`ScopePolicy`. The defaults are GENERIC; a workspace declares
150
+ its own in `dos.toml [ci]` read back through `SubstrateConfig`, the
151
+ closed-config-as-data pattern (`[lanes]`/`[stamp]`/`[reasons]`/`[liveness]`/
152
+ `[scope]`).
153
+
154
+ required_checks — when non-empty, ONLY check-runs whose name is in this set
155
+ gate the verdict; all others are advisory and ignored. The
156
+ mechanical analogue of GitHub branch-protection "required
157
+ status checks." Empty (default) = every check gates: the
158
+ strict, no-config floor (any failing check reddens).
159
+ treat_pending_as — what a still-running required check resolves to when you
160
+ need a binary answer downstream. Default keeps PENDING its
161
+ own state (the honest answer); a host that wants
162
+ "not-yet-green ⇒ block" can fold it. Kept as data so the
163
+ *verdict* never has to guess the host's risk posture.
164
+ """
165
+
166
+ required_checks: frozenset[str] = frozenset()
167
+ treat_pending_as: Ci = Ci.PENDING
168
+
169
+ def __post_init__(self) -> None:
170
+ if self.treat_pending_as not in (Ci.PENDING, Ci.RED, Ci.NO_SIGNAL):
171
+ raise ValueError("treat_pending_as must be PENDING, RED, or NO_SIGNAL")
172
+
173
+
174
+ DEFAULT_POLICY = CiPolicy()
175
+
176
+
177
+ @dataclass(frozen=True)
178
+ class CheckRun:
179
+ """One CI check-run, normalized from the provider's record (the unforgeable bit).
180
+
181
+ `status` is GitHub's lifecycle (`queued`/`in_progress`/`completed`);
182
+ `conclusion` is meaningful only once `status == "completed"`
183
+ (`success`/`failure`/`neutral`/…). The agent under adjudication cannot author
184
+ these for a public commit — they are written by the CI system. That is the gate-2
185
+ (unforgeable) property the whole oracle stands on.
186
+ """
187
+
188
+ name: str
189
+ status: str
190
+ conclusion: Optional[str] = None
191
+
192
+
193
+ @dataclass(frozen=True)
194
+ class CiEvidence:
195
+ """Everything `classify()` needs, gathered by the CALLER before the call.
196
+
197
+ No network, no subprocess inside the verdict — the arbiter/`git_delta` rule.
198
+ `gather()` (the boundary) runs `gh api` and normalizes the response into this
199
+ frozen object; `classify()` receives it and is pure.
200
+
201
+ sha — the commit the checks belong to (echoed for the json/operator
202
+ surface; not an input to the ladder).
203
+ repo — `owner/name` the checks were read from (provenance for the
204
+ operator — *which* CI record answered).
205
+ checks — the normalized check-runs. An EMPTY tuple is the load-bearing
206
+ ambiguity: it means *either* "this commit genuinely has no CI"
207
+ *or* "we could not read the provider." `gather()` distinguishes
208
+ them by setting `reachable=False` on the latter, so the verdict
209
+ can say NO_SIGNAL for both but the `reason` tells the truth.
210
+ reachable — False when the provider call itself failed (no `gh`, unauthed,
211
+ network/timeout, bad JSON). With `reachable=False` the verdict is
212
+ always NO_SIGNAL regardless of `checks` — we observed nothing, so
213
+ we assert nothing (fail-safe, never fail-open).
214
+ detail — a one-line note from the gather (the error class on an
215
+ unreachable read, or "" on a clean read) — carried into the
216
+ verdict `reason` so an operator sees *why* there was no signal.
217
+ """
218
+
219
+ sha: str
220
+ repo: str = ""
221
+ checks: tuple[CheckRun, ...] = ()
222
+ reachable: bool = True
223
+ detail: str = ""
224
+
225
+
226
+ @dataclass(frozen=True)
227
+ class CiVerdict:
228
+ """The single verdict `classify()` returns, with the evidence echoed back.
229
+
230
+ `verdict` is the typed `Ci`. `reason` is a one-line operator-facing summary that
231
+ NAMES the driving checks (legible distrust — not just RED but *which* check
232
+ failed), the RND/Axis-4 renderer seam, identical to `liveness`'s "0 commits,
233
+ heartbeat 8m fresh." `to_dict()` is the JSON shape for `--json` / MCP / the
234
+ decisions queue.
235
+
236
+ Conforms structurally to `dos.verdict.TypedVerdict` (a `str`-enum `verdict`, a
237
+ `str` `reason`, a JSON-shaped `to_dict()`), so a future `dos.verdicts.register`
238
+ could expose it uniformly — though as a *driver* oracle it stays host-wired, not
239
+ a `dos <verb>` subcommand (it fails gate 3, domain-free).
240
+ """
241
+
242
+ verdict: Ci
243
+ reason: str
244
+ evidence: CiEvidence
245
+ failing: tuple[str, ...] = ()
246
+ pending: tuple[str, ...] = ()
247
+ passing: tuple[str, ...] = ()
248
+
249
+ def to_dict(self) -> dict:
250
+ ev = self.evidence
251
+ return {
252
+ "verdict": self.verdict.value,
253
+ "reason": self.reason,
254
+ "failing": list(self.failing),
255
+ "pending": list(self.pending),
256
+ "passing": list(self.passing),
257
+ "evidence": {
258
+ "sha": ev.sha,
259
+ "repo": ev.repo,
260
+ "reachable": ev.reachable,
261
+ "detail": ev.detail,
262
+ "checks": [
263
+ {"name": c.name, "status": c.status, "conclusion": c.conclusion}
264
+ for c in ev.checks
265
+ ],
266
+ },
267
+ }
268
+
269
+
270
+ def classify(ev: CiEvidence, policy: CiPolicy = DEFAULT_POLICY) -> CiVerdict:
271
+ """Classify one commit's CI status from already-gathered evidence. PURE — no I/O.
272
+
273
+ Reads the ladder top to bottom (this function IS the answer to "is the build
274
+ green at this commit?"):
275
+
276
+ 1. NO_SIGNAL — the provider was unreachable, OR there are no (gating) checks
277
+ at all. We observed nothing we can stand on → route to a human.
278
+ Checked FIRST on the unreachable path so a failed read can never
279
+ be mistaken for a real verdict (fail-safe).
280
+ 2. RED — at least one *gating* check concluded in `_FAILING_CONCLUSIONS`.
281
+ A failure dominates: one red required check reddens the commit
282
+ regardless of how many others passed.
283
+ 3. PENDING — no failure, but at least one gating check is not yet completed
284
+ (queued/in_progress, or completed with no conclusion). The build
285
+ is not green *yet*; it is not red either. The honest middle.
286
+ 4. GREEN — every gating check completed and none failed (all passing or
287
+ neutral/skipped). The build is green.
288
+
289
+ The RED-dominates ordering is the conservative one: when checks disagree, the
290
+ failure wins, because a believer must not be told "green" while a required check
291
+ is red. PENDING over GREEN for the same reason — an unfinished check is not a
292
+ pass.
293
+ """
294
+ # 1a. NO_SIGNAL (unreachable) — the provider call failed. We saw nothing, so we
295
+ # assert nothing: NO_SIGNAL with the gather's error class in the reason. This
296
+ # is the fail-safe rung — an unwired/unreachable CI never fabricates a verdict.
297
+ if not ev.reachable:
298
+ return CiVerdict(
299
+ verdict=Ci.NO_SIGNAL,
300
+ reason=(
301
+ f"no CI signal for {ev.sha[:12] or '(no sha)'}"
302
+ + (f" in {ev.repo}" if ev.repo else "")
303
+ + (f" — {ev.detail}" if ev.detail else " — provider unreachable")
304
+ ),
305
+ evidence=ev,
306
+ )
307
+
308
+ # Select the gating subset: when the host declared required_checks, only those
309
+ # gate; otherwise every check gates (the strict no-config floor).
310
+ if policy.required_checks:
311
+ gating = tuple(c for c in ev.checks if c.name in policy.required_checks)
312
+ else:
313
+ gating = ev.checks
314
+
315
+ # 1b. NO_SIGNAL (no checks) — the commit has no gating CI to read. Distinct from
316
+ # unreachable (we DID read the provider; there just is nothing here), and the
317
+ # reason says so — the honest floor for a repo/commit with no CI, never a crash.
318
+ if not gating:
319
+ if ev.checks: # there were checks, but none matched required_checks
320
+ reason = (
321
+ f"none of the {len(ev.checks)} check(s) on {ev.sha[:12]} match the "
322
+ f"required set {sorted(policy.required_checks)} — no gating signal"
323
+ )
324
+ else:
325
+ reason = (
326
+ f"no CI checks found for {ev.sha[:12] or '(no sha)'}"
327
+ + (f" in {ev.repo}" if ev.repo else "")
328
+ + " — commit has no CI, or none has reported yet"
329
+ )
330
+ return CiVerdict(verdict=Ci.NO_SIGNAL, reason=reason, evidence=ev)
331
+
332
+ failing = tuple(
333
+ c.name for c in gating
334
+ if c.status == "completed" and (c.conclusion or "") in _FAILING_CONCLUSIONS
335
+ )
336
+ # Not-yet-conclusive: still queued/running, or completed without a conclusion.
337
+ pending = tuple(
338
+ c.name for c in gating
339
+ if c.status != "completed" or not c.conclusion
340
+ )
341
+ passing = tuple(
342
+ c.name for c in gating
343
+ if c.status == "completed"
344
+ and (c.conclusion or "") in (_PASSING_CONCLUSIONS | _NEUTRAL_CONCLUSIONS)
345
+ )
346
+
347
+ # 2. RED — a failure dominates everything below it.
348
+ if failing:
349
+ return CiVerdict(
350
+ verdict=Ci.RED,
351
+ reason=(
352
+ f"{len(failing)} check(s) failed at {ev.sha[:12]}: "
353
+ f"{', '.join(failing[:5])}" + (" …" if len(failing) > 5 else "")
354
+ ),
355
+ evidence=ev,
356
+ failing=failing,
357
+ pending=pending,
358
+ passing=passing,
359
+ )
360
+
361
+ # 3. PENDING — no failure, but something hasn't finished. Not green yet.
362
+ if pending:
363
+ return CiVerdict(
364
+ verdict=Ci.PENDING,
365
+ reason=(
366
+ f"{len(pending)} check(s) still running at {ev.sha[:12]} "
367
+ f"({len(passing)} passed, 0 failed so far): "
368
+ f"{', '.join(pending[:5])}" + (" …" if len(pending) > 5 else "")
369
+ ),
370
+ evidence=ev,
371
+ failing=failing,
372
+ pending=pending,
373
+ passing=passing,
374
+ )
375
+
376
+ # 4. GREEN — every gating check finished and none failed.
377
+ return CiVerdict(
378
+ verdict=Ci.GREEN,
379
+ reason=(
380
+ f"all {len(passing)} gating check(s) green at {ev.sha[:12]}"
381
+ + (f" in {ev.repo}" if ev.repo else "")
382
+ ),
383
+ evidence=ev,
384
+ failing=failing,
385
+ pending=pending,
386
+ passing=passing,
387
+ )
388
+
389
+
390
+ # ---------------------------------------------------------------------------
391
+ # The boundary reader — the ONLY I/O path (mirrors dos.git_delta).
392
+ # ---------------------------------------------------------------------------
393
+
394
+
395
+ def _run_gh(args: list[str]) -> tuple[Optional[str], str]:
396
+ """Run `gh <args>` and return (stdout, "") on success, (None, error-class) else.
397
+
398
+ The single guarded provider seam, the `llm_judge._call_provider` discipline:
399
+ NEVER raises. Every failure mode — `gh` not installed, not authenticated, a
400
+ non-zero exit (unknown SHA / no access), a network timeout — returns
401
+ `(None, <short reason>)` so `gather()` degrades to an unreachable evidence
402
+ object. This is the one place the GitHub CLI is touched.
403
+ """
404
+ try:
405
+ p = subprocess.run(
406
+ ["gh", *args],
407
+ capture_output=True,
408
+ text=True,
409
+ check=False,
410
+ timeout=_GH_TIMEOUT_S,
411
+ )
412
+ except FileNotFoundError:
413
+ return None, "gh CLI not installed"
414
+ except subprocess.TimeoutExpired:
415
+ return None, f"gh timed out after {_GH_TIMEOUT_S}s"
416
+ except OSError as e: # pragma: no cover - environment-dependent
417
+ return None, f"gh failed to start ({e.__class__.__name__})"
418
+ if p.returncode != 0:
419
+ err = (p.stderr or "").strip().splitlines()
420
+ tail = err[-1] if err else f"exit {p.returncode}"
421
+ # The two most common, most actionable failures get a clean label.
422
+ low = " ".join(err).lower()
423
+ if "not logged" in low or "authentication" in low or "gh auth login" in low:
424
+ return None, "gh not authenticated (run `gh auth login`)"
425
+ if "not found" in low or "404" in low:
426
+ return None, "commit/repo not found (or no access)"
427
+ return None, f"gh error: {tail[:120]}"
428
+ return p.stdout, ""
429
+
430
+
431
+ def _parse_check_runs(raw: str) -> tuple[CheckRun, ...]:
432
+ """Parse `gh api .../check-runs` JSON into normalized `CheckRun`s.
433
+
434
+ Tolerant: malformed JSON or an unexpected shape yields `()` (the caller then
435
+ reports NO_SIGNAL "no checks"), never a raise — the `git_delta` parse-defensively
436
+ stance. The GitHub shape is `{"check_runs": [{"name", "status", "conclusion"}, …]}`.
437
+ """
438
+ try:
439
+ data = json.loads(raw)
440
+ except (ValueError, TypeError):
441
+ return ()
442
+ runs = data.get("check_runs") if isinstance(data, dict) else None
443
+ if not isinstance(runs, list):
444
+ return ()
445
+ out: list[CheckRun] = []
446
+ for r in runs:
447
+ if not isinstance(r, dict):
448
+ continue
449
+ name = str(r.get("name") or "").strip()
450
+ if not name:
451
+ continue
452
+ out.append(
453
+ CheckRun(
454
+ name=name,
455
+ status=str(r.get("status") or "").strip(),
456
+ conclusion=(str(r["conclusion"]).strip() if r.get("conclusion") else None),
457
+ )
458
+ )
459
+ return tuple(out)
460
+
461
+
462
+ def gather(sha: str, *, repo: str = DEFAULT_REPO) -> CiEvidence:
463
+ """Read the CI check-runs for `sha` in `repo` via `gh api`. Boundary I/O.
464
+
465
+ The subprocess lives HERE; the returned `CiEvidence` is pure data the
466
+ `classify()` verdict consumes (the `git_delta`/arbiter discipline). Defaults
467
+ `repo` to this project's own remote so the substrate can adjudicate its OWN
468
+ pipeline (`python -m dos.drivers.ci_status <sha>`).
469
+
470
+ Never raises: an unreachable provider returns `CiEvidence(reachable=False,
471
+ detail=<why>)`, which `classify()` maps to NO_SIGNAL. An empty but reachable
472
+ read (`gh` worked, the commit has no checks) returns `reachable=True, checks=()`,
473
+ which `classify()` also maps to NO_SIGNAL but with the honest "no CI here" reason.
474
+ """
475
+ if not sha:
476
+ return CiEvidence(sha="", repo=repo, reachable=False, detail="no commit SHA given")
477
+ stdout, err = _run_gh(["api", f"repos/{repo}/commits/{sha}/check-runs"])
478
+ if stdout is None:
479
+ return CiEvidence(sha=sha, repo=repo, reachable=False, detail=err)
480
+ checks = _parse_check_runs(stdout)
481
+ return CiEvidence(sha=sha, repo=repo, checks=checks, reachable=True)
482
+
483
+
484
+ def status_of(sha: str, *, repo: str = DEFAULT_REPO, policy: CiPolicy = DEFAULT_POLICY) -> CiVerdict:
485
+ """Convenience: gather + classify in one call (the wired-host entry point).
486
+
487
+ The natural call for a `/release` gate or a `CiPredicate` — gather the evidence
488
+ at the boundary, classify it purely, return the typed verdict. Kept thin so the
489
+ two halves (the reader, the verdict) stay independently testable.
490
+ """
491
+ return classify(gather(sha, repo=repo), policy)
492
+
493
+
494
+ # ---------------------------------------------------------------------------
495
+ # The EvidenceSource face — the `dos.evidence_sources` entry-point occupant.
496
+ # (docs/265 §4. The native verdict is `CiVerdict` with its four-way GREEN/RED/
497
+ # PENDING/NO_SIGNAL fidelity; the resolver/`active_evidence_sources` apparatus
498
+ # needs an `EvidenceSource` — name + accountability + gather(subject, config) —
499
+ # so this thin adapter maps the verdict onto the witness vocabulary the seam
500
+ # shares with `os_acceptance`/`paste_log`. `cmd_verify` calls `status_of` DIRECTLY
501
+ # for the richer four-way mapping (PENDING ≠ NO_SIGNAL there); this face is what a
502
+ # generic `evidence.gather_evidence` / `dos doctor` discovery consumes.)
503
+ # ---------------------------------------------------------------------------
504
+
505
+
506
+ class CiStatusSource:
507
+ """An `evidence.EvidenceSource` over the CI/Checks oracle. `THIRD_PARTY`-tagged.
508
+
509
+ The `subject` IS the commit SHA — "witness that the build is green at this
510
+ commit" becomes "read the provider's check-runs for this SHA." `gather` runs
511
+ `status_of(subject)` at the boundary (the one provider call lives in
512
+ `_run_gh`, inside `gather`) and maps the typed CI verdict to `EvidenceFacts`:
513
+
514
+ * GREEN → **ATTESTED** (every gating check concluded success —
515
+ a third-party record the agent cannot author)
516
+ * RED → **REFUTED** (≥1 required check failed: a positive
517
+ disconfirmation, stronger than "no signal")
518
+ * PENDING / NO_SIGNAL → **NO_SIGNAL** (not answerable yet, or unwired/unreachable
519
+ — abstain, never a fabricated GREEN; the fail-safe floor)
520
+
521
+ `accountability` is CLASS-LEVEL and fixed `THIRD_PARTY`: a CI conclusion is
522
+ mutable state on infrastructure the agent does not control (`ci_status`'s module
523
+ docstring argues exactly why this is more accountable than a commit subject the
524
+ agent typed). So a GREEN attestation IS eligible to grant belief under
525
+ `believe_under_floor` — but only as the conjunctive upgrade docs/265 §1 fixes
526
+ (`verify` never promotes a false git verdict on the strength of CI alone). Never
527
+ raises — `gather_evidence` wraps it fail-safe, and `status_of` degrades every
528
+ provider failure to NO_SIGNAL on its own. `config` is accepted for Protocol
529
+ conformance; a richer source could read `[ci] repo`/`required` out of it (today
530
+ the CLI/`cmd_verify` passes those at the boundary).
531
+ """
532
+
533
+ name = "ci_status"
534
+ accountability = Accountability.THIRD_PARTY
535
+
536
+ def __init__(self, *, repo: str = DEFAULT_REPO, policy: CiPolicy = DEFAULT_POLICY) -> None:
537
+ self._repo = repo
538
+ self._policy = policy
539
+
540
+ def gather(self, subject: str, config: object) -> EvidenceFacts:
541
+ sha = (subject or "").strip()
542
+ if not sha:
543
+ return EvidenceFacts.no_signal(
544
+ self.name,
545
+ self.accountability,
546
+ subject,
547
+ detail="no commit SHA given — nothing to read CI for",
548
+ )
549
+ verdict = status_of(sha, repo=self._repo, policy=self._policy)
550
+ if verdict.verdict is Ci.GREEN:
551
+ return EvidenceFacts.attest(
552
+ self.name, self.accountability, sha, detail=verdict.reason)
553
+ if verdict.verdict is Ci.RED:
554
+ return EvidenceFacts.refute(
555
+ self.name, self.accountability, sha, detail=verdict.reason)
556
+ # PENDING / NO_SIGNAL — not answerable yet, or unwired/unreachable. Abstain
557
+ # (the honest floor); never a fabricated attestation or refutation.
558
+ return EvidenceFacts.no_signal(
559
+ self.name, self.accountability, sha, detail=verdict.reason)
560
+
561
+
562
+ # ---------------------------------------------------------------------------
563
+ # CLI — `python -m dos.drivers.ci_status <sha>` adjudicates a pipeline run.
564
+ # ---------------------------------------------------------------------------
565
+
566
+
567
+ def main(argv: list[str] | None = None) -> int:
568
+ ap = argparse.ArgumentParser(
569
+ prog="dos.drivers.ci_status",
570
+ description=__doc__.splitlines()[0],
571
+ )
572
+ ap.add_argument("sha", nargs="?", default="HEAD",
573
+ help="commit SHA to read CI for (default: HEAD, resolved against the workspace)")
574
+ ap.add_argument("--repo", default=DEFAULT_REPO,
575
+ help=f"owner/name to read checks from (default: {DEFAULT_REPO})")
576
+ ap.add_argument("--workspace", default=None,
577
+ help="workspace root, used only to resolve HEAD (default: $DISPATCH_WORKSPACE or cwd)")
578
+ ap.add_argument("--required", default="",
579
+ help="comma-separated required check names; only these gate (default: all gate)")
580
+ ap.add_argument("--json", action="store_true", help="machine-readable verdict")
581
+ args = ap.parse_args(argv)
582
+
583
+ # Resolve HEAD against the served workspace so `<sha>` may be a ref. Boundary I/O,
584
+ # kept here in the CLI, never in the verdict. Degrades to the literal arg on any
585
+ # failure (the verdict will then report NO_SIGNAL if it isn't a real SHA).
586
+ sha = args.sha
587
+ if sha == "HEAD" or not all(c in "0123456789abcdefABCDEF" for c in sha):
588
+ cfg = _config.default_config(args.workspace)
589
+ try:
590
+ r = subprocess.run(
591
+ ["git", "rev-parse", sha],
592
+ cwd=str(cfg.paths.root), capture_output=True, text=True,
593
+ check=False, timeout=10,
594
+ )
595
+ if r.returncode == 0 and r.stdout.strip():
596
+ sha = r.stdout.strip()
597
+ except (OSError, subprocess.SubprocessError):
598
+ pass
599
+
600
+ policy = CiPolicy(
601
+ required_checks=frozenset(
602
+ s.strip() for s in args.required.split(",") if s.strip()
603
+ )
604
+ )
605
+ verdict = status_of(sha, repo=args.repo, policy=policy)
606
+
607
+ if args.json:
608
+ print(json.dumps(verdict.to_dict(), indent=2, default=str))
609
+ else:
610
+ print(f"SHA {verdict.evidence.sha[:12] or '(none)'}")
611
+ print(f"REPO {verdict.evidence.repo}")
612
+ print(f"VERDICT {verdict.verdict.value}")
613
+ print(f"WHY {verdict.reason}")
614
+ if verdict.failing:
615
+ print("FAILING " + ", ".join(verdict.failing))
616
+ if verdict.pending:
617
+ print("PENDING " + ", ".join(verdict.pending))
618
+ if verdict.passing:
619
+ print("PASSING " + ", ".join(verdict.passing))
620
+
621
+ # Exit-code map mirrors `dos verify` (SHIPPED=0/NOT=1): GREEN=0, everything that
622
+ # is not a clean green is non-zero, so a CI gate can `&&` on it. RED=1 (failure),
623
+ # PENDING=2 (not yet), NO_SIGNAL=3 (could not tell — a human's call).
624
+ return {
625
+ Ci.GREEN: 0, Ci.RED: 1, Ci.PENDING: 2, Ci.NO_SIGNAL: 3,
626
+ }[verdict.verdict]
627
+
628
+
629
+ if __name__ == "__main__":
630
+ raise SystemExit(main())