dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/exporter.py ADDED
@@ -0,0 +1,320 @@
1
+ """exporter — drain the verdict journal outward to an observability backend (docs/266).
2
+
3
+ > **DOS records its own disbelief (docs/262) — but the record is a local island.**
4
+ > The verdict journal (`verdict_journal.py`) lands every adjudication a fleet makes
5
+ > as a `run_id`-correlated `VerdictEvent` in an append-only JSONL file, and `dos
6
+ > observe` reads it back. What was MISSING is the last hop: nothing ships that stream
7
+ > to where an operator's dashboards/alerts already live — Datadog, Grafana/Loki,
8
+ > Honeycomb, an OTLP collector, a syslog file. This seam is that outward connector.
9
+ > **No transport name appears in this module.**
10
+
11
+ Why this seam, not `notify`
12
+ ===========================
13
+
14
+ `notify` (docs/225) pushes a *projection snapshot* (`decisions` = "what needs a human
15
+ now", `top` = "what's running now") to a transport — it answers "page me on the
16
+ current state." That is the wrong shape for observability:
17
+
18
+ * `notify` sends ONE rendered `Notification` from a point-in-time snapshot; an exporter
19
+ ships a STREAM of structured events — every `VerdictEvent`, counts intact — so a
20
+ time-series backend can chart "liveness STALLED rate over 24h."
21
+ * `notify`'s payload is human-facing prose; an exporter's is machine-facing structure
22
+ (a metric point, an OTLP log record, a JSON line) keyed for aggregation.
23
+ * `notify` is operator-triggered / cron-cadence; an exporter is DRAIN-shaped — it
24
+ follows the journal forward and flushes new events, the way a log shipper tails a file.
25
+
26
+ So the exporter is the kernel's FIFTH pure-protocol + by-name-resolver seam — after
27
+ `judges` (the JUDGE rung), `overlap_policy` (the disjointness scorer), `hook_dialect`
28
+ (the host-hook renderer), and `notify` (the projection-delivery side) — on a new axis:
29
+ the verdict *stream*, drained outward. The shape is byte-for-byte `notify.py`'s: a pure
30
+ Protocol + a frozen result type + an unshadowable built-in + a by-name resolver + a
31
+ fail-soft wrapper. Every real connector (which names a transport as code — an OTLP
32
+ shipper is inherently OTLP-specific) lives in a driver and registers through the
33
+ `dos.exporters` entry-point group.
34
+
35
+ The neutral record is already there — `VerdictEvent`
36
+ =====================================================
37
+
38
+ The exporter does NOT invent a payload type. `verdict_journal.VerdictEvent` is already
39
+ the transport-agnostic, byte-clean fact: its `detail` carries the
40
+ environment-authored evidence counts the verdict was computed from (tokens, work,
41
+ ages), NEVER the agent's narration (the docs/138 invariant the journal enforces). An
42
+ exporter takes a batch of `VerdictEvent`s and ships them. The hard part — a clean,
43
+ correlated, forgeable-narration-free record — is done (docs/262).
44
+
45
+ Failure direction = fail-SOFT
46
+ =============================
47
+
48
+ Observability must NEVER crash the thing it observes — the same rule as `notify` and
49
+ the journal itself (`record()` logs-and-drops). So `export_safely` converts any
50
+ transport raise into a non-exported `ExportResult`. A *resolve* of an unknown exporter
51
+ name still raises (operator config error, surfaced at config time, the
52
+ `resolve_notifier` rule); a *send* never does. A down collector, a bad endpoint, an
53
+ absent optional extra → a non-exported result, never an exception into the drain loop.
54
+
55
+ The advisory floor (docs/99)
56
+ ============================
57
+
58
+ The exporter REPORTS; it never acts on the fleet. It reads the journal only — takes no
59
+ lease, mints no belief, stops no run, mutates no DOS state. It is a pure
60
+ read-of-the-journal → ship. The `decisions`/`observe`/`notify` read-only posture,
61
+ extended across the network boundary.
62
+
63
+ Pure-stdlib. The resolver is the unit-test surface; the only I/O in the whole spine is
64
+ inside a driver's `export` (and a `cmd_export` boundary that reads the journal).
65
+ """
66
+
67
+ from __future__ import annotations
68
+
69
+ import sys
70
+ from dataclasses import dataclass
71
+ from typing import TYPE_CHECKING, Protocol, Sequence, runtime_checkable
72
+
73
+ if TYPE_CHECKING: # pragma: no cover - typing only; never imported at runtime
74
+ from dos.verdict_journal import VerdictEvent
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # The result a driver returns — fail-soft: exported=0 is normal, never a raise.
79
+ # The `NotifyResult` analogue.
80
+ # ---------------------------------------------------------------------------
81
+
82
+
83
+ @dataclass(frozen=True)
84
+ class ExportResult:
85
+ """The outcome of an `export` — always returned, never raised (fail-soft).
86
+
87
+ `exported` is how many of the handed events the transport accepted (0 is a
88
+ perfectly normal result — a `null` sink, a `--dry-run`, a down collector). `detail`
89
+ is a one-line human reason (`"wrote 12 lines to …"` / `"dry-run"` / `"null sink"` /
90
+ `"error: …"`). `cursor` is the seq/offset the drain reached — the highest `seq`
91
+ shipped — so a later `--since <cursor>` resumes the forward drain without re-shipping
92
+ (the journal's own monotonic `seq`, nothing fabricated; "" when no event was shipped).
93
+ """
94
+
95
+ exported: int
96
+ detail: str = ""
97
+ cursor: str = ""
98
+
99
+ def to_dict(self) -> dict:
100
+ return {"exported": self.exported, "detail": self.detail, "cursor": self.cursor}
101
+
102
+
103
+ # ---------------------------------------------------------------------------
104
+ # The Exporter protocol + the unshadowable built-in null sink.
105
+ # ---------------------------------------------------------------------------
106
+
107
+
108
+ @runtime_checkable
109
+ class Exporter(Protocol):
110
+ """A transport that drains a batch of `VerdictEvent`s outward. The driver-side seam.
111
+
112
+ `name` is the registered name (`"file"`, `"statsd"`, `"otlp"`); `export` ships the
113
+ batch and returns an `ExportResult`. An implementation MAY write JSONL lines, emit
114
+ one metric per event, or POST OTLP records — that choice is the driver's, not the
115
+ kernel's. It MUST be fail-soft (never raise) on a transport failure; `export_safely`
116
+ is the outer net regardless.
117
+ """
118
+
119
+ name: str
120
+
121
+ def export(self, events: "Sequence[VerdictEvent]") -> ExportResult: ...
122
+
123
+
124
+ class NullExporter:
125
+ """The honest zero — ships nothing, the unshadowable built-in baseline.
126
+
127
+ The default exporter, so a bare `dos export` is a safe no-op that reports how many
128
+ events WOULD ship and sends them nowhere (the `NullNotifier` / `AbstainJudge` /
129
+ `prefix`-floor analogue: the built-in that can never loosen anything and is always
130
+ resolvable). A host opts IN to a real transport by naming one (`--to file`).
131
+
132
+ It still reports a `cursor` (the highest seq it saw) so a `--since` drain advances
133
+ even against `null` — useful for "mark these as seen without shipping them."
134
+ """
135
+
136
+ name = "null"
137
+
138
+ def export(self, events: "Sequence[VerdictEvent]") -> ExportResult:
139
+ n = len(events)
140
+ cursor = _max_seq_cursor(events)
141
+ return ExportResult(
142
+ exported=0,
143
+ detail=f"null sink (no transport configured) — {n} event(s) not shipped",
144
+ cursor=cursor,
145
+ )
146
+
147
+
148
+ _BUILT_IN_EXPORTERS: dict[str, type] = {"null": NullExporter}
149
+
150
+ EXPORTER_ENTRY_POINT_GROUP = "dos.exporters"
151
+
152
+
153
+ def _max_seq_cursor(events: "Sequence[VerdictEvent]") -> str:
154
+ """The highest `seq` in `events` as a string cursor, or "" when the batch is empty.
155
+
156
+ A tiny pure helper every driver reuses to fill `ExportResult.cursor` consistently —
157
+ the resumable-drain offset is the journal's own monotonic `seq`, so "ship past the
158
+ last shipped seq" is `--since <cursor>`. Robust to a non-int seq (degrades to 0)."""
159
+ mx = 0
160
+ for e in events:
161
+ try:
162
+ s = int(getattr(e, "seq", 0) or 0)
163
+ except (TypeError, ValueError):
164
+ s = 0
165
+ if s > mx:
166
+ mx = s
167
+ return str(mx) if events else ""
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # Resolver + discovery — built-ins first, then the `dos.exporters` plugins. The
172
+ # `resolve_notifier` / `resolve_judge` shape; discovery I/O at the call boundary.
173
+ # ---------------------------------------------------------------------------
174
+
175
+
176
+ def _discover_entry_point_exporters(*, _stderr=None) -> list[tuple[str, "Exporter"]]:
177
+ """Every `dos.exporters` plugin as `(name, exporter)`, sorted by name.
178
+
179
+ A plugin that fails to load is SKIPPED with a one-line stderr note rather than
180
+ crashing — the `_discover_entry_point_notifiers` posture (a broken third-party
181
+ plugin is the operator's to fix, not a kernel fault). Does entry-point I/O, so it is
182
+ a call-boundary helper, never called inside the resolve hot path twice."""
183
+ stderr = _stderr if _stderr is not None else sys.stderr
184
+ out: list[tuple[str, Exporter]] = []
185
+ try:
186
+ from importlib.metadata import entry_points
187
+ except Exception: # pragma: no cover - importlib.metadata always present py3.11+
188
+ return out
189
+ try:
190
+ eps = entry_points(group=EXPORTER_ENTRY_POINT_GROUP)
191
+ except TypeError: # pragma: no cover - py<3.10 selectable-API fallback
192
+ eps = entry_points().get(EXPORTER_ENTRY_POINT_GROUP, []) # type: ignore[attr-defined]
193
+ except Exception: # pragma: no cover - defensive: never let discovery crash a call
194
+ return out
195
+ for ep in sorted(eps, key=lambda e: e.name):
196
+ try:
197
+ obj = ep.load()
198
+ exporter = obj() if isinstance(obj, type) else obj
199
+ except Exception as e: # pragma: no cover - depends on third-party plugin
200
+ print(
201
+ f"warning: exporter plugin {ep.name!r} failed to load ({e}); skipping",
202
+ file=stderr,
203
+ )
204
+ continue
205
+ out.append((ep.name, exporter))
206
+ return out
207
+
208
+
209
+ def _accepted_kwargs(ctor: type, kwargs: dict) -> dict:
210
+ """Filter `kwargs` to the parameters `ctor.__init__` actually accepts.
211
+
212
+ The export CLI builds ONE superset bag (path/host/port/endpoint/dry_run/root) and
213
+ hands it to whichever transport was named, so it need not branch per driver. But a
214
+ transport's `__init__` is keyword-only and would raise on an unexpected kwarg (`file`
215
+ does not take `host`; `statsd` does not take `path`). So we pass only the parameters
216
+ the constructor declares — unless it declares `**kwargs` (a `VAR_KEYWORD` param), in
217
+ which case it absorbs the rest and we forward everything. Pure introspection; no I/O.
218
+ The `notify._accepted_kwargs` twin.
219
+ """
220
+ try:
221
+ import inspect
222
+
223
+ params = inspect.signature(ctor).parameters
224
+ except (TypeError, ValueError): # pragma: no cover - builtins without a signature
225
+ return kwargs
226
+ if any(p.kind is p.VAR_KEYWORD for p in params.values()):
227
+ return kwargs
228
+ return {k: v for k, v in kwargs.items() if k in params}
229
+
230
+
231
+ def resolve_exporter(name: str, *, _stderr=None, **kwargs) -> "Exporter":
232
+ """Resolve an exporter by name: built-ins first, then `dos.exporters` plugins.
233
+
234
+ Built-ins (`null`) resolve FIRST and cannot be shadowed (the trusted-fallback
235
+ guarantee, identical to `resolve_notifier`). An unknown name fails LOUD with the
236
+ known list — a typo'd `--to` is an operator error, never a silent degrade to `null`
237
+ (which would drop every event quietly). `kwargs` (e.g. `path=…`, `host=…`, `port=…`,
238
+ `endpoint=…`, `dry_run=…`) are forwarded to a CONSTRUCTOR-style occupant (a `type`),
239
+ FILTERED to the parameters that constructor accepts (so the CLI can hand the same
240
+ superset to any transport); a pre-built instance ignores them.
241
+ """
242
+ if name in _BUILT_IN_EXPORTERS:
243
+ cls = _BUILT_IN_EXPORTERS[name]
244
+ accepted = _accepted_kwargs(cls, kwargs)
245
+ return cls(**accepted) if accepted else cls()
246
+ # For discovered plugins we resolve the ENTRY POINT and, if it is a class, construct
247
+ # it with kwargs (so the CLI can pass path/host/port). A plugin that exposes a
248
+ # pre-built instance is used as-is.
249
+ stderr = _stderr if _stderr is not None else sys.stderr
250
+ try:
251
+ from importlib.metadata import entry_points
252
+ except Exception: # pragma: no cover
253
+ entry_points = None # type: ignore[assignment]
254
+ found: object | None = None
255
+ if entry_points is not None:
256
+ try:
257
+ eps = entry_points(group=EXPORTER_ENTRY_POINT_GROUP)
258
+ except TypeError: # pragma: no cover - py<3.10
259
+ eps = entry_points().get(EXPORTER_ENTRY_POINT_GROUP, []) # type: ignore[attr-defined]
260
+ except Exception: # pragma: no cover
261
+ eps = []
262
+ for ep in eps:
263
+ if ep.name == name:
264
+ try:
265
+ found = ep.load()
266
+ except Exception as e: # pragma: no cover - third-party
267
+ raise ValueError(
268
+ f"exporter {name!r} failed to load: {e}"
269
+ ) from e
270
+ break
271
+ if found is not None:
272
+ if isinstance(found, type):
273
+ accepted = _accepted_kwargs(found, kwargs)
274
+ return found(**accepted) # type: ignore[return-value]
275
+ return found # a pre-built instance ignores kwargs
276
+ discovered = [n for n, _ in _discover_entry_point_exporters(_stderr=stderr)]
277
+ known = sorted(set(_BUILT_IN_EXPORTERS) | set(discovered))
278
+ raise ValueError(f"unknown exporter {name!r}; known: {', '.join(known)}")
279
+
280
+
281
+ def active_exporters(*, _stderr=None) -> list[tuple[str, "Exporter"]]:
282
+ """Every resolvable exporter as `(name, exporter)` — built-ins THEN discovered.
283
+
284
+ The order `dos doctor` would list. Does entry-point discovery (I/O), so it is a
285
+ call-boundary helper, never called inside an adapter (the `active_notifiers` rule).
286
+ """
287
+ built: list[tuple[str, Exporter]] = [(n, cls()) for n, cls in _BUILT_IN_EXPORTERS.items()]
288
+ return built + _discover_entry_point_exporters(_stderr=_stderr)
289
+
290
+
291
+ def active_exporter_names(*, _stderr=None) -> list[str]:
292
+ """The names of every active exporter (built-in + discovered)."""
293
+ return [name for name, _ in active_exporters(_stderr=_stderr)]
294
+
295
+
296
+ # ---------------------------------------------------------------------------
297
+ # export_safely — the fail-soft wrapper. An export NEVER crashes the drain.
298
+ # ---------------------------------------------------------------------------
299
+
300
+
301
+ def export_safely(exporter: "Exporter", events: "Sequence[VerdictEvent]") -> ExportResult:
302
+ """Ship `events` via `exporter`, converting ANY raise to a non-exported result.
303
+
304
+ The fail-soft floor (`notify.send_safely`, re-aimed at the verdict stream):
305
+ observability must never take down the observed, so a transport that raises (a down
306
+ collector, a bad endpoint, a buggy plugin) must never propagate into the drain loop
307
+ that emitted it. A clean `export` returns its own `ExportResult`; a raise becomes
308
+ `ExportResult(exported=0, detail="error: …")`. (Contrast `resolve_exporter`, which
309
+ DOES raise on an unknown name — that is a config-time operator error, surfaced before
310
+ any drain.)
311
+ """
312
+ try:
313
+ result = exporter.export(events)
314
+ except Exception as e: # noqa: BLE001 - observability must not crash the observed
315
+ return ExportResult(exported=0, detail=f"error: {e}")
316
+ if isinstance(result, ExportResult):
317
+ return result
318
+ # A misbehaving occupant returned a non-ExportResult; treat as a soft failure rather
319
+ # than trusting an unknown shape downstream.
320
+ return ExportResult(exported=0, detail="exporter returned a non-ExportResult")
dos/firing_label.py ADDED
@@ -0,0 +1,353 @@
1
+ """firing-label — turn each detector FIRING into a labeled (signal, ground-truth) point (docs/179).
2
+
3
+ > **The kernel already mints one git-authored label per phase (`oracle.is_shipped`).
4
+ > This module mints a SECOND, different kind of label for free: it joins a detector
5
+ > firing (an env/agent-authored event) to the run's git-minted outcome (a fact the
6
+ > judged agent did not author), producing a `(signal, was-it-real)` point the
7
+ > detector line is scored on — lift + false-alarm. It is the one fold in the
8
+ > docs/179 set that mints NEW ground truth, because it joins two independently-authored
9
+ > facts that were never compared before.**
10
+
11
+ The detector line (`tool_stream` / `terminal_error` / `dangling` / `precursor`,
12
+ docs/145/158/173) is scored by LIFT and FALSE-ALARM rate, not recall (a 76%-fail
13
+ bench makes recall meaningless — see the docs/159 naive-baseline result). Those two
14
+ numbers need LABELED firings: each time a detector fired, was it a true catch or a
15
+ false alarm? Today those labels come from hand-curated offline benchmark replays
16
+ (docs/158-163, 174, 177) — a tiny, expensive, static set. This module turns every
17
+ live run into a small batch of such labels, drawn from data the kernel already has.
18
+
19
+ The fold, stated plainly
20
+ =========================
21
+
22
+ A `DetectorFiring` is "detector D fired signal S at step N of run R." Its label is
23
+ the run's GIT-MINTED outcome read off `trace.TraceFrame` — never the agent's
24
+ `claimed` self-report (the docs/138 byte-author invariant; `StepRow.claimed_sha`
25
+ is shown by `trace` but is NEVER read here). `label_firings` joins each firing to
26
+ its run's frame and emits one `LabeledPoint` with a closed `LabelOutcome`:
27
+
28
+ * **TRUE_POSITIVE** — the detector fired AND the run's git-minted outcome
29
+ confirms the no-progress the detector accused: the run has
30
+ a non-empty residual (declared steps the kernel never
31
+ verified) and produced no commits since its start_sha. The
32
+ stall was real; the run did not recover. A true catch.
33
+ * **FALSE_ALARM** — the detector fired BUT the run's git-minted outcome shows
34
+ progress: the run verified at least one declared step OR
35
+ landed at least one commit since start. The loop was not
36
+ terminally stuck (a legitimate poll, an eventual-consistency
37
+ wait that resolved, a stall the agent recovered from). The
38
+ false-alarm count the detector is penalized on.
39
+ * **UNVERIFIABLE** — the firing joined to a run, but the run carries NO
40
+ git-minted ground truth to judge against: no INTENT
41
+ declared (nothing to have a residual of) and no commits.
42
+ Refuse-don't-guess — we will NOT call an unjudgeable firing
43
+ a TP or an FP (the §5a optimism trap, inverted).
44
+ * **BROKEN_LINK** — the firing carries no `run_id` (a pre-docs/179 record, or a
45
+ hook fired outside a DOS spine), so it cannot be joined to a
46
+ frame at all. Counted, never guessed onto a run by time (the
47
+ docs/118/137 "fail toward no-match" rule). The honest
48
+ coverage tally.
49
+
50
+ The ground-truth rule is deliberately RUN-TERMINAL, and its bias is named, not
51
+ buried: it judges a firing against the run's eventual verified-vs-declared state,
52
+ so it is most meaningful on runs that reached a terminal verdict (a long-lived run
53
+ still in flight reads as UNVERIFIABLE until it declares/verifies/commits). That
54
+ selection bias toward terminal runs is reported (`LabelSummary.unverifiable`), not
55
+ hidden — the docs/159 "no silent caps" discipline. A future phase can sharpen TP/FP
56
+ with a TIMESTAMP join (did progress land BEFORE or AFTER the firing's step), using
57
+ the `ts` the Phase-0 record already stamps; v1 is the conservative terminal rule.
58
+
59
+ Why the multiplier is honest (1-3×, not 5-15×)
60
+ ==============================================
61
+
62
+ A single REPEATING→STALLED run on the SAME stuck step is ONE firing, not many: the
63
+ Phase-0 sensor stamps `verdict_state` on a record only when it fired, and a run of
64
+ identical steps is the same `(tool, args, result)` triple — `dedupe_firings`
65
+ collapses consecutive same-`(run_id, signal, args/result identity)` firings to one
66
+ labeled point. So the audited `8bd8c736` read-loop (22 identical reads) mints
67
+ EXACTLY ONE `LabeledPoint`, not 22 — re-counting one stall as 22 labels would be
68
+ the consistency-not-grounding sin (one env fact counted many times is fake data).
69
+ The honest yield is ~1 label per DISTINCT detector-fired step that has a verified
70
+ side — typically 1-3 per run. That is still a real, free gain over the 1-label/phase
71
+ baseline, and every point has clean provenance.
72
+
73
+ ⚓ Kernel discipline (the litmus): PURE Layer-1 leaf — `label_firings`/`dedupe_firings`
74
+ are state-in / frozen-verdict-out, zero I/O (the firings + the `TraceFrame` are
75
+ gathered at the caller boundary, exactly as `liveness.classify` takes a pre-read
76
+ `ProgressEvidence`). It imports only `trace` (for the `TraceFrame`/`StepRow` types it
77
+ folds) + stdlib, names no host/driver, carries no policy. The label is read off the
78
+ git-minted columns of `TraceFrame`; `claimed_sha` is never consulted.
79
+ """
80
+
81
+ from __future__ import annotations
82
+
83
+ import enum
84
+ from dataclasses import dataclass, field
85
+ from typing import Iterable, Optional
86
+
87
+ from dos.trace import TraceFrame
88
+
89
+ # The durable_schema floor (docs/116 §6): a LabeledPoint is a record the detector
90
+ # eval reads, so it carries a schema tag. Additive fields do not bump it.
91
+ FIRING_LABEL_SCHEMA = 1
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # The firing — one detector event, the agent/env-authored INPUT to the join.
96
+ # ---------------------------------------------------------------------------
97
+ @dataclass(frozen=True)
98
+ class DetectorFiring:
99
+ """One detector firing — "detector `detector` fired `signal` at step `step_index`".
100
+
101
+ The INPUT half of the join. Every field is gathered at the boundary from the
102
+ durable firing record the Phase-0 sensor stamps (`posttool_sensor._step_entry`'s
103
+ `run_id`/`step_index`/`verdict_state`), or from any other detector's equivalent
104
+ record. NONE of these fields is a ground-truth LABEL — they are what the detector
105
+ SAID; the label comes from the run's git-minted outcome, joined in `label_firings`.
106
+
107
+ * `run_id` — the spine id joining this firing to its run's `TraceFrame`.
108
+ Empty/None → `BROKEN_LINK` (cannot join; never time-guessed).
109
+ * `detector` — which detector fired ("tool_stream", "terminal_error", …).
110
+ * `signal` — the detector's verdict value ("REPEATING"/"STALLED"/…).
111
+ * `step_index` — the 0-based ordinal within the run's stream where it fired
112
+ (the durable position, for dedup + a future timestamp join).
113
+ * `identity` — an optional opaque repeat-identity (e.g. the env-authored
114
+ `result_digest`) so two firings on the SAME stuck step collapse
115
+ to one labeled point. Defaults to the step_index when absent.
116
+ """
117
+
118
+ run_id: str
119
+ detector: str
120
+ signal: str
121
+ step_index: int = -1
122
+ identity: str = ""
123
+
124
+ def _dedup_key(self) -> tuple[str, str, str, str]:
125
+ """The key two firings must share to be 'the same firing' (dedup). Uses the
126
+ repeat-identity when present (so a 22-read stall is one key regardless of
127
+ step_index), else falls back to the step_index (distinct steps stay distinct)."""
128
+ ident = self.identity or f"@{self.step_index}"
129
+ return (self.run_id, self.detector, self.signal, ident)
130
+
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # The label — the closed OUTCOME vocabulary + the labeled point.
134
+ # ---------------------------------------------------------------------------
135
+ class LabelOutcome(str, enum.Enum):
136
+ """The closed outcome of joining a firing to its run's git-minted ground truth.
137
+
138
+ A label, never an optimism: `UNVERIFIABLE`/`BROKEN_LINK` are first-class refusals
139
+ (we decline to call an unjudgeable firing a catch), the same fail-toward-no-match
140
+ posture the kernel takes everywhere it lacks evidence."""
141
+
142
+ TRUE_POSITIVE = "TRUE_POSITIVE"
143
+ FALSE_ALARM = "FALSE_ALARM"
144
+ UNVERIFIABLE = "UNVERIFIABLE"
145
+ BROKEN_LINK = "BROKEN_LINK"
146
+
147
+
148
+ @dataclass(frozen=True)
149
+ class LabeledPoint:
150
+ """One (firing, git-minted-outcome) calibration point — the fold's unit of NEW data.
151
+
152
+ `firing` is what the detector said; `outcome` is the git-authored verdict on it;
153
+ `reason` names the rung that produced the label (the provenance the verdict must
154
+ carry). `ground_truth` is the compact evidence the label stands on (verified
155
+ count / residual size / commit count) so the eval is auditable. This is the
156
+ `(signal, was-it-real)` row the detector line is scored on."""
157
+
158
+ firing: DetectorFiring
159
+ outcome: LabelOutcome
160
+ reason: str = ""
161
+ ground_truth: dict = field(default_factory=dict)
162
+ schema: int = FIRING_LABEL_SCHEMA
163
+
164
+ def to_dict(self) -> dict:
165
+ return {
166
+ "schema": self.schema,
167
+ "run_id": self.firing.run_id,
168
+ "detector": self.firing.detector,
169
+ "signal": self.firing.signal,
170
+ "step_index": self.firing.step_index,
171
+ "outcome": self.outcome.value,
172
+ "reason": self.reason,
173
+ "ground_truth": dict(self.ground_truth),
174
+ }
175
+
176
+
177
+ @dataclass(frozen=True)
178
+ class LabelSummary:
179
+ """The confusion-grid fold over many `LabeledPoint`s — the detector-eval headline.
180
+
181
+ `false_alarm_rate` is over the JUDGEABLE points only (TP + FP), the honest
182
+ denominator (an UNVERIFIABLE/BROKEN_LINK firing is neither a catch nor a false
183
+ alarm). `coverage` is the share of firings that were judgeable at all — the
184
+ selection-bias the run-terminal rule introduces, reported not hidden."""
185
+
186
+ points: tuple[LabeledPoint, ...]
187
+
188
+ def _count(self, outcome: LabelOutcome) -> int:
189
+ return sum(1 for p in self.points if p.outcome is outcome)
190
+
191
+ @property
192
+ def true_positives(self) -> int:
193
+ return self._count(LabelOutcome.TRUE_POSITIVE)
194
+
195
+ @property
196
+ def false_alarms(self) -> int:
197
+ return self._count(LabelOutcome.FALSE_ALARM)
198
+
199
+ @property
200
+ def unverifiable(self) -> int:
201
+ return self._count(LabelOutcome.UNVERIFIABLE)
202
+
203
+ @property
204
+ def broken_links(self) -> int:
205
+ return self._count(LabelOutcome.BROKEN_LINK)
206
+
207
+ @property
208
+ def judgeable(self) -> int:
209
+ """Points with a real label (TP or FP) — the denominator for the rates."""
210
+ return self.true_positives + self.false_alarms
211
+
212
+ @property
213
+ def false_alarm_rate(self) -> Optional[float]:
214
+ """FP / (TP + FP) — None when nothing was judgeable (refuse a 0/0 number)."""
215
+ j = self.judgeable
216
+ return (self.false_alarms / j) if j else None
217
+
218
+ @property
219
+ def coverage(self) -> Optional[float]:
220
+ """Judgeable / total firings — how much of the firing stream got a real label.
221
+ None on an empty input (no firings to cover)."""
222
+ return (self.judgeable / len(self.points)) if self.points else None
223
+
224
+ def to_dict(self) -> dict:
225
+ return {
226
+ "total": len(self.points),
227
+ "true_positives": self.true_positives,
228
+ "false_alarms": self.false_alarms,
229
+ "unverifiable": self.unverifiable,
230
+ "broken_links": self.broken_links,
231
+ "judgeable": self.judgeable,
232
+ "false_alarm_rate": self.false_alarm_rate,
233
+ "coverage": self.coverage,
234
+ }
235
+
236
+
237
+ # ---------------------------------------------------------------------------
238
+ # The pure fold — firings + a TraceFrame in, labeled points out. No I/O.
239
+ # ---------------------------------------------------------------------------
240
+ def dedupe_firings(firings: Iterable[DetectorFiring]) -> tuple[DetectorFiring, ...]:
241
+ """Collapse firings that are 'the same firing' to ONE (order-preserving). PURE.
242
+
243
+ Two firings with the same `_dedup_key` — same run, detector, signal, and
244
+ repeat-identity — are one event seen twice (a REPEATING that became STALLED on the
245
+ same stuck step; a 22-read loop). Keeping only the FIRST is what makes the
246
+ multiplier honest: one stall is one labeled point, never N (the consistency-not-
247
+ grounding guard). Distinct steps / distinct identities are preserved.
248
+ """
249
+ seen: set[tuple[str, str, str, str]] = set()
250
+ out: list[DetectorFiring] = []
251
+ for f in firings:
252
+ k = f._dedup_key()
253
+ if k in seen:
254
+ continue
255
+ seen.add(k)
256
+ out.append(f)
257
+ return tuple(out)
258
+
259
+
260
+ def _ground_truth(trace: TraceFrame) -> dict:
261
+ """The compact git-minted evidence a label stands on. PURE — reads only the
262
+ git-authored columns of the frame (verified steps, residual, commits); the
263
+ `claimed_sha` column is NEVER read (the byte-author invariant)."""
264
+ verified = sum(1 for s in trace.steps if s.state == "VERIFIED")
265
+ return {
266
+ "has_intent": bool(trace.has_intent),
267
+ "verified_steps": verified,
268
+ "declared_steps": len(trace.steps),
269
+ "residual": len(trace.residual),
270
+ "commits_since_start": len(trace.commits),
271
+ }
272
+
273
+
274
+ def label_one(firing: DetectorFiring, trace: Optional[TraceFrame]) -> LabeledPoint:
275
+ """Label ONE firing against its run's git-minted ground truth. PURE.
276
+
277
+ The labeling ladder (refuse before you guess):
278
+
279
+ 1. No `run_id` on the firing → BROKEN_LINK (cannot join; never time-guessed).
280
+ 2. No frame / `found == False` for the run → BROKEN_LINK (the run left no
281
+ surface to join to — same fail-toward-no-match).
282
+ 3. The run has NO git-minted ground truth (no INTENT and no commits) →
283
+ UNVERIFIABLE (nothing to judge the firing against; we refuse to call it).
284
+ 4. The run made git-minted PROGRESS (a verified step OR a commit since start) →
285
+ FALSE_ALARM (the loop the detector accused was not terminally stuck).
286
+ 5. Else (a residual remains and no commits landed) → TRUE_POSITIVE (the
287
+ no-progress the detector accused is confirmed by git).
288
+
289
+ Rules 4/5 read ONLY the git-authored side of the frame. A run that verified a
290
+ step or landed a commit demonstrably advanced — so the stall accusation was a
291
+ false alarm — regardless of what the agent CLAIMED. A run that declared work,
292
+ verified none of it, and committed nothing is the stall the detector caught.
293
+ """
294
+ if not firing.run_id:
295
+ return LabeledPoint(firing, LabelOutcome.BROKEN_LINK,
296
+ reason="firing carries no run_id — cannot join to a run "
297
+ "(pre-docs/179 record or non-spine hook)")
298
+ if trace is None or not trace.found:
299
+ return LabeledPoint(firing, LabelOutcome.BROKEN_LINK,
300
+ reason=f"no surface found for run {firing.run_id} "
301
+ f"(no run.json / intent ledger / WAL event)")
302
+
303
+ gt = _ground_truth(trace)
304
+ verified = gt["verified_steps"]
305
+ commits = gt["commits_since_start"]
306
+ residual = gt["residual"]
307
+
308
+ if not trace.has_intent and commits == 0:
309
+ return LabeledPoint(
310
+ firing, LabelOutcome.UNVERIFIABLE, ground_truth=gt,
311
+ reason="run declared no intent and landed no commits — no git-minted "
312
+ "ground truth to judge the firing against (refuse, don't guess)")
313
+
314
+ if verified > 0 or commits > 0:
315
+ return LabeledPoint(
316
+ firing, LabelOutcome.FALSE_ALARM, ground_truth=gt,
317
+ reason=f"run made git-minted progress ({verified} step(s) verified, "
318
+ f"{commits} commit(s) since start) — the loop was not terminally "
319
+ f"stuck; the firing was a false alarm")
320
+
321
+ return LabeledPoint(
322
+ firing, LabelOutcome.TRUE_POSITIVE, ground_truth=gt,
323
+ reason=f"run verified 0 of its declared steps and landed 0 commits; "
324
+ f"{residual} step(s) remain unverified — the no-progress the detector "
325
+ f"accused is confirmed by git (a true catch)")
326
+
327
+
328
+ def label_firings(
329
+ firings: Iterable[DetectorFiring],
330
+ frame_for,
331
+ *,
332
+ dedupe: bool = True,
333
+ ) -> tuple[LabeledPoint, ...]:
334
+ """Label a batch of firings against their runs' ground truth. PURE.
335
+
336
+ `frame_for` is a callable `run_id -> TraceFrame | None` the caller supplies — the
337
+ boundary that did the `trace.build_trace` I/O (kept OUT of this fold, the
338
+ state-in/verdict-out rule). It may return None for an unknown run (→ BROKEN_LINK).
339
+ `dedupe` collapses same-firing duplicates first (the honest-multiplier guard);
340
+ pass False only to inspect raw firings.
341
+
342
+ Returns one `LabeledPoint` per (deduped) firing — the calibration batch this run
343
+ contributed to the detector line. Wrap in `LabelSummary` for the confusion grid.
344
+ """
345
+ fs = dedupe_firings(firings) if dedupe else tuple(firings)
346
+ out: list[LabeledPoint] = []
347
+ cache: dict[str, Optional[TraceFrame]] = {}
348
+ for f in fs:
349
+ if f.run_id and f.run_id not in cache:
350
+ cache[f.run_id] = frame_for(f.run_id)
351
+ trace = cache.get(f.run_id)
352
+ out.append(label_one(f, trace))
353
+ return tuple(out)