dos-kernel 0.22.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. dos/__init__.py +261 -0
  2. dos/_bin/dos-hook.exe +0 -0
  3. dos/_filelock.py +255 -0
  4. dos/_job_policy.py +97 -0
  5. dos/_tree.py +145 -0
  6. dos/admission.py +433 -0
  7. dos/answer_shape.py +299 -0
  8. dos/arbiter.py +859 -0
  9. dos/archive_lock.py +266 -0
  10. dos/arg_provenance.py +814 -0
  11. dos/attest.py +472 -0
  12. dos/breaker.py +311 -0
  13. dos/churn.py +226 -0
  14. dos/claim_extract.py +229 -0
  15. dos/claim_ttl.py +150 -0
  16. dos/cli.py +8721 -0
  17. dos/commit_audit.py +666 -0
  18. dos/completion.py +466 -0
  19. dos/concurrency_class.py +154 -0
  20. dos/config.py +1380 -0
  21. dos/config_lint.py +464 -0
  22. dos/cooldown.py +390 -0
  23. dos/coverage.py +387 -0
  24. dos/dangling_intent.py +287 -0
  25. dos/data_class.py +397 -0
  26. dos/decisions.py +1274 -0
  27. dos/decisions_tui.py +251 -0
  28. dos/dispatch_top.py +740 -0
  29. dos/dispatch_top_tui.py +116 -0
  30. dos/drivers/__init__.py +40 -0
  31. dos/drivers/ci_status.py +630 -0
  32. dos/drivers/citation_resolve.py +703 -0
  33. dos/drivers/decision_stop.py +98 -0
  34. dos/drivers/export_file.py +173 -0
  35. dos/drivers/export_otlp.py +275 -0
  36. dos/drivers/export_statsd.py +242 -0
  37. dos/drivers/hook_dialects.py +391 -0
  38. dos/drivers/job.py +47 -0
  39. dos/drivers/llm_judge.py +360 -0
  40. dos/drivers/memory_recall.py +1231 -0
  41. dos/drivers/notify_slack.py +373 -0
  42. dos/drivers/notify_webhook.py +251 -0
  43. dos/drivers/operator_judge.py +114 -0
  44. dos/drivers/os_acceptance.py +228 -0
  45. dos/drivers/paste_log.py +132 -0
  46. dos/drivers/plan_scope.py +133 -0
  47. dos/drivers/self_improve.py +375 -0
  48. dos/drivers/similarity_judge.py +249 -0
  49. dos/drivers/state_diff.py +274 -0
  50. dos/drivers/supervisor.py +347 -0
  51. dos/drivers/watchdog.py +363 -0
  52. dos/drivers/workshop.py +160 -0
  53. dos/durable_schema.py +344 -0
  54. dos/effect_witness.py +393 -0
  55. dos/efficiency.py +318 -0
  56. dos/enforce.py +414 -0
  57. dos/enumerate.py +776 -0
  58. dos/env_print.py +378 -0
  59. dos/event_severity.py +258 -0
  60. dos/evidence.py +692 -0
  61. dos/exec_capability.py +256 -0
  62. dos/export_cursor.py +143 -0
  63. dos/exporter.py +320 -0
  64. dos/firing_label.py +353 -0
  65. dos/fleet_roll.py +226 -0
  66. dos/gate_classify.py +827 -0
  67. dos/gh4_coverage.py +179 -0
  68. dos/git_delta.py +122 -0
  69. dos/guard.py +215 -0
  70. dos/health.py +552 -0
  71. dos/help_summary.py +519 -0
  72. dos/home.py +934 -0
  73. dos/hook_binary.py +194 -0
  74. dos/hook_dialect.py +271 -0
  75. dos/hook_exit.py +191 -0
  76. dos/hook_install.py +437 -0
  77. dos/id_alloc.py +304 -0
  78. dos/improve.py +499 -0
  79. dos/intent_ledger.py +635 -0
  80. dos/interpret.py +176 -0
  81. dos/intervention.py +769 -0
  82. dos/intervention_eval.py +371 -0
  83. dos/journal_delta.py +308 -0
  84. dos/judge_eval.py +328 -0
  85. dos/judges.py +366 -0
  86. dos/lane_infer.py +127 -0
  87. dos/lane_journal.py +1001 -0
  88. dos/lane_lease.py +952 -0
  89. dos/lane_overlap.py +228 -0
  90. dos/lease_health.py +282 -0
  91. dos/lifecycle.py +211 -0
  92. dos/liveness.py +352 -0
  93. dos/lock_modes.py +185 -0
  94. dos/log_source.py +395 -0
  95. dos/loop_decide.py +1746 -0
  96. dos/marker_gate.py +254 -0
  97. dos/marker_sensor.py +396 -0
  98. dos/noop_streak.py +280 -0
  99. dos/notify.py +479 -0
  100. dos/observe.py +175 -0
  101. dos/oracle.py +1661 -0
  102. dos/overlap_eval.py +214 -0
  103. dos/overlap_policy.py +342 -0
  104. dos/packet_sidecar.py +267 -0
  105. dos/phase_shipped.py +1985 -0
  106. dos/pick_priority.py +225 -0
  107. dos/pickable.py +369 -0
  108. dos/picker_oracle.py +1037 -0
  109. dos/plan_board.py +513 -0
  110. dos/plan_board_tui.py +113 -0
  111. dos/plan_source.py +455 -0
  112. dos/posttool_sensor.py +528 -0
  113. dos/precursor_gate.py +499 -0
  114. dos/precursor_gate_eval.py +239 -0
  115. dos/preflight.py +825 -0
  116. dos/pretool_sensor.py +490 -0
  117. dos/proc_delta.py +181 -0
  118. dos/productivity.py +296 -0
  119. dos/provider_limit.py +242 -0
  120. dos/py.typed +4 -0
  121. dos/reason_morphology.py +299 -0
  122. dos/reasons.py +449 -0
  123. dos/reconcile.py +173 -0
  124. dos/recurring_wedge.py +206 -0
  125. dos/render.py +393 -0
  126. dos/result_state.py +468 -0
  127. dos/resume.py +578 -0
  128. dos/resume_evidence.py +293 -0
  129. dos/retention.py +344 -0
  130. dos/reward.py +372 -0
  131. dos/rewind.py +587 -0
  132. dos/rewind_evidence.py +168 -0
  133. dos/rewind_tokens.py +252 -0
  134. dos/run_id.py +342 -0
  135. dos/scope.py +520 -0
  136. dos/scope_source.py +382 -0
  137. dos/scout.py +982 -0
  138. dos/self_modify.py +209 -0
  139. dos/sibling_scan.py +569 -0
  140. dos/skills/EXAMPLES.md +584 -0
  141. dos/skills/dos-class-cycle/SKILL.md +107 -0
  142. dos/skills/dos-dispatch/SKILL.md +177 -0
  143. dos/skills/dos-dispatch-loop/SKILL.md +254 -0
  144. dos/skills/dos-goal-gate/SKILL.md +269 -0
  145. dos/skills/dos-next-up/SKILL.md +231 -0
  146. dos/skills/dos-promote/SKILL.md +114 -0
  147. dos/skills/dos-replan/SKILL.md +159 -0
  148. dos/skills/dos-replan-loop/SKILL.md +114 -0
  149. dos/skills/dos-self-improve/SKILL.md +213 -0
  150. dos/skills/dos-supervise-loop/SKILL.md +180 -0
  151. dos/skills/dos-unstick/SKILL.md +108 -0
  152. dos/skills/dos-witness-claim/SKILL.md +251 -0
  153. dos/stamp.py +1002 -0
  154. dos/state_health.py +387 -0
  155. dos/status.py +114 -0
  156. dos/stop_policy.py +334 -0
  157. dos/supervise.py +1014 -0
  158. dos/testwitness.py +392 -0
  159. dos/timeline.py +1027 -0
  160. dos/tokens.py +485 -0
  161. dos/tool_stream.py +393 -0
  162. dos/tool_stream_eval.py +226 -0
  163. dos/trace.py +524 -0
  164. dos/verdict.py +140 -0
  165. dos/verdict_cli.py +189 -0
  166. dos/verdict_journal.py +497 -0
  167. dos/verdict_rollup.py +217 -0
  168. dos/verdicts.py +181 -0
  169. dos/wedge_reason.py +282 -0
  170. dos_kernel-0.22.0.dist-info/METADATA +859 -0
  171. dos_kernel-0.22.0.dist-info/RECORD +178 -0
  172. dos_kernel-0.22.0.dist-info/WHEEL +5 -0
  173. dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
  174. dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
  175. dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
  176. dos_mcp/__init__.py +52 -0
  177. dos_mcp/py.typed +2 -0
  178. dos_mcp/server.py +779 -0
dos/__init__.py ADDED
@@ -0,0 +1,261 @@
1
+ """DOS — the Dispatch Operating System.
2
+
3
+ The domain-free trust substrate the `job` repo's dispatch family invented by
4
+ accident: a small, boring, deterministic kernel whose primary job is
5
+ **adjudicating ground truth across many unreliable, self-narrating workers** and
6
+ serializing their effects on shared state *without believing what they say they
7
+ did*. The tagline (dispatch-os-vision §0): **the kernel is the part that doesn't
8
+ believe the agents.**
9
+
10
+ This package is the "Stage-1 kernel extraction" — the spine lifted out of
11
+ the reference userland app's scripts into a standalone, pip-installable,
12
+ **workspace-parameterized** package. It carries the *mechanism* (verdict enum, ship oracle, structured
13
+ refusal, lease arbiter, correlation spine) and **no policy**: which lanes exist,
14
+ where plans live, and what counts as a ship stamp are per-workspace data the
15
+ host supplies via `dos.config.SubstrateConfig`. The package never assumes it
16
+ lives inside the repo whose state it manages.
17
+
18
+ The syscall ABI (dispatch-os-vision §4), mapped to the modules here:
19
+
20
+ verify() -> dos.oracle (the truth syscall — artifact over narration)
21
+ refuse(reason) -> dos.wedge_reason (structured refusal — the closed WedgeReason enum)
22
+ dos.picker_oracle (provable no-pick verification)
23
+ lease()/arbitrate()-> dos.arbiter (the pure admission kernel — ACR Plane ①)
24
+ spawn()/reap() -> dos.run_id (the correlation spine across subprocess boundaries)
25
+ dos.lane_journal (the write-ahead log for lease decisions)
26
+
27
+ The first userland app written against this kernel is `job` (job search), the
28
+ way `cat` was the first program for Unix.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ # Single-source the version from installed package metadata so it can never
34
+ # drift from pyproject.toml (it did: __version__ said 0.1.0 while pyproject
35
+ # shipped 0.2.0, so every `dos` CLI command misreported its version). The
36
+ # literal fallback is only hit when running from an uninstalled source tree;
37
+ # keep it equal to pyproject's version for that case.
38
+ #
39
+ # NB: the distribution name is `dos-kernel`, not `dos` (the bare `dos` name on
40
+ # PyPI is an unrelated package). The metadata lookup MUST use the dist name —
41
+ # looking up "dos" would miss our metadata and, if the squatter were installed,
42
+ # could even read ITS version. The import name is still `dos`; only the dist
43
+ # name differs.
44
+ try:
45
+ from importlib.metadata import version as _pkg_version
46
+
47
+ __version__ = _pkg_version("dos-kernel")
48
+ except Exception: # pragma: no cover - source-tree / not-installed fallback
49
+ __version__ = "0.22.0"
50
+
51
+ from dos import config # noqa: F401 (re-export the seam as the package entry)
52
+ from dos.config import ( # noqa: F401
53
+ SubstrateConfig,
54
+ LaneTaxonomy,
55
+ PathLayout,
56
+ HomeLayout,
57
+ job_config,
58
+ default_config,
59
+ active,
60
+ set_active,
61
+ resolve_dos_home,
62
+ active_home,
63
+ set_active_home,
64
+ )
65
+ from dos.reasons import ( # noqa: F401 (the refusal vocabulary, as data)
66
+ ReasonSpec,
67
+ ReasonRegistry,
68
+ BASE_REASONS,
69
+ )
70
+ from dos.stamp import ( # noqa: F401 (the ship-stamp grammar, as data)
71
+ StampConvention,
72
+ JOB_STAMP_CONVENTION,
73
+ GENERIC_STAMP_CONVENTION,
74
+ parse_phase_labels,
75
+ )
76
+ from dos.retention import ( # noqa: F401 (the scratch-retention caps, as data)
77
+ RetentionPolicy,
78
+ GENERIC_RETENTION,
79
+ UNBOUNDED_RETENTION,
80
+ should_compact,
81
+ )
82
+ from dos.intervention import ( # noqa: F401 (the actuation ladder, docs/143 §13)
83
+ Intervention,
84
+ Confidence,
85
+ InterventionSpec,
86
+ InterventionLadder,
87
+ BASE_INTERVENTIONS,
88
+ InterventionPolicy,
89
+ DEFAULT_POLICY as DEFAULT_INTERVENTION_POLICY,
90
+ InterventionDecision,
91
+ assess_confidence,
92
+ choose_intervention,
93
+ synthetic_corrective_result,
94
+ )
95
+ from dos.intervention_eval import ( # noqa: F401 (the net-task-delta eval, docs/143 §13.2)
96
+ InterventionCase,
97
+ InterventionReport,
98
+ )
99
+ from dos.enforce import ( # noqa: F401 (the enforcement-handler seam, docs/189 §A1)
100
+ EffectProposal,
101
+ EnforcementHandler,
102
+ ObserveHandler,
103
+ run_handler,
104
+ resolve_handler,
105
+ active_handlers,
106
+ active_handler_names,
107
+ )
108
+ from dos.tool_stream import ( # noqa: F401 (the loop-economics stall reader, docs/145)
109
+ StreamState,
110
+ StreamPolicy,
111
+ DEFAULT_POLICY as DEFAULT_STREAM_POLICY,
112
+ StreamStep,
113
+ ToolStream,
114
+ StreamVerdict,
115
+ classify_stream,
116
+ policy_from_table as stream_policy_from_table,
117
+ )
118
+ from dos.tool_stream_eval import ( # noqa: F401 (the stall-reader recovery eval, docs/145 §9)
119
+ StreamCase,
120
+ StreamEvalReport,
121
+ )
122
+ from dos.dangling_intent import ( # noqa: F401 (the premature-completion DETECTOR, docs/150)
123
+ Dangling,
124
+ DanglingPolicy,
125
+ DEFAULT_POLICY as DEFAULT_DANGLING_POLICY,
126
+ DEFAULT_CUES as DEFAULT_DANGLING_CUES,
127
+ StopEvidence,
128
+ DanglingVerdict,
129
+ classify_stop,
130
+ )
131
+ from dos.firing_label import ( # noqa: F401 (detector self-labeling — the data-multiplier, docs/179)
132
+ DetectorFiring,
133
+ LabelOutcome,
134
+ LabeledPoint,
135
+ LabelSummary,
136
+ label_one,
137
+ label_firings,
138
+ dedupe_firings,
139
+ )
140
+ from dos.pickable import ( # noqa: F401 (the pre-dispatch gate, docs/168 Concept 2)
141
+ HoldReason,
142
+ Pickability,
143
+ classify as pickable_classify,
144
+ )
145
+ from dos.pick_priority import ( # noqa: F401 (the freshness sort-key, docs/254)
146
+ AttemptSummary,
147
+ Freshness,
148
+ PickPriority,
149
+ classify as pick_priority_classify,
150
+ )
151
+ from dos import render # noqa: F401 (the renderer seam — Axis 4 output, RND)
152
+ from dos.render import ( # noqa: F401
153
+ Renderer,
154
+ BaseRenderer,
155
+ TextRenderer,
156
+ JsonRenderer,
157
+ BUILTIN_RENDERERS,
158
+ resolve_renderer,
159
+ known_renderers,
160
+ UnknownRenderer,
161
+ )
162
+ from dos.reward import ( # noqa: F401 (the reward-set admission verdict, docs/230/234)
163
+ RewardVerdict,
164
+ RewardLabel,
165
+ admit as reward_admit,
166
+ AcceptanceAB,
167
+ acceptance_ab as reward_acceptance_ab,
168
+ )
169
+
170
+ __all__ = [
171
+ "__version__",
172
+ "config",
173
+ "SubstrateConfig",
174
+ "LaneTaxonomy",
175
+ "PathLayout",
176
+ "HomeLayout",
177
+ "job_config",
178
+ "default_config",
179
+ "active",
180
+ "set_active",
181
+ "resolve_dos_home",
182
+ "active_home",
183
+ "set_active_home",
184
+ "ReasonSpec",
185
+ "ReasonRegistry",
186
+ "BASE_REASONS",
187
+ "StampConvention",
188
+ "JOB_STAMP_CONVENTION",
189
+ "GENERIC_STAMP_CONVENTION",
190
+ "parse_phase_labels",
191
+ "RetentionPolicy",
192
+ "GENERIC_RETENTION",
193
+ "UNBOUNDED_RETENTION",
194
+ "should_compact",
195
+ "Intervention",
196
+ "Confidence",
197
+ "InterventionSpec",
198
+ "InterventionLadder",
199
+ "BASE_INTERVENTIONS",
200
+ "InterventionPolicy",
201
+ "DEFAULT_INTERVENTION_POLICY",
202
+ "InterventionDecision",
203
+ "assess_confidence",
204
+ "choose_intervention",
205
+ "synthetic_corrective_result",
206
+ "InterventionCase",
207
+ "InterventionReport",
208
+ "EffectProposal",
209
+ "EnforcementHandler",
210
+ "ObserveHandler",
211
+ "run_handler",
212
+ "resolve_handler",
213
+ "active_handlers",
214
+ "active_handler_names",
215
+ "StreamState",
216
+ "StreamPolicy",
217
+ "DEFAULT_STREAM_POLICY",
218
+ "StreamStep",
219
+ "ToolStream",
220
+ "StreamVerdict",
221
+ "classify_stream",
222
+ "stream_policy_from_table",
223
+ "StreamCase",
224
+ "StreamEvalReport",
225
+ "DetectorFiring",
226
+ "LabelOutcome",
227
+ "LabeledPoint",
228
+ "LabelSummary",
229
+ "label_one",
230
+ "label_firings",
231
+ "dedupe_firings",
232
+ "Dangling",
233
+ "DanglingPolicy",
234
+ "DEFAULT_DANGLING_POLICY",
235
+ "DEFAULT_DANGLING_CUES",
236
+ "StopEvidence",
237
+ "DanglingVerdict",
238
+ "classify_stop",
239
+ "HoldReason",
240
+ "Pickability",
241
+ "pickable_classify",
242
+ "AttemptSummary",
243
+ "Freshness",
244
+ "PickPriority",
245
+ "pick_priority_classify",
246
+ "render",
247
+ "Renderer",
248
+ "BaseRenderer",
249
+ "TextRenderer",
250
+ "JsonRenderer",
251
+ "BUILTIN_RENDERERS",
252
+ "resolve_renderer",
253
+ "known_renderers",
254
+ "UnknownRenderer",
255
+ "reward",
256
+ "RewardVerdict",
257
+ "RewardLabel",
258
+ "reward_admit",
259
+ "AcceptanceAB",
260
+ "reward_acceptance_ab",
261
+ ]
dos/_bin/dos-hook.exe ADDED
Binary file
dos/_filelock.py ADDED
@@ -0,0 +1,255 @@
1
+ """Shared O_EXCL file-mutex primitives — the ONE home for "lock + value-keyed steal".
2
+
3
+ The package grew THREE independent hand-rolled `O_EXCL` mutexes — `archive_lock`
4
+ (the Step-9.5 archive ceremony), `lane_lease._Mutex` (the cross-process lane-grant
5
+ critical section), and `home._home_lock` (the machine-local index) — each with its
6
+ own copy of the same acquire/read/steal logic. When the steal was found to be a
7
+ non-value-keyed TOCTOU (two stealers of one stale lock could each displace the
8
+ other's fresh lock and both come away holding the mutex — a double-grant), the fix
9
+ landed in `archive_lock` ONLY, leaving the two siblings carrying the identical bug.
10
+ That is the "fix one site, the duplicate drifts" failure this module exists to end.
11
+
12
+ So the steal CAS and the O_EXCL write live HERE, parameterized on the lock path, and
13
+ every mutex routes through them — making the naive `unlink()` + `O_EXCL-create` steal
14
+ **unrepresentable** rather than a per-site choice. This is a Layer-1 leaf: pure
15
+ stdlib + a `Path`, no host names, no config, no policy. Each caller still owns its
16
+ own *path resolution* (env override + config seam) and its own retry/TTL policy; only
17
+ the three atomic FS ops — write, read, steal — are shared.
18
+
19
+ The discipline these encode:
20
+ * `write_lock` — atomic `O_CREAT|O_EXCL` create; raises `FileExistsError` if held.
21
+ The ONLY way a lock is born, so "two writers both think they created it" cannot
22
+ happen (the kernel serializes O_EXCL).
23
+ * `read_lock` — parse a lock file's `key: value` body to a dict (None if absent).
24
+ * `steal_stale` — a **value-keyed compare-and-swap**: rename the lock to a unique
25
+ temp, verify the grabbed content IS the stale lock the caller observed (else
26
+ restore-on-mismatch and concede), then drop it and O_EXCL-create. Only the actor
27
+ that displaces the EXACT stale lock it saw wins; a racer that already stole +
28
+ recreated is detected and conceded to. Two concurrent stealers → exactly one owner.
29
+ """
30
+ from __future__ import annotations
31
+
32
+ import datetime as dt
33
+ import os
34
+ import random
35
+ import sys
36
+ import time
37
+ from pathlib import Path
38
+
39
+
40
+ # ACCESS_DENIED / SHARING_VIOLATION / LOCK_VIOLATION — the three MoveFileEx
41
+ # replace-over-an-open-handle codes seen on win32 (the last from AV/indexer
42
+ # byte-range locks). Only these are retried by `atomic_replace`; any other
43
+ # OSError re-raises at once.
44
+ _REPLACE_RETRY_CODES = (5, 32, 33)
45
+
46
+
47
+ def atomic_replace(
48
+ src: os.PathLike | str,
49
+ dst: os.PathLike | str,
50
+ *,
51
+ budget_s: float = 3.0,
52
+ base_s: float = 0.05,
53
+ cap_s: float = 0.4,
54
+ _stderr=None,
55
+ ) -> None:
56
+ """`os.replace(src, dst)` with bounded exp-backoff retry on win32 rename races.
57
+
58
+ The ONE retry-hardened replace the package's atomic-write sites share
59
+ (`home._atomic_write_jsonl`, `home._write_card`, `run_id.write_run_json`) —
60
+ it belongs beside the O_EXCL write/read/steal primitives because it is the
61
+ same class of atomic FS op, parameterized on a path and carrying no policy.
62
+
63
+ On win32 `os.replace` -> `MoveFileEx` raises WinError 5 (ACCESS_DENIED) / 32
64
+ (SHARING_VIOLATION) / 33 (LOCK_VIOLATION) whenever ANY other process holds an
65
+ open handle to the DESTINATION during the rename — a lock-skipping reader, a
66
+ `dos top` tail, `git add`, an AV/Search-indexer/OneDrive scan. A bare
67
+ `os.replace` has NO retry, so the FIRST such collision kills the write
68
+ mid-ceremony. This bounds-retries only on `_REPLACE_RETRY_CODES`, and only
69
+ until `budget_s` elapses; any other OSError (or a non-Windows error, where
70
+ `winerror is None`) re-raises immediately, so on POSIX it degrades to exactly
71
+ one attempt (os.replace already overwrites atomically under open readers
72
+ there). A one-line WARN is emitted before each backoff sleep so a foreign
73
+ handle held LONGER than the budget is visible in the log rather than silently
74
+ absorbed and then crashing opaquely. Pure stdlib (os/time/random) — the
75
+ kernel "PyYAML-only" litmus holds.
76
+ """
77
+ stderr = _stderr if _stderr is not None else sys.stderr
78
+ deadline = time.monotonic() + budget_s
79
+ attempt = 0
80
+ while True:
81
+ try:
82
+ os.replace(src, dst)
83
+ return
84
+ except OSError as e:
85
+ attempt += 1
86
+ winerr = getattr(e, "winerror", None)
87
+ if winerr not in _REPLACE_RETRY_CODES or time.monotonic() >= deadline:
88
+ raise
89
+ sleep_s = min(base_s * (2 ** (attempt - 1)), cap_s) + random.uniform(0, 0.02)
90
+ print(
91
+ f"dos: warning: atomic replace onto {os.fspath(dst)} hit WinError "
92
+ f"{winerr} (attempt {attempt}); a reader/scan holds it open — "
93
+ f"retrying in {sleep_s:.2f}s",
94
+ file=stderr,
95
+ )
96
+ time.sleep(sleep_s)
97
+
98
+
99
+ def unlink_retry(
100
+ path: os.PathLike | str,
101
+ *,
102
+ budget_s: float = 1.0,
103
+ base_s: float = 0.02,
104
+ cap_s: float = 0.2,
105
+ _stderr=None,
106
+ ) -> bool:
107
+ """`os.unlink(path)` with bounded exp-backoff retry on the win32 open-handle races.
108
+
109
+ The release-side sibling of `atomic_replace`. On win32, deleting a file ANY other
110
+ process holds open raises WinError 5/32/33 (ACCESS_DENIED / SHARING_VIOLATION /
111
+ LOCK_VIOLATION) — the same MoveFileEx-family codes — so a bare `lock.unlink()` to
112
+ DROP a mutex can spuriously raise the instant a racing acquirer has the lock file
113
+ open mid-read/mid-steal. A dropped release then leaks the lock until its TTL, and a
114
+ raised release crashes the caller's `finally`. This retries only those codes within
115
+ `budget_s`; a missing file is success (the lock is already gone — the goal). Any
116
+ other OSError, or a POSIX error where `winerror is None`, re-raises at once (POSIX
117
+ unlink under an open handle succeeds anyway, so it degrades to one attempt there).
118
+ Returns True if the file is gone (unlinked or already absent), False only if the
119
+ budget elapsed with the handle still held — the caller treats that as "left for the
120
+ TTL to reap", never a crash. Pure stdlib — the kernel "PyYAML-only" litmus holds.
121
+ """
122
+ stderr = _stderr if _stderr is not None else sys.stderr
123
+ deadline = time.monotonic() + budget_s
124
+ attempt = 0
125
+ while True:
126
+ try:
127
+ os.unlink(os.fspath(path))
128
+ return True
129
+ except FileNotFoundError:
130
+ return True # already gone — the release goal is met
131
+ except OSError as e:
132
+ attempt += 1
133
+ winerr = getattr(e, "winerror", None)
134
+ if winerr not in _REPLACE_RETRY_CODES:
135
+ raise
136
+ if time.monotonic() >= deadline:
137
+ print(
138
+ f"dos: warning: could not unlink {os.fspath(path)} (WinError "
139
+ f"{winerr} — a reader/scan holds it open); leaving it for TTL reap",
140
+ file=stderr,
141
+ )
142
+ return False
143
+ sleep_s = min(base_s * (2 ** (attempt - 1)), cap_s) + random.uniform(0, 0.01)
144
+ time.sleep(sleep_s)
145
+
146
+
147
+ def now_iso() -> str:
148
+ """The lock-body timestamp format every mutex stamps (`acquired_at`)."""
149
+ return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
150
+
151
+
152
+ def lock_body(owner: str) -> str:
153
+ """The canonical lock-file body: owner + acquired_at + pid, one `key: value` per line.
154
+
155
+ Centralized so the steal CAS's value-comparison (owner + acquired_at) reads the
156
+ same fields every writer stamps. A caller that needs extra fields may append, but
157
+ these three are the contract `steal_stale` keys on."""
158
+ return f"owner: {owner}\nacquired_at: {now_iso()}\npid: {os.getpid()}\n"
159
+
160
+
161
+ def read_lock(path: Path) -> dict | None:
162
+ """Parse a lock file's `key: value` body into a dict; None if absent/unreadable.
163
+
164
+ The single parser the canonical read AND the steal CAS share (so the value the
165
+ CAS compares is read the same way the holder wrote it). Never raises."""
166
+ if not path.exists():
167
+ return None
168
+ try:
169
+ contents = path.read_text(encoding="utf-8")
170
+ except OSError:
171
+ return None
172
+ info: dict = {}
173
+ for line in contents.splitlines():
174
+ if ":" in line:
175
+ k, _, v = line.partition(":")
176
+ info[k.strip()] = v.strip()
177
+ return info
178
+
179
+
180
+ def write_lock(path: Path, owner: str) -> None:
181
+ """Atomic `O_CREAT|O_EXCL` create. Raises `FileExistsError` if the lock is held.
182
+
183
+ The ONLY way a lock comes into existence — O_EXCL is the kernel-serialized
184
+ primitive that guarantees exactly one creator. `mkdir(parents=True)` first so a
185
+ fresh `.dos/` tree doesn't fail the create."""
186
+ path.parent.mkdir(parents=True, exist_ok=True)
187
+ fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_EXCL)
188
+ try:
189
+ os.write(fd, lock_body(owner).encode("utf-8"))
190
+ finally:
191
+ os.close(fd)
192
+
193
+
194
+ def steal_stale(path: Path, owner: str, stale: dict) -> bool:
195
+ """Atomically steal the SPECIFIC stale lock `stale` at `path` for `owner`. True iff WON.
196
+
197
+ `stale` is the lock-info dict the caller just `read_lock`'d — the exact stale lock
198
+ it decided to steal. Stealing is a **compare-and-swap keyed on that identity**
199
+ (owner + acquired_at), not on the path alone — which is what closes the TOCTOU the
200
+ naive `unlink()` + `write_lock()` had: there, the unlink took no owner and re-read
201
+ nothing, so two stealers of one stale lock could each unlink the other's fresh lock
202
+ and O_EXCL-create onto the emptied path — both believed they held the mutex.
203
+
204
+ The CAS, three steps, each an atomic FS op:
205
+ 1. `os.rename` the lock to a per-stealer UNIQUE temp (atomic; a stealer that
206
+ already moved the inode makes ours raise → we lost → return False/retry).
207
+ 2. **Verify the temp's content IS the stale lock we observed.** A path-only
208
+ rename is insufficient: a racing stealer that already stole + RE-CREATED a
209
+ fresh lock would have ours move *their* fresh lock — so on a mismatch we
210
+ atomically PUT IT BACK (rename temp→path) and concede. This value check is
211
+ what makes it a true CAS: we only ever displace the stale lock, never a
212
+ winner's live one.
213
+ 3. Drop the verified-stale temp and O_EXCL-create our own. A racer that
214
+ re-created the lock in the residual window makes the O_EXCL raise
215
+ FileExistsError → we lost → return False without clobbering theirs.
216
+
217
+ Every failure path returns False and leaves the FS consistent (displaced lock
218
+ restored, or our temp cleaned up); only a clean win returns True. A unique temp
219
+ name (pid + monotonic_ns) keeps concurrent stealers from colliding on the temp.
220
+ """
221
+ tmp = path.parent / f".{path.name}.steal.{os.getpid()}.{time.monotonic_ns()}"
222
+ stale_owner = str((stale or {}).get("owner", ""))
223
+ stale_at = str((stale or {}).get("acquired_at", ""))
224
+ try:
225
+ os.rename(str(path), str(tmp)) # step 1: claim whatever is at the path, atomically
226
+ except (FileNotFoundError, OSError):
227
+ return False # already moved/removed by another stealer — we did not win
228
+ # Step 2: CAS check — is what we grabbed the stale lock we MEANT to steal?
229
+ grabbed = read_lock(tmp)
230
+ grabbed_owner = str((grabbed or {}).get("owner", ""))
231
+ grabbed_at = str((grabbed or {}).get("acquired_at", ""))
232
+ if grabbed_owner != stale_owner or grabbed_at != stale_at:
233
+ # Grabbed a DIFFERENT lock than observed — a racer already won + recreated.
234
+ # Put it back atomically and concede; if the restore loses to yet another
235
+ # racer, drop our temp. Never co-own.
236
+ try:
237
+ os.rename(str(tmp), str(path))
238
+ except OSError:
239
+ try:
240
+ os.unlink(str(tmp))
241
+ except OSError:
242
+ pass
243
+ return False
244
+ # Step 3: it WAS the stale lock — drop it and O_EXCL-create our own.
245
+ try:
246
+ os.unlink(str(tmp))
247
+ except OSError:
248
+ pass
249
+ try:
250
+ write_lock(path, owner)
251
+ except FileExistsError:
252
+ # A racer re-created the lock between our verified rename and our create.
253
+ # We lost — do NOT clobber theirs.
254
+ return False
255
+ return True
dos/_job_policy.py ADDED
@@ -0,0 +1,97 @@
1
+ """dos._job_policy — the reference app's STRUCTURAL lane fallback, as a leaf.
2
+
3
+ This is the domain-free **structural fallback** taxonomy: only the exclusive
4
+ lanes (``orchestration`` / ``global``), no curated work-lane trees. The
5
+ authoritative work-lane taxonomy is NOT here — it lives in the consumer repo's
6
+ ``dos.toml [lanes]`` (read by ``job_config`` via ``load_workspace_config``). This
7
+ literal stands in only when a workspace has no ``[lanes]`` declaration (a foreign
8
+ checkout, a test tmp_path).
9
+
10
+ History: this module held the full job domain taxonomy (``apply`` / ``tailor`` /
11
+ ``discovery`` / ``recruiter`` / … with job-specific file globs). That was
12
+ userland policy baked into the kernel package — the layering wart the
13
+ ``dos.drivers.job`` docstring called out (2026-06-01 audit). It first moved here
14
+ from ``dos.config`` (the "third home both layers may import" relocation); then on
15
+ 2026-06-06, per dos/119 + the dynamic-claim-area model, the domain names and
16
+ their globs were removed entirely (a lane is a HANDLE resolving to a derived
17
+ per-pick claim, not a curated tree — ``--scope apply`` resolves via the host's
18
+ ``_dynamic_claim_space``). What remains is the structural fallback only. See
19
+ ``docs/_design/kernel-userland-taxonomy-split-2026-06-06.md`` in the consumer repo.
20
+
21
+ Layer position: it imports ONLY ``LaneTaxonomy`` (the domain-free dataclass) from
22
+ ``dos.config``, and nothing else from the package. ``dos.config.job_config`` reads
23
+ this literal back via a **lazy import inside the function body** (not a module-top
24
+ import), so there is no module-load cycle: ``_job_policy`` statically depends on
25
+ ``config`` (for the class); ``config`` depends on ``_job_policy`` only at
26
+ ``job_config()`` *call* time. ``dos.drivers.job`` (layer 4) re-exports from here,
27
+ so the public import surface is ``from dos.drivers.job import JOB_LANE_TAXONOMY``
28
+ while the kernel core (``dos.config``) no longer *defines* the domain taxonomy.
29
+
30
+ The de-clustering (2026-06-02, operator directive "delete the cluster concept,
31
+ it's bad"): ``concurrent`` and ``autopick`` are **empty**. The kernel arbiter
32
+ (`dos.arbiter.arbitrate`) admits concurrency purely by tree-disjointness
33
+ (`DisjointnessPredicate`) — it never consults ``concurrent``; that tuple only fed
34
+ the legacy bare-walk fallback and the TUI/`man lane` display. So an empty
35
+ ``concurrent`` does NOT serialize anything: two disjoint lanes still both acquire.
36
+ And ``autopick=()`` means a bare request no longer auto-picks a privileged trio —
37
+ the host supplies an explicit priority-first ``auto_pick_order`` (built from its
38
+ ``dispatch_lane_priority`` ladder), so a bare loop picks the top-priority pickable
39
+ plan's lane.
40
+
41
+ The dynamic-claim-area step (2026-06-06, dos/119): ``trees`` no longer carries the
42
+ work-lane regions either. ``--scope apply`` does NOT look up a curated
43
+ ``trees["apply"]`` — the host resolves it to the narrow per-pick footprint via
44
+ ``_dynamic_claim_space``. So ``trees`` here holds ONLY the ``global`` exclusive
45
+ lane's region (``**/*``). ``orchestration`` is declared exclusive but carries NO
46
+ tree: exclusive lanes are EXEMPT from ``config_lint.LANE_WITHOUT_TREE`` (the
47
+ arbiter admits them on liveness alone, never a tree — ``config_lint.py`` only
48
+ checks ``concurrent``/``autopick`` members), and ``tree_for`` returns ``[]``
49
+ cleanly for it. The host phased-plan globs that used to sit on ``orchestration``
50
+ (``scripts/next_up*.py``/``scripts/replan_*.py``/``docs/_plans/``) were userland
51
+ policy — they belong in the consumer repo's ``dos.toml [lanes.trees]``, not in
52
+ this kernel leaf — and were reaped 2026-06-08 (the userland-coupling audit; see
53
+ ``docs/_audits/USERLAND_REAP_AUDIT_2026-06-08.md``). ``aliases`` is empty (the
54
+ ``ff``/``recruiter``/``ui``/``auth`` self-aliases were userland and are gone).
55
+ The authoritative job taxonomy is the consumer repo's ``dos.toml [lanes]``; this
56
+ literal is the structural fallback.
57
+ """
58
+
59
+ from __future__ import annotations
60
+
61
+ from dos.config import LaneTaxonomy
62
+
63
+ # The reference userland app's STRUCTURAL lane fallback — domain-free, NOT the
64
+ # authoritative taxonomy. The authoritative work-lane taxonomy now lives in the
65
+ # consumer repo's `dos.toml [lanes]` (read by `job_config` via
66
+ # `load_workspace_config`); this literal is only the fallback used when a
67
+ # workspace has no `[lanes]` declaration (a foreign checkout, a test tmp_path).
68
+ #
69
+ # dos/119 + DCA (dynamic-claim-area): a lane is a HANDLE that resolves to a
70
+ # derived per-pick CLAIM, not a curated tree. So there are NO curated work-lane
71
+ # trees here anymore — the domain names (apply/tailor/discovery/recruiter/fleet/
72
+ # ui/auth) and their job-specific globs (`agents/apply_*.py`, `go/internal/ui/`,
73
+ # …) were userland policy that does not belong in this kernel package; they were
74
+ # removed 2026-06-06 (see `docs/_design/kernel-userland-taxonomy-split-2026-06-06.md`
75
+ # in the consumer repo). `--scope apply` now resolves dynamically via the host's
76
+ # `_dynamic_claim_space` (the narrow per-pick footprint), never a curated tree.
77
+ #
78
+ # What remains is structural ONLY: the two exclusive lanes, which run ALONE and
79
+ # never enter the disjointness algebra. Only `global` carries a tree (`**/*`);
80
+ # `orchestration` carries NONE — exclusive lanes are exempt from the
81
+ # `LANE_WITHOUT_TREE` lint (the arbiter admits an exclusive lane on liveness
82
+ # alone), so a tree-less `orchestration` is correct and `tree_for` returns `[]`
83
+ # for it. The host phased-plan globs it used to carry
84
+ # (`scripts/next_up*.py`/`scripts/replan_*.py`/`docs/_plans/`) were userland
85
+ # policy that does not belong in this kernel leaf; the consumer declares them in
86
+ # its own `dos.toml [lanes.trees]`. `concurrent`/`autopick` are empty
87
+ # (de-clustered 2026-06-02): concurrency is gated by tree-disjointness alone;
88
+ # bare auto-pick is priority-first via the host's ladder.
89
+ JOB_LANE_TAXONOMY = LaneTaxonomy(
90
+ concurrent=(),
91
+ exclusive=("orchestration", "global"),
92
+ autopick=(),
93
+ trees={
94
+ "global": ("**/*",),
95
+ },
96
+ aliases={},
97
+ )