cool-workflow 0.1.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/.claude-plugin/plugin.json +20 -0
  2. package/.codex-plugin/mcp.json +10 -0
  3. package/.codex-plugin/plugin.json +38 -0
  4. package/.mcp.json +10 -0
  5. package/LICENSE +24 -0
  6. package/README.md +638 -0
  7. package/apps/architecture-review/app.json +51 -0
  8. package/apps/architecture-review/workflow.js +116 -0
  9. package/apps/end-to-end-golden-path/app.json +30 -0
  10. package/apps/end-to-end-golden-path/workflow.js +33 -0
  11. package/apps/pr-review-fix-ci/app.json +59 -0
  12. package/apps/pr-review-fix-ci/workflow.js +90 -0
  13. package/apps/release-cut/app.json +54 -0
  14. package/apps/release-cut/workflow.js +82 -0
  15. package/apps/research-synthesis/app.json +50 -0
  16. package/apps/research-synthesis/workflow.js +76 -0
  17. package/apps/workflow-app-framework-demo/app.json +29 -0
  18. package/apps/workflow-app-framework-demo/workflow.js +44 -0
  19. package/dist/agent-config.js +223 -0
  20. package/dist/candidate-scoring.js +715 -0
  21. package/dist/capability-core.js +630 -0
  22. package/dist/capability-dispatcher.js +86 -0
  23. package/dist/capability-registry.js +523 -0
  24. package/dist/cli.js +1276 -0
  25. package/dist/collaboration.js +727 -0
  26. package/dist/commit.js +570 -0
  27. package/dist/contract-migration.js +234 -0
  28. package/dist/coordinator.js +1163 -0
  29. package/dist/daemon.js +44 -0
  30. package/dist/dispatch.js +201 -0
  31. package/dist/drive.js +503 -0
  32. package/dist/error-feedback.js +415 -0
  33. package/dist/evidence-grounding.js +179 -0
  34. package/dist/evidence-reasoning.js +733 -0
  35. package/dist/execution-backend.js +1279 -0
  36. package/dist/harness.js +61 -0
  37. package/dist/mcp-server.js +1615 -0
  38. package/dist/multi-agent-eval.js +857 -0
  39. package/dist/multi-agent-host.js +764 -0
  40. package/dist/multi-agent-operator-ux.js +537 -0
  41. package/dist/multi-agent-trust.js +366 -0
  42. package/dist/multi-agent.js +1173 -0
  43. package/dist/node-snapshot.js +270 -0
  44. package/dist/observability.js +922 -0
  45. package/dist/operator-ux.js +971 -0
  46. package/dist/orchestrator/audit-operations.js +182 -0
  47. package/dist/orchestrator/candidate-operations.js +117 -0
  48. package/dist/orchestrator/cli-options.js +288 -0
  49. package/dist/orchestrator/collaboration-operations.js +86 -0
  50. package/dist/orchestrator/feedback-operations.js +81 -0
  51. package/dist/orchestrator/host-operations.js +78 -0
  52. package/dist/orchestrator/lifecycle-operations.js +462 -0
  53. package/dist/orchestrator/migration-operations.js +44 -0
  54. package/dist/orchestrator/multi-agent-operations.js +362 -0
  55. package/dist/orchestrator/report.js +369 -0
  56. package/dist/orchestrator/topology-operations.js +84 -0
  57. package/dist/orchestrator.js +874 -0
  58. package/dist/pipeline-contract.js +92 -0
  59. package/dist/pipeline-runner.js +285 -0
  60. package/dist/reclamation.js +882 -0
  61. package/dist/result-normalize.js +194 -0
  62. package/dist/run-export.js +64 -0
  63. package/dist/run-registry.js +1347 -0
  64. package/dist/run-state-schema.js +67 -0
  65. package/dist/sandbox-profile.js +471 -0
  66. package/dist/scheduler.js +266 -0
  67. package/dist/scheduling.js +184 -0
  68. package/dist/schema-validate.js +98 -0
  69. package/dist/state-explosion.js +1213 -0
  70. package/dist/state-migrations.js +463 -0
  71. package/dist/state-node.js +301 -0
  72. package/dist/state.js +308 -0
  73. package/dist/telemetry-attestation.js +156 -0
  74. package/dist/telemetry-ledger.js +145 -0
  75. package/dist/topology.js +527 -0
  76. package/dist/triggers.js +159 -0
  77. package/dist/trust-audit.js +475 -0
  78. package/dist/types/blackboard.js +2 -0
  79. package/dist/types/boundary.js +29 -0
  80. package/dist/types/candidate.js +2 -0
  81. package/dist/types/collaboration.js +2 -0
  82. package/dist/types/core.js +2 -0
  83. package/dist/types/drive.js +10 -0
  84. package/dist/types/error-feedback.js +2 -0
  85. package/dist/types/evidence-reasoning.js +2 -0
  86. package/dist/types/execution-backend.js +2 -0
  87. package/dist/types/multi-agent.js +2 -0
  88. package/dist/types/observability.js +2 -0
  89. package/dist/types/pipeline.js +2 -0
  90. package/dist/types/reclamation.js +8 -0
  91. package/dist/types/result.js +2 -0
  92. package/dist/types/run-registry.js +2 -0
  93. package/dist/types/run.js +2 -0
  94. package/dist/types/sandbox.js +2 -0
  95. package/dist/types/schedule.js +2 -0
  96. package/dist/types/state-node.js +2 -0
  97. package/dist/types/topology.js +2 -0
  98. package/dist/types/trust.js +2 -0
  99. package/dist/types/workbench.js +2 -0
  100. package/dist/types/worker.js +2 -0
  101. package/dist/types/workflow-app.js +2 -0
  102. package/dist/types.js +43 -0
  103. package/dist/verifier-registry.js +46 -0
  104. package/dist/verifier.js +78 -0
  105. package/dist/version.js +8 -0
  106. package/dist/workbench-host.js +172 -0
  107. package/dist/workbench.js +190 -0
  108. package/dist/worker-isolation.js +1028 -0
  109. package/dist/workflow-api.js +98 -0
  110. package/dist/workflow-app-framework.js +626 -0
  111. package/docs/agent-delegation-drive.7.md +190 -0
  112. package/docs/agent-framework.md +176 -0
  113. package/docs/candidate-scoring.7.md +106 -0
  114. package/docs/canonical-workflow-apps.7.md +137 -0
  115. package/docs/capability-topology-registry.7.md +168 -0
  116. package/docs/cli-mcp-parity.7.md +373 -0
  117. package/docs/contract-migration-tooling.7.md +123 -0
  118. package/docs/control-plane-scheduling.7.md +110 -0
  119. package/docs/coordinator-blackboard.7.md +183 -0
  120. package/docs/dogfood/architecture-review-cool-workflow.md +16 -0
  121. package/docs/dogfood-one-real-repo.7.md +168 -0
  122. package/docs/durable-state-and-locking.7.md +107 -0
  123. package/docs/end-to-end-golden-path.7.md +117 -0
  124. package/docs/error-feedback.7.md +153 -0
  125. package/docs/evidence-adoption-reasoning-chain.7.md +270 -0
  126. package/docs/execution-backends.7.md +300 -0
  127. package/docs/getting-started.md +99 -0
  128. package/docs/index.md +41 -0
  129. package/docs/mcp-app-surface.7.md +235 -0
  130. package/docs/multi-agent-cli-mcp-surface.7.md +265 -0
  131. package/docs/multi-agent-eval-replay-harness.7.md +302 -0
  132. package/docs/multi-agent-operator-ux.7.md +314 -0
  133. package/docs/multi-agent-runtime-core.7.md +231 -0
  134. package/docs/multi-agent-topologies.7.md +103 -0
  135. package/docs/multi-agent-trust-policy-audit.7.md +154 -0
  136. package/docs/node-snapshot-diff-replay.7.md +135 -0
  137. package/docs/observability-cost-accounting.7.md +194 -0
  138. package/docs/operator-ux.7.md +180 -0
  139. package/docs/pipeline-runner.7.md +136 -0
  140. package/docs/project-index.md +261 -0
  141. package/docs/real-execution-backends.7.md +142 -0
  142. package/docs/release-and-migration.7.md +280 -0
  143. package/docs/release-tooling.7.md +159 -0
  144. package/docs/routines.md +48 -0
  145. package/docs/run-registry-control-plane.7.md +312 -0
  146. package/docs/run-retention-reclamation.7.md +191 -0
  147. package/docs/sandbox-profiles.7.md +137 -0
  148. package/docs/scheduled-tasks.md +80 -0
  149. package/docs/security-trust-hardening.7.md +117 -0
  150. package/docs/state-explosion-management.7.md +264 -0
  151. package/docs/state-node.7.md +96 -0
  152. package/docs/team-collaboration.7.md +207 -0
  153. package/docs/unix-principles.md +192 -0
  154. package/docs/verifier-gated-commit.7.md +140 -0
  155. package/docs/web-desktop-workbench.7.md +215 -0
  156. package/docs/worker-isolation.7.md +167 -0
  157. package/docs/workflow-app-framework.7.md +274 -0
  158. package/manifest/README.md +43 -0
  159. package/manifest/plugin.manifest.json +316 -0
  160. package/manifest/pricing.policy.json +14 -0
  161. package/package.json +79 -0
  162. package/scripts/agents/claude-p-agent.js +104 -0
  163. package/scripts/agents/claude-p-agent.sh +9 -0
  164. package/scripts/agents/cw-attest-keygen.js +55 -0
  165. package/scripts/agents/cw-attest-wrap.js +143 -0
  166. package/scripts/block-unapproved-tag.sh +39 -0
  167. package/scripts/bump-version.js +249 -0
  168. package/scripts/canonical-apps.js +171 -0
  169. package/scripts/cw.js +4 -0
  170. package/scripts/dist-drift-check.js +79 -0
  171. package/scripts/dogfood-architecture-review.js +237 -0
  172. package/scripts/dogfood-release.js +624 -0
  173. package/scripts/forward-ref-docs.js +73 -0
  174. package/scripts/gen-manifests.js +232 -0
  175. package/scripts/golden-path.js +300 -0
  176. package/scripts/mcp-server.js +4 -0
  177. package/scripts/new-feature.js +121 -0
  178. package/scripts/parity-check.js +213 -0
  179. package/scripts/release-check.js +118 -0
  180. package/scripts/release-flow.js +272 -0
  181. package/scripts/release-gate.sh +85 -0
  182. package/scripts/sync-project-index.js +387 -0
  183. package/scripts/validate-run-state-schema.js +126 -0
  184. package/scripts/verify-container-selfref.js +64 -0
  185. package/scripts/version-sync-check.js +237 -0
  186. package/skills/cool-workflow/SKILL.md +162 -0
  187. package/skills/cool-workflow/references/commands.md +282 -0
  188. package/tsconfig.json +16 -0
  189. package/ui/workbench/app.css +76 -0
  190. package/ui/workbench/app.js +159 -0
  191. package/ui/workbench/index.html +32 -0
  192. package/workflows/architecture-review.workflow.js +84 -0
  193. package/workflows/research-synthesis.workflow.js +47 -0
@@ -0,0 +1,312 @@
1
+ # Run Registry / Control Plane
2
+
3
+ CW v0.1.28 adds the Run Registry / Control Plane: a layer that manages MANY
4
+ workflow runs across repositories. Before v0.1.28 a run lived only under its
5
+ repo's `.cw/runs/<id>/` and was loaded from the current directory
6
+ (`loadRunFromCwd`); there was no cross-repo index and no unified lifecycle
7
+ management. This release adds search, resume, archive, a durable queue,
8
+ cross-repo history, and failed-run rerun — without changing the run-state schema
9
+ and without taking ownership of source truth.
10
+
11
+ The design follows the same base-system observability philosophy as
12
+ [State Explosion Management](state-explosion-management.7.md) and the
13
+ [Evidence Adoption Reasoning Chain](evidence-adoption-reasoning-chain.7.md):
14
+
15
+ - the per-run `.cw/runs/<id>/state.json` is the SINGLE source of truth
16
+ - the registry is a DERIVED userland index, never a replacement for source records
17
+ - plain files, stable JSON, deterministic output
18
+ - small composable commands and readable console views with full
19
+ machine-readable output available
20
+ - fail closed when the index is stale, a run's source changed, or its source is
21
+ missing — never fabricate run status from the cache
22
+ - append-only history: resume continues a run, rerun creates a NEW linked run,
23
+ and archive marks rather than deletes
24
+ - backward compatible; no hidden database; no daemon required to read state
25
+
26
+ ## Mechanism vs policy
27
+
28
+ The registry is MECHANISM: a rebuildable cache over runs. POLICY — retention
29
+ windows, queue ordering, and archive thresholds — is configurable and kept out
30
+ of the index (`RunRegistryPolicy`, explicit flags). The index can be deleted and
31
+ rebuilt from source at any time; it never holds authority a `state.json` does
32
+ not.
33
+
34
+ ## Derived index model
35
+
36
+ A `RunRecord` is derived per run and carries `schemaVersion`, `runId`, `appId`,
37
+ `appVersion`, `workflowId`, `title`, `repo` (the owning repo root), `runDir`,
38
+ `statePath`, `createdAt`, `updatedAt`, `loopStage`, a `lifecycle` and a
39
+ `derivedLifecycle`, an `archived` flag with `archivedAt`/`archiveReason`, task
40
+ counts, `commitCount`, `verifierGatedCommitCount`, `openFeedbackCount`, a bounded
41
+ `inputsDigest` for free-text search, a deterministic `sourceFingerprint`, a
42
+ per-record `freshness` (`valid`, `stale`, or `missing`), and optional
43
+ `provenance`.
44
+
45
+ A `RunRegistryIndex` aggregates records for a scope (`repo` or `home`) with its
46
+ own `sourceFingerprint`, the covered `repos`, the `queue`, and lifecycle
47
+ `counts`. A `RunRegistryReport` wraps the index with explicit freshness
48
+ (`valid`, `stale`, or `absent`) plus the `staleRuns` and `missingRuns` lists and
49
+ a `nextAction`. Every read re-derives records from source; the persisted index is
50
+ only compared against, never trusted as the live status.
51
+
52
+ ## Lifecycle state machine
53
+
54
+ Lifecycle is CLASSIFIED from existing state, never invented. `deriveLifecycle`
55
+ applies the following rules to a run's source state — first match wins:
56
+
57
+ ```text
58
+ 1. running tasks > 0 -> running
59
+ 2. open feedback > 0 -> blocked (failures under correction)
60
+ 3. failed tasks > 0 -> failed
61
+ 4. tasks > 0 and all tasks completed -> completed
62
+ 5. verifier-gated commits > 0 and nothing pending -> completed (commit-only runs)
63
+ 6. completed tasks > 0 -> running (mid-flight)
64
+ 7. otherwise -> queued
65
+ ```
66
+
67
+ `archived` is an OVERLAY disposition applied on top of this. The surfaced
68
+ `lifecycle` becomes `archived`, but `derivedLifecycle` preserves the
69
+ source-derived state so search and history can still match the underlying run.
70
+ The classifier never reads the cache; it reads source `state.json`.
71
+
72
+ ## Cross-repo layout
73
+
74
+ State is plain files, readable and diffable:
75
+
76
+ ```text
77
+ <repo>/.cw/runs/<id>/state.json source of truth (unchanged, never owned here)
78
+ <repo>/.cw/registry/index.json per-repo derived index (rebuildable)
79
+ <repo>/.cw/registry/archive.json archive overlay (mark; never deletes source)
80
+ <repo>/.cw/registry/provenance.json rerun provenance links (derived metadata)
81
+
82
+ $CW_HOME/registry/repos.json registered repo roots (explicit discovery set)
83
+ $CW_HOME/registry/index.json cross-repo derived index (rebuildable)
84
+ $CW_HOME/registry/queue.json durable run queue (plain, ordered)
85
+ ```
86
+
87
+ The home registry root resolves from `CW_HOME`, then
88
+ `XDG_STATE_HOME/cool-workflow`, then `~/.local/state/cool-workflow`. A repo is
89
+ registered into `repos.json` when it is refreshed (or when a queue entry names
90
+ it). Reads never write: a search or show computes the repo set as the union of
91
+ the registered repos and the current repo in memory, so reading the index never
92
+ mutates discovery state.
93
+
94
+ ## Search
95
+
96
+ `run search` queries runs by `--app`, `--status`, time range (`--since`,
97
+ `--until`), `--repo`, and free-text (`--text`, matched over runId, app, workflow,
98
+ title, repo, lifecycle, loop stage, and a bounded digest of run inputs).
99
+ Results are deterministic (ordered by `createdAt`, then `runId`) and paginated
100
+ (`--limit`, `--offset`). Search is cross-repo by default (`--scope home`); use
101
+ `--scope repo` to restrict to the current repo. Archived runs are included by
102
+ default and can be excluded with `--include-archived false`.
103
+
104
+ ## Resume
105
+
106
+ `run resume <run-id>` resolves a run by id across the registry — not just the
107
+ cwd — loads its durable state, and returns the next runnable tasks and next
108
+ actions for the host to execute. Resume is read-only over source: it never
109
+ mutates `state.json` and never un-archives a run.
110
+
111
+ ## Queue
112
+
113
+ `queue add` appends a durable entry to `$CW_HOME/registry/queue.json` with an
114
+ explicit `--priority` (lower drains first; ties break by enqueue time, then id).
115
+ `queue list` prints the queue in policy order; `queue show <id>` shows one entry.
116
+ `queue drain [--limit N]` marks the next ready entries drained and returns them —
117
+ CW records order and readiness; the HOST still executes the workers. Nothing in
118
+ the queue spawns work on its own.
119
+
120
+ ## Archive
121
+
122
+ `run archive <run-id>` writes an overlay mark to the owning repo's
123
+ `registry/archive.json`; the run's `state.json` is never moved or deleted, and
124
+ the run stays searchable (its `derivedLifecycle` is preserved). `--unarchive`
125
+ clears the mark. Retention is POLICY: `run archive --older-than-days N
126
+ [--state completed --state failed]` archives eligible runs older than the window
127
+ without touching source truth. The default policy archives nothing
128
+ (`archiveOlderThanDays = 0`) until a window is given.
129
+
130
+ ## Rerun
131
+
132
+ `run rerun <run-id>` re-runs a failed run as a NEW run: it reuses the original
133
+ inputs and app, lands the new run beside the original (same repo), and records a
134
+ provenance link (`rerunOf`, `rerunOfRepo`, `originRunId`, `generation`, `reason`)
135
+ in the repo's `registry/provenance.json`. The original failed run is PRESERVED
136
+ for audit — the past is never overwritten. Rerunning a rerun increments
137
+ `generation` and keeps `originRunId` pinned to the chain root.
138
+
139
+ ## Cross-repo history
140
+
141
+ `history` reads a unified timeline of runs across all registered repos
142
+ (newest first), each entry carrying its repo, lifecycle, loop stage, timestamps,
143
+ freshness, and provenance back to its `.cw/runs/<id>/`. Filter with `--app` and
144
+ `--status`; paginate with `--limit` and `--offset`.
145
+
146
+ ## CLI
147
+
148
+ ```text
149
+ node scripts/cw.js registry refresh [--scope repo|home] [--json]
150
+ node scripts/cw.js registry show [--scope repo|home] [--json]
151
+ node scripts/cw.js run search [--app ID] [--status STATE] [--text Q] [--repo PATH] [--since ISO] [--until ISO] [--limit N] [--offset N] [--scope repo|home] [--json]
152
+ node scripts/cw.js run list [--scope repo|home] [--json]
153
+ node scripts/cw.js run show <run-id> [--scope repo|home] [--json]
154
+ node scripts/cw.js run resume <run-id> [--limit N] [--json]
155
+ node scripts/cw.js run archive <run-id> [--reason TEXT] [--unarchive]
156
+ node scripts/cw.js run archive --older-than-days N [--state completed --state failed]
157
+ node scripts/cw.js run rerun <run-id> [--reason TEXT]
158
+ node scripts/cw.js queue add [--app ID|--workflow ID|--runId ID] [--repo PATH] [--priority N] [--note TEXT]
159
+ node scripts/cw.js queue list [--status STATE] [--repo PATH] [--json]
160
+ node scripts/cw.js queue show <queue-id>
161
+ node scripts/cw.js queue drain [--limit N] [--repo PATH]
162
+ node scripts/cw.js history [--app ID] [--status STATE] [--limit N] [--offset N] [--scope repo|home] [--json]
163
+ ```
164
+
165
+ Read commands print terse human panels by default (lifecycle, freshness, counts,
166
+ and next action) and full machine output under `--json` or `--format json`.
167
+
168
+ ## MCP parity
169
+
170
+ Every command above is declared once in the v0.1.28 capability registry
171
+ (`src/capability-registry.ts`) and rendered on both surfaces, so `cw <cmd>
172
+ --json` is schema-identical to the matching `cw_<tool>` result and the pair
173
+ passes `npm run parity:check`:
174
+
175
+ - `cw_registry_refresh`, `cw_registry_show`
176
+ - `cw_run_search`, `cw_run_list`, `cw_run_show`, `cw_run_resume`,
177
+ `cw_run_archive`, `cw_run_rerun`
178
+ - `cw_queue_add`, `cw_queue_list`, `cw_queue_drain`, `cw_queue_show`
179
+ - `cw_history`
180
+
181
+ See [cli-mcp-parity.7.md](cli-mcp-parity.7.md).
182
+
183
+ ## Freshness and fail-closed behavior
184
+
185
+ `registry show` recomputes the current source fingerprint for every run and
186
+ compares it to the persisted index. If a run's source changed, the report status
187
+ is `stale` and the run is named in `staleRuns`. If a persisted run's source is
188
+ gone, the run is named in `missingRuns`, it is NOT fabricated into the current
189
+ records, and the next action is `registry refresh`. `run show` of a run whose
190
+ source is missing returns `found: false` with `freshness: missing` and only the
191
+ last-known persisted record, clearly flagged — never as a live status. An
192
+ unreadable or unsupported run state is treated as missing, never as success.
193
+
194
+ ## Migration
195
+
196
+ Pre-0.1.28 single-repo runs and existing `.cw/runs/` layouts keep working with
197
+ an empty, rebuildable registry: `registry show` reports `absent` until the first
198
+ `registry refresh`, and every pre-0.1.28 CLI command and MCP tool is unchanged.
199
+ No run-state schema change ships in v0.1.28; newer unsupported run-state schemas
200
+ still fail closed. The registry, archive overlay, provenance overlay, queue, and
201
+ home discovery set are all derived files that can be deleted and rebuilt from
202
+ source at any time.
203
+
204
+ ## CLI ↔ MCP Parity (v0.1.28)
205
+
206
+ Every command and tool referenced above is declared in the capability registry
207
+ (`src/capability-registry.ts`) and validated by `npm run parity:check`, so
208
+ `cw <cmd> --json` and the matching `cw_<tool>` result render one data source.
209
+ See [cli-mcp-parity.7.md](cli-mcp-parity.7.md).
210
+
211
+ ## Execution Backends (v0.1.29)
212
+
213
+ v0.1.29 lifts execution into a pluggable driver layer: one narrow `ExecutionBackend`
214
+ contract with interchangeable `node`/`bun`/`shell`/`container`/`remote`/`ci`
215
+ drivers, selected by `--backend` (parallel to `--sandbox`) and inspected via
216
+ `backend list|show|probe`. The result/evidence envelope is schema-identical across
217
+ backends; the backend id + sandbox attestation are recorded as provenance, so this
218
+ surface is unchanged regardless of which backend executed a run. See
219
+ [execution-backends.7.md](execution-backends.7.md).
220
+ ## Web / Desktop Workbench (v0.1.30)
221
+
222
+ v0.1.30 adds the Web / Desktop Workbench: a read-only, localhost-only human
223
+ console that renders this surface (and the other four operator panels — run
224
+ graph, blackboard, worker logs, candidate compare, audit timeline) for any run,
225
+ reading the SAME capability `--json` payloads. It is a THIRD FRONT DOOR alongside
226
+ the CLI and MCP that holds no authoritative state and forks no schema: each panel
227
+ equals its `cw <cmd> --json` payload byte-for-byte (parity-gated), and refresh
228
+ re-derives everything from disk. See
229
+ [web-desktop-workbench.7.md](web-desktop-workbench.7.md).
230
+
231
+ ## Observability + Cost Accounting (v0.1.31)
232
+
233
+ v0.1.31 adds Observability + Cost Accounting: `metrics show`/`metrics summary`
234
+ derive durations, failure/verifier/acceptance rates (with sample counts and
235
+ fail-closed `n/a`), and host-attested token/cost from existing durable run state
236
+ — no metrics database, no collector daemon, no hidden counter. Usage is additive
237
+ and optional (absent ⇒ `unreported`, never 0); cost is `attested` (attested usage
238
+ × a recorded pricing policy) or clearly `estimated`, with pricing as policy. Both
239
+ verbs are parity-gated and render read-only in the v0.1.30 Workbench. See
240
+ [observability-cost-accounting.7.md](observability-cost-accounting.7.md).
241
+
242
+
243
+ ## Team Collaboration (v0.1.32)
244
+
245
+ v0.1.32 adds Team Collaboration: a host-attested actor and append-only
246
+ approvals/rejections/comments/handoffs provenance-linked to a durable target,
247
+ plus a review gate that STACKS ON the verifier gate — required approvals from
248
+ authorized roles, enforced inside `resolveCommitGate` AFTER the verifier checks
249
+ and never instead of them, failing closed on quorum/authority/self-approval and
250
+ recording who approved the very artifact that shipped. Policy (required approvals,
251
+ authorized roles, self-approval) is data, default off (pre-v0.1.32 behavior
252
+ unchanged). The verbs are parity-gated and render read-only in the v0.1.30
253
+ Workbench. See [Team Collaboration](team-collaboration.7.md).
254
+
255
+ ## Release Tooling (v0.1.33)
256
+
257
+ the per-tag mechanical surfaces (version bump across 17 surfaces, feature scaffold, and the forward-reference docs) become deterministic scripts, with a de-duplicated release gate. See release-tooling(7).
258
+
259
+ ## Real Execution Backend Integrations (v0.1.34)
260
+
261
+ container/remote/ci backends really execute (docker/podman run, remote/CI POST-and-poll) under the sandbox contract, with byte-stable evidence vs node and fail-closed refusal when a runtime/endpoint is unavailable. See real-execution-backends(7).
262
+
263
+ ## Node Snapshot / Diff / Replay (v0.1.35)
264
+
265
+ per-node snapshot, structural diff, and isolated deterministic replay over StateNode, reusing the v0.1.23 eval harness; fail-closed on source drift (valid|stale|absent). See node-snapshot-diff-replay(7).
266
+
267
+ ## Contract Migration Tooling (v0.1.36)
268
+
269
+ first-class declared migration registry (run-state + workflow-app) with per-edge compatibility proofs, fail-closed reachability, and a round-trip/non-destruction prover. See contract-migration-tooling(7).
270
+
271
+ ## Control-Plane Scheduling (v0.1.37)
272
+
273
+ priority + concurrency limits + lease lifecycle + retry/backoff + fail-closed park over the v0.1.28 Run Registry queue; policy-as-data, deterministic. See control-plane-scheduling(7).
274
+
275
+ ## Agent Delegation Drive (v0.1.38)
276
+
277
+ spawn an external agent process per worker, capture result.md + attestation, auto-drive plan->dispatch->fulfill->accept->commit
278
+
279
+ ## Run Retention & Provable Reclamation (v0.1.39)
280
+
281
+ tiered, append-only, cryptographically-verifiable run reclamation: seal the audit skeleton, free the reconstructable bulk, prove it
282
+
283
+ ## Durable State & Locking (v0.1.40)
284
+
285
+ atomic temp->rename writes + fsync-durability for authoritative stores; portable stale-stealing file lock serializing the cross-process read-modify-write stores
286
+
287
+ ## Self-Audit Hardening & Pure-Router Decomposition (v0.1.41)
288
+
289
+ evidence grounding + durable audit append + symlink-hardened containment + deterministic worker ids + recursive redaction; BackendRegistry self-describing drivers (no per-id switches); orchestrator god-object decomposed into per-domain operation modules (pure loadRun->delegate router)
290
+
291
+ ## Robust Result Ingest (v0.1.42)
292
+
293
+ capture findings/evidence from any reasonable agent shape (alt keys + prose), CW derives grounded evidence itself, warn on empty capture — closes the v0.1.41 live-drive 'accepted with 0 captured' failure
294
+
295
+ ## No-False-Green Gate & Launch Prep (v0.1.43)
296
+
297
+ Hard gate blocking empty-capture verifier-gated commits, plus quickstart and launch-prep docs.
298
+
299
+ ## Release-Gate Determinism & Agents Vendor (v0.1.44)
300
+
301
+ Release-readiness checks now validate the committed blob (`git show HEAD:<path>`) instead of the mutable working tree — eliminating false-red/false-green from concurrent working-tree writes (iCloud/Spotlight/editor). Adds the `agents` vendor manifest target: a generated `.agents/plugins/cool-workflow/` adapter giving any non-Claude AI agent one common interface to CW.
302
+
303
+ ## P1-P2 Fixes & CI Content Surfaces (v0.1.49)
304
+
305
+ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.46), vendor-adapter registry (v0.1.47), state auto-compaction and P2 fixes (v0.1.48), plus CI content-surface determinism hardening (v0.1.49).
306
+ 0.1.51
307
+
308
+ 0.1.76
309
+
310
+ 0.1.77
311
+
312
+ 0.1.78
@@ -0,0 +1,191 @@
1
+ # Run Retention & Provable Reclamation
2
+
3
+ CW v0.1.39 adds Run Retention & Provable Reclamation: a tiered, append-only,
4
+ cryptographically-verifiable way to **free disk WITHOUT violating the audit/replay
5
+ moat**. A single day of dogfooding produced ~1 GB across 200+ runs under
6
+ `.cw/runs/`, and before v0.1.39 there was **zero disk reclamation** — `run archive`
7
+ only marked an overlay (it never freed bytes), `sched reclaim` reclaimed expired
8
+ leases (not disk), and worker scratch dirs were never cleaned. Naive GC is
9
+ forbidden: CW's entire value is "don't trust, verify." So reclamation is a
10
+ **verifiable, append-only state transition** — freeing bytes leaves behind
11
+ cryptographic proof that what was freed is reconstructable-or-worthless and that
12
+ the audit-essential subset is sealed.
13
+
14
+ This release builds directly on a precise lineage: v0.1.28's archive overlay
15
+ (`run-registry.ts` — "Archive is an overlay mark, not a delete"), v0.1.35's
16
+ per-node snapshot/diff/deterministic replay (`node-snapshot.ts`), v0.1.32's
17
+ append-only collaboration log, and v0.1.37's policy-as-data scheduling. It EXTENDS
18
+ them; it forks nothing.
19
+
20
+ ## The lifecycle tiers
21
+
22
+ ```
23
+ live full on disk re-runnable + verifiable
24
+ archived overlay mark, full bytes re-runnable + verifiable (v0.1.28 ceiling)
25
+ reclaimed tombstone + skeleton + digests verify-only (or re-runnable-by-reconstruction) (v0.1.39 ceiling)
26
+ ```
27
+
28
+ `archived` keeps its mark-only semantics, untouched. `reclaimed` is the NEW
29
+ disk-freeing tier above it. The lifecycle ceiling for this release is `reclaimed`;
30
+ a future `forgotten` compliance tier (discarding even the skeleton, keeping only
31
+ the chained tombstone hash) is out of scope — the `RunLifecycleState` union gains
32
+ ONLY `reclaimed`, and the hash chain is designed to extend to it later.
33
+
34
+ ## The red line — never delete what is audit-essential AND irreproducible
35
+
36
+ A byte is freeable ONLY if it is one of two classes:
37
+
38
+ 1. **reconstructable** — deterministically re-derivable from RETAINED inputs + a
39
+ recorded recipe + an `expectDigest`, or
40
+ 2. **pure scratch** — zero audit value,
41
+
42
+ AND it is **referenced by no surviving evidence locator or audit/collaboration
43
+ event.** Any path that is neither class defaults to **RETAINED** (fail closed).
44
+ The hard ALLOW-LIST — never freed under any policy — is `state.json`, `audit/`,
45
+ `commits/`, the collaboration log, the attestation chain, `report.md`, and the new
46
+ `reclaimed.json` overlay.
47
+
48
+ The **skeleton** is the machine-checkable contract for what must survive every
49
+ reclamation (`SKELETON_REQUIRED_KEYS` + `validateSkeleton()`): the final verdict,
50
+ every commit record, every evidence locator's content digest, the attestation
51
+ chain, the cost record, and the append-only audit + collaboration logs. If a
52
+ complete skeleton cannot be extracted, reclamation **refuses with
53
+ `skeleton-incomplete` and frees zero bytes.**
54
+
55
+ ## Write-ahead, fail-closed sequencing — order is the safety property
56
+
57
+ The reclamation transaction is four discrete, individually-callable steps:
58
+
59
+ 1. `extractSkeleton()` — extract + seal the audit-essential subset.
60
+ 2. `buildTombstone()` — write the full freed-manifest with a **pre-deletion
61
+ sha256 per path**, plus the hash chain.
62
+ 3. `commitTombstone()` — **fsync** the tombstone into the append-only
63
+ `reclaimed.json` overlay (temp → fsync → rename), and record the attestation
64
+ through the existing append-only trust-audit log.
65
+ 4. `freeBulk()` — ONLY THEN free the bulk bytes.
66
+
67
+ A crash between any steps leaves **EITHER the full run OR a complete tombstone —
68
+ never a half-deleted run with no proof.** This is testable by design:
69
+ `runReclamation(run, policy, { faultAfter })` throws a synthetic `ReclamationAbort`
70
+ after the named step (`skeleton` | `tombstone-write` | `tombstone-commit`) — never
71
+ by killing the process.
72
+
73
+ ## Append-only — reclamation EXTENDS history, never rewrites it
74
+
75
+ The tombstone is a NEW `reclaimed.json` overlay (a peer of `archive.json`'s role).
76
+ Only the bulk DATA bytes are freed — no existing audit, state, or commit record is
77
+ ever rewritten. It is itself a new audit record, **hash-chained**: `tombstoneHash`
78
+ is recomputed from the freed-manifest + sealed skeleton + `prevTombstoneHash`
79
+ (genesis = sha256 of the sealed skeleton). `gc verify` recomputes `tombstoneHash`
80
+ **independently**, never trusting the stored value, so a tampered registry entry
81
+ is caught — flipping a per-path sha256 fails with `tombstone-digest-mismatch`;
82
+ editing a hash link fails with `tombstone-chain-broken`.
83
+
84
+ ## Capability downgrade is explicit and queryable — never silent
85
+
86
+ Reclaiming a node snapshot downgrades a run from `re-runnable` to `verify-only`,
87
+ or to `re-runnable-by-reconstruction` when the snapshot's inputs + `expectDigest`
88
+ are retained. `cw run show <id>` reports `record.tier`, `record.capability`, and an
89
+ enumerable `record.capabilityReason` (a closed set, e.g.
90
+ `snapshot-reclaimed-no-reconstruction` | `inputs-and-expectdigest-retained` |
91
+ `scratch-only-reclaimed`) — never free-text prose.
92
+
93
+ **Reconstruction is a distinct code path, NOT live `verifyNodeReplay`.** A reclaimed
94
+ artifact making `loadNodeSnapshot` return `absent` is the EXPECTED fail-closed
95
+ signal. The reconstruction verifier re-runs the recorded recipe against the
96
+ RETAINED inputs (keyed on the retained-inputs digest) and compares the result's
97
+ sha256 to the tombstoned `expectDigest` — it never routes through the freed source
98
+ bytes. Flipping one retained input byte fails with `reconstruction-digest-mismatch`.
99
+
100
+ ## The eager-scratch exception
101
+
102
+ Worker scratch is the one class reclaimed eagerly. A worker's scratch dir is pure
103
+ scratch with zero audit value, and its `result.md` is already copied to
104
+ `results/<task-id>.md` and evidence-gated. Before the scratch is freed, the result
105
+ node's `worker-result` artifact (set by `recordWorkerOutput` to a path INSIDE the
106
+ scratch dir) is **re-pointed** to the retained `results/<task-id>.md` copy, and the
107
+ result-node snapshot is proven to stay `valid` (not `absent`) — so no surviving
108
+ node references a freed path. Opt out with `--keep-scratch`.
109
+
110
+ ## CLI
111
+
112
+ ```
113
+ cw gc plan [run-id] [--reclaimAfterArchiveDays N] [--keep-scratch] [--keep-snapshots] [--scope repo|home] [--json]
114
+ cw gc run [run-id] [--reclaimAfterArchiveDays N] [--keep-scratch] [--keep-snapshots] [--limit N] [--actor NAME] [--json]
115
+ cw gc verify <run-id> [--scope repo|home] [--json]
116
+ ```
117
+
118
+ - `gc plan` is a pure **dry-run**: it computes eligible runs, the exact bytes that
119
+ WOULD be freed per kind, and the per-run capability downgrade. It frees nothing
120
+ (`plan.bytesToFree` equals the summed per-path sizes it lists).
121
+ - `gc run` executes the write-ahead transaction for eligible runs, bounded by
122
+ `maxReclaimRuns` / `maxReclaimBytes`, fail-closed on any incomplete skeleton.
123
+ - `gc verify` re-proves a reclaimed run end-to-end.
124
+
125
+ Eligibility is explicit and fail-closed: a run is reclaimable exactly when its
126
+ **derived lifecycle is `completed` or `failed` AND it is archived AND it has no
127
+ open feedback AND it is past `reclaimAfterArchiveDays`.** `running` / `blocked` /
128
+ `queued` runs are NEVER reclaimable; the check reads live source state and fails
129
+ closed (`non-terminal` | `not-archived` | `within-retention` | `open-feedback` |
130
+ `unreadable` | `already-reclaimed`). **CW never reclaims by default** — every
131
+ reclamation knob defaults to reclaim nothing, and `gc run` is an explicit operator
132
+ action, never a daemon.
133
+
134
+ ## MCP
135
+
136
+ `cw_gc_plan`, `cw_gc_run`, and `cw_gc_verify` are the peers of the CLI verbs,
137
+ registered in the capability registry and validated by `parity:check` (fail-closed
138
+ on drift). The read-only `gc plan` / `gc verify` payloads obey the now-derived-field
139
+ rule: only ISO timestamps may be now-derived.
140
+
141
+ ## Policy-as-data
142
+
143
+ Retention/reclamation thresholds extend `RunRegistryPolicy` (alongside
144
+ `archiveOlderThanDays`), never a new policy file: `reclaimAfterArchiveDays`,
145
+ `keepSnapshots`, `keepScratch`, `reclaimStates`, `maxReclaimRuns`, `maxReclaimBytes`.
146
+ Back-compatible defaults reclaim nothing; pre-v0.1.39 runs load unchanged.
147
+
148
+ ## Compatibility
149
+
150
+ Additive. The kernel `state.json` schema is unchanged beyond the new per-run
151
+ `reclaimed.json` overlay + policy fields; pre-v0.1.39 runs load unchanged. The
152
+ `RunLifecycleState` union gains only `reclaimed`. `run archive` keeps its mark-only
153
+ semantics. Nothing in the original audit log is ever edited or erased.
154
+
155
+ ## See Also
156
+
157
+ - `docs/run-registry-control-plane.7.md` — the v0.1.28 archive overlay this extends.
158
+ - `docs/node-snapshot-diff-replay.7.md` — the v0.1.35 snapshot engine reconstruction layers beside.
159
+ - `docs/control-plane-scheduling.7.md` — the v0.1.37 policy-as-data lineage.
160
+ - `docs/team-collaboration.7.md` — the v0.1.32 append-only log sealed in the skeleton.
161
+
162
+ ## Durable State & Locking (v0.1.40)
163
+
164
+ atomic temp->rename writes + fsync-durability for authoritative stores; portable stale-stealing file lock serializing the cross-process read-modify-write stores
165
+
166
+ ## Self-Audit Hardening & Pure-Router Decomposition (v0.1.41)
167
+
168
+ evidence grounding + durable audit append + symlink-hardened containment + deterministic worker ids + recursive redaction; BackendRegistry self-describing drivers (no per-id switches); orchestrator god-object decomposed into per-domain operation modules (pure loadRun->delegate router)
169
+
170
+ ## Robust Result Ingest (v0.1.42)
171
+
172
+ capture findings/evidence from any reasonable agent shape (alt keys + prose), CW derives grounded evidence itself, warn on empty capture — closes the v0.1.41 live-drive 'accepted with 0 captured' failure
173
+
174
+ ## No-False-Green Gate & Launch Prep (v0.1.43)
175
+
176
+ Hard gate blocking empty-capture verifier-gated commits, plus quickstart and launch-prep docs.
177
+
178
+ ## Release-Gate Determinism & Agents Vendor (v0.1.44)
179
+
180
+ Release-readiness checks now validate the committed blob (`git show HEAD:<path>`) instead of the mutable working tree — eliminating false-red/false-green from concurrent working-tree writes (iCloud/Spotlight/editor). Adds the `agents` vendor manifest target: a generated `.agents/plugins/cool-workflow/` adapter giving any non-Claude AI agent one common interface to CW.
181
+
182
+ ## P1-P2 Fixes & CI Content Surfaces (v0.1.49)
183
+
184
+ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.46), vendor-adapter registry (v0.1.47), state auto-compaction and P2 fixes (v0.1.48), plus CI content-surface determinism hardening (v0.1.49).
185
+ 0.1.51
186
+
187
+ 0.1.76
188
+
189
+ 0.1.77
190
+
191
+ 0.1.78
@@ -0,0 +1,137 @@
1
+ # SANDBOX-PROFILES(7)
2
+
3
+ ## NAME
4
+
5
+ Sandbox Profiles - named, durable worker policy contracts for Cool Workflow
6
+
7
+ ## SYNOPSIS
8
+
9
+ ```text
10
+ node dist/cli.js sandbox list
11
+ node dist/cli.js sandbox show readonly
12
+ node dist/cli.js sandbox validate ./site-sandbox.json
13
+ node dist/cli.js dispatch <run-id> --sandbox readonly
14
+ node dist/cli.js worker manifest <run-id> <worker-id>
15
+ ```
16
+
17
+ ## DESCRIPTION
18
+
19
+ A sandbox profile is a CW policy contract. It tells the agent host what a
20
+ worker may read, write, execute, access over the network, and receive through
21
+ environment variables.
22
+
23
+ It is not a container, jail, chroot, seatbelt profile, packet filter, or OS
24
+ process sandbox by itself. CW enforces profile validation, deterministic path
25
+ normalization, worker result acceptance, and durable feedback for denied worker
26
+ output. The agent host must enforce OS-level file access, process execution,
27
+ network access, and environment filtering.
28
+
29
+ The design goal is simple:
30
+
31
+ ```text
32
+ named policy -> resolved worker manifest -> host enforcement -> CW acceptance
33
+ ```
34
+
35
+ Profiles are selected at dispatch time and stored in run state, worker records,
36
+ dispatch manifests, worker manifests, feedback records, and reports.
37
+
38
+ ## BUNDLED PROFILES
39
+
40
+ `default`
41
+ : Preserves existing Worker Isolation behavior. Workers may read the workspace
42
+ and write only accepted worker output paths unless additional `allowedPaths`
43
+ are supplied by older APIs.
44
+
45
+ `readonly`
46
+ : Workers may read the workspace and write only worker-local output paths.
47
+ Network access is denied by profile. CW still relies on the host to enforce
48
+ read-only mounts or equivalent OS policy.
49
+
50
+ `workspace-write`
51
+ : Workers may read and write the workspace, plus worker-local output paths.
52
+ Use this only for workers expected to modify repository files.
53
+
54
+ `locked-down`
55
+ : Workers may read only `input.md` and write only `result.md`. Command,
56
+ network, and inherited environment access are denied by policy.
57
+
58
+ ## PROFILE SHAPE
59
+
60
+ Profile files use schema version `1`:
61
+
62
+ ```json
63
+ {
64
+ "schemaVersion": 1,
65
+ "id": "site-readonly",
66
+ "title": "Site Readonly",
67
+ "readPaths": ["$cwd"],
68
+ "writePaths": [],
69
+ "workerOutput": { "result": true, "artifacts": true, "logs": true },
70
+ "execute": { "mode": "none" },
71
+ "network": { "mode": "none" },
72
+ "env": { "inherit": false, "expose": ["PATH"] }
73
+ }
74
+ ```
75
+
76
+ Supported path tokens are `$cwd`, `$runDir`, `$workerDir`, `$inputPath`,
77
+ `$resultPath`, `$artifactsDir`, and `$logsDir`. Relative paths are resolved
78
+ from the run workspace. Empty paths, control characters, unknown tokens, and
79
+ `..` traversal are rejected.
80
+
81
+ `execute.mode` and `network.mode` are `none`, `allowlist`, or `any`.
82
+ Allowlisted commands or network targets are exact strings. Environment variable
83
+ names must use normal shell identifier syntax.
84
+
85
+ ## ENFORCEMENT
86
+
87
+ CW-enforced:
88
+
89
+ - profile existence and profile-file validation
90
+ - deterministic path resolution
91
+ - worker output acceptance against effective write paths
92
+ - rejected worker scope, error StateNode, and ErrorFeedback on denied output
93
+
94
+ Host-required:
95
+
96
+ - preventing reads outside `readPaths`
97
+ - preventing writes before CW accepts a result
98
+ - command execution restrictions
99
+ - network restrictions
100
+ - environment variable filtering
101
+
102
+ Worker manifests include both lists as `sandbox.enforcedByCW` and
103
+ `sandbox.hostRequired`. Do not present CW Sandbox Profiles as OS-level
104
+ sandboxing unless the agent host actually applies OS policy.
105
+
106
+ ## FILES
107
+
108
+ ```text
109
+ .cw/runs/<run-id>/state.json
110
+ .cw/runs/<run-id>/dispatches/<dispatch-id>.json
111
+ .cw/runs/<run-id>/workers/<worker-id>/worker.json
112
+ .cw/runs/<run-id>/workers/<worker-id>/manifest.json
113
+ .cw/runs/<run-id>/feedback/
114
+ .cw/runs/<run-id>/report.md
115
+ ```
116
+
117
+ ## FAILURE MODES
118
+
119
+ Unknown requested profiles fail closed with `sandbox-profile-not-found`.
120
+
121
+ Malformed profile files fail validation with `sandbox-profile-invalid`.
122
+
123
+ Denied worker output writes create `sandbox-write-denied` feedback. Runtime
124
+ helpers also provide `sandbox-read-denied`, `sandbox-network-denied`, and
125
+ `sandbox-command-denied` for hosts that want to record those decisions through
126
+ CW.
127
+
128
+ CW never silently downgrades a requested profile to `default`.
129
+
130
+ ## COMPATIBILITY
131
+
132
+ Sandbox Profiles are introduced in CW v0.1.8. The legacy `allowedPaths` field
133
+ remains in worker scopes and manifests as the effective write-path alias for
134
+ older callers. New hosts should read `sandboxPolicy.readPaths` and
135
+ `sandboxPolicy.writePaths`, then apply worker output allowances from
136
+ `sandboxPolicy.workerOutput`.
137
+ 0.1.51