cool-workflow 0.1.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/.claude-plugin/plugin.json +20 -0
  2. package/.codex-plugin/mcp.json +10 -0
  3. package/.codex-plugin/plugin.json +38 -0
  4. package/.mcp.json +10 -0
  5. package/LICENSE +24 -0
  6. package/README.md +638 -0
  7. package/apps/architecture-review/app.json +51 -0
  8. package/apps/architecture-review/workflow.js +116 -0
  9. package/apps/end-to-end-golden-path/app.json +30 -0
  10. package/apps/end-to-end-golden-path/workflow.js +33 -0
  11. package/apps/pr-review-fix-ci/app.json +59 -0
  12. package/apps/pr-review-fix-ci/workflow.js +90 -0
  13. package/apps/release-cut/app.json +54 -0
  14. package/apps/release-cut/workflow.js +82 -0
  15. package/apps/research-synthesis/app.json +50 -0
  16. package/apps/research-synthesis/workflow.js +76 -0
  17. package/apps/workflow-app-framework-demo/app.json +29 -0
  18. package/apps/workflow-app-framework-demo/workflow.js +44 -0
  19. package/dist/agent-config.js +223 -0
  20. package/dist/candidate-scoring.js +715 -0
  21. package/dist/capability-core.js +630 -0
  22. package/dist/capability-dispatcher.js +86 -0
  23. package/dist/capability-registry.js +523 -0
  24. package/dist/cli.js +1276 -0
  25. package/dist/collaboration.js +727 -0
  26. package/dist/commit.js +570 -0
  27. package/dist/contract-migration.js +234 -0
  28. package/dist/coordinator.js +1163 -0
  29. package/dist/daemon.js +44 -0
  30. package/dist/dispatch.js +201 -0
  31. package/dist/drive.js +503 -0
  32. package/dist/error-feedback.js +415 -0
  33. package/dist/evidence-grounding.js +179 -0
  34. package/dist/evidence-reasoning.js +733 -0
  35. package/dist/execution-backend.js +1279 -0
  36. package/dist/harness.js +61 -0
  37. package/dist/mcp-server.js +1615 -0
  38. package/dist/multi-agent-eval.js +857 -0
  39. package/dist/multi-agent-host.js +764 -0
  40. package/dist/multi-agent-operator-ux.js +537 -0
  41. package/dist/multi-agent-trust.js +366 -0
  42. package/dist/multi-agent.js +1173 -0
  43. package/dist/node-snapshot.js +270 -0
  44. package/dist/observability.js +922 -0
  45. package/dist/operator-ux.js +971 -0
  46. package/dist/orchestrator/audit-operations.js +182 -0
  47. package/dist/orchestrator/candidate-operations.js +117 -0
  48. package/dist/orchestrator/cli-options.js +288 -0
  49. package/dist/orchestrator/collaboration-operations.js +86 -0
  50. package/dist/orchestrator/feedback-operations.js +81 -0
  51. package/dist/orchestrator/host-operations.js +78 -0
  52. package/dist/orchestrator/lifecycle-operations.js +462 -0
  53. package/dist/orchestrator/migration-operations.js +44 -0
  54. package/dist/orchestrator/multi-agent-operations.js +362 -0
  55. package/dist/orchestrator/report.js +369 -0
  56. package/dist/orchestrator/topology-operations.js +84 -0
  57. package/dist/orchestrator.js +874 -0
  58. package/dist/pipeline-contract.js +92 -0
  59. package/dist/pipeline-runner.js +285 -0
  60. package/dist/reclamation.js +882 -0
  61. package/dist/result-normalize.js +194 -0
  62. package/dist/run-export.js +64 -0
  63. package/dist/run-registry.js +1347 -0
  64. package/dist/run-state-schema.js +67 -0
  65. package/dist/sandbox-profile.js +471 -0
  66. package/dist/scheduler.js +266 -0
  67. package/dist/scheduling.js +184 -0
  68. package/dist/schema-validate.js +98 -0
  69. package/dist/state-explosion.js +1213 -0
  70. package/dist/state-migrations.js +463 -0
  71. package/dist/state-node.js +301 -0
  72. package/dist/state.js +308 -0
  73. package/dist/telemetry-attestation.js +156 -0
  74. package/dist/telemetry-ledger.js +145 -0
  75. package/dist/topology.js +527 -0
  76. package/dist/triggers.js +159 -0
  77. package/dist/trust-audit.js +475 -0
  78. package/dist/types/blackboard.js +2 -0
  79. package/dist/types/boundary.js +29 -0
  80. package/dist/types/candidate.js +2 -0
  81. package/dist/types/collaboration.js +2 -0
  82. package/dist/types/core.js +2 -0
  83. package/dist/types/drive.js +10 -0
  84. package/dist/types/error-feedback.js +2 -0
  85. package/dist/types/evidence-reasoning.js +2 -0
  86. package/dist/types/execution-backend.js +2 -0
  87. package/dist/types/multi-agent.js +2 -0
  88. package/dist/types/observability.js +2 -0
  89. package/dist/types/pipeline.js +2 -0
  90. package/dist/types/reclamation.js +8 -0
  91. package/dist/types/result.js +2 -0
  92. package/dist/types/run-registry.js +2 -0
  93. package/dist/types/run.js +2 -0
  94. package/dist/types/sandbox.js +2 -0
  95. package/dist/types/schedule.js +2 -0
  96. package/dist/types/state-node.js +2 -0
  97. package/dist/types/topology.js +2 -0
  98. package/dist/types/trust.js +2 -0
  99. package/dist/types/workbench.js +2 -0
  100. package/dist/types/worker.js +2 -0
  101. package/dist/types/workflow-app.js +2 -0
  102. package/dist/types.js +43 -0
  103. package/dist/verifier-registry.js +46 -0
  104. package/dist/verifier.js +78 -0
  105. package/dist/version.js +8 -0
  106. package/dist/workbench-host.js +172 -0
  107. package/dist/workbench.js +190 -0
  108. package/dist/worker-isolation.js +1028 -0
  109. package/dist/workflow-api.js +98 -0
  110. package/dist/workflow-app-framework.js +626 -0
  111. package/docs/agent-delegation-drive.7.md +190 -0
  112. package/docs/agent-framework.md +176 -0
  113. package/docs/candidate-scoring.7.md +106 -0
  114. package/docs/canonical-workflow-apps.7.md +137 -0
  115. package/docs/capability-topology-registry.7.md +168 -0
  116. package/docs/cli-mcp-parity.7.md +373 -0
  117. package/docs/contract-migration-tooling.7.md +123 -0
  118. package/docs/control-plane-scheduling.7.md +110 -0
  119. package/docs/coordinator-blackboard.7.md +183 -0
  120. package/docs/dogfood/architecture-review-cool-workflow.md +16 -0
  121. package/docs/dogfood-one-real-repo.7.md +168 -0
  122. package/docs/durable-state-and-locking.7.md +107 -0
  123. package/docs/end-to-end-golden-path.7.md +117 -0
  124. package/docs/error-feedback.7.md +153 -0
  125. package/docs/evidence-adoption-reasoning-chain.7.md +270 -0
  126. package/docs/execution-backends.7.md +300 -0
  127. package/docs/getting-started.md +99 -0
  128. package/docs/index.md +41 -0
  129. package/docs/mcp-app-surface.7.md +235 -0
  130. package/docs/multi-agent-cli-mcp-surface.7.md +265 -0
  131. package/docs/multi-agent-eval-replay-harness.7.md +302 -0
  132. package/docs/multi-agent-operator-ux.7.md +314 -0
  133. package/docs/multi-agent-runtime-core.7.md +231 -0
  134. package/docs/multi-agent-topologies.7.md +103 -0
  135. package/docs/multi-agent-trust-policy-audit.7.md +154 -0
  136. package/docs/node-snapshot-diff-replay.7.md +135 -0
  137. package/docs/observability-cost-accounting.7.md +194 -0
  138. package/docs/operator-ux.7.md +180 -0
  139. package/docs/pipeline-runner.7.md +136 -0
  140. package/docs/project-index.md +261 -0
  141. package/docs/real-execution-backends.7.md +142 -0
  142. package/docs/release-and-migration.7.md +280 -0
  143. package/docs/release-tooling.7.md +159 -0
  144. package/docs/routines.md +48 -0
  145. package/docs/run-registry-control-plane.7.md +312 -0
  146. package/docs/run-retention-reclamation.7.md +191 -0
  147. package/docs/sandbox-profiles.7.md +137 -0
  148. package/docs/scheduled-tasks.md +80 -0
  149. package/docs/security-trust-hardening.7.md +117 -0
  150. package/docs/state-explosion-management.7.md +264 -0
  151. package/docs/state-node.7.md +96 -0
  152. package/docs/team-collaboration.7.md +207 -0
  153. package/docs/unix-principles.md +192 -0
  154. package/docs/verifier-gated-commit.7.md +140 -0
  155. package/docs/web-desktop-workbench.7.md +215 -0
  156. package/docs/worker-isolation.7.md +167 -0
  157. package/docs/workflow-app-framework.7.md +274 -0
  158. package/manifest/README.md +43 -0
  159. package/manifest/plugin.manifest.json +316 -0
  160. package/manifest/pricing.policy.json +14 -0
  161. package/package.json +79 -0
  162. package/scripts/agents/claude-p-agent.js +104 -0
  163. package/scripts/agents/claude-p-agent.sh +9 -0
  164. package/scripts/agents/cw-attest-keygen.js +55 -0
  165. package/scripts/agents/cw-attest-wrap.js +143 -0
  166. package/scripts/block-unapproved-tag.sh +39 -0
  167. package/scripts/bump-version.js +249 -0
  168. package/scripts/canonical-apps.js +171 -0
  169. package/scripts/cw.js +4 -0
  170. package/scripts/dist-drift-check.js +79 -0
  171. package/scripts/dogfood-architecture-review.js +237 -0
  172. package/scripts/dogfood-release.js +624 -0
  173. package/scripts/forward-ref-docs.js +73 -0
  174. package/scripts/gen-manifests.js +232 -0
  175. package/scripts/golden-path.js +300 -0
  176. package/scripts/mcp-server.js +4 -0
  177. package/scripts/new-feature.js +121 -0
  178. package/scripts/parity-check.js +213 -0
  179. package/scripts/release-check.js +118 -0
  180. package/scripts/release-flow.js +272 -0
  181. package/scripts/release-gate.sh +85 -0
  182. package/scripts/sync-project-index.js +387 -0
  183. package/scripts/validate-run-state-schema.js +126 -0
  184. package/scripts/verify-container-selfref.js +64 -0
  185. package/scripts/version-sync-check.js +237 -0
  186. package/skills/cool-workflow/SKILL.md +162 -0
  187. package/skills/cool-workflow/references/commands.md +282 -0
  188. package/tsconfig.json +16 -0
  189. package/ui/workbench/app.css +76 -0
  190. package/ui/workbench/app.js +159 -0
  191. package/ui/workbench/index.html +32 -0
  192. package/workflows/architecture-review.workflow.js +84 -0
  193. package/workflows/research-synthesis.workflow.js +47 -0
@@ -0,0 +1,190 @@
1
+ # Agent Delegation Drive
2
+
3
+ CW v0.1.38 adds Agent Delegation Drive: a way to run a natural-language-prompt
4
+ workflow **end-to-end by DELEGATING each worker to an EXTERNAL agent process**
5
+ (`claude -p` headless, `codex exec`, or a configured HTTP agent endpoint),
6
+ capturing each worker's `result.md` plus an attestation of which agent/model
7
+ produced it. It turns the `architecture-review` app into CW's first turnkey,
8
+ evidence-audited product: point CW at a repo, get an audited risk report — with
9
+ no human hand-writing any `result.md`.
10
+
11
+ Before v0.1.38, CW could `plan` a workflow, isolate workers, and accept their
12
+ output, but **nothing spawned the agent that wrote each `result.md`**. Running
13
+ `architecture-review` end-to-end meant an operator hand-writing all 14 worker
14
+ result files out-of-band, with no recorded attestation of which agent/model
15
+ produced each. The new `agent` backend + `run --drive` loop close that last mile.
16
+
17
+ ## The red line — delegate, do not internalize
18
+
19
+ CW **DELEGATES, IT DOES NOT BECOME THE EXECUTOR.** The `agent` backend does the
20
+ same thing the `container`/`remote`/`ci` backends do: it `spawnSync`s an
21
+ out-of-process child (the agent CLI, argv-style, `shell:false`) or POSTs to a
22
+ configured endpoint, then records a `BackendExecutionHandle` (`kind: "process"`)
23
+ + a `SandboxAttestation` + the canonical result envelope. **The model runs in the
24
+ agent's process, never inside CW.** CW imports no model SDK, holds no API key,
25
+ constructs no chat/completions request, and calls no model HTTP API. Any API key
26
+ flows from the agent's *own* inherited env; CW never reads or records it. Adding a
27
+ provider SDK to `package.json` would lose the neutral-audit moat and is the red
28
+ line.
29
+
30
+ ## Operator-chosen model is policy; agent-reported model is the attestation
31
+
32
+ Any model id CW passes **into** the agent invocation (`CW_AGENT_MODEL`
33
+ interpolated into `{{model}}`) is **policy-as-data the operator chose**. It is
34
+ recorded only as part of the secret-stripped command-template/args provenance and
35
+ is **never** the source of the attested `UsageRecord.model`. The recorded/attested
36
+ model id comes solely from what the external agent reports back in its output. If
37
+ the agent reports no model, CW records `unreported` — it never backfills from
38
+ `CW_AGENT_MODEL`. A configured `CW_AGENT_MODEL` that differs from the agent's
39
+ reported model does not overwrite the host-reported model id.
40
+
41
+ ## Two layers, never conflated
42
+
43
+ 1. **Backend evidence triple.** `runAgentProcess` records the agent CHILD's
44
+ `command` + `exitCode` + `sha256(stdout)` — the identical mechanism
45
+ `runContainer`/`runHttpDelegation` use. This triple is byte-stable in SHAPE
46
+ across `node`/`container`/`remote`/`ci`/`agent`. It NEVER reads, parses, or
47
+ hashes a `result.md`.
48
+ 2. **`result.md` acceptance.** The worker's `result.md` `cw:result` envelope is
49
+ accepted in a SEPARATE layer (`recordWorkerOutput`), which validates it, copies
50
+ it into `resultsDir`, runs the verifier gate, and records trust-audit +
51
+ provenance — unchanged by this feature.
52
+
53
+ The agent handle (`kind: "process"`), the agent-reported model id, the prompt
54
+ digest, the secret-stripped args, and the result digest live in `provenance` and
55
+ the `worker.agent-delegation` trust-audit event — **never in `evidence`**.
56
+
57
+ ## The drive lifecycle
58
+
59
+ `run --drive` is a thin orchestrator over the EXISTING verbs + the v0.1.37
60
+ scheduler. For each worker the planner emits, in deterministic phase/dispatch
61
+ order:
62
+
63
+ ```
64
+ plan -> dispatch -> agent-fulfill (agent backend) -> recordWorkerOutput/verify -> commit
65
+ ```
66
+
67
+ - **dispatch** allocates the worker scope (`input.md` + manifest; the worker's own
68
+ sandbox profile, e.g. `readonly`).
69
+ - **agent-fulfill** delegates the worker to the `agent` backend out-of-process; the
70
+ agent reads the input/manifest and writes `result.md`; CW captures the child's
71
+ evidence triple + reported model.
72
+ - **accept** records + verifies `result.md`; the agent-hop attestation is folded
73
+ into the result node's metadata (so the v0.1.35 replay engine covers it).
74
+ - **commit** is verifier-gated on the Verdict node once every worker completes.
75
+
76
+ The Verdict `artifact` node is fulfilled through the SAME agent backend — it is a
77
+ worker scope with a `result.md` like any other. `--drive --once` advances exactly
78
+ one deterministic step (injected `now`); bare `--drive` runs to completion or to a
79
+ parked/blocked stop. `run drive <run-id>` (no `--step`) is the read-only,
80
+ deterministic preview of the next step.
81
+
82
+ ## Fail closed — probe vs refusal vs park
83
+
84
+ - **Probe.** `backend probe agent` reports `readiness: "ready"` iff a
85
+ command-template/endpoint is configured; otherwise `readiness: "unverified"`,
86
+ `ready: false`, with a non-empty reason — byte-identical in shape to
87
+ `backend probe remote` unconfigured. It is NEVER a hard `refused`/`unavailable`.
88
+ - **runBackend.** Unconfigured execution (no command-template AND no endpoint)
89
+ returns a `delegation-target-missing` refusal — never a fabricated `completed`.
90
+ - **Failed hop.** A spawned agent that exits non-zero, returns no exit code,
91
+ produces no `result.md`, or produces a `result.md` that fails validation yields a
92
+ `refused`/`failed` envelope (or a rejected accept) — never a fabricated
93
+ completion.
94
+ - **Park.** In the drive loop, a worker whose agent hop keeps failing exhausts its
95
+ scheduling retry budget and lands **parked** (reuse v0.1.37 `retryOrPark`) — the
96
+ drive stops; it is never silently re-driven forever.
97
+
98
+ ## Replay determinism (bound to node-snapshot)
99
+
100
+ The attested record (model id, prompt digest, args, result digest, exit) is plain
101
+ data folded into the snapshotted node body. Replaying a recorded drive run via
102
+ `snapshotNode`/`replayNodeSnapshot`/`verifyNodeReplay` reproduces the SAME
103
+ audit/provenance graph and the same recorded digests, **without re-spawning the
104
+ agent or re-reading the live `result.md`** — even with the agent binary
105
+ unavailable. Two replays with different injected `now` are byte-identical in body
106
+ + `sourceFingerprint`/`outputFingerprint`.
107
+
108
+ ## Vendor neutrality + durable config
109
+
110
+ WHICH agent (claude / codex / ollama / an HTTP endpoint) is **policy expressed as
111
+ DATA** — a command-template and/or endpoint resolved flags > env
112
+ (`CW_AGENT_COMMAND` / `CW_AGENT_ENDPOINT` / `CW_AGENT_MODEL`) > a durable
113
+ `$CW_HOME/agent-config.json`. claude / codex / ollama are CONFIGS, never CW
114
+ dependencies. No secrets are written into the config or `.cw/`: it holds a
115
+ command-template + endpoint + operator-chosen model only; recorded command/args
116
+ are secret-stripped.
117
+
118
+ ## CLI
119
+
120
+ ```text
121
+ # configure the agent (policy as data; no API key is ever written)
122
+ # the bundled wrapper feeds input.md to headless claude READ-ONLY, persists
123
+ # result.md itself, and forwards claude's JSON (model+usage) for provenance.
124
+ # A bare "claude -p" or "claude -p {{input}}" does NOT complete a worker:
125
+ # headless claude gets no prompt content / cannot write result.md without it.
126
+ node dist/cli.js backend agent config set --agent-command "node $(pwd)/scripts/agents/claude-p-agent.js {{input}} {{result}}" --agent-model claude-opus-4-8
127
+ node dist/cli.js backend agent config # show the effective config (secret-stripped)
128
+ node dist/cli.js backend probe agent --json # ready iff configured, else unverified
129
+
130
+ # drive a real repo end-to-end (zero hand-written result.md)
131
+ node dist/cli.js run architecture-review --drive --repo /path/to/repo --question "Is the design sound?"
132
+ node dist/cli.js run architecture-review --drive --once --repo /path/to/repo --question "..." # one step
133
+ node dist/cli.js run drive <run-id> --json # read-only preview of the next step
134
+ ```
135
+
136
+ `{{manifest}}`, `{{input}}`, `{{result}}`, `{{workerDir}}`, `{{model}}`, and
137
+ `{{prompt}}` are substituted into DISCRETE argv elements (never a shell-interpreted
138
+ string). Each verb is declared once in `capability-registry.ts`, so `cw <cmd>
139
+ --json` is byte-identical to the matching `cw_<tool>` MCP tool for the read-only
140
+ preview/config-show verbs.
141
+
142
+ ## Compatibility
143
+
144
+ Agent Delegation Drive is introduced in CW v0.1.38. Adding the `agent` row leaves
145
+ `node`/`bun`/`shell`/`container`/`remote`/`ci` byte-identical; `backendIds()`
146
+ simply grows by one to the sorted 7-row set
147
+ `["agent","bun","ci","container","node","remote","shell"]`. A run driven manually
148
+ (plan → dispatch → `worker output` → commit) still works unchanged. Fields are
149
+ additive and optional; older run state loads unchanged. No `.cw/` layout change.
150
+
151
+ ## See Also
152
+
153
+ execution-backends(7), real-execution-backends(7), node-snapshot-diff-replay(7),
154
+ control-plane-scheduling(7), dogfood-one-real-repo(7), cli-mcp-parity(7),
155
+ observability-cost-accounting(7)
156
+
157
+ ## Run Retention & Provable Reclamation (v0.1.39)
158
+
159
+ tiered, append-only, cryptographically-verifiable run reclamation: seal the audit skeleton, free the reconstructable bulk, prove it
160
+
161
+ ## Durable State & Locking (v0.1.40)
162
+
163
+ atomic temp->rename writes + fsync-durability for authoritative stores; portable stale-stealing file lock serializing the cross-process read-modify-write stores
164
+
165
+ ## Self-Audit Hardening & Pure-Router Decomposition (v0.1.41)
166
+
167
+ evidence grounding + durable audit append + symlink-hardened containment + deterministic worker ids + recursive redaction; BackendRegistry self-describing drivers (no per-id switches); orchestrator god-object decomposed into per-domain operation modules (pure loadRun->delegate router)
168
+
169
+ ## Robust Result Ingest (v0.1.42)
170
+
171
+ capture findings/evidence from any reasonable agent shape (alt keys + prose), CW derives grounded evidence itself, warn on empty capture — closes the v0.1.41 live-drive 'accepted with 0 captured' failure
172
+
173
+ ## No-False-Green Gate & Launch Prep (v0.1.43)
174
+
175
+ Hard gate blocking empty-capture verifier-gated commits, plus quickstart and launch-prep docs.
176
+
177
+ ## Release-Gate Determinism & Agents Vendor (v0.1.44)
178
+
179
+ Release-readiness checks now validate the committed blob (`git show HEAD:<path>`) instead of the mutable working tree — eliminating false-red/false-green from concurrent working-tree writes (iCloud/Spotlight/editor). Adds the `agents` vendor manifest target: a generated `.agents/plugins/cool-workflow/` adapter giving any non-Claude AI agent one common interface to CW.
180
+
181
+ ## P1-P2 Fixes & CI Content Surfaces (v0.1.49)
182
+
183
+ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.46), vendor-adapter registry (v0.1.47), state auto-compaction and P2 fixes (v0.1.48), plus CI content-surface determinism hardening (v0.1.49).
184
+ 0.1.51
185
+
186
+ 0.1.76
187
+
188
+ 0.1.77
189
+
190
+ 0.1.78
@@ -0,0 +1,176 @@
1
+ # Workflow App framework
2
+
3
+ CW is designed as an independent agent workflow control-plane.
4
+
5
+ The goal is to make agent development feel like building inside a platform
6
+ ecosystem. CW provides the runtime, contracts, storage, CLI, MCP bridge, and
7
+ package structure. Developers write workflow apps against those contracts.
8
+
9
+ The framework is guided by five practical systems principles: small kernel, explicit
10
+ state, composable pipes, isolated workers, and verifier-gated commits. See
11
+ [unix-principles.md](unix-principles.md).
12
+
13
+ ## Platform Contract
14
+
15
+ Every CW workflow follows this loop:
16
+
17
+ ```text
18
+ interpret -> act -> observe -> adjust -> checkpoint
19
+ ```
20
+
21
+ The loop maps to concrete framework operations:
22
+
23
+ | Loop stage | framework operation | Responsibility |
24
+ | --- | --- | --- |
25
+ | Interpret | `plan()` | Load workflow, validate inputs, generate tasks |
26
+ | Act | `dispatch()` | Move runnable tasks from pending to running |
27
+ | Observe | `recordResult()` | Read Markdown/JSON-RPC result evidence |
28
+ | Adjust | verifier gates | Validate evidence and choose the next phase |
29
+ | Checkpoint | `commitState()` | Snapshot state after important transitions |
30
+
31
+ The v0.1.12 operator UX layer renders read-only summaries over run state:
32
+ human `status`, graph maps, report summaries, resource summaries, and
33
+ deterministic next-step recommendations. Scripts can keep using `--json` or
34
+ `--format json`.
35
+
36
+ The v0.1.13 MCP app surface exposes the same runtime operations to agent hosts
37
+ with stable JSON tools: app run, dispatch, worker inspection/output, candidate
38
+ scoring/selection, sandbox profile resolution, verifier-gated commit, and
39
+ operator status/graph/report summaries.
40
+
41
+ The v0.1.13 canonical app matrix validates and plans the maintained userland
42
+ apps with public CLI commands:
43
+
44
+ ```bash
45
+ npm run canonical-apps
46
+ ```
47
+
48
+ The golden path runs the full integration chain end to end:
49
+
50
+ ```bash
51
+ npm run golden-path
52
+ ```
53
+
54
+ It validates an app, plans a run, dispatches a readonly worker, accepts a
55
+ worker-local `cw:result`, scores and selects a candidate, creates a
56
+ verifier-gated commit, and renders a report. See
57
+ [end-to-end-golden-path.7.md](end-to-end-golden-path.7.md).
58
+
59
+ ## Developer Contract
60
+
61
+ A workflow app defines:
62
+
63
+ - `id`, `title`, and `summary`
64
+ - `schemaVersion`, app `version`, compatibility, and metadata when using the
65
+ first-class Workflow App framework contract
66
+ - required and repeated inputs
67
+ - phase order
68
+ - agent tasks
69
+ - artifact tasks
70
+ - concurrency limits
71
+ - evidence requirements
72
+ - sandbox profile hints
73
+
74
+ Example:
75
+
76
+ ```js
77
+ const {
78
+ defineWorkflowApp,
79
+ workflow,
80
+ phase,
81
+ agent,
82
+ artifact,
83
+ input
84
+ } = require("../dist/workflow-app-framework");
85
+
86
+ const inputs = [input("repo", { type: "path", required: true })];
87
+
88
+ module.exports = defineWorkflowApp({
89
+ schemaVersion: 1,
90
+ id: "example-review",
91
+ title: "Example Review",
92
+ summary: "Review a repository with evidence.",
93
+ version: "0.1.0",
94
+ inputs,
95
+ sandboxProfiles: ["readonly"],
96
+ compatibility: {
97
+ minVersion: "0.1.9"
98
+ },
99
+ workflow: workflow({
100
+ id: "example-review",
101
+ title: "Example Review",
102
+ inputs,
103
+ sandboxProfiles: ["readonly"],
104
+ phases: [
105
+ phase("Map", [
106
+ agent("map:system", "Map the system boundaries.", {
107
+ sandboxProfileId: "readonly"
108
+ })
109
+ ]),
110
+ phase("Verdict", [
111
+ artifact("verdict", "Write the final evidence-backed verdict.", {
112
+ requiresEvidence: true,
113
+ sandboxProfileId: "readonly"
114
+ })
115
+ ])
116
+ ]
117
+ })
118
+ });
119
+ ```
120
+
121
+ Legacy `module.exports = ({ workflow, phase, agent, artifact }) => workflow(...)`
122
+ files remain loadable. CW wraps them as compatibility apps with version `0.0.0`
123
+ so workflow files still plan and dispatch. When a canonical app owns the public
124
+ id, compatibility wrappers use explicit ids such as `legacy-research-synthesis`.
125
+
126
+ ## Language Contract
127
+
128
+ The CW platform is TypeScript:
129
+
130
+ ```text
131
+ src/*.ts -> dist/*.js
132
+ ```
133
+
134
+ Workflow apps are JavaScript modules:
135
+
136
+ ```text
137
+ workflows/*.workflow.js
138
+ apps/<app-id>/app.json
139
+ apps/<app-id>/workflow.js
140
+ ```
141
+
142
+ This is intentional. The runtime is strongly typed for maintainability, while
143
+ workflow scripts can run without `ts-node`.
144
+
145
+ See [workflow-app-framework.7.md](workflow-app-framework.7.md) for the full app contract,
146
+ validation rules, CLI commands, MCP tools, and state/report fields.
147
+ See [mcp-app-surface.7.md](mcp-app-surface.7.md) for the agent-host runtime
148
+ surface over MCP.
149
+ See [operator-ux.7.md](operator-ux.7.md) for the operator inspection surface.
150
+ See [canonical-workflow-apps.7.md](canonical-workflow-apps.7.md) for the
151
+ official app matrix.
152
+ See [end-to-end-golden-path.7.md](end-to-end-golden-path.7.md) for the
153
+ deterministic release proof that those pieces connect.
154
+
155
+ ## Evidence Contract
156
+
157
+ Verification and verdict tasks should return:
158
+
159
+ ````text
160
+ ```cw:result
161
+ {
162
+ "summary": "short summary",
163
+ "findings": [],
164
+ "evidence": ["/absolute/path/file.ts:42"]
165
+ }
166
+ ```
167
+ ````
168
+
169
+ CW rejects high-priority findings without evidence. This keeps agent work closer
170
+ to inspectable engineering output than unconstrained conversation.
171
+
172
+ ## Boundary
173
+
174
+ CW is an independent workflow control-plane by COOLWHITE LLC. It implements dynamic workflows,
175
+ scheduled tasks, local scheduling, routine triggers, state checkpoints, and
176
+ multi-agent verification.
@@ -0,0 +1,106 @@
1
+ # CANDIDATE-SCORING(7)
2
+
3
+ ## NAME
4
+
5
+ Candidate Scoring - inspectable decision support for competing CW outputs
6
+
7
+ ## SYNOPSIS
8
+
9
+ ```ts
10
+ import {
11
+ registerCandidate,
12
+ scoreCandidate,
13
+ rankCandidates,
14
+ selectCandidate
15
+ } from "./candidate-scoring";
16
+
17
+ registerCandidate(run, { workerId, taskId, resultNodeId, verifierNodeId });
18
+ scoreCandidate(run, candidateId, {
19
+ scorer: "verifier",
20
+ criteria: { correctness: 4, evidence: 4, fit: 2 },
21
+ maxTotal: 10,
22
+ evidence: [{ id: "score:evidence", source: "test", locator: "test/file.js:1" }]
23
+ });
24
+ rankCandidates(run);
25
+ selectCandidate(run, candidateId);
26
+ ```
27
+
28
+ ```text
29
+ node dist/cli.js candidate register <run-id> --worker <worker-id>
30
+ node dist/cli.js candidate score <run-id> <candidate-id> --criterion correctness=4 --evidence path:line
31
+ node dist/cli.js candidate rank <run-id>
32
+ node dist/cli.js candidate select <run-id> <candidate-id>
33
+ ```
34
+
35
+ ## DESCRIPTION
36
+
37
+ Candidate Scoring is the small decision-support layer between isolated worker
38
+ outputs, result nodes, verifier evidence, candidate scores, selected winners,
39
+ ErrorFeedback, and commit/report.
40
+
41
+ It does not merge code, replace verifier judgment, spawn workers, or provide a
42
+ domain-specific ranking policy. A score is evidence, not authority.
43
+
44
+ The normal flow is:
45
+
46
+ ```text
47
+ worker output -> candidate record -> score record -> ranking
48
+ -> verifier-gated selection -> checkpoint/report
49
+ ```
50
+
51
+ Each step writes plain JSON. Rejected and failed candidates remain inspectable.
52
+
53
+ ## FILES
54
+
55
+ ```text
56
+ .cw/runs/<run-id>/candidates/index.json
57
+ .cw/runs/<run-id>/candidates/ranking.json
58
+ .cw/runs/<run-id>/candidates/<candidate-id>/candidate.json
59
+ .cw/runs/<run-id>/candidates/<candidate-id>/scores/<score-id>.json
60
+ .cw/runs/<run-id>/candidates/selections/<selection-id>.json
61
+ .cw/runs/<run-id>/nodes/
62
+ .cw/runs/<run-id>/feedback/
63
+ .cw/runs/<run-id>/report.md
64
+ ```
65
+
66
+ Candidate records point at existing worker, result, verifier, and artifact
67
+ paths. They do not copy large worker outputs by default.
68
+
69
+ ## SELECTION GATE
70
+
71
+ Selection is conservative by default:
72
+
73
+ - score records require evidence
74
+ - selection requires a linked verifier node with `verified` status
75
+ - selection failures become ErrorFeedback records
76
+ - rejected candidates remain on disk
77
+
78
+ Operators can record an unverified selection only with an explicit option. That
79
+ records selection state but does not turn the candidate into committed state.
80
+
81
+ Committed state has a stricter rule. A candidate can be promoted by
82
+ `cw.js commit --candidate` or `cw.js commit --selection` only when it has score
83
+ evidence, a verified selection, and a linked verifier node with evidence.
84
+ Rejected, failed, unscored, unselected, and unverified candidates are blocked
85
+ and produce ErrorFeedback.
86
+
87
+ ## FAILURE MODES
88
+
89
+ Missing score evidence fails scoring and records feedback.
90
+
91
+ Selecting a failed or rejected candidate fails and records feedback.
92
+
93
+ Selecting without a verified verifier node fails unless explicitly allowed.
94
+
95
+ Tie-breaking is predictable: higher normalized score wins; equal scores use the
96
+ configured tie breaker, defaulting to earlier candidate creation time.
97
+
98
+ ## COMPATIBILITY
99
+
100
+ Candidate Scoring is introduced in CW v0.1.6. It adds optional candidate paths
101
+ and arrays to run state. Older runs remain readable because missing candidate
102
+ fields are initialized when state loads.
103
+
104
+ Existing workflow, worker, feedback, node, contract, result, commit, and report
105
+ commands remain compatible.
106
+ 0.1.51
@@ -0,0 +1,137 @@
1
+ # Canonical Workflow Apps
2
+
3
+ Canonical Workflow Apps are the official CW userland apps maintained with the
4
+ runtime. They are not loose examples. Each one lives in a first-class app
5
+ directory:
6
+
7
+ ```text
8
+ apps/<app-id>/app.json
9
+ apps/<app-id>/workflow.js
10
+ ```
11
+
12
+ The runner remains the base system. Canonical apps carry domain behavior:
13
+ inputs, phases, task prompts, evidence gates, sandbox profile hints, and app
14
+ metadata.
15
+
16
+ ## Apps
17
+
18
+ `architecture-review`
19
+
20
+ Map a repository architecture, assess risks, verify important findings, and
21
+ synthesize an evidence-backed verdict.
22
+
23
+ ```bash
24
+ node scripts/cw.js plan architecture-review \
25
+ --repo /path/to/repo \
26
+ --question "Is this architecture sound?" \
27
+ --invariant "public API stays stable" \
28
+ --focus "runtime"
29
+ ```
30
+
31
+ `pr-review-fix-ci`
32
+
33
+ Review a pull request or branch, inspect CI failures, diagnose actionable
34
+ issues, optionally patch when `--mode fix` is allowed, verify outcomes, and
35
+ summarize with evidence.
36
+
37
+ ```bash
38
+ node scripts/cw.js plan pr-review-fix-ci \
39
+ --repo /path/to/repo \
40
+ --pr 123 \
41
+ --base main \
42
+ --ci "unit-tests" \
43
+ --mode review
44
+ ```
45
+
46
+ `release-cut`
47
+
48
+ Prepare a release with checklist discipline: version checks, changelog, tests,
49
+ packaging, release notes, and final verification.
50
+
51
+ ```bash
52
+ node scripts/cw.js plan release-cut \
53
+ --repo /path/to/repo \
54
+ --version 0.1.13 \
55
+ --previousVersion 0.1.11 \
56
+ --dryRun true
57
+ ```
58
+
59
+ `research-synthesis`
60
+
61
+ Split a research question into claims, investigate sources, cross-check
62
+ evidence, verify claims, and synthesize a concise answer.
63
+
64
+ ```bash
65
+ node scripts/cw.js plan research-synthesis \
66
+ --cwd /tmp/research-run \
67
+ --question "What does the evidence support?" \
68
+ --source "official-docs" \
69
+ --scope "local sources first" \
70
+ --freshness "as of today"
71
+ ```
72
+
73
+ ## Validation Matrix
74
+
75
+ Run the canonical app matrix from the plugin root:
76
+
77
+ ```bash
78
+ cd plugins/cool-workflow
79
+ npm run canonical-apps
80
+ ```
81
+
82
+ The command uses only Node.js standard library APIs and local temporary
83
+ workspaces. It validates each canonical app, shows its app metadata, plans it
84
+ with representative inputs, checks app id/version metadata in run state, checks
85
+ evidence-required verification or synthesis/verdict tasks, checks sandbox
86
+ profile hints, checks unique task ids, and checks duplicate ids do not break
87
+ discovery.
88
+
89
+ `npm test` includes `test/canonical-workflow-apps-smoke.js`, which repeats the
90
+ same core assertions against generated `dist/`.
91
+
92
+ ## Framework Pressure
93
+
94
+ The apps intentionally stress different parts of the Workflow App framework:
95
+
96
+ - declared required, optional, and repeated inputs
97
+ - app-directory discovery and app metadata
98
+ - readonly, locked-down, and workspace-write sandbox hints
99
+ - evidence-required verifier, synthesis, summary, and verdict tasks
100
+ - deterministic planning into temporary workspaces
101
+ - compatibility between canonical app ids and legacy workflow-file wrappers
102
+
103
+ The legacy `workflows/architecture-review.workflow.js` and
104
+ `workflows/research-synthesis.workflow.js` files remain loadable with explicit
105
+ compatibility ids:
106
+
107
+ ```text
108
+ legacy-architecture-review
109
+ legacy-research-synthesis
110
+ ```
111
+
112
+ The public `architecture-review` and `research-synthesis` ids are now owned by
113
+ the canonical app directories.
114
+
115
+ ## Relationship To The Golden Path
116
+
117
+ `npm run canonical-apps` proves the official userland app matrix validates and
118
+ plans correctly. It does not run every worker for every app.
119
+
120
+ `npm run golden-path` remains the full integration proof:
121
+
122
+ ```text
123
+ workflow app -> plan -> dispatch -> isolated worker -> candidate scoring
124
+ -> verifier -> gated commit -> report
125
+ ```
126
+
127
+ Together they keep the kernel small while making the maintained userland boring,
128
+ inspectable, and useful.
129
+
130
+ Use the Operator UX commands to inspect any canonical app run:
131
+
132
+ ```bash
133
+ node scripts/cw.js status <run-id>
134
+ node scripts/cw.js graph <run-id>
135
+ node scripts/cw.js report <run-id> --summary
136
+ ```
137
+ 0.1.51