cool-workflow 0.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +20 -0
- package/.codex-plugin/mcp.json +10 -0
- package/.codex-plugin/plugin.json +38 -0
- package/.mcp.json +10 -0
- package/LICENSE +24 -0
- package/README.md +638 -0
- package/apps/architecture-review/app.json +51 -0
- package/apps/architecture-review/workflow.js +116 -0
- package/apps/end-to-end-golden-path/app.json +30 -0
- package/apps/end-to-end-golden-path/workflow.js +33 -0
- package/apps/pr-review-fix-ci/app.json +59 -0
- package/apps/pr-review-fix-ci/workflow.js +90 -0
- package/apps/release-cut/app.json +54 -0
- package/apps/release-cut/workflow.js +82 -0
- package/apps/research-synthesis/app.json +50 -0
- package/apps/research-synthesis/workflow.js +76 -0
- package/apps/workflow-app-framework-demo/app.json +29 -0
- package/apps/workflow-app-framework-demo/workflow.js +44 -0
- package/dist/agent-config.js +223 -0
- package/dist/candidate-scoring.js +715 -0
- package/dist/capability-core.js +630 -0
- package/dist/capability-dispatcher.js +86 -0
- package/dist/capability-registry.js +523 -0
- package/dist/cli.js +1276 -0
- package/dist/collaboration.js +727 -0
- package/dist/commit.js +570 -0
- package/dist/contract-migration.js +234 -0
- package/dist/coordinator.js +1163 -0
- package/dist/daemon.js +44 -0
- package/dist/dispatch.js +201 -0
- package/dist/drive.js +503 -0
- package/dist/error-feedback.js +415 -0
- package/dist/evidence-grounding.js +179 -0
- package/dist/evidence-reasoning.js +733 -0
- package/dist/execution-backend.js +1279 -0
- package/dist/harness.js +61 -0
- package/dist/mcp-server.js +1615 -0
- package/dist/multi-agent-eval.js +857 -0
- package/dist/multi-agent-host.js +764 -0
- package/dist/multi-agent-operator-ux.js +537 -0
- package/dist/multi-agent-trust.js +366 -0
- package/dist/multi-agent.js +1173 -0
- package/dist/node-snapshot.js +270 -0
- package/dist/observability.js +922 -0
- package/dist/operator-ux.js +971 -0
- package/dist/orchestrator/audit-operations.js +182 -0
- package/dist/orchestrator/candidate-operations.js +117 -0
- package/dist/orchestrator/cli-options.js +288 -0
- package/dist/orchestrator/collaboration-operations.js +86 -0
- package/dist/orchestrator/feedback-operations.js +81 -0
- package/dist/orchestrator/host-operations.js +78 -0
- package/dist/orchestrator/lifecycle-operations.js +462 -0
- package/dist/orchestrator/migration-operations.js +44 -0
- package/dist/orchestrator/multi-agent-operations.js +362 -0
- package/dist/orchestrator/report.js +369 -0
- package/dist/orchestrator/topology-operations.js +84 -0
- package/dist/orchestrator.js +874 -0
- package/dist/pipeline-contract.js +92 -0
- package/dist/pipeline-runner.js +285 -0
- package/dist/reclamation.js +882 -0
- package/dist/result-normalize.js +194 -0
- package/dist/run-export.js +64 -0
- package/dist/run-registry.js +1347 -0
- package/dist/run-state-schema.js +67 -0
- package/dist/sandbox-profile.js +471 -0
- package/dist/scheduler.js +266 -0
- package/dist/scheduling.js +184 -0
- package/dist/schema-validate.js +98 -0
- package/dist/state-explosion.js +1213 -0
- package/dist/state-migrations.js +463 -0
- package/dist/state-node.js +301 -0
- package/dist/state.js +308 -0
- package/dist/telemetry-attestation.js +156 -0
- package/dist/telemetry-ledger.js +145 -0
- package/dist/topology.js +527 -0
- package/dist/triggers.js +159 -0
- package/dist/trust-audit.js +475 -0
- package/dist/types/blackboard.js +2 -0
- package/dist/types/boundary.js +29 -0
- package/dist/types/candidate.js +2 -0
- package/dist/types/collaboration.js +2 -0
- package/dist/types/core.js +2 -0
- package/dist/types/drive.js +10 -0
- package/dist/types/error-feedback.js +2 -0
- package/dist/types/evidence-reasoning.js +2 -0
- package/dist/types/execution-backend.js +2 -0
- package/dist/types/multi-agent.js +2 -0
- package/dist/types/observability.js +2 -0
- package/dist/types/pipeline.js +2 -0
- package/dist/types/reclamation.js +8 -0
- package/dist/types/result.js +2 -0
- package/dist/types/run-registry.js +2 -0
- package/dist/types/run.js +2 -0
- package/dist/types/sandbox.js +2 -0
- package/dist/types/schedule.js +2 -0
- package/dist/types/state-node.js +2 -0
- package/dist/types/topology.js +2 -0
- package/dist/types/trust.js +2 -0
- package/dist/types/workbench.js +2 -0
- package/dist/types/worker.js +2 -0
- package/dist/types/workflow-app.js +2 -0
- package/dist/types.js +43 -0
- package/dist/verifier-registry.js +46 -0
- package/dist/verifier.js +78 -0
- package/dist/version.js +8 -0
- package/dist/workbench-host.js +172 -0
- package/dist/workbench.js +190 -0
- package/dist/worker-isolation.js +1028 -0
- package/dist/workflow-api.js +98 -0
- package/dist/workflow-app-framework.js +626 -0
- package/docs/agent-delegation-drive.7.md +190 -0
- package/docs/agent-framework.md +176 -0
- package/docs/candidate-scoring.7.md +106 -0
- package/docs/canonical-workflow-apps.7.md +137 -0
- package/docs/capability-topology-registry.7.md +168 -0
- package/docs/cli-mcp-parity.7.md +373 -0
- package/docs/contract-migration-tooling.7.md +123 -0
- package/docs/control-plane-scheduling.7.md +110 -0
- package/docs/coordinator-blackboard.7.md +183 -0
- package/docs/dogfood/architecture-review-cool-workflow.md +16 -0
- package/docs/dogfood-one-real-repo.7.md +168 -0
- package/docs/durable-state-and-locking.7.md +107 -0
- package/docs/end-to-end-golden-path.7.md +117 -0
- package/docs/error-feedback.7.md +153 -0
- package/docs/evidence-adoption-reasoning-chain.7.md +270 -0
- package/docs/execution-backends.7.md +300 -0
- package/docs/getting-started.md +99 -0
- package/docs/index.md +41 -0
- package/docs/mcp-app-surface.7.md +235 -0
- package/docs/multi-agent-cli-mcp-surface.7.md +265 -0
- package/docs/multi-agent-eval-replay-harness.7.md +302 -0
- package/docs/multi-agent-operator-ux.7.md +314 -0
- package/docs/multi-agent-runtime-core.7.md +231 -0
- package/docs/multi-agent-topologies.7.md +103 -0
- package/docs/multi-agent-trust-policy-audit.7.md +154 -0
- package/docs/node-snapshot-diff-replay.7.md +135 -0
- package/docs/observability-cost-accounting.7.md +194 -0
- package/docs/operator-ux.7.md +180 -0
- package/docs/pipeline-runner.7.md +136 -0
- package/docs/project-index.md +261 -0
- package/docs/real-execution-backends.7.md +142 -0
- package/docs/release-and-migration.7.md +280 -0
- package/docs/release-tooling.7.md +159 -0
- package/docs/routines.md +48 -0
- package/docs/run-registry-control-plane.7.md +312 -0
- package/docs/run-retention-reclamation.7.md +191 -0
- package/docs/sandbox-profiles.7.md +137 -0
- package/docs/scheduled-tasks.md +80 -0
- package/docs/security-trust-hardening.7.md +117 -0
- package/docs/state-explosion-management.7.md +264 -0
- package/docs/state-node.7.md +96 -0
- package/docs/team-collaboration.7.md +207 -0
- package/docs/unix-principles.md +192 -0
- package/docs/verifier-gated-commit.7.md +140 -0
- package/docs/web-desktop-workbench.7.md +215 -0
- package/docs/worker-isolation.7.md +167 -0
- package/docs/workflow-app-framework.7.md +274 -0
- package/manifest/README.md +43 -0
- package/manifest/plugin.manifest.json +316 -0
- package/manifest/pricing.policy.json +14 -0
- package/package.json +79 -0
- package/scripts/agents/claude-p-agent.js +104 -0
- package/scripts/agents/claude-p-agent.sh +9 -0
- package/scripts/agents/cw-attest-keygen.js +55 -0
- package/scripts/agents/cw-attest-wrap.js +143 -0
- package/scripts/block-unapproved-tag.sh +39 -0
- package/scripts/bump-version.js +249 -0
- package/scripts/canonical-apps.js +171 -0
- package/scripts/cw.js +4 -0
- package/scripts/dist-drift-check.js +79 -0
- package/scripts/dogfood-architecture-review.js +237 -0
- package/scripts/dogfood-release.js +624 -0
- package/scripts/forward-ref-docs.js +73 -0
- package/scripts/gen-manifests.js +232 -0
- package/scripts/golden-path.js +300 -0
- package/scripts/mcp-server.js +4 -0
- package/scripts/new-feature.js +121 -0
- package/scripts/parity-check.js +213 -0
- package/scripts/release-check.js +118 -0
- package/scripts/release-flow.js +272 -0
- package/scripts/release-gate.sh +85 -0
- package/scripts/sync-project-index.js +387 -0
- package/scripts/validate-run-state-schema.js +126 -0
- package/scripts/verify-container-selfref.js +64 -0
- package/scripts/version-sync-check.js +237 -0
- package/skills/cool-workflow/SKILL.md +162 -0
- package/skills/cool-workflow/references/commands.md +282 -0
- package/tsconfig.json +16 -0
- package/ui/workbench/app.css +76 -0
- package/ui/workbench/app.js +159 -0
- package/ui/workbench/index.html +32 -0
- package/workflows/architecture-review.workflow.js +84 -0
- package/workflows/research-synthesis.workflow.js +47 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# Agent Delegation Drive
|
|
2
|
+
|
|
3
|
+
CW v0.1.38 adds Agent Delegation Drive: a way to run a natural-language-prompt
|
|
4
|
+
workflow **end-to-end by DELEGATING each worker to an EXTERNAL agent process**
|
|
5
|
+
(`claude -p` headless, `codex exec`, or a configured HTTP agent endpoint),
|
|
6
|
+
capturing each worker's `result.md` plus an attestation of which agent/model
|
|
7
|
+
produced it. It turns the `architecture-review` app into CW's first turnkey,
|
|
8
|
+
evidence-audited product: point CW at a repo, get an audited risk report — with
|
|
9
|
+
no human hand-writing any `result.md`.
|
|
10
|
+
|
|
11
|
+
Before v0.1.38, CW could `plan` a workflow, isolate workers, and accept their
|
|
12
|
+
output, but **nothing spawned the agent that wrote each `result.md`**. Running
|
|
13
|
+
`architecture-review` end-to-end meant an operator hand-writing all 14 worker
|
|
14
|
+
result files out-of-band, with no recorded attestation of which agent/model
|
|
15
|
+
produced each. The new `agent` backend + `run --drive` loop close that last mile.
|
|
16
|
+
|
|
17
|
+
## The red line — delegate, do not internalize
|
|
18
|
+
|
|
19
|
+
CW **DELEGATES, IT DOES NOT BECOME THE EXECUTOR.** The `agent` backend does the
|
|
20
|
+
same thing the `container`/`remote`/`ci` backends do: it `spawnSync`s an
|
|
21
|
+
out-of-process child (the agent CLI, argv-style, `shell:false`) or POSTs to a
|
|
22
|
+
configured endpoint, then records a `BackendExecutionHandle` (`kind: "process"`)
|
|
23
|
+
+ a `SandboxAttestation` + the canonical result envelope. **The model runs in the
|
|
24
|
+
agent's process, never inside CW.** CW imports no model SDK, holds no API key,
|
|
25
|
+
constructs no chat/completions request, and calls no model HTTP API. Any API key
|
|
26
|
+
flows from the agent's *own* inherited env; CW never reads or records it. Adding a
|
|
27
|
+
provider SDK to `package.json` would lose the neutral-audit moat and is the red
|
|
28
|
+
line.
|
|
29
|
+
|
|
30
|
+
## Operator-chosen model is policy; agent-reported model is the attestation
|
|
31
|
+
|
|
32
|
+
Any model id CW passes **into** the agent invocation (`CW_AGENT_MODEL`
|
|
33
|
+
interpolated into `{{model}}`) is **policy-as-data the operator chose**. It is
|
|
34
|
+
recorded only as part of the secret-stripped command-template/args provenance and
|
|
35
|
+
is **never** the source of the attested `UsageRecord.model`. The recorded/attested
|
|
36
|
+
model id comes solely from what the external agent reports back in its output. If
|
|
37
|
+
the agent reports no model, CW records `unreported` — it never backfills from
|
|
38
|
+
`CW_AGENT_MODEL`. A configured `CW_AGENT_MODEL` that differs from the agent's
|
|
39
|
+
reported model does not overwrite the host-reported model id.
|
|
40
|
+
|
|
41
|
+
## Two layers, never conflated
|
|
42
|
+
|
|
43
|
+
1. **Backend evidence triple.** `runAgentProcess` records the agent CHILD's
|
|
44
|
+
`command` + `exitCode` + `sha256(stdout)` — the identical mechanism
|
|
45
|
+
`runContainer`/`runHttpDelegation` use. This triple is byte-stable in SHAPE
|
|
46
|
+
across `node`/`container`/`remote`/`ci`/`agent`. It NEVER reads, parses, or
|
|
47
|
+
hashes a `result.md`.
|
|
48
|
+
2. **`result.md` acceptance.** The worker's `result.md` `cw:result` envelope is
|
|
49
|
+
accepted in a SEPARATE layer (`recordWorkerOutput`), which validates it, copies
|
|
50
|
+
it into `resultsDir`, runs the verifier gate, and records trust-audit +
|
|
51
|
+
provenance — unchanged by this feature.
|
|
52
|
+
|
|
53
|
+
The agent handle (`kind: "process"`), the agent-reported model id, the prompt
|
|
54
|
+
digest, the secret-stripped args, and the result digest live in `provenance` and
|
|
55
|
+
the `worker.agent-delegation` trust-audit event — **never in `evidence`**.
|
|
56
|
+
|
|
57
|
+
## The drive lifecycle
|
|
58
|
+
|
|
59
|
+
`run --drive` is a thin orchestrator over the EXISTING verbs + the v0.1.37
|
|
60
|
+
scheduler. For each worker the planner emits, in deterministic phase/dispatch
|
|
61
|
+
order:
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
plan -> dispatch -> agent-fulfill (agent backend) -> recordWorkerOutput/verify -> commit
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
- **dispatch** allocates the worker scope (`input.md` + manifest; the worker's own
|
|
68
|
+
sandbox profile, e.g. `readonly`).
|
|
69
|
+
- **agent-fulfill** delegates the worker to the `agent` backend out-of-process; the
|
|
70
|
+
agent reads the input/manifest and writes `result.md`; CW captures the child's
|
|
71
|
+
evidence triple + reported model.
|
|
72
|
+
- **accept** records + verifies `result.md`; the agent-hop attestation is folded
|
|
73
|
+
into the result node's metadata (so the v0.1.35 replay engine covers it).
|
|
74
|
+
- **commit** is verifier-gated on the Verdict node once every worker completes.
|
|
75
|
+
|
|
76
|
+
The Verdict `artifact` node is fulfilled through the SAME agent backend — it is a
|
|
77
|
+
worker scope with a `result.md` like any other. `--drive --once` advances exactly
|
|
78
|
+
one deterministic step (injected `now`); bare `--drive` runs to completion or to a
|
|
79
|
+
parked/blocked stop. `run drive <run-id>` (no `--step`) is the read-only,
|
|
80
|
+
deterministic preview of the next step.
|
|
81
|
+
|
|
82
|
+
## Fail closed — probe vs refusal vs park
|
|
83
|
+
|
|
84
|
+
- **Probe.** `backend probe agent` reports `readiness: "ready"` iff a
|
|
85
|
+
command-template/endpoint is configured; otherwise `readiness: "unverified"`,
|
|
86
|
+
`ready: false`, with a non-empty reason — byte-identical in shape to
|
|
87
|
+
`backend probe remote` unconfigured. It is NEVER a hard `refused`/`unavailable`.
|
|
88
|
+
- **runBackend.** Unconfigured execution (no command-template AND no endpoint)
|
|
89
|
+
returns a `delegation-target-missing` refusal — never a fabricated `completed`.
|
|
90
|
+
- **Failed hop.** A spawned agent that exits non-zero, returns no exit code,
|
|
91
|
+
produces no `result.md`, or produces a `result.md` that fails validation yields a
|
|
92
|
+
`refused`/`failed` envelope (or a rejected accept) — never a fabricated
|
|
93
|
+
completion.
|
|
94
|
+
- **Park.** In the drive loop, a worker whose agent hop keeps failing exhausts its
|
|
95
|
+
scheduling retry budget and lands **parked** (reuse v0.1.37 `retryOrPark`) — the
|
|
96
|
+
drive stops; it is never silently re-driven forever.
|
|
97
|
+
|
|
98
|
+
## Replay determinism (bound to node-snapshot)
|
|
99
|
+
|
|
100
|
+
The attested record (model id, prompt digest, args, result digest, exit) is plain
|
|
101
|
+
data folded into the snapshotted node body. Replaying a recorded drive run via
|
|
102
|
+
`snapshotNode`/`replayNodeSnapshot`/`verifyNodeReplay` reproduces the SAME
|
|
103
|
+
audit/provenance graph and the same recorded digests, **without re-spawning the
|
|
104
|
+
agent or re-reading the live `result.md`** — even with the agent binary
|
|
105
|
+
unavailable. Two replays with different injected `now` are byte-identical in body
|
|
106
|
+
+ `sourceFingerprint`/`outputFingerprint`.
|
|
107
|
+
|
|
108
|
+
## Vendor neutrality + durable config
|
|
109
|
+
|
|
110
|
+
WHICH agent (claude / codex / ollama / an HTTP endpoint) is **policy expressed as
|
|
111
|
+
DATA** — a command-template and/or endpoint resolved flags > env
|
|
112
|
+
(`CW_AGENT_COMMAND` / `CW_AGENT_ENDPOINT` / `CW_AGENT_MODEL`) > a durable
|
|
113
|
+
`$CW_HOME/agent-config.json`. claude / codex / ollama are CONFIGS, never CW
|
|
114
|
+
dependencies. No secrets are written into the config or `.cw/`: it holds a
|
|
115
|
+
command-template + endpoint + operator-chosen model only; recorded command/args
|
|
116
|
+
are secret-stripped.
|
|
117
|
+
|
|
118
|
+
## CLI
|
|
119
|
+
|
|
120
|
+
```text
|
|
121
|
+
# configure the agent (policy as data; no API key is ever written)
|
|
122
|
+
# the bundled wrapper feeds input.md to headless claude READ-ONLY, persists
|
|
123
|
+
# result.md itself, and forwards claude's JSON (model+usage) for provenance.
|
|
124
|
+
# A bare "claude -p" or "claude -p {{input}}" does NOT complete a worker:
|
|
125
|
+
# headless claude gets no prompt content / cannot write result.md without it.
|
|
126
|
+
node dist/cli.js backend agent config set --agent-command "node $(pwd)/scripts/agents/claude-p-agent.js {{input}} {{result}}" --agent-model claude-opus-4-8
|
|
127
|
+
node dist/cli.js backend agent config # show the effective config (secret-stripped)
|
|
128
|
+
node dist/cli.js backend probe agent --json # ready iff configured, else unverified
|
|
129
|
+
|
|
130
|
+
# drive a real repo end-to-end (zero hand-written result.md)
|
|
131
|
+
node dist/cli.js run architecture-review --drive --repo /path/to/repo --question "Is the design sound?"
|
|
132
|
+
node dist/cli.js run architecture-review --drive --once --repo /path/to/repo --question "..." # one step
|
|
133
|
+
node dist/cli.js run drive <run-id> --json # read-only preview of the next step
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
`{{manifest}}`, `{{input}}`, `{{result}}`, `{{workerDir}}`, `{{model}}`, and
|
|
137
|
+
`{{prompt}}` are substituted into DISCRETE argv elements (never a shell-interpreted
|
|
138
|
+
string). Each verb is declared once in `capability-registry.ts`, so `cw <cmd>
|
|
139
|
+
--json` is byte-identical to the matching `cw_<tool>` MCP tool for the read-only
|
|
140
|
+
preview/config-show verbs.
|
|
141
|
+
|
|
142
|
+
## Compatibility
|
|
143
|
+
|
|
144
|
+
Agent Delegation Drive is introduced in CW v0.1.38. Adding the `agent` row leaves
|
|
145
|
+
`node`/`bun`/`shell`/`container`/`remote`/`ci` byte-identical; `backendIds()`
|
|
146
|
+
simply grows by one to the sorted 7-row set
|
|
147
|
+
`["agent","bun","ci","container","node","remote","shell"]`. A run driven manually
|
|
148
|
+
(plan → dispatch → `worker output` → commit) still works unchanged. Fields are
|
|
149
|
+
additive and optional; older run state loads unchanged. No `.cw/` layout change.
|
|
150
|
+
|
|
151
|
+
## See Also
|
|
152
|
+
|
|
153
|
+
execution-backends(7), real-execution-backends(7), node-snapshot-diff-replay(7),
|
|
154
|
+
control-plane-scheduling(7), dogfood-one-real-repo(7), cli-mcp-parity(7),
|
|
155
|
+
observability-cost-accounting(7)
|
|
156
|
+
|
|
157
|
+
## Run Retention & Provable Reclamation (v0.1.39)
|
|
158
|
+
|
|
159
|
+
tiered, append-only, cryptographically-verifiable run reclamation: seal the audit skeleton, free the reconstructable bulk, prove it
|
|
160
|
+
|
|
161
|
+
## Durable State & Locking (v0.1.40)
|
|
162
|
+
|
|
163
|
+
atomic temp->rename writes + fsync-durability for authoritative stores; portable stale-stealing file lock serializing the cross-process read-modify-write stores
|
|
164
|
+
|
|
165
|
+
## Self-Audit Hardening & Pure-Router Decomposition (v0.1.41)
|
|
166
|
+
|
|
167
|
+
evidence grounding + durable audit append + symlink-hardened containment + deterministic worker ids + recursive redaction; BackendRegistry self-describing drivers (no per-id switches); orchestrator god-object decomposed into per-domain operation modules (pure loadRun->delegate router)
|
|
168
|
+
|
|
169
|
+
## Robust Result Ingest (v0.1.42)
|
|
170
|
+
|
|
171
|
+
capture findings/evidence from any reasonable agent shape (alt keys + prose), CW derives grounded evidence itself, warn on empty capture — closes the v0.1.41 live-drive 'accepted with 0 captured' failure
|
|
172
|
+
|
|
173
|
+
## No-False-Green Gate & Launch Prep (v0.1.43)
|
|
174
|
+
|
|
175
|
+
Hard gate blocking empty-capture verifier-gated commits, plus quickstart and launch-prep docs.
|
|
176
|
+
|
|
177
|
+
## Release-Gate Determinism & Agents Vendor (v0.1.44)
|
|
178
|
+
|
|
179
|
+
Release-readiness checks now validate the committed blob (`git show HEAD:<path>`) instead of the mutable working tree — eliminating false-red/false-green from concurrent working-tree writes (iCloud/Spotlight/editor). Adds the `agents` vendor manifest target: a generated `.agents/plugins/cool-workflow/` adapter giving any non-Claude AI agent one common interface to CW.
|
|
180
|
+
|
|
181
|
+
## P1-P2 Fixes & CI Content Surfaces (v0.1.49)
|
|
182
|
+
|
|
183
|
+
Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.46), vendor-adapter registry (v0.1.47), state auto-compaction and P2 fixes (v0.1.48), plus CI content-surface determinism hardening (v0.1.49).
|
|
184
|
+
0.1.51
|
|
185
|
+
|
|
186
|
+
0.1.76
|
|
187
|
+
|
|
188
|
+
0.1.77
|
|
189
|
+
|
|
190
|
+
0.1.78
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# Workflow App framework
|
|
2
|
+
|
|
3
|
+
CW is designed as an independent agent workflow control-plane.
|
|
4
|
+
|
|
5
|
+
The goal is to make agent development feel like building inside a platform
|
|
6
|
+
ecosystem. CW provides the runtime, contracts, storage, CLI, MCP bridge, and
|
|
7
|
+
package structure. Developers write workflow apps against those contracts.
|
|
8
|
+
|
|
9
|
+
The framework is guided by five practical systems principles: small kernel, explicit
|
|
10
|
+
state, composable pipes, isolated workers, and verifier-gated commits. See
|
|
11
|
+
[unix-principles.md](unix-principles.md).
|
|
12
|
+
|
|
13
|
+
## Platform Contract
|
|
14
|
+
|
|
15
|
+
Every CW workflow follows this loop:
|
|
16
|
+
|
|
17
|
+
```text
|
|
18
|
+
interpret -> act -> observe -> adjust -> checkpoint
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
The loop maps to concrete framework operations:
|
|
22
|
+
|
|
23
|
+
| Loop stage | framework operation | Responsibility |
|
|
24
|
+
| --- | --- | --- |
|
|
25
|
+
| Interpret | `plan()` | Load workflow, validate inputs, generate tasks |
|
|
26
|
+
| Act | `dispatch()` | Move runnable tasks from pending to running |
|
|
27
|
+
| Observe | `recordResult()` | Read Markdown/JSON-RPC result evidence |
|
|
28
|
+
| Adjust | verifier gates | Validate evidence and choose the next phase |
|
|
29
|
+
| Checkpoint | `commitState()` | Snapshot state after important transitions |
|
|
30
|
+
|
|
31
|
+
The v0.1.12 operator UX layer renders read-only summaries over run state:
|
|
32
|
+
human `status`, graph maps, report summaries, resource summaries, and
|
|
33
|
+
deterministic next-step recommendations. Scripts can keep using `--json` or
|
|
34
|
+
`--format json`.
|
|
35
|
+
|
|
36
|
+
The v0.1.13 MCP app surface exposes the same runtime operations to agent hosts
|
|
37
|
+
with stable JSON tools: app run, dispatch, worker inspection/output, candidate
|
|
38
|
+
scoring/selection, sandbox profile resolution, verifier-gated commit, and
|
|
39
|
+
operator status/graph/report summaries.
|
|
40
|
+
|
|
41
|
+
The v0.1.13 canonical app matrix validates and plans the maintained userland
|
|
42
|
+
apps with public CLI commands:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
npm run canonical-apps
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
The golden path runs the full integration chain end to end:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
npm run golden-path
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
It validates an app, plans a run, dispatches a readonly worker, accepts a
|
|
55
|
+
worker-local `cw:result`, scores and selects a candidate, creates a
|
|
56
|
+
verifier-gated commit, and renders a report. See
|
|
57
|
+
[end-to-end-golden-path.7.md](end-to-end-golden-path.7.md).
|
|
58
|
+
|
|
59
|
+
## Developer Contract
|
|
60
|
+
|
|
61
|
+
A workflow app defines:
|
|
62
|
+
|
|
63
|
+
- `id`, `title`, and `summary`
|
|
64
|
+
- `schemaVersion`, app `version`, compatibility, and metadata when using the
|
|
65
|
+
first-class Workflow App framework contract
|
|
66
|
+
- required and repeated inputs
|
|
67
|
+
- phase order
|
|
68
|
+
- agent tasks
|
|
69
|
+
- artifact tasks
|
|
70
|
+
- concurrency limits
|
|
71
|
+
- evidence requirements
|
|
72
|
+
- sandbox profile hints
|
|
73
|
+
|
|
74
|
+
Example:
|
|
75
|
+
|
|
76
|
+
```js
|
|
77
|
+
const {
|
|
78
|
+
defineWorkflowApp,
|
|
79
|
+
workflow,
|
|
80
|
+
phase,
|
|
81
|
+
agent,
|
|
82
|
+
artifact,
|
|
83
|
+
input
|
|
84
|
+
} = require("../dist/workflow-app-framework");
|
|
85
|
+
|
|
86
|
+
const inputs = [input("repo", { type: "path", required: true })];
|
|
87
|
+
|
|
88
|
+
module.exports = defineWorkflowApp({
|
|
89
|
+
schemaVersion: 1,
|
|
90
|
+
id: "example-review",
|
|
91
|
+
title: "Example Review",
|
|
92
|
+
summary: "Review a repository with evidence.",
|
|
93
|
+
version: "0.1.0",
|
|
94
|
+
inputs,
|
|
95
|
+
sandboxProfiles: ["readonly"],
|
|
96
|
+
compatibility: {
|
|
97
|
+
minVersion: "0.1.9"
|
|
98
|
+
},
|
|
99
|
+
workflow: workflow({
|
|
100
|
+
id: "example-review",
|
|
101
|
+
title: "Example Review",
|
|
102
|
+
inputs,
|
|
103
|
+
sandboxProfiles: ["readonly"],
|
|
104
|
+
phases: [
|
|
105
|
+
phase("Map", [
|
|
106
|
+
agent("map:system", "Map the system boundaries.", {
|
|
107
|
+
sandboxProfileId: "readonly"
|
|
108
|
+
})
|
|
109
|
+
]),
|
|
110
|
+
phase("Verdict", [
|
|
111
|
+
artifact("verdict", "Write the final evidence-backed verdict.", {
|
|
112
|
+
requiresEvidence: true,
|
|
113
|
+
sandboxProfileId: "readonly"
|
|
114
|
+
})
|
|
115
|
+
])
|
|
116
|
+
]
|
|
117
|
+
})
|
|
118
|
+
});
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Legacy `module.exports = ({ workflow, phase, agent, artifact }) => workflow(...)`
|
|
122
|
+
files remain loadable. CW wraps them as compatibility apps with version `0.0.0`
|
|
123
|
+
so workflow files still plan and dispatch. When a canonical app owns the public
|
|
124
|
+
id, compatibility wrappers use explicit ids such as `legacy-research-synthesis`.
|
|
125
|
+
|
|
126
|
+
## Language Contract
|
|
127
|
+
|
|
128
|
+
The CW platform is TypeScript:
|
|
129
|
+
|
|
130
|
+
```text
|
|
131
|
+
src/*.ts -> dist/*.js
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Workflow apps are JavaScript modules:
|
|
135
|
+
|
|
136
|
+
```text
|
|
137
|
+
workflows/*.workflow.js
|
|
138
|
+
apps/<app-id>/app.json
|
|
139
|
+
apps/<app-id>/workflow.js
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
This is intentional. The runtime is strongly typed for maintainability, while
|
|
143
|
+
workflow scripts can run without `ts-node`.
|
|
144
|
+
|
|
145
|
+
See [workflow-app-framework.7.md](workflow-app-framework.7.md) for the full app contract,
|
|
146
|
+
validation rules, CLI commands, MCP tools, and state/report fields.
|
|
147
|
+
See [mcp-app-surface.7.md](mcp-app-surface.7.md) for the agent-host runtime
|
|
148
|
+
surface over MCP.
|
|
149
|
+
See [operator-ux.7.md](operator-ux.7.md) for the operator inspection surface.
|
|
150
|
+
See [canonical-workflow-apps.7.md](canonical-workflow-apps.7.md) for the
|
|
151
|
+
official app matrix.
|
|
152
|
+
See [end-to-end-golden-path.7.md](end-to-end-golden-path.7.md) for the
|
|
153
|
+
deterministic release proof that those pieces connect.
|
|
154
|
+
|
|
155
|
+
## Evidence Contract
|
|
156
|
+
|
|
157
|
+
Verification and verdict tasks should return:
|
|
158
|
+
|
|
159
|
+
````text
|
|
160
|
+
```cw:result
|
|
161
|
+
{
|
|
162
|
+
"summary": "short summary",
|
|
163
|
+
"findings": [],
|
|
164
|
+
"evidence": ["/absolute/path/file.ts:42"]
|
|
165
|
+
}
|
|
166
|
+
```
|
|
167
|
+
````
|
|
168
|
+
|
|
169
|
+
CW rejects high-priority findings without evidence. This keeps agent work closer
|
|
170
|
+
to inspectable engineering output than unconstrained conversation.
|
|
171
|
+
|
|
172
|
+
## Boundary
|
|
173
|
+
|
|
174
|
+
CW is an independent workflow control-plane by COOLWHITE LLC. It implements dynamic workflows,
|
|
175
|
+
scheduled tasks, local scheduling, routine triggers, state checkpoints, and
|
|
176
|
+
multi-agent verification.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# CANDIDATE-SCORING(7)
|
|
2
|
+
|
|
3
|
+
## NAME
|
|
4
|
+
|
|
5
|
+
Candidate Scoring - inspectable decision support for competing CW outputs
|
|
6
|
+
|
|
7
|
+
## SYNOPSIS
|
|
8
|
+
|
|
9
|
+
```ts
|
|
10
|
+
import {
|
|
11
|
+
registerCandidate,
|
|
12
|
+
scoreCandidate,
|
|
13
|
+
rankCandidates,
|
|
14
|
+
selectCandidate
|
|
15
|
+
} from "./candidate-scoring";
|
|
16
|
+
|
|
17
|
+
registerCandidate(run, { workerId, taskId, resultNodeId, verifierNodeId });
|
|
18
|
+
scoreCandidate(run, candidateId, {
|
|
19
|
+
scorer: "verifier",
|
|
20
|
+
criteria: { correctness: 4, evidence: 4, fit: 2 },
|
|
21
|
+
maxTotal: 10,
|
|
22
|
+
evidence: [{ id: "score:evidence", source: "test", locator: "test/file.js:1" }]
|
|
23
|
+
});
|
|
24
|
+
rankCandidates(run);
|
|
25
|
+
selectCandidate(run, candidateId);
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
```text
|
|
29
|
+
node dist/cli.js candidate register <run-id> --worker <worker-id>
|
|
30
|
+
node dist/cli.js candidate score <run-id> <candidate-id> --criterion correctness=4 --evidence path:line
|
|
31
|
+
node dist/cli.js candidate rank <run-id>
|
|
32
|
+
node dist/cli.js candidate select <run-id> <candidate-id>
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## DESCRIPTION
|
|
36
|
+
|
|
37
|
+
Candidate Scoring is the small decision-support layer between isolated worker
|
|
38
|
+
outputs, result nodes, verifier evidence, candidate scores, selected winners,
|
|
39
|
+
ErrorFeedback, and commit/report.
|
|
40
|
+
|
|
41
|
+
It does not merge code, replace verifier judgment, spawn workers, or provide a
|
|
42
|
+
domain-specific ranking policy. A score is evidence, not authority.
|
|
43
|
+
|
|
44
|
+
The normal flow is:
|
|
45
|
+
|
|
46
|
+
```text
|
|
47
|
+
worker output -> candidate record -> score record -> ranking
|
|
48
|
+
-> verifier-gated selection -> checkpoint/report
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Each step writes plain JSON. Rejected and failed candidates remain inspectable.
|
|
52
|
+
|
|
53
|
+
## FILES
|
|
54
|
+
|
|
55
|
+
```text
|
|
56
|
+
.cw/runs/<run-id>/candidates/index.json
|
|
57
|
+
.cw/runs/<run-id>/candidates/ranking.json
|
|
58
|
+
.cw/runs/<run-id>/candidates/<candidate-id>/candidate.json
|
|
59
|
+
.cw/runs/<run-id>/candidates/<candidate-id>/scores/<score-id>.json
|
|
60
|
+
.cw/runs/<run-id>/candidates/selections/<selection-id>.json
|
|
61
|
+
.cw/runs/<run-id>/nodes/
|
|
62
|
+
.cw/runs/<run-id>/feedback/
|
|
63
|
+
.cw/runs/<run-id>/report.md
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Candidate records point at existing worker, result, verifier, and artifact
|
|
67
|
+
paths. They do not copy large worker outputs by default.
|
|
68
|
+
|
|
69
|
+
## SELECTION GATE
|
|
70
|
+
|
|
71
|
+
Selection is conservative by default:
|
|
72
|
+
|
|
73
|
+
- score records require evidence
|
|
74
|
+
- selection requires a linked verifier node with `verified` status
|
|
75
|
+
- selection failures become ErrorFeedback records
|
|
76
|
+
- rejected candidates remain on disk
|
|
77
|
+
|
|
78
|
+
Operators can record an unverified selection only with an explicit option. That
|
|
79
|
+
records selection state but does not turn the candidate into committed state.
|
|
80
|
+
|
|
81
|
+
Committed state has a stricter rule. A candidate can be promoted by
|
|
82
|
+
`cw.js commit --candidate` or `cw.js commit --selection` only when it has score
|
|
83
|
+
evidence, a verified selection, and a linked verifier node with evidence.
|
|
84
|
+
Rejected, failed, unscored, unselected, and unverified candidates are blocked
|
|
85
|
+
and produce ErrorFeedback.
|
|
86
|
+
|
|
87
|
+
## FAILURE MODES
|
|
88
|
+
|
|
89
|
+
Missing score evidence fails scoring and records feedback.
|
|
90
|
+
|
|
91
|
+
Selecting a failed or rejected candidate fails and records feedback.
|
|
92
|
+
|
|
93
|
+
Selecting without a verified verifier node fails unless explicitly allowed.
|
|
94
|
+
|
|
95
|
+
Tie-breaking is predictable: higher normalized score wins; equal scores use the
|
|
96
|
+
configured tie breaker, defaulting to earlier candidate creation time.
|
|
97
|
+
|
|
98
|
+
## COMPATIBILITY
|
|
99
|
+
|
|
100
|
+
Candidate Scoring is introduced in CW v0.1.6. It adds optional candidate paths
|
|
101
|
+
and arrays to run state. Older runs remain readable because missing candidate
|
|
102
|
+
fields are initialized when state loads.
|
|
103
|
+
|
|
104
|
+
Existing workflow, worker, feedback, node, contract, result, commit, and report
|
|
105
|
+
commands remain compatible.
|
|
106
|
+
0.1.51
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Canonical Workflow Apps
|
|
2
|
+
|
|
3
|
+
Canonical Workflow Apps are the official CW userland apps maintained with the
|
|
4
|
+
runtime. They are not loose examples. Each one lives in a first-class app
|
|
5
|
+
directory:
|
|
6
|
+
|
|
7
|
+
```text
|
|
8
|
+
apps/<app-id>/app.json
|
|
9
|
+
apps/<app-id>/workflow.js
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
The runner remains the base system. Canonical apps carry domain behavior:
|
|
13
|
+
inputs, phases, task prompts, evidence gates, sandbox profile hints, and app
|
|
14
|
+
metadata.
|
|
15
|
+
|
|
16
|
+
## Apps
|
|
17
|
+
|
|
18
|
+
`architecture-review`
|
|
19
|
+
|
|
20
|
+
Map a repository architecture, assess risks, verify important findings, and
|
|
21
|
+
synthesize an evidence-backed verdict.
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
node scripts/cw.js plan architecture-review \
|
|
25
|
+
--repo /path/to/repo \
|
|
26
|
+
--question "Is this architecture sound?" \
|
|
27
|
+
--invariant "public API stays stable" \
|
|
28
|
+
--focus "runtime"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
`pr-review-fix-ci`
|
|
32
|
+
|
|
33
|
+
Review a pull request or branch, inspect CI failures, diagnose actionable
|
|
34
|
+
issues, optionally patch when `--mode fix` is allowed, verify outcomes, and
|
|
35
|
+
summarize with evidence.
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
node scripts/cw.js plan pr-review-fix-ci \
|
|
39
|
+
--repo /path/to/repo \
|
|
40
|
+
--pr 123 \
|
|
41
|
+
--base main \
|
|
42
|
+
--ci "unit-tests" \
|
|
43
|
+
--mode review
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
`release-cut`
|
|
47
|
+
|
|
48
|
+
Prepare a release with checklist discipline: version checks, changelog, tests,
|
|
49
|
+
packaging, release notes, and final verification.
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
node scripts/cw.js plan release-cut \
|
|
53
|
+
--repo /path/to/repo \
|
|
54
|
+
--version 0.1.13 \
|
|
55
|
+
--previousVersion 0.1.11 \
|
|
56
|
+
--dryRun true
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`research-synthesis`
|
|
60
|
+
|
|
61
|
+
Split a research question into claims, investigate sources, cross-check
|
|
62
|
+
evidence, verify claims, and synthesize a concise answer.
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
node scripts/cw.js plan research-synthesis \
|
|
66
|
+
--cwd /tmp/research-run \
|
|
67
|
+
--question "What does the evidence support?" \
|
|
68
|
+
--source "official-docs" \
|
|
69
|
+
--scope "local sources first" \
|
|
70
|
+
--freshness "as of today"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Validation Matrix
|
|
74
|
+
|
|
75
|
+
Run the canonical app matrix from the plugin root:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
cd plugins/cool-workflow
|
|
79
|
+
npm run canonical-apps
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
The command uses only Node.js standard library APIs and local temporary
|
|
83
|
+
workspaces. It validates each canonical app, shows its app metadata, plans it
|
|
84
|
+
with representative inputs, checks app id/version metadata in run state, checks
|
|
85
|
+
evidence-required verification or synthesis/verdict tasks, checks sandbox
|
|
86
|
+
profile hints, checks unique task ids, and checks duplicate ids do not break
|
|
87
|
+
discovery.
|
|
88
|
+
|
|
89
|
+
`npm test` includes `test/canonical-workflow-apps-smoke.js`, which repeats the
|
|
90
|
+
same core assertions against generated `dist/`.
|
|
91
|
+
|
|
92
|
+
## Framework Pressure
|
|
93
|
+
|
|
94
|
+
The apps intentionally stress different parts of the Workflow App framework:
|
|
95
|
+
|
|
96
|
+
- declared required, optional, and repeated inputs
|
|
97
|
+
- app-directory discovery and app metadata
|
|
98
|
+
- readonly, locked-down, and workspace-write sandbox hints
|
|
99
|
+
- evidence-required verifier, synthesis, summary, and verdict tasks
|
|
100
|
+
- deterministic planning into temporary workspaces
|
|
101
|
+
- compatibility between canonical app ids and legacy workflow-file wrappers
|
|
102
|
+
|
|
103
|
+
The legacy `workflows/architecture-review.workflow.js` and
|
|
104
|
+
`workflows/research-synthesis.workflow.js` files remain loadable with explicit
|
|
105
|
+
compatibility ids:
|
|
106
|
+
|
|
107
|
+
```text
|
|
108
|
+
legacy-architecture-review
|
|
109
|
+
legacy-research-synthesis
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The public `architecture-review` and `research-synthesis` ids are now owned by
|
|
113
|
+
the canonical app directories.
|
|
114
|
+
|
|
115
|
+
## Relationship To The Golden Path
|
|
116
|
+
|
|
117
|
+
`npm run canonical-apps` proves the official userland app matrix validates and
|
|
118
|
+
plans correctly. It does not run every worker for every app.
|
|
119
|
+
|
|
120
|
+
`npm run golden-path` remains the full integration proof:
|
|
121
|
+
|
|
122
|
+
```text
|
|
123
|
+
workflow app -> plan -> dispatch -> isolated worker -> candidate scoring
|
|
124
|
+
-> verifier -> gated commit -> report
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Together they keep the kernel small while making the maintained userland boring,
|
|
128
|
+
inspectable, and useful.
|
|
129
|
+
|
|
130
|
+
Use the Operator UX commands to inspect any canonical app run:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
node scripts/cw.js status <run-id>
|
|
134
|
+
node scripts/cw.js graph <run-id>
|
|
135
|
+
node scripts/cw.js report <run-id> --summary
|
|
136
|
+
```
|
|
137
|
+
0.1.51
|