cool-workflow 0.1.79 → 0.1.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.codex-plugin/plugin.json +1 -1
- package/README.md +51 -3
- package/apps/architecture-review/app.json +1 -1
- package/apps/architecture-review-fast/app.json +64 -0
- package/apps/architecture-review-fast/workflow.js +153 -0
- package/apps/end-to-end-golden-path/app.json +1 -1
- package/apps/pr-review-fix-ci/app.json +1 -1
- package/apps/release-cut/app.json +1 -1
- package/apps/research-synthesis/app.json +1 -1
- package/dist/agent-config.js +21 -7
- package/dist/candidate-scoring.js +42 -22
- package/dist/capability-core.js +132 -17
- package/dist/capability-registry.js +138 -168
- package/dist/cli.js +97 -98
- package/dist/collaboration.js +5 -6
- package/dist/commit.js +20 -6
- package/dist/compare.js +18 -0
- package/dist/coordinator/classify.js +45 -0
- package/dist/coordinator/paths.js +42 -0
- package/dist/coordinator/util.js +129 -0
- package/dist/coordinator.js +127 -300
- package/dist/dispatch.js +35 -0
- package/dist/drive.js +79 -6
- package/dist/error-feedback.js +8 -4
- package/dist/evidence-reasoning.js +3 -3
- package/dist/execution-backend/agent.js +331 -0
- package/dist/execution-backend/probes.js +96 -0
- package/dist/execution-backend/util.js +47 -0
- package/dist/execution-backend.js +73 -421
- package/dist/mcp-server.js +79 -183
- package/dist/multi-agent/graph.js +84 -0
- package/dist/multi-agent/helpers.js +145 -0
- package/dist/multi-agent/paths.js +22 -0
- package/dist/multi-agent-eval/format.js +194 -0
- package/dist/multi-agent-eval/normalize.js +51 -0
- package/dist/multi-agent-eval.js +39 -244
- package/dist/multi-agent-host.js +0 -19
- package/dist/multi-agent.js +125 -314
- package/dist/node-snapshot.js +3 -3
- package/dist/observability/format.js +61 -0
- package/dist/observability/intake.js +98 -0
- package/dist/observability.js +14 -160
- package/dist/operator-ux/format.js +364 -0
- package/dist/operator-ux.js +22 -363
- package/dist/orchestrator/lifecycle-operations.js +2 -1
- package/dist/orchestrator/report.js +8 -0
- package/dist/orchestrator.js +26 -9
- package/dist/reclamation.js +26 -21
- package/dist/run-export.js +494 -25
- package/dist/run-registry/derive.js +172 -0
- package/dist/run-registry/format.js +124 -0
- package/dist/run-registry/gc.js +251 -0
- package/dist/run-registry/policy.js +16 -0
- package/dist/run-registry/queue.js +116 -0
- package/dist/run-registry.js +89 -597
- package/dist/run-state-schema.js +1 -0
- package/dist/sandbox-profile.js +43 -2
- package/dist/state-explosion/format.js +159 -0
- package/dist/state-explosion/helpers.js +82 -0
- package/dist/state-explosion.js +165 -304
- package/dist/state-node.js +19 -4
- package/dist/telemetry-attestation.js +55 -0
- package/dist/telemetry-demo.js +15 -3
- package/dist/telemetry-ledger.js +60 -15
- package/dist/topology.js +25 -8
- package/dist/triggers.js +33 -14
- package/dist/trust-audit.js +145 -33
- package/dist/version.js +1 -1
- package/dist/worker-isolation/helpers.js +51 -0
- package/dist/worker-isolation/paths.js +46 -0
- package/dist/worker-isolation.js +39 -115
- package/docs/agent-delegation-drive.7.md +71 -0
- package/docs/canonical-workflow-apps.7.md +37 -0
- package/docs/cli-mcp-parity.7.md +16 -0
- package/docs/contract-migration-tooling.7.md +6 -0
- package/docs/control-plane-scheduling.7.md +6 -0
- package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
- package/docs/durable-state-and-locking.7.md +8 -0
- package/docs/evidence-adoption-reasoning-chain.7.md +6 -0
- package/docs/execution-backends.7.md +6 -0
- package/docs/index.md +2 -0
- package/docs/launch/demo.tape +28 -0
- package/docs/launch/launch-kit.md +96 -17
- package/docs/launch/pre-launch-checklist.md +53 -0
- package/docs/multi-agent-cli-mcp-surface.7.md +8 -0
- package/docs/multi-agent-eval-replay-harness.7.md +6 -0
- package/docs/multi-agent-operator-ux.7.md +6 -0
- package/docs/multi-agent-trust-policy-audit.7.md +27 -0
- package/docs/node-snapshot-diff-replay.7.md +6 -0
- package/docs/observability-cost-accounting.7.md +6 -0
- package/docs/project-index.md +27 -6
- package/docs/real-execution-backends.7.md +6 -0
- package/docs/release-and-migration.7.md +8 -0
- package/docs/release-tooling.7.md +6 -0
- package/docs/routines.md +23 -0
- package/docs/run-registry-control-plane.7.md +89 -2
- package/docs/run-retention-reclamation.7.md +8 -0
- package/docs/source-context-profiles.7.md +119 -0
- package/docs/state-explosion-management.7.md +13 -0
- package/docs/team-collaboration.7.md +6 -0
- package/docs/trust-model.md +267 -0
- package/docs/unix-principles.md +49 -1
- package/docs/vendor-manifest-loadability.7.md +43 -0
- package/docs/web-desktop-workbench.7.md +6 -0
- package/manifest/plugin.manifest.json +1 -1
- package/manifest/source-context-profiles.json +142 -0
- package/package.json +4 -1
- package/scripts/agents/builtin-templates.json +7 -0
- package/scripts/agents/claude-p-agent.js +129 -43
- package/scripts/architecture-review-fast.js +362 -0
- package/scripts/bump-version.js +5 -10
- package/scripts/canonical-apps-list.js +64 -0
- package/scripts/canonical-apps.js +36 -4
- package/scripts/coverage-gate.js +211 -0
- package/scripts/dogfood-release.js +1 -1
- package/scripts/golden-path.js +4 -4
- package/scripts/parity-check.js +5 -0
- package/scripts/release-check.js +5 -1
- package/scripts/source-context.js +291 -0
- package/scripts/version-sync-check.js +5 -7
- package/skills/ci-triage/SKILL.md +50 -0
- package/skills/ci-triage/agents/openai.yaml +4 -0
- package/skills/cool-workflow/SKILL.md +4 -1
- package/skills/deploy-check/SKILL.md +55 -0
- package/skills/deploy-check/agents/openai.yaml +4 -0
- package/skills/design-qa/SKILL.md +49 -0
- package/skills/design-qa/agents/openai.yaml +4 -0
- package/skills/pr-review/SKILL.md +45 -0
- package/skills/pr-review/agents/openai.yaml +4 -0
- package/dist/capability-dispatcher.js +0 -86
|
@@ -191,3 +191,11 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
|
|
|
191
191
|
0.1.78
|
|
192
192
|
|
|
193
193
|
0.1.79
|
|
194
|
+
|
|
195
|
+
## Fast Architecture Review (v0.1.80)
|
|
196
|
+
|
|
197
|
+
Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
|
|
198
|
+
|
|
199
|
+
## Deterministic Freed Manifest (v0.1.81)
|
|
200
|
+
|
|
201
|
+
The freed manifest is path-sorted before it feeds `tombstoneHash`, so reclamation's write-ahead tombstone hash-chain is reproducible across hosts regardless of filesystem enumeration order. Reclaimed tiers, the re-point seam, and the default (reclaim-nothing) policy are unchanged.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Source Context Profiles
|
|
2
|
+
|
|
3
|
+
CW keeps source-context slimming out of the runtime kernel. The profile is policy
|
|
4
|
+
data in `manifest/source-context-profiles.json`; `scripts/source-context.js` is a
|
|
5
|
+
small mechanism that reads a git ref and writes JSONL to stdout.
|
|
6
|
+
|
|
7
|
+
## Core Profile
|
|
8
|
+
|
|
9
|
+
The default `core` profile is the project memory for AI source imports. It keeps
|
|
10
|
+
runtime source and app/userland entrypoints, and leaves generated artifacts,
|
|
11
|
+
tests, docs, release records, and long logs as manifest-only records.
|
|
12
|
+
|
|
13
|
+
Included:
|
|
14
|
+
|
|
15
|
+
- `plugins/cool-workflow/src/**`
|
|
16
|
+
- `plugins/cool-workflow/apps/**`
|
|
17
|
+
- `plugins/cool-workflow/package.json`
|
|
18
|
+
- `plugins/cool-workflow/tsconfig.json`
|
|
19
|
+
- `plugins/cool-workflow/scripts/cw.js`
|
|
20
|
+
- `plugins/cool-workflow/scripts/mcp-server.js`
|
|
21
|
+
- `plugins/cool-workflow/scripts/agents/**`
|
|
22
|
+
|
|
23
|
+
Excluded from exported content:
|
|
24
|
+
|
|
25
|
+
- `plugins/cool-workflow/dist/**`
|
|
26
|
+
- `plugins/cool-workflow/test/**`
|
|
27
|
+
- `plugins/cool-workflow/docs/**`
|
|
28
|
+
- `docs/assets/**`
|
|
29
|
+
- `.cw-release/**`
|
|
30
|
+
- `CHANGELOG.md`
|
|
31
|
+
- `ITERATION_LOG.md`
|
|
32
|
+
|
|
33
|
+
Exclusion does not delete files and does not change release behavior. `dist/`
|
|
34
|
+
remains a committed release artifact until the release contract is explicitly
|
|
35
|
+
changed.
|
|
36
|
+
|
|
37
|
+
## Narrow Profiles
|
|
38
|
+
|
|
39
|
+
Use a narrower opt-in profile when the question is already scoped:
|
|
40
|
+
|
|
41
|
+
- `runtime`: the full `src/**` runtime kernel plus package and TypeScript
|
|
42
|
+
metadata.
|
|
43
|
+
- `mcp`: capability core/registry, CLI routing, MCP server, MCP launcher scripts,
|
|
44
|
+
and shared types.
|
|
45
|
+
- `workflow-apps`: canonical apps plus the Workflow App framework and app
|
|
46
|
+
planning/orchestration surface.
|
|
47
|
+
- `release`: release flow, gates, manifest/version tooling, package metadata, and
|
|
48
|
+
release-tooling docs.
|
|
49
|
+
- `agent-wrappers`: external agent wrappers, agent config, execution backend,
|
|
50
|
+
drive loop, and agent-delegation docs.
|
|
51
|
+
|
|
52
|
+
The narrow profiles are policy data only. Selecting one changes only the JSONL
|
|
53
|
+
context pack; it does not change runtime behavior, release contents, or the
|
|
54
|
+
default `core` profile.
|
|
55
|
+
|
|
56
|
+
## Commands
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
node scripts/source-context.js profiles
|
|
60
|
+
node scripts/source-context.js manifest --profile core --ref HEAD --repo-root /path/to/repo > manifest.jsonl
|
|
61
|
+
node scripts/source-context.js export --profile core --ref HEAD --repo-root /path/to/repo > core-source.jsonl
|
|
62
|
+
node scripts/source-context.js export --profile mcp --ref HEAD --repo-root /path/to/repo > mcp-source.jsonl
|
|
63
|
+
node scripts/source-context.js export --profile mcp --changed-from origin/main --ref HEAD --repo-root /path/to/repo > mcp-changed.jsonl
|
|
64
|
+
node scripts/source-context.js export --profile core --ref HEAD --repo-root /path/to/repo --cache-dir .cw/cache/source-context > core-source.jsonl
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
`manifest` emits one JSON object per tracked file at the selected ref:
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{"path":"plugins/cool-workflow/src/state.ts","included":true,"reason":"included:plugins/cool-workflow/src/**","sha256":"..."}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`export` emits only included text files and adds `content`. Both commands use
|
|
74
|
+
stdout for JSONL data only. Diagnostics and refusal messages go to stderr.
|
|
75
|
+
|
|
76
|
+
`--changed-from REF` is opt-in diff-aware mode. It filters `manifest` and
|
|
77
|
+
`export` to paths changed between the resolved base commit and `--ref`, then
|
|
78
|
+
applies the selected profile include/exclude rules. Deleted files are omitted
|
|
79
|
+
because there is no blob at the target ref. Records include `changedFrom` with
|
|
80
|
+
the resolved base commit. Empty diffs are valid and emit empty JSONL.
|
|
81
|
+
|
|
82
|
+
`export --cache-dir DIR` is opt-in. The cache key is the resolved git commit SHA
|
|
83
|
+
plus a digest of the selected source profile, so changing either the ref or the
|
|
84
|
+
include/exclude policy produces a different JSONL cache file. Cache hits write the
|
|
85
|
+
same JSONL bytes to stdout and stay silent on stderr. Corrupt or mismatched cache
|
|
86
|
+
records fail closed instead of falling back silently. Diff-aware exports include
|
|
87
|
+
the resolved `--changed-from` commit in the cache key, so full and changed exports
|
|
88
|
+
do not share cache files.
|
|
89
|
+
|
|
90
|
+
`--repo-root DIR` is also opt-in; when omitted, the script keeps its historical
|
|
91
|
+
default and reads the Cool Workflow repository root.
|
|
92
|
+
|
|
93
|
+
## Verification
|
|
94
|
+
|
|
95
|
+
The smoke test checks that:
|
|
96
|
+
|
|
97
|
+
- the profile includes and excludes exactly the remembered paths;
|
|
98
|
+
- `dist/`, tests, docs, release records, and long logs are manifest-only;
|
|
99
|
+
- exported records are parseable JSONL with content and sha256;
|
|
100
|
+
- narrow profiles are slimmer than `core` and include/exclude their intended
|
|
101
|
+
surfaces;
|
|
102
|
+
- `--changed-from` emits only changed current-ref files, still honors excludes,
|
|
103
|
+
and caches separately from full exports;
|
|
104
|
+
- cached exports are byte-identical to uncached exports and corrupt cache hits
|
|
105
|
+
fail closed;
|
|
106
|
+
- the `core` profile stays under its `maxLines` guard.
|
|
107
|
+
|
|
108
|
+
Run:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
node test/source-context-profile-smoke.js
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## FreeBSD Discipline
|
|
115
|
+
|
|
116
|
+
This feature is opt-in and does not alter existing CLI output. It is mechanism,
|
|
117
|
+
not policy: profile selection lives in data, and vendor prompt/stream behavior
|
|
118
|
+
stays in wrappers. It fails closed on invalid profiles, unknown refs, binary
|
|
119
|
+
included files, and line-count drift past the configured guard.
|
|
@@ -45,6 +45,13 @@ Summaries are written under `.cw/runs/<run-id>/summaries/` as plain JSON. Raw
|
|
|
45
45
|
blackboard messages, graph nodes, graph edges, audit events, evidence refs, and
|
|
46
46
|
eval artifacts are never deleted or overwritten.
|
|
47
47
|
|
|
48
|
+
Within a single summary build, CW shares the derived full operator graph,
|
|
49
|
+
operator status, blackboard digest, state-size record, and graph view records
|
|
50
|
+
through a short-lived in-memory context. This avoids rebuilding the same graph
|
|
51
|
+
for `summary refresh`, `summary show`, and the top-level state-explosion report.
|
|
52
|
+
It is not a daemon or persistent cache: the next command re-reads run state from
|
|
53
|
+
disk, recomputes source fingerprints, and still fails closed on stale summaries.
|
|
54
|
+
|
|
48
55
|
## Blackboard summarization
|
|
49
56
|
|
|
50
57
|
`blackboard summarize <run-id>` (MCP: `cw_blackboard_summarize`) returns a
|
|
@@ -264,3 +271,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
|
|
|
264
271
|
0.1.78
|
|
265
272
|
|
|
266
273
|
0.1.79
|
|
274
|
+
|
|
275
|
+
## Fast Architecture Review (v0.1.80)
|
|
276
|
+
|
|
277
|
+
Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
|
|
278
|
+
|
|
279
|
+
_No changes to the state-explosion management surface in v0.1.81 (the module was carved into behavior-preserving siblings; output is byte-identical)._
|
|
@@ -207,3 +207,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
|
|
|
207
207
|
0.1.78
|
|
208
208
|
|
|
209
209
|
0.1.79
|
|
210
|
+
|
|
211
|
+
## Fast Architecture Review (v0.1.80)
|
|
212
|
+
|
|
213
|
+
Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
|
|
214
|
+
|
|
215
|
+
_No changes to the team-collaboration surface in v0.1.81._
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# Trust Model & Limitations
|
|
2
|
+
|
|
3
|
+
> **Read this before you trust a cool-workflow record.** This document states
|
|
4
|
+
> exactly what CW's cryptographic guarantees prove, and — just as important —
|
|
5
|
+
> what they do **not** prove. We would rather lose a skeptical reader here than
|
|
6
|
+
> have them over-trust a green checkmark in production. If anything below reads
|
|
7
|
+
> as an overclaim, it is a bug; please file it.
|
|
8
|
+
|
|
9
|
+
CW is an **auditable control-plane**. It plans, dispatches, records, and verifies
|
|
10
|
+
agent work — it does **not** run the model itself. That single architectural
|
|
11
|
+
choice is what the guarantees below rest on, and it is also the source of their
|
|
12
|
+
honest ceiling.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## TL;DR
|
|
17
|
+
|
|
18
|
+
- CW's ed25519 signature + hash-chained ledger prove **integrity and
|
|
19
|
+
attribution**: a recorded usage figure was signed by the keyholder and has not
|
|
20
|
+
been edited since it was recorded. Both re-verify **offline** — the recorded
|
|
21
|
+
ledger's integrity with **no key at all** (`cw telemetry verify`), and each
|
|
22
|
+
`attested` signature with the **public key alone** (`cw telemetry verify
|
|
23
|
+
--pubkey <public.pem>`; also reproduced by `cw demo tamper`).
|
|
24
|
+
- They do **not** prove the original number was **true**. A dishonest signer can
|
|
25
|
+
sign a lie; the lie is then cryptographically bound to its signer, but it is
|
|
26
|
+
still a lie.
|
|
27
|
+
- **CW holds no private key.** It can verify, but it can neither forge a
|
|
28
|
+
signature nor measure usage itself (by design — see the red line below).
|
|
29
|
+
- The honest gap is **single-keyholder / no second party**: when the same
|
|
30
|
+
operator runs CW *and* holds the only signing key, integrity is real but there
|
|
31
|
+
is no independent party attesting that the source was honest. **This is exactly
|
|
32
|
+
why we are seeking early integration partners** who supply an independent
|
|
33
|
+
second party / co-signer. See [Closing the gap](#closing-the-gap-the-second-party).
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## What the cryptography is, precisely
|
|
38
|
+
|
|
39
|
+
There are two distinct mechanisms. Conflating them is the most common way to
|
|
40
|
+
over- or under-state the guarantee, so they are kept separate here.
|
|
41
|
+
|
|
42
|
+
### 1. The telemetry signature (ed25519) — attribution of a reported number
|
|
43
|
+
|
|
44
|
+
The agent (the **executor**) self-reports its token usage. A control-plane that
|
|
45
|
+
records that number verbatim is recording a **claim**. To turn the claim into an
|
|
46
|
+
**attestation**, the executor signs a canonical payload with its **private key**:
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
sign({ usage, runId, taskId, promptDigest }) // ed25519, executor-side
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The `runId` / `taskId` / `promptDigest` binding is load-bearing: it ties the
|
|
53
|
+
signature to **this** hop, so a valid signature from one task cannot be replayed
|
|
54
|
+
onto another. `promptDigest` is the sha256 of the exact worker prompt CW handed
|
|
55
|
+
the agent.
|
|
56
|
+
|
|
57
|
+
CW then **verifies** that signature against an **operator-provisioned public
|
|
58
|
+
key**. CW holds *only* the public half. From `telemetry-attestation.ts`:
|
|
59
|
+
|
|
60
|
+
> CW VERIFIES that signature against an operator-provisioned PUBLIC key. CW holds
|
|
61
|
+
> ONLY the public key — it can verify, but can neither forge a signature nor (the
|
|
62
|
+
> red line) call a model to measure usage itself.
|
|
63
|
+
|
|
64
|
+
The result is one of three honest states, surfaced loudly and never silently
|
|
65
|
+
upgraded to "trusted":
|
|
66
|
+
|
|
67
|
+
| State | Meaning |
|
|
68
|
+
|---|---|
|
|
69
|
+
| `attested` | A valid ed25519 signature over the reported usage, bound to this run/task/prompt, verified against the configured public key. |
|
|
70
|
+
| `unattested` | Usage was reported but the signature is missing, malformed, made with the wrong key, or does not match the payload (tampered or replayed). Also: no trust key configured. |
|
|
71
|
+
| `absent` | The agent reported no usage at all. |
|
|
72
|
+
|
|
73
|
+
Defaults are honest: no signature ⇒ `unattested`; no usage ⇒ `absent`. **Usage
|
|
74
|
+
is never silently recorded as trusted.** The opt-in `require-attested-telemetry`
|
|
75
|
+
policy fails the run closed on anything other than `attested`.
|
|
76
|
+
|
|
77
|
+
### 2. The hash-chained ledgers — tamper-evidence of the recorded log
|
|
78
|
+
|
|
79
|
+
A signature proves the agent *said* a number in flight. It does not, by itself,
|
|
80
|
+
prove that **CW recorded exactly that** and that **nobody edited the record
|
|
81
|
+
afterward**. That is the job of the append-only, hash-chained ledgers:
|
|
82
|
+
|
|
83
|
+
- **Telemetry ledger** (`telemetry.json`, one entry per agent hop): each entry
|
|
84
|
+
chains to the previous via `prevHash`, and `recordHash = sha256(canonical
|
|
85
|
+
entry)`. Flip a recorded verdict (`unattested` → `attested`) or edit a recorded
|
|
86
|
+
usage digest, and the chain no longer recomputes.
|
|
87
|
+
- **Trust-audit event log** (`events.jsonl`): the same discipline applied to
|
|
88
|
+
every recorded decision — sandbox path allow/deny, policy snapshots,
|
|
89
|
+
verifier-gated commits, collaboration approvals.
|
|
90
|
+
|
|
91
|
+
Verification **recomputes every hash independently and never trusts the stored
|
|
92
|
+
value**, so an edited, reordered, removed, or truncated entry flips
|
|
93
|
+
`verified = false`. A ledger that exists but cannot be parsed **fails closed** —
|
|
94
|
+
it is treated as corrupt, never silently as the clean empty chain.
|
|
95
|
+
|
|
96
|
+
This is all **offline**. The chain re-proof needs **no key at all**; add
|
|
97
|
+
`--pubkey <public.pem>` to re-run the signature **attribution** check against the
|
|
98
|
+
stored raw usage for every `attested` record. There is no telemetry service to
|
|
99
|
+
trust or breach — the record proves its own integrity, and a third-party auditor
|
|
100
|
+
can re-run both checks on their own machine.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## What this DOES prove
|
|
105
|
+
|
|
106
|
+
For telemetry, if `cw telemetry verify <run> --pubkey <public.pem>` reports green,
|
|
107
|
+
you can rely on **all** of the following, and only these:
|
|
108
|
+
|
|
109
|
+
1. **Attribution.** Each `attested` usage figure was signed by the holder of the
|
|
110
|
+
configured private key, over a payload bound to that specific run, task, and
|
|
111
|
+
prompt. It is **non-repudiable**: the signer cannot later disown it, and it
|
|
112
|
+
could not have been replayed from a different hop.
|
|
113
|
+
2. **Tamper-evidence of the record.** The recorded ledger — verdicts, usage
|
|
114
|
+
digests, audit decisions — has not been edited, reordered, truncated, or had
|
|
115
|
+
entries removed since it was written, *to the extent a self-recomputable chain
|
|
116
|
+
can detect* (see the threat-model caveat below). Casual or partial tampering,
|
|
117
|
+
accidental corruption, truncation, and forged unchained lines are all caught.
|
|
118
|
+
3. **Offline, independent re-verification.** Re-proving the recorded ledger needs
|
|
119
|
+
no network, no CW service, and no trust in our infrastructure — `cw telemetry
|
|
120
|
+
verify` recomputes the chain on your machine (and needs no key to do it). With
|
|
121
|
+
`--pubkey`, the ed25519 **attribution** is independently re-checked with the
|
|
122
|
+
**public key alone**; `cw demo tamper` reproduces that sign-and-catch
|
|
123
|
+
end-to-end, offline. The integrity claim does not depend on trusting us.
|
|
124
|
+
4. **CW never forged or measured anything.** CW holds no private key and never
|
|
125
|
+
calls a model. It cannot mint a signature, and it cannot fabricate a usage
|
|
126
|
+
number to sign. What it records, it received and verified.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## What this DOES NOT prove
|
|
131
|
+
|
|
132
|
+
Equally load-bearing. None of the following are within the guarantee, and we will
|
|
133
|
+
not imply otherwise:
|
|
134
|
+
|
|
135
|
+
1. **It does not prove the reported number is true.** A signature proves *who*
|
|
136
|
+
said it and that it *wasn't altered* — **not** that it was correct at the
|
|
137
|
+
source. Quoting the code's own honest ceiling:
|
|
138
|
+
|
|
139
|
+
> A dishonest keyholder can still sign a lie, but the lie is now
|
|
140
|
+
> cryptographically bound to its signer.
|
|
141
|
+
|
|
142
|
+
CW deliberately does **not** independently measure usage (doing so would mean
|
|
143
|
+
calling the model — the red line it refuses to cross). So the strongest honest
|
|
144
|
+
claim is **attribution, not ground-truth measurement**.
|
|
145
|
+
|
|
146
|
+
2. **It does not defend against a single party who holds both roles.** If the
|
|
147
|
+
same operator runs CW, holds the signing private key, *and* controls the
|
|
148
|
+
machine the ledger lives on, then a green verdict attests that **that party**
|
|
149
|
+
signed and that **that party's** record is internally consistent. It does not
|
|
150
|
+
bring in any *independent* party. Self-consistency is not third-party
|
|
151
|
+
verification.
|
|
152
|
+
|
|
153
|
+
3. **A determined local writer can re-chain the whole log.** The hash-chain's
|
|
154
|
+
genesis is `sha256(runId)` — a value the local writer knows. So the chain
|
|
155
|
+
detects edits to *part* of a log, but a writer who edits an entry and then
|
|
156
|
+
**re-computes every subsequent hash** with CW's own sha256 produces a log that
|
|
157
|
+
re-verifies green. From `trust-audit.ts`:
|
|
158
|
+
|
|
159
|
+
> THREAT MODEL (be honest about the limit): the genesis is sha256(runId), so
|
|
160
|
+
> this detects casual/partial tampering, accidental corruption, truncation,
|
|
161
|
+
> removal, and forged-unchained lines — but NOT a determined local writer who
|
|
162
|
+
> re-chains the WHOLE log with this module's own sha256 after an edit.
|
|
163
|
+
|
|
164
|
+
This is **inherent** to any local, self-recomputable chain. Closing it needs an
|
|
165
|
+
anchor the writer cannot reproduce. CW **cannot mint that anchor itself** —
|
|
166
|
+
because by design it holds no private key. The one cryptographic anchor that
|
|
167
|
+
exists is the **agent's** telemetry signature, which covers agent-reported
|
|
168
|
+
*usage* — it does **not** cover CW-only decisions (sandbox / policy /
|
|
169
|
+
commit-gate), which have no external signer.
|
|
170
|
+
|
|
171
|
+
For those CW-only decisions, the only stronger guarantee available today is
|
|
172
|
+
**operational**, not cryptographic: commit `events.jsonl` to an external
|
|
173
|
+
append-only medium (git history, a remote append-only log) that the local
|
|
174
|
+
writer cannot rewrite. The chain is a **strict upgrade** over a bare
|
|
175
|
+
append-only log — not a substitute for an external anchor.
|
|
176
|
+
|
|
177
|
+
4. **It says nothing about the quality, safety, or correctness of the work.**
|
|
178
|
+
Attestation is about *provenance and integrity of records*, not about whether
|
|
179
|
+
the agent's output is good, secure, or even functional. Other CW mechanisms
|
|
180
|
+
(verifier gate, schema validation, evidence grounding) speak to that; the
|
|
181
|
+
cryptography here does not.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## The single-keyholder limitation (stated plainly)
|
|
186
|
+
|
|
187
|
+
> **The core honest gap:** when the same operator runs CW and holds the only
|
|
188
|
+
> verification/signing key, tamper-evidence proves that **records were not edited
|
|
189
|
+
> after the fact** — it does **not** prove that the **original signer was
|
|
190
|
+
> honest**. Integrity, yes. A trustworthy source, not necessarily.
|
|
191
|
+
|
|
192
|
+
Concretely, in a single-party setup:
|
|
193
|
+
|
|
194
|
+
- The operator provisions the keypair.
|
|
195
|
+
- The operator's agent process signs usage with the private key.
|
|
196
|
+
- CW (run by the same operator) verifies with the public key and writes the
|
|
197
|
+
ledger to the operator's disk.
|
|
198
|
+
|
|
199
|
+
Every cryptographic check can pass while a motivated single party fabricates the
|
|
200
|
+
source number, or — given the genesis caveat above — rewrites the whole local
|
|
201
|
+
chain. **Cryptography cannot manufacture a second party that does not exist.**
|
|
202
|
+
Separation of duties is the property auditors require everywhere; with one
|
|
203
|
+
operator wearing both hats, it is structurally absent no matter how good the
|
|
204
|
+
math is.
|
|
205
|
+
|
|
206
|
+
We are not going to argue this point away. It is real, it is the most important
|
|
207
|
+
limitation in this document, and it is the right critique to raise.
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Closing the gap: the second party
|
|
212
|
+
|
|
213
|
+
The fix is **not** more cryptography on one machine — it is an **independent
|
|
214
|
+
second party**, which is precisely the thing a single operator cannot self-supply.
|
|
215
|
+
This is why CW's near-term priority is **early integration partners**, and what we
|
|
216
|
+
mean by that concretely:
|
|
217
|
+
|
|
218
|
+
- **An independent co-signer / second keyholder.** A second party (a different
|
|
219
|
+
team, a CI identity outside the operator's control, or a partner's signing
|
|
220
|
+
service) holds a key the operator does not. When that party counter-signs runs —
|
|
221
|
+
or *is* the executor that signs usage — a green verdict starts to mean
|
|
222
|
+
"two parties who do not fully trust each other agree," which is the property
|
|
223
|
+
single-party attestation structurally cannot provide.
|
|
224
|
+
- **An external append-only anchor.** Pushing `events.jsonl` to a medium the local
|
|
225
|
+
operator cannot rewrite (a partner-held log, a public transparency log, signed
|
|
226
|
+
git history on a remote the operator doesn't control) closes the re-chain gap
|
|
227
|
+
for CW-only decisions described above.
|
|
228
|
+
- **Separated execution and verification.** The party that *spends the money*
|
|
229
|
+
(runs the model) and the party that *keeps the books* (CW) being genuinely
|
|
230
|
+
different entities turns CW's separation-of-duties design from an architectural
|
|
231
|
+
intent into an enforced fact.
|
|
232
|
+
|
|
233
|
+
If you are a potential partner who can supply an independent second party — a
|
|
234
|
+
co-signer, an external anchor, or separated execution — **that is the
|
|
235
|
+
collaboration we are actively looking for.** We would rather ship this honestly
|
|
236
|
+
and earn the second party than paper over the gap with a stronger-sounding claim
|
|
237
|
+
than the math supports.
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## How to verify for yourself
|
|
242
|
+
|
|
243
|
+
- `cw telemetry verify <run>` — re-proves the telemetry ledger's **integrity**:
|
|
244
|
+
chain linkage + an independent per-record hash recompute, so any edit to a
|
|
245
|
+
recorded verdict or usage digest since record time flips it red. It needs **no
|
|
246
|
+
key** (it re-proves the *recording*). Add `--pubkey <pem-or-path>` to re-run the
|
|
247
|
+
ed25519 **signature** check for every `attested` record against the stored raw
|
|
248
|
+
usage; unreadable keys, missing raw usage, digest mismatches, wrong keys, and
|
|
249
|
+
signature mismatches fail closed. Mirrored as `cw_telemetry_verify` on the MCP
|
|
250
|
+
surface.
|
|
251
|
+
- `cw demo tamper` — a hermetic, offline, one-command proof: it builds a real
|
|
252
|
+
ed25519-signed ledger and then forges it two ways — flips a recorded verdict and
|
|
253
|
+
re-computes the *local* record hash (the chain still breaks), and reuses a
|
|
254
|
+
signature over inflated tokens (ed25519 rejects it). Everything is verified with
|
|
255
|
+
the public key only. The `✗ DETECTED` lines are the point.
|
|
256
|
+
- Re-run either with **only the public key** on a machine we do not control. If it
|
|
257
|
+
doesn't reproduce, our integrity claim is false — hold us to it.
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## One-line summary
|
|
262
|
+
|
|
263
|
+
CW's cryptography proves **records weren't edited and were signed by the
|
|
264
|
+
keyholder** — strong, offline, public-key-verifiable **integrity and
|
|
265
|
+
attribution**. It does **not** prove the **source was honest**, and a single
|
|
266
|
+
operator holding both roles is the honest limit we are explicitly recruiting
|
|
267
|
+
integration partners to close.
|
package/docs/unix-principles.md
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# Unix-Inspired Workflow Principles
|
|
2
2
|
|
|
3
3
|
CW borrows a small set of durable systems ideas and applies them to agent
|
|
4
|
-
workflow engineering. These are design principles, not platform claims
|
|
4
|
+
workflow engineering. These are design principles, not platform claims — but
|
|
5
|
+
they are not optional: this project strictly follows the FreeBSD programming
|
|
6
|
+
philosophy, and §7 below states the binding rules every change is reviewed
|
|
7
|
+
against (mirrored as hard constraints in the repository's `AGENTS.md`).
|
|
5
8
|
|
|
6
9
|
## 1. Everything Is State
|
|
7
10
|
|
|
@@ -190,3 +193,48 @@ Hosts enforce runtime sandbox policy.
|
|
|
190
193
|
```
|
|
191
194
|
|
|
192
195
|
This keeps CW small, inspectable, and extensible.
|
|
196
|
+
|
|
197
|
+
## 7. FreeBSD Discipline (Binding Rules)
|
|
198
|
+
|
|
199
|
+
The principles above descend from one tradition — the FreeBSD school of
|
|
200
|
+
systems engineering — and CW adheres to it strictly. Concretely:
|
|
201
|
+
|
|
202
|
+
**POLA — Principle of Least Astonishment.** An existing output, file layout,
|
|
203
|
+
exit code, or flag never changes meaning or bytes underneath an operator. New
|
|
204
|
+
behavior ships behind a new verb/flag or an env toggle, with the prior
|
|
205
|
+
behavior byte-identical by default. (Example: live drive output is additive —
|
|
206
|
+
stderr only, TTY-gated, `CW_NO_STREAM=1` opt-out; the stdout payload and
|
|
207
|
+
evidence digest are unchanged.)
|
|
208
|
+
|
|
209
|
+
**Mechanism, not policy.** The kernel provides mechanisms; policy is data in
|
|
210
|
+
userland. WHICH agent runs is config (`CW_AGENT_COMMAND` / agent-config), not
|
|
211
|
+
code; vendor-specific rendering lives in wrappers under `scripts/agents/`,
|
|
212
|
+
never in core. Core may forward a vendor's stream; it never parses one.
|
|
213
|
+
|
|
214
|
+
**Rule of Silence.** stdout is data, stderr is diagnostics, and a
|
|
215
|
+
non-interactive run is silent on success. Anything human-friendly is TTY-gated
|
|
216
|
+
and can be disabled; `--json` output is stable and undecorated so it composes
|
|
217
|
+
in pipes.
|
|
218
|
+
|
|
219
|
+
**Fail closed, conservative defaults.** Unconfigured backends probe as
|
|
220
|
+
`unverified`, unverifiable telemetry is surfaced loudly (or refused in strict
|
|
221
|
+
mode), invalid results park the hop. CW never fabricates a success and never
|
|
222
|
+
falls back silently. Boring correctness beats clever features.
|
|
223
|
+
|
|
224
|
+
**Tools, not frameworks.** Zero runtime dependencies is a red line. Verbs do
|
|
225
|
+
one thing; composition happens through durable files (`.cw/`) and pipes, not
|
|
226
|
+
hidden in-process coupling.
|
|
227
|
+
|
|
228
|
+
**Man pages are the contract.** Every shipped capability has a `docs/*.7.md`
|
|
229
|
+
page updated in the same change, and doc-drift guards in the test suite keep
|
|
230
|
+
the documented commands honest. Undocumented behavior is unfinished behavior.
|
|
231
|
+
|
|
232
|
+
**style(9) spirit.** One consistent style per layer; a diff matches the file
|
|
233
|
+
it touches and never reformats code it does not change.
|
|
234
|
+
|
|
235
|
+
**Release engineering.** Main is -CURRENT; a tag is -RELEASE: it exists only
|
|
236
|
+
after the deterministic gate and an independent review pass, and cadence never
|
|
237
|
+
overrides the gate.
|
|
238
|
+
|
|
239
|
+
A change that violates any rule in this section is rejected in review even if
|
|
240
|
+
the capability it ships is otherwise desirable.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Vendor Manifest Loadability
|
|
2
|
+
|
|
3
|
+
CW ships one kernel to many AI clients. A single `manifest/plugin.manifest.json`
|
|
4
|
+
generates every vendor's plugin files (Claude, Codex, the `agents` marketplace,
|
|
5
|
+
Gemini, OpenCode) — see `gen-manifests(1)`. Each vendor that exposes the MCP
|
|
6
|
+
server gets a generated `mcp.json` telling that client how to launch it.
|
|
7
|
+
|
|
8
|
+
## The gap this closes
|
|
9
|
+
|
|
10
|
+
Two gates already guard the manifests, but neither proves a vendor manifest
|
|
11
|
+
actually *boots*:
|
|
12
|
+
|
|
13
|
+
- `npm run gen:manifests -- --check` diffs the generated bytes against the
|
|
14
|
+
manifest source. It catches drift, not a wrong-but-consistent command.
|
|
15
|
+
- `parity-check` boots `dist/mcp-server.js` **directly** — it never reads any
|
|
16
|
+
vendor's `mcp.json`, never resolves a `pluginRootVar`.
|
|
17
|
+
|
|
18
|
+
So a manifest could declare a broken `command`, `args`, or path and every gate
|
|
19
|
+
would stay green while no client could load it. Track C ("multi-vendor manifest
|
|
20
|
+
actually loaded by ≥2 real clients") was asserted, not proven.
|
|
21
|
+
|
|
22
|
+
## The load proof
|
|
23
|
+
|
|
24
|
+
`npm run manifest:load-check` (the `vendor-manifest-load-smoke`, run automatically
|
|
25
|
+
by `npm test`) closes it. For every vendor in `targets` that declares an `mcp`
|
|
26
|
+
output it:
|
|
27
|
+
|
|
28
|
+
1. reads the generated `mcp.json`;
|
|
29
|
+
2. resolves the server `command` + `args` exactly as that client does —
|
|
30
|
+
substituting the vendor's `pluginRootVar` (`${CLAUDE_PLUGIN_ROOT}/` for Claude,
|
|
31
|
+
`./` for the rest) to the real plugin root;
|
|
32
|
+
3. spawns the server with `shell:false` (argv spawn, no shell);
|
|
33
|
+
4. completes a JSON-RPC `initialize` + `tools/list` round-trip.
|
|
34
|
+
|
|
35
|
+
Every vendor launches the same kernel, so the proof asserts they **agree**: one
|
|
36
|
+
`serverInfo.name` and an identical tool count across all of them. A vendor whose
|
|
37
|
+
manifest drifted to an unbootable shape — wrong path, wrong command, bad
|
|
38
|
+
`pluginRootVar` — fails this check instead of shipping a dead plugin.
|
|
39
|
+
|
|
40
|
+
## See also
|
|
41
|
+
|
|
42
|
+
- `gen-manifests(1)` — one source generates every vendor manifest.
|
|
43
|
+
- `cli-mcp-parity(7)` — the CLI ↔ MCP capability-parity gate.
|
|
@@ -215,3 +215,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
|
|
|
215
215
|
0.1.78
|
|
216
216
|
|
|
217
217
|
0.1.79
|
|
218
|
+
|
|
219
|
+
## Fast Architecture Review (v0.1.80)
|
|
220
|
+
|
|
221
|
+
Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
|
|
222
|
+
|
|
223
|
+
_No changes to the Web / Desktop Workbench in v0.1.81._
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"_comment": "SINGLE SOURCE OF TRUTH for every vendor manifest. Edit THIS file, then run `npm run gen:manifests`. Do NOT hand-edit the generated vendor manifests (.claude-plugin/, .codex-plugin/, .agents/, .mcp.json) — `npm run gen:manifests -- --check` (run by release:check) will fail if they drift from this source.",
|
|
3
3
|
"identity": {
|
|
4
4
|
"name": "cool-workflow",
|
|
5
|
-
"version": "0.1.
|
|
5
|
+
"version": "0.1.81",
|
|
6
6
|
"license": "BSD-2-Clause",
|
|
7
7
|
"homepage": "https://github.com/coo1white/cool-workflow",
|
|
8
8
|
"author": {
|