cool-workflow 0.1.79 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.codex-plugin/plugin.json +1 -1
  3. package/README.md +51 -3
  4. package/apps/architecture-review/app.json +1 -1
  5. package/apps/architecture-review-fast/app.json +64 -0
  6. package/apps/architecture-review-fast/workflow.js +153 -0
  7. package/apps/end-to-end-golden-path/app.json +1 -1
  8. package/apps/pr-review-fix-ci/app.json +1 -1
  9. package/apps/release-cut/app.json +1 -1
  10. package/apps/research-synthesis/app.json +1 -1
  11. package/dist/agent-config.js +21 -7
  12. package/dist/candidate-scoring.js +42 -22
  13. package/dist/capability-core.js +132 -17
  14. package/dist/capability-registry.js +138 -168
  15. package/dist/cli.js +97 -98
  16. package/dist/collaboration.js +5 -6
  17. package/dist/commit.js +20 -6
  18. package/dist/compare.js +18 -0
  19. package/dist/coordinator/classify.js +45 -0
  20. package/dist/coordinator/paths.js +42 -0
  21. package/dist/coordinator/util.js +129 -0
  22. package/dist/coordinator.js +127 -300
  23. package/dist/dispatch.js +35 -0
  24. package/dist/drive.js +79 -6
  25. package/dist/error-feedback.js +8 -4
  26. package/dist/evidence-reasoning.js +3 -3
  27. package/dist/execution-backend/agent.js +331 -0
  28. package/dist/execution-backend/probes.js +96 -0
  29. package/dist/execution-backend/util.js +47 -0
  30. package/dist/execution-backend.js +73 -421
  31. package/dist/mcp-server.js +79 -183
  32. package/dist/multi-agent/graph.js +84 -0
  33. package/dist/multi-agent/helpers.js +145 -0
  34. package/dist/multi-agent/paths.js +22 -0
  35. package/dist/multi-agent-eval/format.js +194 -0
  36. package/dist/multi-agent-eval/normalize.js +51 -0
  37. package/dist/multi-agent-eval.js +39 -244
  38. package/dist/multi-agent-host.js +0 -19
  39. package/dist/multi-agent.js +125 -314
  40. package/dist/node-snapshot.js +3 -3
  41. package/dist/observability/format.js +61 -0
  42. package/dist/observability/intake.js +98 -0
  43. package/dist/observability.js +14 -160
  44. package/dist/operator-ux/format.js +364 -0
  45. package/dist/operator-ux.js +22 -363
  46. package/dist/orchestrator/lifecycle-operations.js +2 -1
  47. package/dist/orchestrator/report.js +8 -0
  48. package/dist/orchestrator.js +26 -9
  49. package/dist/reclamation.js +26 -21
  50. package/dist/run-export.js +494 -25
  51. package/dist/run-registry/derive.js +172 -0
  52. package/dist/run-registry/format.js +124 -0
  53. package/dist/run-registry/gc.js +251 -0
  54. package/dist/run-registry/policy.js +16 -0
  55. package/dist/run-registry/queue.js +116 -0
  56. package/dist/run-registry.js +89 -597
  57. package/dist/run-state-schema.js +1 -0
  58. package/dist/sandbox-profile.js +43 -2
  59. package/dist/state-explosion/format.js +159 -0
  60. package/dist/state-explosion/helpers.js +82 -0
  61. package/dist/state-explosion.js +165 -304
  62. package/dist/state-node.js +19 -4
  63. package/dist/telemetry-attestation.js +55 -0
  64. package/dist/telemetry-demo.js +15 -3
  65. package/dist/telemetry-ledger.js +60 -15
  66. package/dist/topology.js +25 -8
  67. package/dist/triggers.js +33 -14
  68. package/dist/trust-audit.js +145 -33
  69. package/dist/version.js +1 -1
  70. package/dist/worker-isolation/helpers.js +51 -0
  71. package/dist/worker-isolation/paths.js +46 -0
  72. package/dist/worker-isolation.js +39 -115
  73. package/docs/agent-delegation-drive.7.md +71 -0
  74. package/docs/canonical-workflow-apps.7.md +37 -0
  75. package/docs/cli-mcp-parity.7.md +16 -0
  76. package/docs/contract-migration-tooling.7.md +6 -0
  77. package/docs/control-plane-scheduling.7.md +6 -0
  78. package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
  79. package/docs/durable-state-and-locking.7.md +8 -0
  80. package/docs/evidence-adoption-reasoning-chain.7.md +6 -0
  81. package/docs/execution-backends.7.md +6 -0
  82. package/docs/index.md +2 -0
  83. package/docs/launch/demo.tape +28 -0
  84. package/docs/launch/launch-kit.md +96 -17
  85. package/docs/launch/pre-launch-checklist.md +53 -0
  86. package/docs/multi-agent-cli-mcp-surface.7.md +8 -0
  87. package/docs/multi-agent-eval-replay-harness.7.md +6 -0
  88. package/docs/multi-agent-operator-ux.7.md +6 -0
  89. package/docs/multi-agent-trust-policy-audit.7.md +27 -0
  90. package/docs/node-snapshot-diff-replay.7.md +6 -0
  91. package/docs/observability-cost-accounting.7.md +6 -0
  92. package/docs/project-index.md +27 -6
  93. package/docs/real-execution-backends.7.md +6 -0
  94. package/docs/release-and-migration.7.md +8 -0
  95. package/docs/release-tooling.7.md +6 -0
  96. package/docs/routines.md +23 -0
  97. package/docs/run-registry-control-plane.7.md +89 -2
  98. package/docs/run-retention-reclamation.7.md +8 -0
  99. package/docs/source-context-profiles.7.md +119 -0
  100. package/docs/state-explosion-management.7.md +13 -0
  101. package/docs/team-collaboration.7.md +6 -0
  102. package/docs/trust-model.md +267 -0
  103. package/docs/unix-principles.md +49 -1
  104. package/docs/vendor-manifest-loadability.7.md +43 -0
  105. package/docs/web-desktop-workbench.7.md +6 -0
  106. package/manifest/plugin.manifest.json +1 -1
  107. package/manifest/source-context-profiles.json +142 -0
  108. package/package.json +4 -1
  109. package/scripts/agents/builtin-templates.json +7 -0
  110. package/scripts/agents/claude-p-agent.js +129 -43
  111. package/scripts/architecture-review-fast.js +362 -0
  112. package/scripts/bump-version.js +5 -10
  113. package/scripts/canonical-apps-list.js +64 -0
  114. package/scripts/canonical-apps.js +36 -4
  115. package/scripts/coverage-gate.js +211 -0
  116. package/scripts/dogfood-release.js +1 -1
  117. package/scripts/golden-path.js +4 -4
  118. package/scripts/parity-check.js +5 -0
  119. package/scripts/release-check.js +5 -1
  120. package/scripts/source-context.js +291 -0
  121. package/scripts/version-sync-check.js +5 -7
  122. package/skills/ci-triage/SKILL.md +50 -0
  123. package/skills/ci-triage/agents/openai.yaml +4 -0
  124. package/skills/cool-workflow/SKILL.md +4 -1
  125. package/skills/deploy-check/SKILL.md +55 -0
  126. package/skills/deploy-check/agents/openai.yaml +4 -0
  127. package/skills/design-qa/SKILL.md +49 -0
  128. package/skills/design-qa/agents/openai.yaml +4 -0
  129. package/skills/pr-review/SKILL.md +45 -0
  130. package/skills/pr-review/agents/openai.yaml +4 -0
  131. package/dist/capability-dispatcher.js +0 -86
@@ -191,3 +191,11 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
191
191
  0.1.78
192
192
 
193
193
  0.1.79
194
+
195
+ ## Fast Architecture Review (v0.1.80)
196
+
197
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
198
+
199
+ ## Deterministic Freed Manifest (v0.1.81)
200
+
201
+ The freed manifest is path-sorted before it feeds `tombstoneHash`, so reclamation's write-ahead tombstone hash-chain is reproducible across hosts regardless of filesystem enumeration order. Reclaimed tiers, the re-point seam, and the default (reclaim-nothing) policy are unchanged.
@@ -0,0 +1,119 @@
1
+ # Source Context Profiles
2
+
3
+ CW keeps source-context slimming out of the runtime kernel. The profile is policy
4
+ data in `manifest/source-context-profiles.json`; `scripts/source-context.js` is a
5
+ small mechanism that reads a git ref and writes JSONL to stdout.
6
+
7
+ ## Core Profile
8
+
9
+ The default `core` profile is the project memory for AI source imports. It keeps
10
+ runtime source and app/userland entrypoints, and leaves generated artifacts,
11
+ tests, docs, release records, and long logs as manifest-only records.
12
+
13
+ Included:
14
+
15
+ - `plugins/cool-workflow/src/**`
16
+ - `plugins/cool-workflow/apps/**`
17
+ - `plugins/cool-workflow/package.json`
18
+ - `plugins/cool-workflow/tsconfig.json`
19
+ - `plugins/cool-workflow/scripts/cw.js`
20
+ - `plugins/cool-workflow/scripts/mcp-server.js`
21
+ - `plugins/cool-workflow/scripts/agents/**`
22
+
23
+ Excluded from exported content:
24
+
25
+ - `plugins/cool-workflow/dist/**`
26
+ - `plugins/cool-workflow/test/**`
27
+ - `plugins/cool-workflow/docs/**`
28
+ - `docs/assets/**`
29
+ - `.cw-release/**`
30
+ - `CHANGELOG.md`
31
+ - `ITERATION_LOG.md`
32
+
33
+ Exclusion does not delete files and does not change release behavior. `dist/`
34
+ remains a committed release artifact until the release contract is explicitly
35
+ changed.
36
+
37
+ ## Narrow Profiles
38
+
39
+ Use a narrower opt-in profile when the question is already scoped:
40
+
41
+ - `runtime`: the full `src/**` runtime kernel plus package and TypeScript
42
+ metadata.
43
+ - `mcp`: capability core/registry, CLI routing, MCP server, MCP launcher scripts,
44
+ and shared types.
45
+ - `workflow-apps`: canonical apps plus the Workflow App framework and app
46
+ planning/orchestration surface.
47
+ - `release`: release flow, gates, manifest/version tooling, package metadata, and
48
+ release-tooling docs.
49
+ - `agent-wrappers`: external agent wrappers, agent config, execution backend,
50
+ drive loop, and agent-delegation docs.
51
+
52
+ The narrow profiles are policy data only. Selecting one changes only the JSONL
53
+ context pack; it does not change runtime behavior, release contents, or the
54
+ default `core` profile.
55
+
56
+ ## Commands
57
+
58
+ ```bash
59
+ node scripts/source-context.js profiles
60
+ node scripts/source-context.js manifest --profile core --ref HEAD --repo-root /path/to/repo > manifest.jsonl
61
+ node scripts/source-context.js export --profile core --ref HEAD --repo-root /path/to/repo > core-source.jsonl
62
+ node scripts/source-context.js export --profile mcp --ref HEAD --repo-root /path/to/repo > mcp-source.jsonl
63
+ node scripts/source-context.js export --profile mcp --changed-from origin/main --ref HEAD --repo-root /path/to/repo > mcp-changed.jsonl
64
+ node scripts/source-context.js export --profile core --ref HEAD --repo-root /path/to/repo --cache-dir .cw/cache/source-context > core-source.jsonl
65
+ ```
66
+
67
+ `manifest` emits one JSON object per tracked file at the selected ref:
68
+
69
+ ```json
70
+ {"path":"plugins/cool-workflow/src/state.ts","included":true,"reason":"included:plugins/cool-workflow/src/**","sha256":"..."}
71
+ ```
72
+
73
+ `export` emits only included text files and adds `content`. Both commands use
74
+ stdout for JSONL data only. Diagnostics and refusal messages go to stderr.
75
+
76
+ `--changed-from REF` is opt-in diff-aware mode. It filters `manifest` and
77
+ `export` to paths changed between the resolved base commit and `--ref`, then
78
+ applies the selected profile include/exclude rules. Deleted files are omitted
79
+ because there is no blob at the target ref. Records include `changedFrom` with
80
+ the resolved base commit. Empty diffs are valid and emit empty JSONL.
81
+
82
+ `export --cache-dir DIR` is opt-in. The cache key is the resolved git commit SHA
83
+ plus a digest of the selected source profile, so changing either the ref or the
84
+ include/exclude policy produces a different JSONL cache file. Cache hits write the
85
+ same JSONL bytes to stdout and stay silent on stderr. Corrupt or mismatched cache
86
+ records fail closed instead of falling back silently. Diff-aware exports include
87
+ the resolved `--changed-from` commit in the cache key, so full and changed exports
88
+ do not share cache files.
89
+
90
+ `--repo-root DIR` is also opt-in; when omitted, the script keeps its historical
91
+ default and reads the Cool Workflow repository root.
92
+
93
+ ## Verification
94
+
95
+ The smoke test checks that:
96
+
97
+ - the profile includes and excludes exactly the remembered paths;
98
+ - `dist/`, tests, docs, release records, and long logs are manifest-only;
99
+ - exported records are parseable JSONL with content and sha256;
100
+ - narrow profiles are slimmer than `core` and include/exclude their intended
101
+ surfaces;
102
+ - `--changed-from` emits only changed current-ref files, still honors excludes,
103
+ and caches separately from full exports;
104
+ - cached exports are byte-identical to uncached exports and corrupt cache hits
105
+ fail closed;
106
+ - the `core` profile stays under its `maxLines` guard.
107
+
108
+ Run:
109
+
110
+ ```bash
111
+ node test/source-context-profile-smoke.js
112
+ ```
113
+
114
+ ## FreeBSD Discipline
115
+
116
+ This feature is opt-in and does not alter existing CLI output. It is mechanism,
117
+ not policy: profile selection lives in data, and vendor prompt/stream behavior
118
+ stays in wrappers. It fails closed on invalid profiles, unknown refs, binary
119
+ included files, and line-count drift past the configured guard.
@@ -45,6 +45,13 @@ Summaries are written under `.cw/runs/<run-id>/summaries/` as plain JSON. Raw
45
45
  blackboard messages, graph nodes, graph edges, audit events, evidence refs, and
46
46
  eval artifacts are never deleted or overwritten.
47
47
 
48
+ Within a single summary build, CW shares the derived full operator graph,
49
+ operator status, blackboard digest, state-size record, and graph view records
50
+ through a short-lived in-memory context. This avoids rebuilding the same graph
51
+ for `summary refresh`, `summary show`, and the top-level state-explosion report.
52
+ It is not a daemon or persistent cache: the next command re-reads run state from
53
+ disk, recomputes source fingerprints, and still fails closed on stale summaries.
54
+
48
55
  ## Blackboard summarization
49
56
 
50
57
  `blackboard summarize <run-id>` (MCP: `cw_blackboard_summarize`) returns a
@@ -264,3 +271,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
264
271
  0.1.78
265
272
 
266
273
  0.1.79
274
+
275
+ ## Fast Architecture Review (v0.1.80)
276
+
277
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
278
+
279
+ _No changes to the state-explosion management surface in v0.1.81 (the module was carved into behavior-preserving siblings; output is byte-identical)._
@@ -207,3 +207,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
207
207
  0.1.78
208
208
 
209
209
  0.1.79
210
+
211
+ ## Fast Architecture Review (v0.1.80)
212
+
213
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
214
+
215
+ _No changes to the team-collaboration surface in v0.1.81._
@@ -0,0 +1,267 @@
1
+ # Trust Model & Limitations
2
+
3
+ > **Read this before you trust a cool-workflow record.** This document states
4
+ > exactly what CW's cryptographic guarantees prove, and — just as important —
5
+ > what they do **not** prove. We would rather lose a skeptical reader here than
6
+ > have them over-trust a green checkmark in production. If anything below reads
7
+ > as an overclaim, it is a bug; please file it.
8
+
9
+ CW is an **auditable control-plane**. It plans, dispatches, records, and verifies
10
+ agent work — it does **not** run the model itself. That single architectural
11
+ choice is what the guarantees below rest on, and it is also the source of their
12
+ honest ceiling.
13
+
14
+ ---
15
+
16
+ ## TL;DR
17
+
18
+ - CW's ed25519 signature + hash-chained ledger prove **integrity and
19
+ attribution**: a recorded usage figure was signed by the keyholder and has not
20
+ been edited since it was recorded. Both re-verify **offline** — the recorded
21
+ ledger's integrity with **no key at all** (`cw telemetry verify`), and each
22
+ `attested` signature with the **public key alone** (`cw telemetry verify
23
+ --pubkey <public.pem>`; also reproduced by `cw demo tamper`).
24
+ - They do **not** prove the original number was **true**. A dishonest signer can
25
+ sign a lie; the lie is then cryptographically bound to its signer, but it is
26
+ still a lie.
27
+ - **CW holds no private key.** It can verify, but it can neither forge a
28
+ signature nor measure usage itself (by design — see the red line below).
29
+ - The honest gap is **single-keyholder / no second party**: when the same
30
+ operator runs CW *and* holds the only signing key, integrity is real but there
31
+ is no independent party attesting that the source was honest. **This is exactly
32
+ why we are seeking early integration partners** who supply an independent
33
+ second party / co-signer. See [Closing the gap](#closing-the-gap-the-second-party).
34
+
35
+ ---
36
+
37
+ ## What the cryptography is, precisely
38
+
39
+ There are two distinct mechanisms. Conflating them is the most common way to
40
+ over- or under-state the guarantee, so they are kept separate here.
41
+
42
+ ### 1. The telemetry signature (ed25519) — attribution of a reported number
43
+
44
+ The agent (the **executor**) self-reports its token usage. A control-plane that
45
+ records that number verbatim is recording a **claim**. To turn the claim into an
46
+ **attestation**, the executor signs a canonical payload with its **private key**:
47
+
48
+ ```
49
+ sign({ usage, runId, taskId, promptDigest }) // ed25519, executor-side
50
+ ```
51
+
52
+ The `runId` / `taskId` / `promptDigest` binding is load-bearing: it ties the
53
+ signature to **this** hop, so a valid signature from one task cannot be replayed
54
+ onto another. `promptDigest` is the sha256 of the exact worker prompt CW handed
55
+ the agent.
56
+
57
+ CW then **verifies** that signature against an **operator-provisioned public
58
+ key**. CW holds *only* the public half. From `telemetry-attestation.ts`:
59
+
60
+ > CW VERIFIES that signature against an operator-provisioned PUBLIC key. CW holds
61
+ > ONLY the public key — it can verify, but can neither forge a signature nor (the
62
+ > red line) call a model to measure usage itself.
63
+
64
+ The result is one of three honest states, surfaced loudly and never silently
65
+ upgraded to "trusted":
66
+
67
+ | State | Meaning |
68
+ |---|---|
69
+ | `attested` | A valid ed25519 signature over the reported usage, bound to this run/task/prompt, verified against the configured public key. |
70
+ | `unattested` | Usage was reported but the signature is missing, malformed, made with the wrong key, or does not match the payload (tampered or replayed). Also: no trust key configured. |
71
+ | `absent` | The agent reported no usage at all. |
72
+
73
+ Defaults are honest: no signature ⇒ `unattested`; no usage ⇒ `absent`. **Usage
74
+ is never silently recorded as trusted.** The opt-in `require-attested-telemetry`
75
+ policy fails the run closed on anything other than `attested`.
76
+
77
+ ### 2. The hash-chained ledgers — tamper-evidence of the recorded log
78
+
79
+ A signature proves the agent *said* a number in flight. It does not, by itself,
80
+ prove that **CW recorded exactly that** and that **nobody edited the record
81
+ afterward**. That is the job of the append-only, hash-chained ledgers:
82
+
83
+ - **Telemetry ledger** (`telemetry.json`, one entry per agent hop): each entry
84
+ chains to the previous via `prevHash`, and `recordHash = sha256(canonical
85
+ entry)`. Flip a recorded verdict (`unattested` → `attested`) or edit a recorded
86
+ usage digest, and the chain no longer recomputes.
87
+ - **Trust-audit event log** (`events.jsonl`): the same discipline applied to
88
+ every recorded decision — sandbox path allow/deny, policy snapshots,
89
+ verifier-gated commits, collaboration approvals.
90
+
91
+ Verification **recomputes every hash independently and never trusts the stored
92
+ value**, so an edited, reordered, removed, or truncated entry flips
93
+ `verified = false`. A ledger that exists but cannot be parsed **fails closed** —
94
+ it is treated as corrupt, never silently as the clean empty chain.
95
+
96
+ This is all **offline**. The chain re-proof needs **no key at all**; add
97
+ `--pubkey <public.pem>` to re-run the signature **attribution** check against the
98
+ stored raw usage for every `attested` record. There is no telemetry service to
99
+ trust or breach — the record proves its own integrity, and a third-party auditor
100
+ can re-run both checks on their own machine.
101
+
102
+ ---
103
+
104
+ ## What this DOES prove
105
+
106
+ For telemetry, if `cw telemetry verify <run> --pubkey <public.pem>` reports green,
107
+ you can rely on **all** of the following, and only these:
108
+
109
+ 1. **Attribution.** Each `attested` usage figure was signed by the holder of the
110
+ configured private key, over a payload bound to that specific run, task, and
111
+ prompt. It is **non-repudiable**: the signer cannot later disown it, and it
112
+ could not have been replayed from a different hop.
113
+ 2. **Tamper-evidence of the record.** The recorded ledger — verdicts, usage
114
+ digests, audit decisions — has not been edited, reordered, truncated, or had
115
+ entries removed since it was written, *to the extent a self-recomputable chain
116
+ can detect* (see the threat-model caveat below). Casual or partial tampering,
117
+ accidental corruption, truncation, and forged unchained lines are all caught.
118
+ 3. **Offline, independent re-verification.** Re-proving the recorded ledger needs
119
+ no network, no CW service, and no trust in our infrastructure — `cw telemetry
120
+ verify` recomputes the chain on your machine (and needs no key to do it). With
121
+ `--pubkey`, the ed25519 **attribution** is independently re-checked with the
122
+ **public key alone**; `cw demo tamper` reproduces that sign-and-catch
123
+ end-to-end, offline. The integrity claim does not depend on trusting us.
124
+ 4. **CW never forged or measured anything.** CW holds no private key and never
125
+ calls a model. It cannot mint a signature, and it cannot fabricate a usage
126
+ number to sign. What it records, it received and verified.
127
+
128
+ ---
129
+
130
+ ## What this DOES NOT prove
131
+
132
+ Equally load-bearing. None of the following are within the guarantee, and we will
133
+ not imply otherwise:
134
+
135
+ 1. **It does not prove the reported number is true.** A signature proves *who*
136
+ said it and that it *wasn't altered* — **not** that it was correct at the
137
+ source. Quoting the code's own honest ceiling:
138
+
139
+ > A dishonest keyholder can still sign a lie, but the lie is now
140
+ > cryptographically bound to its signer.
141
+
142
+ CW deliberately does **not** independently measure usage (doing so would mean
143
+ calling the model — the red line it refuses to cross). So the strongest honest
144
+ claim is **attribution, not ground-truth measurement**.
145
+
146
+ 2. **It does not defend against a single party who holds both roles.** If the
147
+ same operator runs CW, holds the signing private key, *and* controls the
148
+ machine the ledger lives on, then a green verdict attests that **that party**
149
+ signed and that **that party's** record is internally consistent. It does not
150
+ bring in any *independent* party. Self-consistency is not third-party
151
+ verification.
152
+
153
+ 3. **A determined local writer can re-chain the whole log.** The hash-chain's
154
+ genesis is `sha256(runId)` — a value the local writer knows. So the chain
155
+ detects edits to *part* of a log, but a writer who edits an entry and then
156
+ **re-computes every subsequent hash** with CW's own sha256 produces a log that
157
+ re-verifies green. From `trust-audit.ts`:
158
+
159
+ > THREAT MODEL (be honest about the limit): the genesis is sha256(runId), so
160
+ > this detects casual/partial tampering, accidental corruption, truncation,
161
+ > removal, and forged-unchained lines — but NOT a determined local writer who
162
+ > re-chains the WHOLE log with this module's own sha256 after an edit.
163
+
164
+ This is **inherent** to any local, self-recomputable chain. Closing it needs an
165
+ anchor the writer cannot reproduce. CW **cannot mint that anchor itself** —
166
+ because by design it holds no private key. The one cryptographic anchor that
167
+ exists is the **agent's** telemetry signature, which covers agent-reported
168
+ *usage* — it does **not** cover CW-only decisions (sandbox / policy /
169
+ commit-gate), which have no external signer.
170
+
171
+ For those CW-only decisions, the only stronger guarantee available today is
172
+ **operational**, not cryptographic: commit `events.jsonl` to an external
173
+ append-only medium (git history, a remote append-only log) that the local
174
+ writer cannot rewrite. The chain is a **strict upgrade** over a bare
175
+ append-only log — not a substitute for an external anchor.
176
+
177
+ 4. **It says nothing about the quality, safety, or correctness of the work.**
178
+ Attestation is about *provenance and integrity of records*, not about whether
179
+ the agent's output is good, secure, or even functional. Other CW mechanisms
180
+ (verifier gate, schema validation, evidence grounding) speak to that; the
181
+ cryptography here does not.
182
+
183
+ ---
184
+
185
+ ## The single-keyholder limitation (stated plainly)
186
+
187
+ > **The core honest gap:** when the same operator runs CW and holds the only
188
+ > verification/signing key, tamper-evidence proves that **records were not edited
189
+ > after the fact** — it does **not** prove that the **original signer was
190
+ > honest**. Integrity, yes. A trustworthy source, not necessarily.
191
+
192
+ Concretely, in a single-party setup:
193
+
194
+ - The operator provisions the keypair.
195
+ - The operator's agent process signs usage with the private key.
196
+ - CW (run by the same operator) verifies with the public key and writes the
197
+ ledger to the operator's disk.
198
+
199
+ Every cryptographic check can pass while a motivated single party fabricates the
200
+ source number, or — given the genesis caveat above — rewrites the whole local
201
+ chain. **Cryptography cannot manufacture a second party that does not exist.**
202
+ Separation of duties is the property auditors require everywhere; with one
203
+ operator wearing both hats, it is structurally absent no matter how good the
204
+ math is.
205
+
206
+ We are not going to argue this point away. It is real, it is the most important
207
+ limitation in this document, and it is the right critique to raise.
208
+
209
+ ---
210
+
211
+ ## Closing the gap: the second party
212
+
213
+ The fix is **not** more cryptography on one machine — it is an **independent
214
+ second party**, which is precisely the thing a single operator cannot self-supply.
215
+ This is why CW's near-term priority is **early integration partners**, and what we
216
+ mean by that concretely:
217
+
218
+ - **An independent co-signer / second keyholder.** A second party (a different
219
+ team, a CI identity outside the operator's control, or a partner's signing
220
+ service) holds a key the operator does not. When that party counter-signs runs —
221
+ or *is* the executor that signs usage — a green verdict starts to mean
222
+ "two parties who do not fully trust each other agree," which is the property
223
+ single-party attestation structurally cannot provide.
224
+ - **An external append-only anchor.** Pushing `events.jsonl` to a medium the local
225
+ operator cannot rewrite (a partner-held log, a public transparency log, signed
226
+ git history on a remote the operator doesn't control) closes the re-chain gap
227
+ for CW-only decisions described above.
228
+ - **Separated execution and verification.** The party that *spends the money*
229
+ (runs the model) and the party that *keeps the books* (CW) being genuinely
230
+ different entities turns CW's separation-of-duties design from an architectural
231
+ intent into an enforced fact.
232
+
233
+ If you are a potential partner who can supply an independent second party — a
234
+ co-signer, an external anchor, or separated execution — **that is the
235
+ collaboration we are actively looking for.** We would rather ship this honestly
236
+ and earn the second party than paper over the gap with a stronger-sounding claim
237
+ than the math supports.
238
+
239
+ ---
240
+
241
+ ## How to verify for yourself
242
+
243
+ - `cw telemetry verify <run>` — re-proves the telemetry ledger's **integrity**:
244
+ chain linkage + an independent per-record hash recompute, so any edit to a
245
+ recorded verdict or usage digest since record time flips it red. It needs **no
246
+ key** (it re-proves the *recording*). Add `--pubkey <pem-or-path>` to re-run the
247
+ ed25519 **signature** check for every `attested` record against the stored raw
248
+ usage; unreadable keys, missing raw usage, digest mismatches, wrong keys, and
249
+ signature mismatches fail closed. Mirrored as `cw_telemetry_verify` on the MCP
250
+ surface.
251
+ - `cw demo tamper` — a hermetic, offline, one-command proof: it builds a real
252
+ ed25519-signed ledger and then forges it two ways — flips a recorded verdict and
253
+ re-computes the *local* record hash (the chain still breaks), and reuses a
254
+ signature over inflated tokens (ed25519 rejects it). Everything is verified with
255
+ the public key only. The `✗ DETECTED` lines are the point.
256
+ - Re-run either with **only the public key** on a machine we do not control. If it
257
+ doesn't reproduce, our integrity claim is false — hold us to it.
258
+
259
+ ---
260
+
261
+ ## One-line summary
262
+
263
+ CW's cryptography proves **records weren't edited and were signed by the
264
+ keyholder** — strong, offline, public-key-verifiable **integrity and
265
+ attribution**. It does **not** prove the **source was honest**, and a single
266
+ operator holding both roles is the honest limit we are explicitly recruiting
267
+ integration partners to close.
@@ -1,7 +1,10 @@
1
1
  # Unix-Inspired Workflow Principles
2
2
 
3
3
  CW borrows a small set of durable systems ideas and applies them to agent
4
- workflow engineering. These are design principles, not platform claims.
4
+ workflow engineering. These are design principles, not platform claims — but
5
+ they are not optional: this project strictly follows the FreeBSD programming
6
+ philosophy, and §7 below states the binding rules every change is reviewed
7
+ against (mirrored as hard constraints in the repository's `AGENTS.md`).
5
8
 
6
9
  ## 1. Everything Is State
7
10
 
@@ -190,3 +193,48 @@ Hosts enforce runtime sandbox policy.
190
193
  ```
191
194
 
192
195
  This keeps CW small, inspectable, and extensible.
196
+
197
+ ## 7. FreeBSD Discipline (Binding Rules)
198
+
199
+ The principles above descend from one tradition — the FreeBSD school of
200
+ systems engineering — and CW adheres to it strictly. Concretely:
201
+
202
+ **POLA — Principle of Least Astonishment.** An existing output, file layout,
203
+ exit code, or flag never changes meaning or bytes underneath an operator. New
204
+ behavior ships behind a new verb/flag or an env toggle, with the prior
205
+ behavior byte-identical by default. (Example: live drive output is additive —
206
+ stderr only, TTY-gated, `CW_NO_STREAM=1` opt-out; the stdout payload and
207
+ evidence digest are unchanged.)
208
+
209
+ **Mechanism, not policy.** The kernel provides mechanisms; policy is data in
210
+ userland. WHICH agent runs is config (`CW_AGENT_COMMAND` / agent-config), not
211
+ code; vendor-specific rendering lives in wrappers under `scripts/agents/`,
212
+ never in core. Core may forward a vendor's stream; it never parses one.
213
+
214
+ **Rule of Silence.** stdout is data, stderr is diagnostics, and a
215
+ non-interactive run is silent on success. Anything human-friendly is TTY-gated
216
+ and can be disabled; `--json` output is stable and undecorated so it composes
217
+ in pipes.
218
+
219
+ **Fail closed, conservative defaults.** Unconfigured backends probe as
220
+ `unverified`, unverifiable telemetry is surfaced loudly (or refused in strict
221
+ mode), invalid results park the hop. CW never fabricates a success and never
222
+ falls back silently. Boring correctness beats clever features.
223
+
224
+ **Tools, not frameworks.** Zero runtime dependencies is a red line. Verbs do
225
+ one thing; composition happens through durable files (`.cw/`) and pipes, not
226
+ hidden in-process coupling.
227
+
228
+ **Man pages are the contract.** Every shipped capability has a `docs/*.7.md`
229
+ page updated in the same change, and doc-drift guards in the test suite keep
230
+ the documented commands honest. Undocumented behavior is unfinished behavior.
231
+
232
+ **style(9) spirit.** One consistent style per layer; a diff matches the file
233
+ it touches and never reformats code it does not change.
234
+
235
+ **Release engineering.** Main is -CURRENT; a tag is -RELEASE: it exists only
236
+ after the deterministic gate and an independent review pass, and cadence never
237
+ overrides the gate.
238
+
239
+ A change that violates any rule in this section is rejected in review even if
240
+ the capability it ships is otherwise desirable.
@@ -0,0 +1,43 @@
1
+ # Vendor Manifest Loadability
2
+
3
+ CW ships one kernel to many AI clients. A single `manifest/plugin.manifest.json`
4
+ generates every vendor's plugin files (Claude, Codex, the `agents` marketplace,
5
+ Gemini, OpenCode) — see `gen-manifests(1)`. Each vendor that exposes the MCP
6
+ server gets a generated `mcp.json` telling that client how to launch it.
7
+
8
+ ## The gap this closes
9
+
10
+ Two gates already guard the manifests, but neither proves a vendor manifest
11
+ actually *boots*:
12
+
13
+ - `npm run gen:manifests -- --check` diffs the generated bytes against the
14
+ manifest source. It catches drift, not a wrong-but-consistent command.
15
+ - `parity-check` boots `dist/mcp-server.js` **directly** — it never reads any
16
+ vendor's `mcp.json`, never resolves a `pluginRootVar`.
17
+
18
+ So a manifest could declare a broken `command`, `args`, or path and every gate
19
+ would stay green while no client could load it. Track C ("multi-vendor manifest
20
+ actually loaded by ≥2 real clients") was asserted, not proven.
21
+
22
+ ## The load proof
23
+
24
+ `npm run manifest:load-check` (the `vendor-manifest-load-smoke`, run automatically
25
+ by `npm test`) closes it. For every vendor in `targets` that declares an `mcp`
26
+ output it:
27
+
28
+ 1. reads the generated `mcp.json`;
29
+ 2. resolves the server `command` + `args` exactly as that client does —
30
+ substituting the vendor's `pluginRootVar` (`${CLAUDE_PLUGIN_ROOT}/` for Claude,
31
+ `./` for the rest) to the real plugin root;
32
+ 3. spawns the server with `shell:false` (argv spawn, no shell);
33
+ 4. completes a JSON-RPC `initialize` + `tools/list` round-trip.
34
+
35
+ Every vendor launches the same kernel, so the proof asserts they **agree**: one
36
+ `serverInfo.name` and an identical tool count across all of them. A vendor whose
37
+ manifest drifted to an unbootable shape — wrong path, wrong command, bad
38
+ `pluginRootVar` — fails this check instead of shipping a dead plugin.
39
+
40
+ ## See also
41
+
42
+ - `gen-manifests(1)` — one source generates every vendor manifest.
43
+ - `cli-mcp-parity(7)` — the CLI ↔ MCP capability-parity gate.
@@ -215,3 +215,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
215
215
  0.1.78
216
216
 
217
217
  0.1.79
218
+
219
+ ## Fast Architecture Review (v0.1.80)
220
+
221
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
222
+
223
+ _No changes to the Web / Desktop Workbench in v0.1.81._
@@ -2,7 +2,7 @@
2
2
  "_comment": "SINGLE SOURCE OF TRUTH for every vendor manifest. Edit THIS file, then run `npm run gen:manifests`. Do NOT hand-edit the generated vendor manifests (.claude-plugin/, .codex-plugin/, .agents/, .mcp.json) — `npm run gen:manifests -- --check` (run by release:check) will fail if they drift from this source.",
3
3
  "identity": {
4
4
  "name": "cool-workflow",
5
- "version": "0.1.79",
5
+ "version": "0.1.81",
6
6
  "license": "BSD-2-Clause",
7
7
  "homepage": "https://github.com/coo1white/cool-workflow",
8
8
  "author": {