cool-workflow 0.1.78 → 0.1.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.codex-plugin/plugin.json +1 -1
  3. package/README.md +29 -3
  4. package/apps/architecture-review/app.json +1 -1
  5. package/apps/architecture-review-fast/app.json +64 -0
  6. package/apps/architecture-review-fast/workflow.js +153 -0
  7. package/apps/end-to-end-golden-path/app.json +1 -1
  8. package/apps/pr-review-fix-ci/app.json +1 -1
  9. package/apps/release-cut/app.json +1 -1
  10. package/apps/research-synthesis/app.json +1 -1
  11. package/dist/capability-core.js +71 -0
  12. package/dist/capability-registry.js +13 -8
  13. package/dist/cli.js +49 -1
  14. package/dist/drive.js +74 -1
  15. package/dist/evidence-reasoning.js +2 -2
  16. package/dist/execution-backend.js +6 -1
  17. package/dist/mcp-server.js +56 -13
  18. package/dist/orchestrator/lifecycle-operations.js +2 -1
  19. package/dist/orchestrator.js +1 -1
  20. package/dist/run-export.js +370 -25
  21. package/dist/run-registry.js +11 -4
  22. package/dist/state-explosion.js +100 -21
  23. package/dist/telemetry-demo.js +154 -0
  24. package/dist/version.js +1 -1
  25. package/docs/agent-delegation-drive.7.md +60 -0
  26. package/docs/canonical-workflow-apps.7.md +37 -0
  27. package/docs/cli-mcp-parity.7.md +14 -0
  28. package/docs/contract-migration-tooling.7.md +6 -0
  29. package/docs/control-plane-scheduling.7.md +6 -0
  30. package/docs/durable-state-and-locking.7.md +6 -0
  31. package/docs/evidence-adoption-reasoning-chain.7.md +6 -0
  32. package/docs/execution-backends.7.md +6 -0
  33. package/docs/index.md +1 -0
  34. package/docs/launch/demo.tape +28 -0
  35. package/docs/launch/launch-kit.md +172 -0
  36. package/docs/launch/pre-launch-checklist.md +53 -0
  37. package/docs/multi-agent-cli-mcp-surface.7.md +6 -0
  38. package/docs/multi-agent-eval-replay-harness.7.md +6 -0
  39. package/docs/multi-agent-operator-ux.7.md +6 -0
  40. package/docs/node-snapshot-diff-replay.7.md +6 -0
  41. package/docs/observability-cost-accounting.7.md +6 -0
  42. package/docs/project-index.md +16 -6
  43. package/docs/real-execution-backends.7.md +6 -0
  44. package/docs/release-and-migration.7.md +6 -0
  45. package/docs/release-tooling.7.md +6 -0
  46. package/docs/routines.md +23 -0
  47. package/docs/run-registry-control-plane.7.md +44 -1
  48. package/docs/run-retention-reclamation.7.md +6 -0
  49. package/docs/source-context-profiles.7.md +119 -0
  50. package/docs/state-explosion-management.7.md +13 -0
  51. package/docs/team-collaboration.7.md +6 -0
  52. package/docs/unix-principles.md +49 -1
  53. package/docs/web-desktop-workbench.7.md +6 -0
  54. package/manifest/plugin.manifest.json +1 -1
  55. package/manifest/source-context-profiles.json +142 -0
  56. package/package.json +2 -1
  57. package/scripts/agents/claude-p-agent.js +129 -43
  58. package/scripts/architecture-review-fast.js +362 -0
  59. package/scripts/bump-version.js +1 -0
  60. package/scripts/canonical-apps.js +21 -4
  61. package/scripts/coverage-gate.js +211 -0
  62. package/scripts/dogfood-release.js +1 -1
  63. package/scripts/golden-path.js +4 -4
  64. package/scripts/source-context.js +291 -0
  65. package/scripts/version-sync-check.js +1 -0
  66. package/skills/ci-triage/SKILL.md +50 -0
  67. package/skills/ci-triage/agents/openai.yaml +4 -0
  68. package/skills/cool-workflow/SKILL.md +4 -1
  69. package/skills/deploy-check/SKILL.md +55 -0
  70. package/skills/deploy-check/agents/openai.yaml +4 -0
  71. package/skills/design-qa/SKILL.md +49 -0
  72. package/skills/design-qa/agents/openai.yaml +4 -0
  73. package/skills/pr-review/SKILL.md +45 -0
  74. package/skills/pr-review/agents/openai.yaml +4 -0
@@ -48,13 +48,55 @@ exports.GRAPH_VIEWS = [
48
48
  "candidate",
49
49
  "commit-gate"
50
50
  ];
51
+ function createStateExplosionBuildContext() {
52
+ return {
53
+ stateSizes: new Map(),
54
+ blackboardDigests: new Map(),
55
+ graphRecords: new Map()
56
+ };
57
+ }
58
+ function fullGraphFor(run, context) {
59
+ if (!context.fullGraph)
60
+ context.fullGraph = (0, multi_agent_operator_ux_1.buildMultiAgentOperatorGraph)(run);
61
+ return context.fullGraph;
62
+ }
63
+ function operatorFor(run, context) {
64
+ if (!context.operator)
65
+ context.operator = (0, multi_agent_operator_ux_1.summarizeMultiAgentOperator)(run);
66
+ return context.operator;
67
+ }
68
+ function reasoningCriticalIdsFor(run, context) {
69
+ if (!context.reasoningCriticalIds)
70
+ context.reasoningCriticalIds = (0, evidence_reasoning_1.reasoningCriticalNodeIds)(run, operatorFor(run, context));
71
+ return context.reasoningCriticalIds;
72
+ }
73
+ function thresholdsKey(thresholds) {
74
+ return [
75
+ thresholds.graphNodes,
76
+ thresholds.graphEdges,
77
+ thresholds.blackboardMessages,
78
+ thresholds.blackboardRecords,
79
+ thresholds.collapseBucket,
80
+ thresholds.totalRecords
81
+ ].join(":");
82
+ }
83
+ function graphKey(view, options) {
84
+ return [
85
+ view,
86
+ options.focus || "",
87
+ options.depth === undefined ? "" : String(options.depth),
88
+ thresholdsKey(options.thresholds || exports.DEFAULT_STATE_EXPLOSION_THRESHOLDS)
89
+ ].join("\0");
90
+ }
51
91
  // ---------------------------------------------------------------------------
52
92
  // State size
53
93
  // ---------------------------------------------------------------------------
54
94
  function computeStateSize(run, thresholds = exports.DEFAULT_STATE_EXPLOSION_THRESHOLDS) {
95
+ return computeStateSizeWithGraph(run, thresholds, (0, multi_agent_operator_ux_1.buildMultiAgentOperatorGraph)(run));
96
+ }
97
+ function computeStateSizeWithGraph(run, thresholds, graph) {
55
98
  const ma = run.multiAgent || { runs: [], roles: [], groups: [], memberships: [], fanouts: [], fanins: [] };
56
99
  const bb = run.blackboard || { topics: [], messages: [], contexts: [], artifacts: [], snapshots: [], decisions: [] };
57
- const graph = (0, multi_agent_operator_ux_1.buildMultiAgentOperatorGraph)(run);
58
100
  const counts = {
59
101
  multiAgentRuns: (ma.runs || []).length,
60
102
  roles: (ma.roles || []).length,
@@ -97,6 +139,15 @@ function computeStateSize(run, thresholds = exports.DEFAULT_STATE_EXPLOSION_THRE
97
139
  reasons.push(`run has ${total} multi-agent records (> ${thresholds.totalRecords})`);
98
140
  return { ...counts, total, compactionRecommended: reasons.length > 0, reasons: reasons.sort() };
99
141
  }
142
+ function stateSizeFor(run, thresholds, context) {
143
+ const key = thresholdsKey(thresholds);
144
+ let size = context.stateSizes.get(key);
145
+ if (!size) {
146
+ size = computeStateSizeWithGraph(run, thresholds, fullGraphFor(run, context));
147
+ context.stateSizes.set(key, size);
148
+ }
149
+ return size;
150
+ }
100
151
  // ---------------------------------------------------------------------------
101
152
  // Blackboard digest (deterministic structural summary)
102
153
  // ---------------------------------------------------------------------------
@@ -310,10 +361,26 @@ function summarizeBlackboardDigest(run, blackboardId) {
310
361
  highSignal
311
362
  };
312
363
  }
364
+ function blackboardDigestFor(run, context, blackboardId) {
365
+ const key = blackboardId || "";
366
+ let digest = context.blackboardDigests.get(key);
367
+ if (!digest) {
368
+ digest = summarizeBlackboardDigest(run, blackboardId);
369
+ context.blackboardDigests.set(key, digest);
370
+ }
371
+ return digest;
372
+ }
313
373
  function buildCompactGraph(run, view = "compact", options = {}) {
374
+ return buildCompactGraphWithContext(run, view, options, createStateExplosionBuildContext());
375
+ }
376
+ function buildCompactGraphWithContext(run, view, options, context) {
314
377
  const thresholds = options.thresholds || exports.DEFAULT_STATE_EXPLOSION_THRESHOLDS;
315
- const full = (0, multi_agent_operator_ux_1.buildMultiAgentOperatorGraph)(run);
316
- const operator = (0, multi_agent_operator_ux_1.summarizeMultiAgentOperator)(run);
378
+ const key = graphKey(view, { ...options, thresholds });
379
+ const cached = context.graphRecords.get(key);
380
+ if (cached)
381
+ return cached;
382
+ const full = fullGraphFor(run, context);
383
+ const operator = operatorFor(run, context);
317
384
  const critical = criticalPathNodeIds(run, operator);
318
385
  const protectedIds = new Set(critical);
319
386
  // Failures, blocked, rejected, conflicting nodes are always preserved.
@@ -324,7 +391,7 @@ function buildCompactGraph(run, view = "compact", options = {}) {
324
391
  // v0.1.26: reasoning steps are on the critical path and must never be collapsed
325
392
  // into a synthetic summary node — protect every decision-gate node backing an
326
393
  // adopted reasoning chain (notably score nodes, which are otherwise collapsed).
327
- for (const id of (0, evidence_reasoning_1.reasoningCriticalNodeIds)(run))
394
+ for (const id of reasoningCriticalIdsFor(run, context))
328
395
  protectedIds.add(id);
329
396
  for (const failure of operator.failures) {
330
397
  if (failure.linked)
@@ -349,13 +416,15 @@ function buildCompactGraph(run, view = "compact", options = {}) {
349
416
  const collapseEnabled = view === "compact" || view === "critical-path" || Boolean(options.focus);
350
417
  if (view === "full" || !collapseEnabled) {
351
418
  // No collapse: emit scoped graph verbatim (still records provenance + critical path).
352
- return finalizeGraphRecord(run, view, options, full, {
419
+ const record = finalizeGraphRecord(run, view, options, full, {
353
420
  nodes: scopeNodes.map((node) => ({ ...node })),
354
421
  edges: scopeEdges.map((edge) => ({ ...edge })),
355
422
  syntheticNodes: [],
356
423
  critical,
357
424
  operator
358
425
  });
426
+ context.graphRecords.set(key, record);
427
+ return record;
359
428
  }
360
429
  // Determine collapse buckets per node.
361
430
  const rule = collapseRuleFor(view);
@@ -436,13 +505,15 @@ function buildCompactGraph(run, view = "compact", options = {}) {
436
505
  edgeSeen.add(key);
437
506
  edges.push({ from, to, label: edge.label });
438
507
  }
439
- return finalizeGraphRecord(run, view, options, full, {
508
+ const record = finalizeGraphRecord(run, view, options, full, {
440
509
  nodes: nodes.sort((a, b) => a.kind.localeCompare(b.kind) || a.id.localeCompare(b.id)),
441
510
  edges: edges.sort((a, b) => a.from.localeCompare(b.from) || a.to.localeCompare(b.to) || (a.label || "").localeCompare(b.label || "")),
442
511
  syntheticNodes: synthetic.sort((a, b) => a.id.localeCompare(b.id)),
443
512
  critical,
444
513
  operator
445
514
  });
515
+ context.graphRecords.set(key, record);
516
+ return record;
446
517
  }
447
518
  function finalizeGraphRecord(run, view, options, full, built) {
448
519
  const collapsedNodeCount = built.syntheticNodes.reduce((acc, syn) => acc + syn.collapsedNodeCount, 0);
@@ -671,10 +742,13 @@ function expansionCommandFor(run, view, key) {
671
742
  // Operator digest
672
743
  // ---------------------------------------------------------------------------
673
744
  function buildOperatorDigest(run, thresholds = exports.DEFAULT_STATE_EXPLOSION_THRESHOLDS) {
674
- const stateSize = computeStateSize(run, thresholds);
675
- const operator = (0, multi_agent_operator_ux_1.summarizeMultiAgentOperator)(run);
676
- const compact = buildCompactGraph(run, "compact", { thresholds });
677
- const blackboard = summarizeBlackboardDigest(run);
745
+ return buildOperatorDigestWithContext(run, thresholds, createStateExplosionBuildContext());
746
+ }
747
+ function buildOperatorDigestWithContext(run, thresholds, context) {
748
+ const stateSize = stateSizeFor(run, thresholds, context);
749
+ const operator = operatorFor(run, context);
750
+ const compact = buildCompactGraphWithContext(run, "compact", { thresholds }, context);
751
+ const blackboard = blackboardDigestFor(run, context);
678
752
  const evidence = operator.evidence;
679
753
  const adopted = evidence.filter((e) => e.status === "adopted");
680
754
  const missing = evidence.filter((e) => e.status === "missing" || e.status === "pending" || e.status === "conflicting");
@@ -753,12 +827,15 @@ function buildOperatorDigest(run, thresholds = exports.DEFAULT_STATE_EXPLOSION_T
753
827
  // State explosion report (combines all derived indexes)
754
828
  // ---------------------------------------------------------------------------
755
829
  function buildStateExplosionReport(run, options = {}) {
830
+ return buildStateExplosionReportWithContext(run, options, createStateExplosionBuildContext());
831
+ }
832
+ function buildStateExplosionReportWithContext(run, options, context) {
756
833
  const thresholds = options.thresholds || exports.DEFAULT_STATE_EXPLOSION_THRESHOLDS;
757
- const stateSize = computeStateSize(run, thresholds);
758
- const compactGraph = buildCompactGraph(run, "compact", { thresholds });
759
- const criticalPathGraph = buildCompactGraph(run, "critical-path", { thresholds });
760
- const blackboardDigest = summarizeBlackboardDigest(run);
761
- const operatorDigest = buildOperatorDigest(run, thresholds);
834
+ const stateSize = stateSizeFor(run, thresholds, context);
835
+ const compactGraph = buildCompactGraphWithContext(run, "compact", { thresholds }, context);
836
+ const criticalPathGraph = buildCompactGraphWithContext(run, "critical-path", { thresholds }, context);
837
+ const blackboardDigest = blackboardDigestFor(run, context);
838
+ const operatorDigest = buildOperatorDigestWithContext(run, thresholds, context);
762
839
  const currentFingerprint = fingerprintStrings([
763
840
  compactGraph.sourceFingerprint,
764
841
  blackboardDigest.sourceFingerprint,
@@ -837,12 +914,13 @@ function summariesDir(run) {
837
914
  }
838
915
  function refreshStateExplosionSummaries(run, options = {}) {
839
916
  const thresholds = options.thresholds || exports.DEFAULT_STATE_EXPLOSION_THRESHOLDS;
917
+ const context = createStateExplosionBuildContext();
840
918
  const dir = summariesDir(run);
841
919
  node_fs_1.default.mkdirSync(dir, { recursive: true });
842
920
  const views = options.views || ["full", "compact", "critical-path", "failures", "evidence", "trust", "topology", "blackboard", "candidate", "commit-gate"];
843
- const blackboardDigest = summarizeBlackboardDigest(run);
844
- const operatorDigest = buildOperatorDigest(run, thresholds);
845
- const graphRecords = views.map((view) => buildCompactGraph(run, view, { thresholds }));
921
+ const blackboardDigest = blackboardDigestFor(run, context);
922
+ const operatorDigest = buildOperatorDigestWithContext(run, thresholds, context);
923
+ const graphRecords = views.map((view) => buildCompactGraphWithContext(run, view, { thresholds }, context));
846
924
  const entries = [];
847
925
  const writeRecord = (id, record, scope, fingerprint, included, omitted) => {
848
926
  const file = node_path_1.default.join(dir, `${(0, state_1.safeFileName)(id)}.json`);
@@ -854,13 +932,14 @@ function refreshStateExplosionSummaries(run, options = {}) {
854
932
  for (const record of graphRecords) {
855
933
  writeRecord(record.id, record, "run", record.sourceFingerprint, record.compactNodeCount, record.collapsedNodeCount);
856
934
  }
857
- const stateSize = computeStateSize(run, thresholds);
935
+ const stateSize = stateSizeFor(run, thresholds, context);
858
936
  const indexFingerprint = fingerprintStrings([
859
937
  operatorDigest.sourceFingerprint,
860
938
  blackboardDigest.sourceFingerprint,
861
939
  ...graphRecords.map((r) => r.sourceFingerprint),
862
940
  String(stateSize.total)
863
941
  ]);
942
+ const compactGraph = buildCompactGraphWithContext(run, "compact", { thresholds }, context);
864
943
  const reportPath = node_path_1.default.join(dir, "state-explosion-report.json");
865
944
  const index = {
866
945
  schemaVersion: exports.STATE_EXPLOSION_SCHEMA_VERSION,
@@ -869,7 +948,7 @@ function refreshStateExplosionSummaries(run, options = {}) {
869
948
  scope: "run",
870
949
  sourceRecordIds: unique([...blackboardDigest.sourceRecordIds, ...operatorDigest.sourceRecordIds]),
871
950
  sourceFingerprint: fingerprintStrings([
872
- buildCompactGraph(run, "compact", { thresholds }).sourceFingerprint,
951
+ compactGraph.sourceFingerprint,
873
952
  blackboardDigest.sourceFingerprint,
874
953
  operatorDigest.sourceFingerprint,
875
954
  String(stateSize.total)
@@ -893,7 +972,7 @@ function refreshStateExplosionSummaries(run, options = {}) {
893
972
  };
894
973
  void indexFingerprint;
895
974
  (0, state_1.writeJson)(index.paths.indexPath, index);
896
- const report = buildStateExplosionReport(run, { thresholds, index });
975
+ const report = buildStateExplosionReportWithContext(run, { thresholds, index }, context);
897
976
  (0, state_1.writeJson)(reportPath, report);
898
977
  (0, trust_audit_1.recordTrustAuditEvent)(run, {
899
978
  kind: "summary.refresh",
@@ -0,0 +1,154 @@
1
+ "use strict";
2
+ // Tamper-evidence demo (the one-command proof) — make CW's central claim VISIBLE:
3
+ // an audit record proves its own integrity, and ANYONE can re-verify it offline
4
+ // with only the public key. No competitor's pipeline telemetry can do this.
5
+ //
6
+ // Fully hermetic + deterministic: generates an EPHEMERAL ed25519 keypair, builds
7
+ // a REAL telemetry ledger through the production append API (appendTelemetryAttestation
8
+ // + signTelemetry — byte-identical to what a live attested run writes), then
9
+ // demonstrates BOTH tamper-evidence layers catching a forgery:
10
+ // A) LEDGER layer — flip a recorded verdict on disk (unattested -> attested, the
11
+ // canonical "forge a green record" attack) -> verifyTelemetryLedger recomputes
12
+ // every hash independently, so the edited record's hash mismatches AND every
13
+ // record after it breaks the chain (cascade).
14
+ // B) SIGNATURE layer — inflate the reported tokens but keep the original ed25519
15
+ // signature -> verifyTelemetryAttestation rejects it ("signature does not match").
16
+ //
17
+ // No model, no network, no API key, no second repo — runs in a private tmpdir.
18
+ var __importDefault = (this && this.__importDefault) || function (mod) {
19
+ return (mod && mod.__esModule) ? mod : { "default": mod };
20
+ };
21
+ Object.defineProperty(exports, "__esModule", { value: true });
22
+ exports.formatTelemetryVerify = formatTelemetryVerify;
23
+ exports.formatTamperDemo = formatTamperDemo;
24
+ exports.runTamperDemo = runTamperDemo;
25
+ const node_crypto_1 = __importDefault(require("node:crypto"));
26
+ const node_fs_1 = __importDefault(require("node:fs"));
27
+ const node_os_1 = __importDefault(require("node:os"));
28
+ const node_path_1 = __importDefault(require("node:path"));
29
+ const telemetry_ledger_1 = require("./telemetry-ledger");
30
+ const telemetry_attestation_1 = require("./telemetry-attestation");
31
+ const execution_backend_1 = require("./execution-backend");
32
+ /** Human-facing render of `telemetry verify <run>`. */
33
+ function formatTelemetryVerify(r) {
34
+ if (!r.present)
35
+ return `telemetry: run ${r.runId} has no attestation ledger (nothing to verify)`;
36
+ const head = r.verified ? `✓ VERIFIED — ${r.records} record(s), chain intact, every hash recomputed independently` : `✗ TAMPERING DETECTED — ${r.failedChecks.length} check(s) failed`;
37
+ const tally = ` attested ${r.attested} · unattested ${r.unattested} · absent ${r.absent}`;
38
+ const fails = r.failedChecks.length ? "\n" + r.failedChecks.map((c) => ` ✗ ${c.name} ${c.code || ""}`).join("\n") : "";
39
+ return `telemetry verify ${r.runId}\n${head}\n${tally}${fails}`;
40
+ }
41
+ /** Human-facing render of `demo tamper` — the visible tamper-evidence proof. */
42
+ function formatTamperDemo(r) {
43
+ const lines = [];
44
+ lines.push(`cw demo tamper — tamper-evidence proof (hermetic, ${r.trustKey} key)`);
45
+ lines.push("");
46
+ lines.push(`▶ Built an attested telemetry ledger: ${r.workers} hops, ${r.baseline.records} records`);
47
+ lines.push(` ${r.baseline.ledgerVerified ? "✓" : "✗"} ledger verifies ${r.baseline.signaturesValid} signed hop(s) verify against the public key`);
48
+ for (const l of r.layers) {
49
+ lines.push("");
50
+ lines.push(`▶ ${l.layer.toUpperCase()} tamper`);
51
+ lines.push(` edit: ${l.tamper}`);
52
+ lines.push(` before: ${l.before.verified ? "✓ verified" : "✗"} — ${l.before.detail}`);
53
+ lines.push(` after: ${l.after.verified ? "✓ (UNDETECTED!)" : "✗ DETECTED"} — ${l.after.detail}`);
54
+ }
55
+ lines.push("");
56
+ lines.push(r.proven
57
+ ? "VERDICT: tamper-evidence holds ✓ — every forgery was caught offline, with only the public key. No server was trusted."
58
+ : "VERDICT: PROOF FAILED ✗ — a tamper went undetected. This is a regression in the integrity guarantee.");
59
+ return lines.join("\n");
60
+ }
61
+ // Three hops with a deliberate mix: two signed/attested, one unattested — so the
62
+ // ledger-layer tamper can forge the unattested verdict into "attested" (the exact
63
+ // threat the ledger exists to catch).
64
+ const HOPS = [
65
+ { workerId: "w-map", taskId: "map:server-api", promptDigest: (0, execution_backend_1.sha256)("map:server-api"), usage: { input_tokens: 2117, output_tokens: 1911 }, attestation: "attested" },
66
+ { workerId: "w-assess", taskId: "assess:security", promptDigest: (0, execution_backend_1.sha256)("assess:security"), usage: { input_tokens: 1840, output_tokens: 1502 }, attestation: "unattested" },
67
+ { workerId: "w-verdict", taskId: "verdict:synthesis", promptDigest: (0, execution_backend_1.sha256)("verdict:synthesis"), usage: { input_tokens: 980, output_tokens: 770 }, attestation: "attested" }
68
+ ];
69
+ const DEMO_NOW = "2026-01-01T00:00:00.000Z";
70
+ function failingChecks(checks) {
71
+ return checks.filter((c) => !c.pass).map((c) => `${c.name}: ${c.code}`);
72
+ }
73
+ /** Run the full tamper-evidence demonstration in a private tmpdir (cleaned up
74
+ * unless `keepDir` is set). Pure of clock/network; the only nondeterminism is
75
+ * the ephemeral keypair, which never leaves this function. */
76
+ function runTamperDemo(options = {}) {
77
+ const runDir = options.dir || node_fs_1.default.mkdtempSync(node_path_1.default.join(node_os_1.default.tmpdir(), "cw-tamper-demo-"));
78
+ node_fs_1.default.mkdirSync(runDir, { recursive: true });
79
+ const runId = "demo-tamper-run";
80
+ // Minimal run shape: the ledger API uses only id + paths.runDir.
81
+ const run = { id: runId, paths: { runDir } };
82
+ const { publicKey, privateKey } = node_crypto_1.default.generateKeyPairSync("ed25519");
83
+ const publicKeyPem = publicKey.export({ type: "spki", format: "pem" }).toString();
84
+ const privateKeyPem = privateKey.export({ type: "pkcs8", format: "pem" }).toString();
85
+ // 1. Build a REAL ledger through the production append API, signing each
86
+ // attested hop's usage with the ephemeral key.
87
+ const signed = [];
88
+ for (const hop of HOPS) {
89
+ const ctx = { runId, taskId: hop.taskId, promptDigest: hop.promptDigest };
90
+ const signature = hop.attestation === "attested" ? (0, telemetry_attestation_1.signTelemetry)(hop.usage, privateKeyPem, ctx) : undefined;
91
+ (0, telemetry_ledger_1.appendTelemetryAttestation)(run, {
92
+ workerId: hop.workerId,
93
+ taskId: hop.taskId,
94
+ promptDigest: hop.promptDigest,
95
+ reportedUsage: hop.usage,
96
+ usageSignature: signature,
97
+ attestation: hop.attestation,
98
+ now: DEMO_NOW
99
+ });
100
+ signed.push({ hop, signature });
101
+ }
102
+ // 2. Baseline: the clean ledger verifies, and every signed hop's signature is valid.
103
+ const clean = (0, telemetry_ledger_1.verifyTelemetryLedger)(run);
104
+ const signaturesValid = signed.filter((s) => s.signature && (0, telemetry_attestation_1.verifyTelemetryAttestation)(s.hop.usage, s.signature, publicKeyPem, { runId, taskId: s.hop.taskId, promptDigest: s.hop.promptDigest }).status === "attested").length;
105
+ const baseline = { ledgerVerified: clean.verified, signaturesValid, records: clean.records.length };
106
+ const layers = [];
107
+ // 3a. LEDGER layer — the SOPHISTICATED forgery: flip record[1]'s verdict
108
+ // "unattested" -> "attested" AND recompute its recordHash to cover the edit,
109
+ // so the per-record digest check passes. The chain still catches it: record[2]
110
+ // was linked to the ORIGINAL record[1] hash, so chain-link[2] now breaks. This
111
+ // is the point of the chain over a flat per-record hash — fixing one record's
112
+ // hash cannot be hidden without rewriting every record after it too.
113
+ const ledgerFile = (0, telemetry_ledger_1.telemetryLedgerPath)(run);
114
+ const ledgerJson = JSON.parse(node_fs_1.default.readFileSync(ledgerFile, "utf8"));
115
+ ledgerJson.records[1].attestation = "attested";
116
+ const { recordHash: _stale, ...rest1 } = ledgerJson.records[1];
117
+ ledgerJson.records[1].recordHash = (0, telemetry_ledger_1.computeRecordHash)(rest1); // attacker re-seals the local hash
118
+ node_fs_1.default.writeFileSync(ledgerFile, JSON.stringify(ledgerJson, null, 2));
119
+ const afterLedger = (0, telemetry_ledger_1.verifyTelemetryLedger)(run);
120
+ layers.push({
121
+ layer: "ledger",
122
+ tamper: `forged record[1] verdict "unattested" -> "attested" AND recomputed its recordHash to cover the edit`,
123
+ before: { verified: clean.verified, detail: `${clean.records.length} records: chain intact, all hashes recompute` },
124
+ after: { verified: afterLedger.verified, detail: `the hash chain caught it: ${failingChecks(afterLedger.checks).join(", ")}` },
125
+ failures: failingChecks(afterLedger.checks)
126
+ });
127
+ // 3b. SIGNATURE layer — inflate hop-0's reported output tokens, keep the original
128
+ // signature. The ed25519 verify binds the exact usage bytes, so it rejects.
129
+ const target = signed[0];
130
+ const inflated = { ...target.hop.usage, output_tokens: target.hop.usage.output_tokens * 10 };
131
+ const sigCheck = (0, telemetry_attestation_1.verifyTelemetryAttestation)(inflated, target.signature, publicKeyPem, {
132
+ runId,
133
+ taskId: target.hop.taskId,
134
+ promptDigest: target.hop.promptDigest
135
+ });
136
+ const sigCleanCheck = (0, telemetry_attestation_1.verifyTelemetryAttestation)(target.hop.usage, target.signature, publicKeyPem, {
137
+ runId,
138
+ taskId: target.hop.taskId,
139
+ promptDigest: target.hop.promptDigest
140
+ });
141
+ layers.push({
142
+ layer: "signature",
143
+ tamper: `inflated record[0] reported output_tokens ${target.hop.usage.output_tokens} -> ${inflated.output_tokens}, reused the original ed25519 signature`,
144
+ before: { verified: sigCleanCheck.status === "attested", detail: `signature verifies against the reported usage (${sigCleanCheck.algorithm || "ed25519"})` },
145
+ after: { verified: sigCheck.status === "attested", detail: sigCheck.reason || sigCheck.status },
146
+ failures: sigCheck.status === "attested" ? [] : [`signature: ${sigCheck.reason}`]
147
+ });
148
+ if (!options.keepDir && !options.dir)
149
+ node_fs_1.default.rmSync(runDir, { recursive: true, force: true });
150
+ const proven = baseline.ledgerVerified &&
151
+ baseline.signaturesValid === signed.filter((s) => s.signature).length &&
152
+ layers.every((l) => l.before.verified && !l.after.verified && l.failures.length > 0);
153
+ return { schemaVersion: 1, runId, workers: HOPS.length, trustKey: "ephemeral-ed25519", baseline, layers, proven };
154
+ }
package/dist/version.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.MIN_SUPPORTED_RUN_STATE_SCHEMA_VERSION = exports.LEGACY_RUN_STATE_SCHEMA_VERSION = exports.CURRENT_RUN_STATE_SCHEMA_VERSION = exports.WORKFLOW_APP_SCHEMA_VERSION = exports.CURRENT_COOL_WORKFLOW_VERSION = void 0;
4
- exports.CURRENT_COOL_WORKFLOW_VERSION = "0.1.78";
4
+ exports.CURRENT_COOL_WORKFLOW_VERSION = "0.1.80";
5
5
  exports.WORKFLOW_APP_SCHEMA_VERSION = 1;
6
6
  exports.CURRENT_RUN_STATE_SCHEMA_VERSION = 1;
7
7
  exports.LEGACY_RUN_STATE_SCHEMA_VERSION = 0;
@@ -133,12 +133,66 @@ node dist/cli.js run architecture-review --drive --once --repo /path/to/repo --q
133
133
  node dist/cli.js run drive <run-id> --json # read-only preview of the next step
134
134
  ```
135
135
 
136
+ For faster first results, use the opt-in fast app instead of changing the full
137
+ review contract:
138
+
139
+ ```text
140
+ node scripts/architecture-review-fast.js --repo /path/to/repo --question "Is the design sound?" --fast-model gpt-5.5-high --strong-model gpt-5.5-extra-high --metrics --schedule-full
141
+ ```
142
+
143
+ `architecture-review-fast` has six workers: two Map and two Assess workers in
144
+ parallel, then sequential Verify and Verdict workers. The original
145
+ `architecture-review` app remains the full 14-worker review and is the right
146
+ target for background routines when a deep audit can finish outside the user's
147
+ foreground wait.
148
+
149
+ The model flags are policy, not attestation: they set task-level `{{model}}`
150
+ hints for the delegated agent process. The recorded model still comes only from
151
+ the agent-reported output.
152
+
153
+ The wrapper computes the source-context digest and supplies it to the fast app.
154
+ For external repositories, the documented no-profile command creates a repo-local
155
+ default `repo` profile over common tracked text surfaces. If the selected profile
156
+ exports zero records, the wrapper refuses rather than handing the app an empty
157
+ context digest.
158
+ The two Map workers opt in to result caching keyed by source-context digest plus
159
+ prompt digest. The two Assess workers also opt in, but their cache key includes
160
+ the completed previous-phase result digests so stale Map outputs do not satisfy
161
+ an Assess cache hit. A cache hit still passes through `recordWorkerOutput`
162
+ validation; a corrupt cached result parks/fails closed rather than spawning a
163
+ silent fallback.
164
+
165
+ `--metrics` is diagnostic and opt-in. It adds elapsed milliseconds, step counts,
166
+ agent-spawn counts, and `result-cache` hit counts to the wrapper JSON payload;
167
+ without it, the wrapper's default output shape stays unchanged.
168
+
136
169
  `{{manifest}}`, `{{input}}`, `{{result}}`, `{{workerDir}}`, `{{model}}`, and
137
170
  `{{prompt}}` are substituted into DISCRETE argv elements (never a shell-interpreted
138
171
  string). Each verb is declared once in `capability-registry.ts`, so `cw <cmd>
139
172
  --json` is byte-identical to the matching `cw_<tool>` MCP tool for the read-only
140
173
  preview/config-show verbs.
141
174
 
175
+ ## Live output — opt-in stderr passthrough (Unix-clean)
176
+
177
+ A drive can show the agent's activity live, without touching the evidence
178
+ contract, when the operator opts in with `CW_AGENT_STREAM=1`:
179
+
180
+ - **Default stays buffered.** Without `CW_AGENT_STREAM=1`, the bundled wrapper
181
+ preserves the legacy `--output-format json` path and forwards claude's JSON
182
+ stdout verbatim after writing `result.md`.
183
+ - **The opt-in wrapper renders; stderr only.** With `CW_AGENT_STREAM=1`, the
184
+ bundled wrapper runs claude in `--output-format stream-json` and renders a
185
+ concise human trace (tool uses, assistant text, per-turn summaries) to its
186
+ **stderr** — diagnostics, never data. It reconstructs the single
187
+ `{model, usage, result}` object for stdout only on that opt-in path.
188
+ - **Core forwards, never parses.** `runAgentProcess` passes the agent child's
189
+ stderr straight through to the operator's terminal (`stdio` inherit) only when
190
+ `CW_AGENT_STREAM=1`, CW's own stderr is a TTY, and `CW_NO_STREAM` is not set.
191
+ Piped / CI runs stay silent (the Rule of Silence). Vendor-specific rendering
192
+ lives in the wrapper (policy), not the kernel (mechanism).
193
+ - **Determinism intact.** The backend evidence triple hashes stdout only, so
194
+ the live stderr stream never affects recorded evidence or replay.
195
+
142
196
  ## Compatibility
143
197
 
144
198
  Agent Delegation Drive is introduced in CW v0.1.38. Adding the `agent` row leaves
@@ -188,3 +242,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
188
242
  0.1.77
189
243
 
190
244
  0.1.78
245
+
246
+ 0.1.79
247
+
248
+ ## Fast Architecture Review (v0.1.80)
249
+
250
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
@@ -28,6 +28,43 @@ node scripts/cw.js plan architecture-review \
28
28
  --focus "runtime"
29
29
  ```
30
30
 
31
+ `architecture-review-fast`
32
+
33
+ Run a shorter architecture review for a fast first result. The app keeps the
34
+ full `architecture-review` contract available under its original id, but uses two
35
+ parallel Map workers, two parallel Assess workers, one verifier, and one verdict
36
+ worker. Operators can optionally provide a pinned JSONL source context and route
37
+ mapping/assessment work to a faster model while reserving stronger models for
38
+ verification and synthesis.
39
+
40
+ ```bash
41
+ node scripts/architecture-review-fast.js \
42
+ --repo /path/to/repo \
43
+ --question "Is this architecture sound?" \
44
+ --fast-model gpt-5.5-high \
45
+ --strong-model gpt-5.5-extra-high \
46
+ --metrics \
47
+ --schedule-full
48
+ ```
49
+
50
+ The wrapper prepares one cached JSONL source context, passes its sha256 digest to
51
+ the fast app, runs `quickstart architecture-review-fast`, and optionally creates
52
+ a one-shot background schedule for the full `architecture-review` app. When run
53
+ against an external repo without `--profile` or `--profile-file`, it writes a
54
+ small repo-local `repo` profile covering common tracked text surfaces such as
55
+ README/package metadata, `src/`, `lib/`, `apps/`, `scripts/`, docs, and tests.
56
+ If the selected profile exports zero records, the wrapper fails closed instead of
57
+ passing an empty context digest to the app.
58
+ `--fast-model` and `--strong-model` are userland policy flags; internally they
59
+ set the same task-level hints as `CW_ARCHITECTURE_REVIEW_FAST_MODEL` and
60
+ `CW_ARCHITECTURE_REVIEW_STRONG_MODEL`.
61
+ `--metrics` is opt-in; when present the wrapper adds elapsed-time, worker-step,
62
+ agent-spawn, and result-cache-hit counts to the JSON payload so operators can
63
+ measure foreground wait reductions without changing the default output shape.
64
+
65
+ For long full reviews, use the existing routine or schedule surfaces to run
66
+ `architecture-review` in the background after the fast report has returned.
67
+
31
68
  `pr-review-fix-ci`
32
69
 
33
70
  Review a pull request or branch, inspect CI failures, diagnose actionable
@@ -39,6 +39,14 @@ A new runtime capability is added once, in the registry, against one core entry.
39
39
  The CLI command and the MCP tool are then two policies over that one mechanism —
40
40
  which is exactly what the parity gate checks.
41
41
 
42
+ The MCP tool list is also being collapsed toward that single source. The first
43
+ read-only inspection group (`operator.status`, `graph`, `operator.report`,
44
+ worker/candidate/feedback/commit summaries, and the basic multi-agent inspection
45
+ views) derives its MCP tool name and description directly from the capability
46
+ registry; `mcp-server.ts` still owns the MCP input schema for those tools. This
47
+ keeps the public `tools/list` output unchanged while removing one duplicate
48
+ description table at a time.
49
+
42
50
  ## Human vs Machine Contract
43
51
 
44
52
  The two surfaces have different contracts and must not interfere:
@@ -371,3 +379,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
371
379
  0.1.77
372
380
 
373
381
  0.1.78
382
+
383
+ 0.1.79
384
+
385
+ ## Fast Architecture Review (v0.1.80)
386
+
387
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
@@ -121,3 +121,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
121
121
  0.1.77
122
122
 
123
123
  0.1.78
124
+
125
+ 0.1.79
126
+
127
+ ## Fast Architecture Review (v0.1.80)
128
+
129
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
@@ -108,3 +108,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
108
108
  0.1.77
109
109
 
110
110
  0.1.78
111
+
112
+ 0.1.79
113
+
114
+ ## Fast Architecture Review (v0.1.80)
115
+
116
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
@@ -105,3 +105,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
105
105
  0.1.77
106
106
 
107
107
  0.1.78
108
+
109
+ 0.1.79
110
+
111
+ ## Fast Architecture Review (v0.1.80)
112
+
113
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
@@ -268,3 +268,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
268
268
  0.1.77
269
269
 
270
270
  0.1.78
271
+
272
+ 0.1.79
273
+
274
+ ## Fast Architecture Review (v0.1.80)
275
+
276
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
@@ -298,3 +298,9 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
298
298
  0.1.77
299
299
 
300
300
  0.1.78
301
+
302
+ 0.1.79
303
+
304
+ ## Fast Architecture Review (v0.1.80)
305
+
306
+ Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
package/docs/index.md CHANGED
@@ -35,6 +35,7 @@ Read these in order when you are new to CW:
35
35
  31. [Agent Delegation Drive](agent-delegation-drive.7.md) - the `agent` backend delegates each worker to an EXTERNAL agent process (claude/codex/HTTP endpoint) and `run --drive` auto-advances plan→dispatch→fulfill→accept→commit; the model runs in the agent's process, never in CW. Two-layer evidence, operator-vs-attested model, fail-closed park, replay without re-spawn.
36
36
  32. [Run Retention & Provable Reclamation](run-retention-reclamation.7.md) - tiered, append-only, cryptographically-verifiable disk reclamation over the v0.1.28 archive overlay: seal the audit skeleton, free the reconstructable/scratch bulk, and prove it via a hash-chained tombstone; `gc plan|run|verify`, write-ahead + fail-closed, explicit capability downgrade.
37
37
  33. [Durable State & Locking](durable-state-and-locking.7.md) - atomic (temp→rename) writes for every authoritative store with fsync-durability for the audit-essential ones, plus a portable stale-stealing file lock serializing the cross-process read-modify-write stores (home queue, archive overlay, reclamation chain); closes the prior verdict's non-atomic/unlocked P1.
38
+ 34. [Source Context Profiles](source-context-profiles.7.md) - opt-in JSONL source exports for AI context slimming, with profile policy in manifest data and manifest records proving every included or omitted tracked file.
38
39
 
39
40
  CW is the base system. Workflow apps are userland. Release and migration rules
40
41
  must preserve that line: stable contracts, explicit compatibility checks, and
@@ -0,0 +1,28 @@
1
+ # VHS tape — records the `cw demo tamper` proof to a GIF for the README hero.
2
+ #
3
+ # Reproducible, deterministic capture (no manual screen-recording). Install VHS
4
+ # (https://github.com/charmbracelet/vhs), then from the repo root:
5
+ #
6
+ # vhs plugins/cool-workflow/docs/launch/demo.tape
7
+ #
8
+ # Output: docs/launch/demo-tamper.gif. Then in the root README, replace the
9
+ # fenced demo output block under "See it in 30 seconds" with:
10
+ # ![cw demo tamper](plugins/cool-workflow/docs/launch/demo-tamper.gif)
11
+
12
+ Output plugins/cool-workflow/docs/launch/demo-tamper.gif
13
+
14
+ Set FontSize 15
15
+ Set Width 1180
16
+ Set Height 760
17
+ Set Padding 18
18
+ Set Theme "Dracula"
19
+ Set Framerate 24
20
+ Set PlaybackSpeed 1.0
21
+
22
+ # Use the published package so the GIF reflects exactly what a new user runs.
23
+ Type "npx cool-workflow demo tamper"
24
+ Sleep 600ms
25
+ Enter
26
+
27
+ # Allow npx fetch + the demo to run and print the verdict.
28
+ Sleep 14s