cool-workflow 0.1.80 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.codex-plugin/plugin.json +1 -1
  3. package/README.md +42 -2
  4. package/apps/architecture-review/app.json +1 -1
  5. package/apps/architecture-review-fast/app.json +1 -1
  6. package/apps/end-to-end-golden-path/app.json +1 -1
  7. package/apps/pr-review-fix-ci/app.json +1 -1
  8. package/apps/release-cut/app.json +1 -1
  9. package/apps/research-synthesis/app.json +1 -1
  10. package/dist/agent-config.js +21 -7
  11. package/dist/candidate-scoring.js +42 -22
  12. package/dist/capability-core.js +94 -17
  13. package/dist/capability-registry.js +138 -171
  14. package/dist/cli.js +90 -100
  15. package/dist/collaboration.js +5 -6
  16. package/dist/commit.js +20 -6
  17. package/dist/compare.js +18 -0
  18. package/dist/coordinator/classify.js +45 -0
  19. package/dist/coordinator/paths.js +42 -0
  20. package/dist/coordinator/util.js +129 -0
  21. package/dist/coordinator.js +127 -300
  22. package/dist/dispatch.js +35 -0
  23. package/dist/drive.js +7 -7
  24. package/dist/error-feedback.js +8 -4
  25. package/dist/evidence-reasoning.js +1 -1
  26. package/dist/execution-backend/agent.js +331 -0
  27. package/dist/execution-backend/probes.js +96 -0
  28. package/dist/execution-backend/util.js +47 -0
  29. package/dist/execution-backend.js +67 -420
  30. package/dist/mcp-server.js +34 -173
  31. package/dist/multi-agent/graph.js +84 -0
  32. package/dist/multi-agent/helpers.js +145 -0
  33. package/dist/multi-agent/paths.js +22 -0
  34. package/dist/multi-agent-eval/format.js +194 -0
  35. package/dist/multi-agent-eval/normalize.js +51 -0
  36. package/dist/multi-agent-eval.js +39 -244
  37. package/dist/multi-agent-host.js +0 -19
  38. package/dist/multi-agent.js +125 -314
  39. package/dist/node-snapshot.js +3 -3
  40. package/dist/observability/format.js +61 -0
  41. package/dist/observability/intake.js +98 -0
  42. package/dist/observability.js +14 -160
  43. package/dist/operator-ux/format.js +364 -0
  44. package/dist/operator-ux.js +22 -363
  45. package/dist/orchestrator/report.js +8 -0
  46. package/dist/orchestrator.js +25 -8
  47. package/dist/reclamation.js +26 -21
  48. package/dist/run-export.js +138 -14
  49. package/dist/run-registry/derive.js +172 -0
  50. package/dist/run-registry/format.js +124 -0
  51. package/dist/run-registry/gc.js +251 -0
  52. package/dist/run-registry/policy.js +16 -0
  53. package/dist/run-registry/queue.js +116 -0
  54. package/dist/run-registry.js +78 -593
  55. package/dist/run-state-schema.js +1 -0
  56. package/dist/sandbox-profile.js +43 -2
  57. package/dist/state-explosion/format.js +159 -0
  58. package/dist/state-explosion/helpers.js +82 -0
  59. package/dist/state-explosion.js +65 -283
  60. package/dist/state-node.js +19 -4
  61. package/dist/telemetry-attestation.js +55 -0
  62. package/dist/telemetry-demo.js +15 -3
  63. package/dist/telemetry-ledger.js +60 -15
  64. package/dist/topology.js +25 -8
  65. package/dist/triggers.js +33 -14
  66. package/dist/trust-audit.js +145 -33
  67. package/dist/version.js +1 -1
  68. package/dist/worker-isolation/helpers.js +51 -0
  69. package/dist/worker-isolation/paths.js +46 -0
  70. package/dist/worker-isolation.js +39 -115
  71. package/docs/agent-delegation-drive.7.md +13 -0
  72. package/docs/cli-mcp-parity.7.md +4 -0
  73. package/docs/contract-migration-tooling.7.md +2 -0
  74. package/docs/control-plane-scheduling.7.md +2 -0
  75. package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
  76. package/docs/durable-state-and-locking.7.md +4 -0
  77. package/docs/evidence-adoption-reasoning-chain.7.md +2 -0
  78. package/docs/execution-backends.7.md +2 -0
  79. package/docs/index.md +1 -0
  80. package/docs/launch/launch-kit.md +46 -23
  81. package/docs/launch/pre-launch-checklist.md +14 -14
  82. package/docs/multi-agent-cli-mcp-surface.7.md +4 -0
  83. package/docs/multi-agent-eval-replay-harness.7.md +2 -0
  84. package/docs/multi-agent-operator-ux.7.md +2 -0
  85. package/docs/multi-agent-trust-policy-audit.7.md +27 -0
  86. package/docs/node-snapshot-diff-replay.7.md +2 -0
  87. package/docs/observability-cost-accounting.7.md +2 -0
  88. package/docs/project-index.md +18 -5
  89. package/docs/real-execution-backends.7.md +2 -0
  90. package/docs/release-and-migration.7.md +4 -0
  91. package/docs/release-tooling.7.md +2 -0
  92. package/docs/run-registry-control-plane.7.md +54 -8
  93. package/docs/run-retention-reclamation.7.md +4 -0
  94. package/docs/state-explosion-management.7.md +2 -0
  95. package/docs/team-collaboration.7.md +2 -0
  96. package/docs/trust-model.md +267 -0
  97. package/docs/vendor-manifest-loadability.7.md +43 -0
  98. package/docs/web-desktop-workbench.7.md +2 -0
  99. package/manifest/plugin.manifest.json +1 -1
  100. package/package.json +4 -2
  101. package/scripts/agents/builtin-templates.json +7 -0
  102. package/scripts/bump-version.js +5 -11
  103. package/scripts/canonical-apps-list.js +64 -0
  104. package/scripts/canonical-apps.js +19 -4
  105. package/scripts/dogfood-release.js +1 -1
  106. package/scripts/golden-path.js +4 -4
  107. package/scripts/parity-check.js +5 -0
  108. package/scripts/release-check.js +5 -1
  109. package/scripts/version-sync-check.js +5 -8
  110. package/dist/capability-dispatcher.js +0 -86
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "cool-workflow",
3
3
  "description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
4
- "version": "0.1.80",
4
+ "version": "0.1.81",
5
5
  "author": {
6
6
  "name": "COOLWHITE LLC"
7
7
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cool-workflow",
3
- "version": "0.1.80",
3
+ "version": "0.1.81",
4
4
  "description": "Auditable workflow control-plane and orchestration runtime: TypeScript dispatch, evidence-gated verification, state commits, scheduling, routines, multi-agent coordination, and MCP. Delegates execution to external agents — never runs models.",
5
5
  "author": {
6
6
  "name": "COOLWHITE LLC"
package/README.md CHANGED
@@ -263,6 +263,40 @@ write paths, command execution, network access, and environment exposure. CW
263
263
  stores and validates the policy, while the agent host enforces OS/process
264
264
  runtime controls. See [docs/sandbox-profiles.7.md](docs/sandbox-profiles.7.md).
265
265
 
266
+ ## Quickstart
267
+
268
+ **30-second proof, no install** — see that a recorded telemetry verdict can't be forged:
269
+
270
+ ```bash
271
+ npx cool-workflow demo tamper
272
+ # builds a signed ed25519 ledger, forges it 2 ways, both caught offline
273
+ # -> VERDICT: tamper-evidence holds ✓
274
+ ```
275
+
276
+ **Try a real run** — no clone needed; drive an architecture review with your own agent:
277
+
278
+ ```bash
279
+ npx cool-workflow quickstart architecture-review --repo /path/to/repo \
280
+ --question "Is this architecture sound?" --agent-command builtin:claude
281
+ ```
282
+
283
+ CW DELEGATES worker execution to your own agent. With no `--agent-command` (or
284
+ `CW_AGENT_COMMAND`) the drive fails closed (status `blocked`) — it never fabricates a
285
+ result. `--agent-command builtin:claude` resolves to a bundled read-only `claude -p`
286
+ wrapper (needs `claude` on your PATH).
287
+
288
+ **Re-prove a finished run, offline** (`cw` is the installed bin; or `npx cool-workflow <cmd>`):
289
+
290
+ ```bash
291
+ cw telemetry verify <run-id> # re-checks the hash-chained ledger
292
+ cw telemetry verify <run-id> --pubkey pub.pem # also re-runs ed25519 signature checks
293
+ cw audit verify <run-id> # re-proves the trust-audit hash chain
294
+ ```
295
+
296
+ More: `cw quickstart <app> --preview` (read-only dry run), `cw run resume <run-id> --drive`
297
+ (continue an interrupted run), `cw run inspect-archive <archive>` (integrity-check a
298
+ portable run archive without importing it).
299
+
266
300
  ## Structure
267
301
 
268
302
  ```text
@@ -300,6 +334,10 @@ cool-workflow
300
334
 
301
335
  ## Commands
302
336
 
337
+ Installed via npm, the bin is `cw` (alias `cool-workflow`): e.g. `cw list`,
338
+ `cw quickstart …`. From a cloned source checkout, before `npm run build`, use the
339
+ equivalent `node scripts/cw.js <cmd>` form shown in the examples below.
340
+
303
341
  List bundled workflows:
304
342
 
305
343
  ```bash
@@ -619,7 +657,7 @@ Replaces the linear migration chain with a BFS graph path resolver (`findMigrati
619
657
 
620
658
  ## Vendor-Adapter Registry (v0.1.47)
621
659
 
622
- Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data.
660
+ Data-driven manifest generation: vendor JSON shapes extracted from `gen-manifests.js` into declarative templates in `plugin.manifest.json`. A `_resolveTemplate()` engine resolves `{{path.to.field}}` markers. Adding a new AI platform is pure data. Cross-vendor is proven by boot, not just by generation: `npm run manifest:load-check` (`node test/vendor-manifest-load-smoke.js`) loads every generated manifest (claude, codex, agents, gemini, opencode) and asserts each exposes the full tool surface (184 tools).
623
661
 
624
662
  ## P2 Fixes (v0.1.48)
625
663
 
@@ -651,7 +689,7 @@ The orchestration vision landed in one release, all reviewer-gated:
651
689
 
652
690
  ## Tamper-evidence demo (v0.1.79)
653
691
 
654
- `cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` is the operator-facing half (`cw_telemetry_verify` on MCP).
692
+ `cw demo tamper` — a hermetic, one-command proof that a recorded telemetry verdict cannot be forged undetected: it builds a real ed25519-signed ledger, forges it at the ledger layer (verdict flip + recomputed local hash → the chain still breaks) and the signature layer (inflated tokens, reused signature → ed25519 rejects), all verified offline with only the public key. `cw telemetry verify <run>` (`cw_telemetry_verify` on MCP) is the operator-facing re-proof: by default it recomputes the hash chain on disk so any later edit to a recorded verdict or usage digest is caught; add `--pubkey <pem-or-path>` to re-run each `attested` hop's ed25519 signature check against the stored raw usage too. What this does and does **not** prove — including the single-keyholder ceiling — is documented honestly in [Trust Model & Limitations](docs/trust-model.md); read it before relying on a green verdict.
655
693
 
656
694
  ## Opt-in live agent output during a drive (on main, ships next)
657
695
 
@@ -662,3 +700,5 @@ v0.1.79
662
700
  ## Fast Architecture Review (v0.1.80)
663
701
 
664
702
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
703
+
704
+ _This documentation tracks Cool Workflow v0.1.81. See [CHANGELOG](../../CHANGELOG.md) for the release notes._
@@ -3,7 +3,7 @@
3
3
  "id": "architecture-review",
4
4
  "title": "Architecture Review",
5
5
  "summary": "Map a repository architecture, assess risks, verify important findings, and synthesize an evidence-backed verdict.",
6
- "version": "0.1.80",
6
+ "version": "0.1.81",
7
7
  "author": "COOLWHITE LLC",
8
8
  "inputs": [
9
9
  {
@@ -3,7 +3,7 @@
3
3
  "id": "architecture-review-fast",
4
4
  "title": "Architecture Review Fast",
5
5
  "summary": "Run a shorter architecture review with parallel map and assess phases for faster first results.",
6
- "version": "0.1.80",
6
+ "version": "0.1.81",
7
7
  "author": "COOLWHITE LLC",
8
8
  "inputs": [
9
9
  {
@@ -3,7 +3,7 @@
3
3
  "id": "end-to-end-golden-path",
4
4
  "title": "End-to-End Golden Path",
5
5
  "summary": "Deterministic one-worker workflow app for proving the CW integration chain.",
6
- "version": "0.1.80",
6
+ "version": "0.1.81",
7
7
  "author": "COOLWHITE LLC",
8
8
  "inputs": [
9
9
  {
@@ -3,7 +3,7 @@
3
3
  "id": "pr-review-fix-ci",
4
4
  "title": "PR Review Fix CI",
5
5
  "summary": "Review a pull request or branch, inspect CI failures, diagnose actionable issues, optionally patch, verify, and summarize with evidence.",
6
- "version": "0.1.80",
6
+ "version": "0.1.81",
7
7
  "author": "COOLWHITE LLC",
8
8
  "inputs": [
9
9
  {
@@ -3,7 +3,7 @@
3
3
  "id": "release-cut",
4
4
  "title": "Release Cut",
5
5
  "summary": "Prepare a release with checklist discipline: version checks, changelog, tests, packaging, release notes, and final verification.",
6
- "version": "0.1.80",
6
+ "version": "0.1.81",
7
7
  "author": "COOLWHITE LLC",
8
8
  "inputs": [
9
9
  {
@@ -3,7 +3,7 @@
3
3
  "id": "research-synthesis",
4
4
  "title": "Research Synthesis",
5
5
  "summary": "Split a research question into claims, investigate sources, cross-check evidence, verify claims, and synthesize a concise answer.",
6
- "version": "0.1.80",
6
+ "version": "0.1.81",
7
7
  "author": "COOLWHITE LLC",
8
8
  "inputs": [
9
9
  {
@@ -139,18 +139,30 @@ function agentConfigFromArgs(args) {
139
139
  // npx/global install, where $(pwd)-relative paths don't exist) can configure a
140
140
  // WORKING agent without knowing where the package landed on disk:
141
141
  // --agent-command builtin:claude (or CW_AGENT_COMMAND=builtin:claude)
142
- // resolves to the packaged claude wrapper invocation. Still pure config — the
143
- // template is an out-of-process delegation script; CW never calls a model API.
144
- const BUILTIN_AGENT_TEMPLATES = {
145
- claude: `node ${node_path_1.default.join(__dirname, "..", "scripts", "agents", "claude-p-agent.js")} {{input}} {{result}}`
146
- };
142
+ // resolves to the packaged wrapper invocation. Still pure config — the template
143
+ // is an out-of-process delegation script; CW never calls a model API.
144
+ //
145
+ // The builtin set is DATA, not a kernel TS literal (FreeBSD-audit L15): it lives
146
+ // in scripts/agents/builtin-templates.json (vendor name -> wrapper script name).
147
+ // Adding a vendor is a content/distribution step (drop a wrapper + a JSON line),
148
+ // not a kernel edit — keeping CW vendor-agnostic at the source level.
149
+ function builtinAgentTemplates() {
150
+ const agentsDir = node_path_1.default.join(__dirname, "..", "scripts", "agents");
151
+ const manifest = JSON.parse(node_fs_1.default.readFileSync(node_path_1.default.join(agentsDir, "builtin-templates.json"), "utf8"));
152
+ const out = {};
153
+ for (const [name, script] of Object.entries(manifest.templates || {})) {
154
+ out[name] = `node ${node_path_1.default.join(agentsDir, script)} {{input}} {{result}}`;
155
+ }
156
+ return out;
157
+ }
147
158
  function expandBuiltinAgentCommand(command) {
148
159
  if (!command || !command.startsWith("builtin:"))
149
160
  return command;
150
161
  const name = command.slice("builtin:".length).trim();
151
- const template = BUILTIN_AGENT_TEMPLATES[name];
162
+ const templates = builtinAgentTemplates();
163
+ const template = templates[name];
152
164
  if (!template) {
153
- throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(BUILTIN_AGENT_TEMPLATES).join(", ")}`);
165
+ throw new Error(`Unknown builtin agent template "${name}" — available: ${Object.keys(templates).join(", ")}`);
154
166
  }
155
167
  return template;
156
168
  }
@@ -200,6 +212,8 @@ function setAgentConfigFile(patch, env = process.env) {
200
212
  endpoint: firstDefined(incoming.endpoint, current.endpoint),
201
213
  model: firstDefined(incoming.model, current.model),
202
214
  timeoutMs: firstDefined(incoming.timeoutMs, current.timeoutMs),
215
+ attestPublicKey: firstDefined(incoming.attestPublicKey, current.attestPublicKey),
216
+ requireAttestedTelemetry: firstDefined(incoming.requireAttestedTelemetry, current.requireAttestedTelemetry),
203
217
  source: "file"
204
218
  };
205
219
  const stored = redacted(merged);
@@ -20,7 +20,14 @@ const state_1 = require("./state");
20
20
  const state_node_1 = require("./state-node");
21
21
  const trust_audit_1 = require("./trust-audit");
22
22
  const collaboration_1 = require("./collaboration");
23
+ const compare_1 = require("./compare");
23
24
  exports.CANDIDATE_SCHEMA_VERSION = 1;
25
+ /** Verdict thresholds on a score's normalized value [0,1], declared once so the
26
+ * numbers carry intent instead of being buried as literals in verdictFor(). A
27
+ * normalized score at-or-above PASS is "pass"; at-or-above WARN (but below
28
+ * PASS) is "warn"; anything lower is "fail". Same numbers as before. */
29
+ const VERDICT_PASS_THRESHOLD = 0.7;
30
+ const VERDICT_WARN_THRESHOLD = 0.4;
24
31
  function createCandidateScoring(options = {}) {
25
32
  return {
26
33
  registerCandidate: (run, input) => registerCandidate(run, input, options),
@@ -39,7 +46,7 @@ function registerCandidate(run, input, options = {}) {
39
46
  if (existing)
40
47
  return existing;
41
48
  const now = new Date().toISOString();
42
- const id = input.id || createCandidateId(input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
49
+ const id = input.id || createCandidateId(run, input.kind || "manual", input.workerId || input.taskId || input.resultNodeId);
43
50
  const candidate = {
44
51
  schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
45
52
  id,
@@ -109,7 +116,7 @@ function getCandidate(run, candidateId) {
109
116
  }
110
117
  function scoreCandidate(run, candidateId, input, options = {}) {
111
118
  const candidate = requireCandidate(run, candidateId);
112
- const scoreId = input.id || createScoreId(candidateId);
119
+ const scoreId = input.id || createScoreId(candidate);
113
120
  const evidence = (0, trust_audit_1.normalizeEvidence)(run, input.evidence || [], {
114
121
  source: "operator-recorded",
115
122
  candidateId,
@@ -279,7 +286,7 @@ function selectCandidate(run, candidateId, options = {}, scoringOptions = {}) {
279
286
  const now = new Date().toISOString();
280
287
  const selection = {
281
288
  schemaVersion: exports.CANDIDATE_SCHEMA_VERSION,
282
- id: createSelectionId(candidateId),
289
+ id: createSelectionId(run, candidateId),
283
290
  runId: run.id,
284
291
  candidateId,
285
292
  selectedAt: now,
@@ -558,16 +565,16 @@ function inferCandidateKind(input) {
558
565
  return "manual";
559
566
  }
560
567
  function bestScore(scores) {
561
- return [...scores].sort((left, right) => right.normalized - left.normalized || left.createdAt.localeCompare(right.createdAt))[0];
568
+ return [...scores].sort((left, right) => right.normalized - left.normalized || (0, compare_1.compareBytes)(left.createdAt, right.createdAt))[0];
562
569
  }
563
570
  function compareRows(left, right, policy) {
564
571
  const byScore = right.normalized - left.normalized;
565
572
  if (byScore !== 0)
566
573
  return byScore;
567
574
  if (policy.tieBreaker === "candidateId")
568
- return left.candidate.id.localeCompare(right.candidate.id);
569
- const byCreated = left.candidate.createdAt.localeCompare(right.candidate.createdAt);
570
- return byCreated || left.candidate.id.localeCompare(right.candidate.id);
575
+ return (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
576
+ const byCreated = (0, compare_1.compareBytes)(left.candidate.createdAt, right.candidate.createdAt);
577
+ return byCreated || (0, compare_1.compareBytes)(left.candidate.id, right.candidate.id);
571
578
  }
572
579
  function detectTies(candidates) {
573
580
  const groups = new Map();
@@ -578,10 +585,15 @@ function detectTies(candidates) {
578
585
  return Array.from(groups.values()).filter((group) => group.length > 1);
579
586
  }
580
587
  function mergePolicy(policy = {}) {
588
+ // NOTE: `policy.criteria` (string[]) is intentionally NOT carried here. A
589
+ // whole-repo grep shows it has no read points — scoring reads each score's
590
+ // own `input.criteria` (Record<string, number>), not this list. Emitting a
591
+ // default `criteria: []` advertised a guarantee the code never honored and
592
+ // could silently drift, so it is dropped. The field stays OPTIONAL on
593
+ // CandidateScoringPolicy / CandidateRanking.policy for forward-compat input.
581
594
  return {
582
595
  id: policy.id || "cw.candidate.default",
583
596
  title: policy.title || "Default Candidate Scoring",
584
- criteria: policy.criteria || [],
585
597
  requireEvidence: policy.requireEvidence ?? true,
586
598
  requireVerifierGate: policy.requireVerifierGate ?? true,
587
599
  minNormalized: policy.minNormalized,
@@ -591,9 +603,9 @@ function mergePolicy(policy = {}) {
591
603
  function verdictFor(normalized, policy) {
592
604
  if (policy.minNormalized !== undefined && normalized < policy.minNormalized)
593
605
  return "fail";
594
- if (normalized >= 0.7)
606
+ if (normalized >= VERDICT_PASS_THRESHOLD)
595
607
  return "pass";
596
- if (normalized >= 0.4)
608
+ if (normalized >= VERDICT_WARN_THRESHOLD)
597
609
  return "warn";
598
610
  return "fail";
599
611
  }
@@ -616,18 +628,26 @@ function indexPath(run) {
616
628
  function rankingPath(run) {
617
629
  return node_path_1.default.join(candidateRoot(run), "ranking.json");
618
630
  }
619
- function createCandidateId(kind, seed) {
620
- const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
621
- const suffix = Math.random().toString(36).slice(2, 8);
622
- return `candidate-${(0, state_1.safeFileName)(kind)}-${seed ? `${(0, state_1.safeFileName)(seed)}-` : ""}${stamp}-${suffix}`;
623
- }
624
- function createScoreId(candidateId) {
625
- const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
626
- return `score-${(0, state_1.safeFileName)(candidateId)}-${stamp}-${Math.random().toString(36).slice(2, 8)}`;
627
- }
628
- function createSelectionId(candidateId) {
629
- const stamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\..+/, "Z");
630
- return `selection-${(0, state_1.safeFileName)(candidateId)}-${stamp}-${Math.random().toString(36).slice(2, 8)}`;
631
+ // Deterministic candidate id (FreeBSD-audit L12/L13): the candidate's POSITION in
632
+ // the run's candidate set, qualified by kind + seed (a stable worker/task/result
633
+ // id) for readability. No wall-clock stamp, no PRNG suffix — re-running the same
634
+ // workflow mints byte-identical candidate ids, keeping fingerprints replay-stable.
635
+ function createCandidateId(run, kind, seed) {
636
+ const seq = (run.candidates || []).length + 1;
637
+ return `candidate-${(0, state_1.safeFileName)(kind)}-${seed ? `${(0, state_1.safeFileName)(seed)}-` : ""}${String(seq).padStart(4, "0")}`;
638
+ }
639
+ // Deterministic score id (FreeBSD-audit L12/L13): the score's POSITION within its
640
+ // candidate's score list. Scores only ever append, so the sequence is unique per
641
+ // candidate and stable across replays.
642
+ function createScoreId(candidate) {
643
+ const seq = (candidate.scores || []).length + 1;
644
+ return `score-${(0, state_1.safeFileName)(candidate.id)}-${String(seq).padStart(4, "0")}`;
645
+ }
646
+ // Deterministic selection id (FreeBSD-audit L12/L13): the selection's POSITION in
647
+ // the run's append-only selection log. No clock, no PRNG.
648
+ function createSelectionId(run, candidateId) {
649
+ const seq = (run.candidateSelections || []).length + 1;
650
+ return `selection-${(0, state_1.safeFileName)(candidateId)}-${String(seq).padStart(4, "0")}`;
631
651
  }
632
652
  function shouldPersist(options) {
633
653
  return options.persist !== false;
@@ -9,11 +9,12 @@
9
9
  // expressed here and called identically by both surfaces. A composite that lives
10
10
  // in only one surface is exactly the cross-surface drift v0.1.27 forbids.
11
11
  //
12
- // From v0.1.46: each exported entry function SHOULD self-register its capability
13
- // metadata via registerCapability() from capability-registry.ts. This replaces
14
- // the manual "add an entry to the giant array in capability-registry.ts" workflow
15
- // with automatic discovery. New capabilities just add a registerCapability() call
16
- // next to their implementation no need to touch capability-registry.ts.
12
+ // Capability metadata (which entry is on which surface, tool names, jsonMode) is
13
+ // declared in ONE place — the BUILTIN_CAPABILITIES table in capability-registry.ts,
14
+ // the single source of truth both surfaces and the parity gate read. New
15
+ // capabilities add a row there. (A v0.1.46 "self-register at load time" mechanism
16
+ // was removed: the registry snapshot was taken before those registrations ran, so
17
+ // they were silently dead duplicates of the table — see capability-registry.ts.)
17
18
  //
18
19
  // See docs/cli-mcp-parity.7.md and src/capability-registry.ts.
19
20
  var __importDefault = (this && this.__importDefault) || function (mod) {
@@ -37,6 +38,7 @@ exports.runArchive = runArchive;
37
38
  exports.runRerun = runRerun;
38
39
  exports.runExportArchive = runExportArchive;
39
40
  exports.runImportArchive = runImportArchive;
41
+ exports.runInspectArchive = runInspectArchive;
40
42
  exports.runVerifyImport = runVerifyImport;
41
43
  exports.queueAdd = queueAdd;
42
44
  exports.queueList = queueList;
@@ -65,13 +67,15 @@ exports.withoutRuntimeKeys = withoutRuntimeKeys;
65
67
  exports.optionalString = optionalString;
66
68
  exports.isRecord = isRecord;
67
69
  exports.telemetryVerify = telemetryVerify;
70
+ exports.auditVerify = auditVerify;
68
71
  exports.demoTamper = demoTamper;
69
- const capability_registry_1 = require("./capability-registry");
70
72
  const drive_1 = require("./drive");
71
73
  const agent_config_1 = require("./agent-config");
72
74
  const run_registry_1 = require("./run-registry");
73
75
  const observability_1 = require("./observability");
74
76
  const telemetry_ledger_1 = require("./telemetry-ledger");
77
+ const telemetry_attestation_1 = require("./telemetry-attestation");
78
+ const trust_audit_1 = require("./trust-audit");
75
79
  const telemetry_demo_1 = require("./telemetry-demo");
76
80
  const state_1 = require("./state");
77
81
  const run_export_1 = require("./run-export");
@@ -90,8 +94,6 @@ function planSummary(runner, workflowId, options) {
90
94
  pendingTasks: run.tasks.filter((task) => task.status === "pending").length
91
95
  };
92
96
  }
93
- // Auto-register with the capability registry (v0.1.46 — no need to edit capability-registry.ts)
94
- (0, capability_registry_1.registerCapability)({ capability: "plan", summary: "Plan a workflow run on a repo + app.", entry: "planSummary", surface: "both", cli: { path: ["plan"], jsonMode: "default" }, mcp: { tool: "cw_plan" } });
95
97
  // ---- canonical app-run payload --------------------------------------------
96
98
  // Both `cw app run` and `cw_app_run` resolve to this exact object. Structured
97
99
  // app inputs + optional sandbox resolution, then a compact operator status.
@@ -117,7 +119,6 @@ function appRun(runner, args) {
117
119
  sandboxProfile: resolvedSandbox
118
120
  };
119
121
  }
120
- (0, capability_registry_1.registerCapability)({ capability: "app.run", summary: "Plan a run on a named app with structured inputs.", entry: "appRun", surface: "both", cli: { path: ["app", "run"], caseTokens: ["app"], jsonMode: "default" }, mcp: { tool: "cw_app_run" } });
121
122
  // ---- canonical sandbox choice payload -------------------------------------
122
123
  // Both `cw sandbox choose|resolve` and `cw_sandbox_choose|cw_sandbox_resolve`
123
124
  // resolve to this exact object.
@@ -154,7 +155,6 @@ function commitEnvelope(runner, runId, args) {
154
155
  commit
155
156
  };
156
157
  }
157
- (0, capability_registry_1.registerCapability)({ capability: "commit", summary: "Create a verifier-gated state commit with evidence.", entry: "commitEnvelope", surface: "both", cli: { path: ["commit"], jsonMode: "default" }, mcp: { tool: "cw_commit" }, payloadIdentical: false, reason: "CLI renders a human summary with path hints; MCP returns the structured commit payload alone." });
158
158
  function compactOperatorStatus(status) {
159
159
  return {
160
160
  runId: status.runId,
@@ -242,11 +242,19 @@ function runList(reg, args) {
242
242
  function runShow(reg, runId, args) {
243
243
  return reg.showRun(runId, { scope: scopeOf(args, "home") });
244
244
  }
245
- function runResume(reg, runId, args) {
246
- return reg.resume(runId, {
245
+ function runResume(reg, runner, runId, args) {
246
+ const base = reg.resume(runId, {
247
247
  scope: scopeOf(args, "home"),
248
248
  limit: args.limit === undefined ? undefined : Number(args.limit)
249
249
  });
250
+ // Default (no --drive/--once): read-only, byte-identical to before.
251
+ if (!isTrue(args.drive) && !isTrue(args.once))
252
+ return base;
253
+ // Opt-in continuation: hand the resolved run to the EXISTING agent-delegation
254
+ // drive loop (re-plans nothing; picks up pending/running tasks from durable
255
+ // state). An unconfigured agent surfaces drive.status="blocked" (fail-closed).
256
+ const drive = runDrive(runner, { ...args, runId: base.runId, repo: base.repo, once: isTrue(args.once) });
257
+ return { ...base, drive };
250
258
  }
251
259
  function runArchive(reg, runId, args) {
252
260
  if (runId) {
@@ -286,6 +294,17 @@ function runImportArchive(runner, args) {
286
294
  return { ...imported, registry: registryReport };
287
295
  });
288
296
  }
297
+ // Read-only: inspect a portable archive's integrity WITHOUT importing it. Routes
298
+ // both surfaces through one shared core entry. The runner is unused (no registry
299
+ // touch — inspection writes nothing) but kept for dispatch-signature symmetry.
300
+ function runInspectArchive(_runner, args) {
301
+ return withInvocationCwd(args, () => {
302
+ const archive = optionalString(args.archive || args.path || args.file);
303
+ if (!archive)
304
+ throw new Error("run inspect-archive requires an archive path (positional, --archive, --path, or --file)");
305
+ return (0, run_export_1.inspectArchive)(node_path_1.default.resolve(archive));
306
+ });
307
+ }
289
308
  function runVerifyImport(runner, runId, args) {
290
309
  return withInvocationCwd(args, () => (0, run_export_1.verifyImportedRun)(runner.loadRun(runId)));
291
310
  }
@@ -422,7 +441,8 @@ const DRIVE_RUNTIME_KEYS = [
422
441
  "agentModel",
423
442
  "agent-model",
424
443
  "agentTimeoutMs",
425
- "agent-timeout-ms"
444
+ "agent-timeout-ms",
445
+ "resume"
426
446
  ];
427
447
  function planInputsFor(args) {
428
448
  const copy = withoutRuntimeKeys(args);
@@ -474,6 +494,12 @@ exports.QUICKSTART_DEFAULT_APP = "architecture-review";
474
494
  function quickstart(runner, args) {
475
495
  const appId = String(args.appId || args.app || args.workflowId || exports.QUICKSTART_DEFAULT_APP);
476
496
  const agentConfigured = Boolean((0, agent_config_1.resolveAgentConfig)(args).command || (0, agent_config_1.resolveAgentConfig)(args).endpoint);
497
+ // `--resume`: a discoverability flag over the existing continuation. With no
498
+ // `--run`, advance exactly ONE step (reuse the `--once` path) and print a
499
+ // copy-pasteable continue line; with `--run <id>`, continue that run to
500
+ // completion (the default drive). It adds no new execution path.
501
+ const resume = isTrue(args.resume);
502
+ const resumeRunId = resume ? optionalString(args.runId || args.run) : undefined;
477
503
  // `--preview`: read-only, deterministic next-step projection (no spawn, no commit).
478
504
  // Plan a fresh run (the read-only first verb) then project the next drive step.
479
505
  if (isTrue(args.preview)) {
@@ -498,7 +524,10 @@ function quickstart(runner, args) {
498
524
  // Drive end-to-end (or one `--once` step). runDrive plans the run, delegates each
499
525
  // worker to the agent backend, and commits — we add only the report write + a
500
526
  // single assembled payload. No orchestration is duplicated here.
501
- const result = runDrive(runner, { ...args, appId });
527
+ // `--resume` with no run id advances a single step so a newcomer WITNESSES the
528
+ // stop-then-resume; with a run id it continues to completion. Non-resume paths
529
+ // are untouched (byte-identical default).
530
+ const result = runDrive(runner, { ...args, appId, ...(resume && !resumeRunId ? { once: true } : {}) });
502
531
  // Always (re)write the report so the one command yields a report.md on disk, even
503
532
  // when the drive blocked/parked (a partial report is still useful triage).
504
533
  const cwd0 = process.cwd();
@@ -530,7 +559,9 @@ function quickstart(runner, args) {
530
559
  hint = `the drive is blocked — inspect: cw run drive ${result.runId}`;
531
560
  }
532
561
  else if (result.status === "in-progress") {
533
- hint = `one step advanced (--once) — continue: cw quickstart ${appId} --run ${result.runId} --once`;
562
+ hint = resume
563
+ ? `one step advanced — continue: cw quickstart ${appId} --run ${result.runId} --resume`
564
+ : `one step advanced (--once) — continue: cw quickstart ${appId} --run ${result.runId} --once`;
534
565
  }
535
566
  return {
536
567
  schemaVersion: 1,
@@ -546,7 +577,11 @@ function quickstart(runner, args) {
546
577
  statePath: result.statePath,
547
578
  agentConfigured,
548
579
  steps: result.steps,
549
- hint
580
+ hint,
581
+ // Stamp resumedFrom ONLY when we continued an explicit run. Conditional spread
582
+ // keeps the key absent on the default/fresh path (own-property absent + omitted
583
+ // by JSON.stringify), so default output is byte-identical.
584
+ ...(resumeRunId ? { resumedFrom: resumeRunId } : {})
550
585
  };
551
586
  }
552
587
  /** Read-only, deterministic projection of the effective agent config (secret-stripped). */
@@ -680,15 +715,57 @@ function telemetryVerify(runner, args) {
680
715
  throw new Error("telemetry verify requires a run id (cw telemetry verify <run-id>)");
681
716
  const run = runner.loadRun(runId);
682
717
  const v = (0, telemetry_ledger_1.verifyTelemetryLedger)(run);
718
+ // Opt-in independent signature re-verification. verifyTelemetryLedger re-proves
719
+ // the chain (so the stored attestation verdicts were not edited); supplying the
720
+ // trust public key (--pubkey / CW_AGENT_ATTEST_PUBKEY) additionally RE-RUNS the
721
+ // ed25519 check over each `attested` record's stored raw usage rather than
722
+ // trusting that verdict, so a forged signature can no longer ride a green chain.
723
+ const trustPublicKeyInput = optionalString(args.pubkey || args.pubKey || args.publicKey) || process.env.CW_AGENT_ATTEST_PUBKEY;
724
+ const trustPublicKey = (0, telemetry_attestation_1.resolveTrustPublicKey)(trustPublicKeyInput);
725
+ const keyChecks = trustPublicKeyInput && !trustPublicKey
726
+ ? [{ name: "signature-key", pass: false, code: "telemetry-pubkey-unreadable" }]
727
+ : [];
728
+ const sig = (0, telemetry_attestation_1.verifyTelemetrySignatures)(v.records, trustPublicKey);
729
+ const failedChecks = [...v.checks.filter((c) => !c.pass), ...keyChecks, ...sig.checks.filter((c) => !c.pass)];
683
730
  return {
684
731
  schemaVersion: 1,
685
732
  runId: run.id,
686
733
  present: v.present,
687
- verified: v.verified,
734
+ // Chain integrity AND (when a key was supplied) every attested signature must
735
+ // re-verify. With no key, sig.failed is 0 → unchanged chain-only behavior.
736
+ verified: v.verified && keyChecks.length === 0 && sig.failed === 0,
688
737
  records: v.records.length,
689
738
  attested: v.attested,
690
739
  unattested: v.unattested,
691
740
  absent: v.absent,
741
+ signatureKeyProvided: sig.keyProvided,
742
+ signaturesChecked: sig.checked,
743
+ signaturesReverified: sig.reverified,
744
+ signaturesFailed: sig.failed,
745
+ failedChecks: failedChecks.map((c) => ({ name: c.name, code: c.code }))
746
+ };
747
+ }
748
+ // audit.verify — fail-closed re-prove of a run's trust-audit hash chain. The peer
749
+ // of telemetry.verify for the sandbox/policy/commit-gate decision log: recomputes
750
+ // every event hash from genesis, checks chain linkage, and catches the
751
+ // unchained-event forgery. Exposed as a verb (not just embedded in `audit summary`,
752
+ // which always exits 0) so `cw audit verify <run> && deploy` can gate on the exit
753
+ // code. POLA: a run with no audit log is present:false / verified:true / exit 0.
754
+ function auditVerify(runner, args) {
755
+ const runId = optionalString(args.runId || args.run);
756
+ if (!runId)
757
+ throw new Error("audit verify requires a run id (cw audit verify <run-id>)");
758
+ const run = runner.loadRun(runId);
759
+ const v = (0, trust_audit_1.verifyTrustAudit)(run);
760
+ return {
761
+ schemaVersion: 1,
762
+ runId: run.id,
763
+ present: v.present,
764
+ verified: v.verified,
765
+ eventCount: v.eventCount,
766
+ chained: v.chained,
767
+ unchained: v.unchained,
768
+ corruptLines: v.corruptLines,
692
769
  failedChecks: v.checks.filter((c) => !c.pass).map((c) => ({ name: c.name, code: c.code }))
693
770
  };
694
771
  }