cool-workflow 0.1.80 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.codex-plugin/plugin.json +1 -1
  3. package/README.md +42 -2
  4. package/apps/architecture-review/app.json +1 -1
  5. package/apps/architecture-review-fast/app.json +1 -1
  6. package/apps/end-to-end-golden-path/app.json +1 -1
  7. package/apps/pr-review-fix-ci/app.json +1 -1
  8. package/apps/release-cut/app.json +1 -1
  9. package/apps/research-synthesis/app.json +1 -1
  10. package/dist/agent-config.js +21 -7
  11. package/dist/candidate-scoring.js +42 -22
  12. package/dist/capability-core.js +94 -17
  13. package/dist/capability-registry.js +138 -171
  14. package/dist/cli.js +90 -100
  15. package/dist/collaboration.js +5 -6
  16. package/dist/commit.js +20 -6
  17. package/dist/compare.js +18 -0
  18. package/dist/coordinator/classify.js +45 -0
  19. package/dist/coordinator/paths.js +42 -0
  20. package/dist/coordinator/util.js +129 -0
  21. package/dist/coordinator.js +127 -300
  22. package/dist/dispatch.js +35 -0
  23. package/dist/drive.js +7 -7
  24. package/dist/error-feedback.js +8 -4
  25. package/dist/evidence-reasoning.js +1 -1
  26. package/dist/execution-backend/agent.js +331 -0
  27. package/dist/execution-backend/probes.js +96 -0
  28. package/dist/execution-backend/util.js +47 -0
  29. package/dist/execution-backend.js +67 -420
  30. package/dist/mcp-server.js +34 -173
  31. package/dist/multi-agent/graph.js +84 -0
  32. package/dist/multi-agent/helpers.js +145 -0
  33. package/dist/multi-agent/paths.js +22 -0
  34. package/dist/multi-agent-eval/format.js +194 -0
  35. package/dist/multi-agent-eval/normalize.js +51 -0
  36. package/dist/multi-agent-eval.js +39 -244
  37. package/dist/multi-agent-host.js +0 -19
  38. package/dist/multi-agent.js +125 -314
  39. package/dist/node-snapshot.js +3 -3
  40. package/dist/observability/format.js +61 -0
  41. package/dist/observability/intake.js +98 -0
  42. package/dist/observability.js +14 -160
  43. package/dist/operator-ux/format.js +364 -0
  44. package/dist/operator-ux.js +22 -363
  45. package/dist/orchestrator/report.js +8 -0
  46. package/dist/orchestrator.js +25 -8
  47. package/dist/reclamation.js +26 -21
  48. package/dist/run-export.js +138 -14
  49. package/dist/run-registry/derive.js +172 -0
  50. package/dist/run-registry/format.js +124 -0
  51. package/dist/run-registry/gc.js +251 -0
  52. package/dist/run-registry/policy.js +16 -0
  53. package/dist/run-registry/queue.js +116 -0
  54. package/dist/run-registry.js +78 -593
  55. package/dist/run-state-schema.js +1 -0
  56. package/dist/sandbox-profile.js +43 -2
  57. package/dist/state-explosion/format.js +159 -0
  58. package/dist/state-explosion/helpers.js +82 -0
  59. package/dist/state-explosion.js +65 -283
  60. package/dist/state-node.js +19 -4
  61. package/dist/telemetry-attestation.js +55 -0
  62. package/dist/telemetry-demo.js +15 -3
  63. package/dist/telemetry-ledger.js +60 -15
  64. package/dist/topology.js +25 -8
  65. package/dist/triggers.js +33 -14
  66. package/dist/trust-audit.js +145 -33
  67. package/dist/version.js +1 -1
  68. package/dist/worker-isolation/helpers.js +51 -0
  69. package/dist/worker-isolation/paths.js +46 -0
  70. package/dist/worker-isolation.js +39 -115
  71. package/docs/agent-delegation-drive.7.md +13 -0
  72. package/docs/cli-mcp-parity.7.md +4 -0
  73. package/docs/contract-migration-tooling.7.md +2 -0
  74. package/docs/control-plane-scheduling.7.md +2 -0
  75. package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
  76. package/docs/durable-state-and-locking.7.md +4 -0
  77. package/docs/evidence-adoption-reasoning-chain.7.md +2 -0
  78. package/docs/execution-backends.7.md +2 -0
  79. package/docs/index.md +1 -0
  80. package/docs/launch/launch-kit.md +46 -23
  81. package/docs/launch/pre-launch-checklist.md +14 -14
  82. package/docs/multi-agent-cli-mcp-surface.7.md +4 -0
  83. package/docs/multi-agent-eval-replay-harness.7.md +2 -0
  84. package/docs/multi-agent-operator-ux.7.md +2 -0
  85. package/docs/multi-agent-trust-policy-audit.7.md +27 -0
  86. package/docs/node-snapshot-diff-replay.7.md +2 -0
  87. package/docs/observability-cost-accounting.7.md +2 -0
  88. package/docs/project-index.md +18 -5
  89. package/docs/real-execution-backends.7.md +2 -0
  90. package/docs/release-and-migration.7.md +4 -0
  91. package/docs/release-tooling.7.md +2 -0
  92. package/docs/run-registry-control-plane.7.md +54 -8
  93. package/docs/run-retention-reclamation.7.md +4 -0
  94. package/docs/state-explosion-management.7.md +2 -0
  95. package/docs/team-collaboration.7.md +2 -0
  96. package/docs/trust-model.md +267 -0
  97. package/docs/vendor-manifest-loadability.7.md +43 -0
  98. package/docs/web-desktop-workbench.7.md +2 -0
  99. package/manifest/plugin.manifest.json +1 -1
  100. package/package.json +4 -2
  101. package/scripts/agents/builtin-templates.json +7 -0
  102. package/scripts/bump-version.js +5 -11
  103. package/scripts/canonical-apps-list.js +64 -0
  104. package/scripts/canonical-apps.js +19 -4
  105. package/scripts/dogfood-release.js +1 -1
  106. package/scripts/golden-path.js +4 -4
  107. package/scripts/parity-check.js +5 -0
  108. package/scripts/release-check.js +5 -1
  109. package/scripts/version-sync-check.js +5 -8
  110. package/dist/capability-dispatcher.js +0 -86
@@ -4,7 +4,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.WORKER_ISOLATION_SCHEMA_VERSION = void 0;
7
- exports.createWorkerIsolation = createWorkerIsolation;
8
7
  exports.allocateWorkerScope = allocateWorkerScope;
9
8
  exports.writeWorkerManifest = writeWorkerManifest;
10
9
  exports.syncWorkerScopeFromTask = syncWorkerScopeFromTask;
@@ -33,21 +32,9 @@ const multi_agent_1 = require("./multi-agent");
33
32
  const telemetry_attestation_1 = require("./telemetry-attestation");
34
33
  const telemetry_ledger_1 = require("./telemetry-ledger");
35
34
  const coordinator_1 = require("./coordinator");
35
+ const helpers_1 = require("./worker-isolation/helpers");
36
+ const paths_1 = require("./worker-isolation/paths");
36
37
  exports.WORKER_ISOLATION_SCHEMA_VERSION = 1;
37
- const WORKER_SCOPE_FILE = "worker.json";
38
- const WORKER_MANIFEST_FILE = "manifest.json";
39
- function createWorkerIsolation(options = {}) {
40
- return {
41
- allocateWorkerScope: (run, task, allocateOptions) => allocateWorkerScope(run, task, { ...options, ...allocateOptions }),
42
- writeWorkerManifest,
43
- listWorkerScopes: (run, listOptions) => listWorkerScopes(run, listOptions),
44
- getWorkerScope,
45
- recordWorkerOutput,
46
- recordWorkerFailure,
47
- validateWorkerBoundary,
48
- summarizeWorkers
49
- };
50
- }
51
38
  function allocateWorkerScope(run, task, options = {}) {
52
39
  ensureWorkerState(run);
53
40
  const existing = task.workerId ? getWorkerScope(run, task.workerId) : undefined;
@@ -64,7 +51,7 @@ function allocateWorkerScope(run, task, options = {}) {
64
51
  return existing;
65
52
  }
66
53
  const now = new Date().toISOString();
67
- const workerId = options.workerId || createWorkerId(run, task.id);
54
+ const workerId = options.workerId || (0, paths_1.createWorkerId)(run, task.id);
68
55
  const workerDir = node_path_1.default.join(workerRoot(run), (0, state_1.safeFileName)(workerId));
69
56
  const inputPath = node_path_1.default.join(workerDir, "input.md");
70
57
  const resultPath = node_path_1.default.join(workerDir, "result.md");
@@ -82,7 +69,10 @@ function allocateWorkerScope(run, task, options = {}) {
82
69
  extraReadPaths: options.policy?.readPaths || [],
83
70
  extraWritePaths: [...(options.policy?.writePaths || []), ...(options.policy?.allowedPaths || [])],
84
71
  allowArtifacts: options.policy?.allowArtifacts,
85
- allowLogs: options.policy?.allowLogs
72
+ allowLogs: options.policy?.allowLogs,
73
+ // H7: persisted custom profile definitions so a custom logical id resolves
74
+ // against THIS worker's context (worker-specific path tokens bind correctly).
75
+ customProfiles: run.customSandboxProfiles
86
76
  });
87
77
  const allowedPaths = (0, sandbox_profile_1.effectiveSandboxWritePaths)(sandboxPolicy);
88
78
  (0, sandbox_profile_1.upsertRunSandboxPolicy)(run, sandboxPolicy);
@@ -123,7 +113,7 @@ function allocateWorkerScope(run, task, options = {}) {
123
113
  feedbackIds: [],
124
114
  errors: [],
125
115
  multiAgent: options.multiAgent,
126
- metadata: compactMetadata({
116
+ metadata: (0, helpers_1.compactMetadata)({
127
117
  ...options.metadata,
128
118
  multiAgent: options.multiAgent,
129
119
  phase: task.phase,
@@ -165,7 +155,7 @@ function allocateWorkerScope(run, task, options = {}) {
165
155
  });
166
156
  }
167
157
  task.workerId = scope.id;
168
- task.workerManifestPath = manifestPath(scope);
158
+ task.workerManifestPath = (0, paths_1.manifestPath)(scope);
169
159
  task.sandboxProfileId = sandboxPolicy.id;
170
160
  task.sandboxPolicy = sandboxPolicy;
171
161
  task.backendId = backendId;
@@ -180,8 +170,8 @@ function writeWorkerManifest(run, scope) {
180
170
  const task = run.tasks.find((candidate) => candidate.id === scope.taskId);
181
171
  const sandboxPolicy = scope.sandboxPolicy || sandboxPolicyForBoundary(run, scope);
182
172
  const sandboxProfileId = scope.sandboxProfileId || sandboxPolicy.id;
183
- const scopePath = workerScopePath(scope);
184
- const workerManifestPath = manifestPath(scope);
173
+ const scopePath = (0, paths_1.workerScopePath)(scope);
174
+ const workerManifestPath = (0, paths_1.manifestPath)(scope);
185
175
  const manifest = {
186
176
  schemaVersion: exports.WORKER_ISOLATION_SCHEMA_VERSION,
187
177
  id: scope.id,
@@ -257,7 +247,7 @@ function syncWorkerScopeFromTask(run, workerId) {
257
247
  ...scope,
258
248
  updatedAt: new Date().toISOString(),
259
249
  multiAgent: task.multiAgent,
260
- metadata: compactMetadata({
250
+ metadata: (0, helpers_1.compactMetadata)({
261
251
  ...(scope.metadata || {}),
262
252
  multiAgent: task.multiAgent
263
253
  })
@@ -267,7 +257,7 @@ function syncWorkerScopeFromTask(run, workerId) {
267
257
  function listWorkerScopes(run, options = {}) {
268
258
  ensureWorkerState(run);
269
259
  const scopes = loadWorkerScopesFromDisk(run);
270
- run.workers = mergeScopes(run.workers || [], scopes);
260
+ run.workers = (0, helpers_1.mergeScopes)(run.workers || [], scopes);
271
261
  const listed = run.workers || [];
272
262
  return options.status ? listed.filter((scope) => scope.status === options.status) : listed;
273
263
  }
@@ -276,7 +266,7 @@ function getWorkerScope(run, workerId) {
276
266
  const existing = (run.workers || []).find((scope) => scope.id === workerId);
277
267
  if (existing)
278
268
  return existing;
279
- const file = node_path_1.default.join(workerRoot(run), (0, state_1.safeFileName)(workerId), WORKER_SCOPE_FILE);
269
+ const file = node_path_1.default.join(workerRoot(run), (0, state_1.safeFileName)(workerId), paths_1.WORKER_SCOPE_FILE);
280
270
  if (!node_fs_1.default.existsSync(file))
281
271
  return undefined;
282
272
  const scope = JSON.parse(node_fs_1.default.readFileSync(file, "utf8"));
@@ -302,7 +292,7 @@ function recordWorkerOutput(run, workerId, resultPath, options = {}) {
302
292
  throw new Error(violation.message);
303
293
  }
304
294
  if (!node_fs_1.default.existsSync(absoluteResultPath)) {
305
- const error = structuredError("worker-result-missing", `Worker result file does not exist: ${absoluteResultPath}`, {
295
+ const error = (0, helpers_1.structuredError)("worker-result-missing", `Worker result file does not exist: ${absoluteResultPath}`, {
306
296
  path: absoluteResultPath,
307
297
  retryable: true
308
298
  });
@@ -321,7 +311,7 @@ function recordWorkerOutput(run, workerId, resultPath, options = {}) {
321
311
  const baseDirs = Array.from(new Set([run.cwd, process.cwd(), scope.workerDir, run.paths.runDir].filter(Boolean)));
322
312
  const unresolved = (0, evidence_grounding_1.unresolvedFileEvidence)(parsedResult.evidence, baseDirs);
323
313
  if (unresolved.length) {
324
- const error = structuredError("worker-evidence-unresolvable", `Worker ${workerId} result cites file evidence that does not resolve on disk: ${unresolved.join(", ")}`, { path: absoluteResultPath, retryable: false });
314
+ const error = (0, helpers_1.structuredError)("worker-evidence-unresolvable", `Worker ${workerId} result cites file evidence that does not resolve on disk: ${unresolved.join(", ")}`, { path: absoluteResultPath, retryable: false });
325
315
  recordWorkerFailure(run, workerId, error, { ...options, persist: options.persist });
326
316
  throw new Error(error.message);
327
317
  }
@@ -345,7 +335,7 @@ function recordWorkerOutput(run, workerId, resultPath, options = {}) {
345
335
  // it instead of recording unverifiable usage. Default behavior is unchanged
346
336
  // (flag-and-surface). Non-agent hops carry no verdict and are never blocked.
347
337
  if (options.requireAttestedTelemetry && telemetry && telemetry.status !== "attested") {
348
- const error = structuredError("telemetry-unattested-blocked", `Worker ${workerId} telemetry is ${telemetry.status} (${telemetry.reason || "unverified"}) and require-attested-telemetry is enabled — refusing to accept a hop whose usage cannot be cryptographically verified`, { path: absoluteResultPath, retryable: false });
338
+ const error = (0, helpers_1.structuredError)("telemetry-unattested-blocked", `Worker ${workerId} telemetry is ${telemetry.status} (${telemetry.reason || "unverified"}) and require-attested-telemetry is enabled — refusing to accept a hop whose usage cannot be cryptographically verified`, { path: absoluteResultPath, retryable: false });
349
339
  recordWorkerFailure(run, workerId, error, { ...options, persist: options.persist });
350
340
  throw new Error(error.message);
351
341
  }
@@ -581,7 +571,7 @@ function recordWorkerFailure(run, workerId, error, options = {}) {
581
571
  status: "pending",
582
572
  loopStage: "adjust",
583
573
  inputs: { workerId, taskId: task.id, dispatchId: scope.dispatchId },
584
- artifacts: workerArtifacts(scope),
574
+ artifacts: (0, paths_1.workerArtifacts)(scope),
585
575
  parents: task.stateNodeId ? [task.stateNodeId] : [],
586
576
  contractId: pipeline_contract_1.DEFAULT_PIPELINE_CONTRACT_ID,
587
577
  metadata: { workerId, taskId: task.id, dispatchId: scope.dispatchId, workerDir: scope.workerDir, sandboxProfileId: scope.sandboxProfileId }
@@ -636,7 +626,7 @@ function recordWorkerFailure(run, workerId, error, options = {}) {
636
626
  updatedAt: new Date().toISOString(),
637
627
  status: structured.code === "worker-boundary-violation" || structured.code.startsWith("sandbox-") ? "rejected" : "failed",
638
628
  retryCount: typeof options.retryCount === "number" ? options.retryCount : scope.retryCount,
639
- feedbackIds: unique([...(scope.feedbackIds || []), feedback.id]),
629
+ feedbackIds: (0, helpers_1.unique)([...(scope.feedbackIds || []), feedback.id]),
640
630
  errors: [...(scope.errors || []), structured]
641
631
  });
642
632
  if (options.persist !== false)
@@ -649,7 +639,7 @@ function recordWorkerRetryAttempt(run, workerId, attempts, reason, options = {})
649
639
  ...scope,
650
640
  updatedAt: new Date().toISOString(),
651
641
  retryCount: attempts,
652
- metadata: compactMetadata({
642
+ metadata: (0, helpers_1.compactMetadata)({
653
643
  ...scope.metadata,
654
644
  agentDelegationAttempts: attempts,
655
645
  agentDelegationLastFailure: reason
@@ -668,8 +658,8 @@ function summarizeWorkers(run) {
668
658
  const workers = listWorkerScopes(run);
669
659
  return {
670
660
  total: workers.length,
671
- byStatus: countBy(workers, (scope) => scope.status),
672
- manifestPaths: workers.map(manifestPath),
661
+ byStatus: (0, helpers_1.countBy)(workers, (scope) => scope.status),
662
+ manifestPaths: workers.map(paths_1.manifestPath),
673
663
  failed: workers
674
664
  .filter((scope) => scope.status === "failed" || scope.status === "rejected")
675
665
  .map((scope) => ({ id: scope.id, status: scope.status, feedbackIds: scope.feedbackIds || [] }))
@@ -705,15 +695,9 @@ function reclaimOrphans(run, now) {
705
695
  }
706
696
  if (orphans.length) {
707
697
  writeWorkerIndex(run);
708
- saveWorkerCheckpoint(run);
709
698
  }
710
699
  return { runId: run.id, reclaimed: orphans.length, orphans };
711
700
  }
712
- function saveWorkerCheckpoint(run) {
713
- // Durable write via atomic temp+rename (same contract as saveCheckpoint)
714
- // For worker index, the atomic write in writeWorkerIndex already handles it.
715
- // This is a no-op wrapper that signals the checkpoint boundary.
716
- }
717
701
  function ensureWorkerState(run) {
718
702
  run.paths.workersDir = run.paths.workersDir || node_path_1.default.join(run.paths.runDir, "workers");
719
703
  node_fs_1.default.mkdirSync(run.paths.workersDir, { recursive: true });
@@ -771,7 +755,7 @@ function updateWorkerScope(run, scope) {
771
755
  return updated;
772
756
  }
773
757
  function writeWorkerScope(scope) {
774
- (0, state_1.writeJson)(workerScopePath(scope), scope);
758
+ (0, state_1.writeJson)((0, paths_1.workerScopePath)(scope), scope);
775
759
  }
776
760
  function writeWorkerIndex(run) {
777
761
  ensureWorkerState(run);
@@ -784,7 +768,7 @@ function writeWorkerIndex(run) {
784
768
  dispatchId: scope.dispatchId,
785
769
  status: scope.status,
786
770
  workerDir: scope.workerDir,
787
- manifestPath: manifestPath(scope),
771
+ manifestPath: (0, paths_1.manifestPath)(scope),
788
772
  resultPath: scope.resultPath,
789
773
  sandboxProfileId: scope.sandboxProfileId,
790
774
  backendId: scope.backendId,
@@ -800,7 +784,7 @@ function loadWorkerScopesFromDisk(run) {
800
784
  return node_fs_1.default
801
785
  .readdirSync(workerRoot(run), { withFileTypes: true })
802
786
  .filter((entry) => entry.isDirectory())
803
- .map((entry) => node_path_1.default.join(workerRoot(run), entry.name, WORKER_SCOPE_FILE))
787
+ .map((entry) => node_path_1.default.join(workerRoot(run), entry.name, paths_1.WORKER_SCOPE_FILE))
804
788
  .filter((file) => node_fs_1.default.existsSync(file))
805
789
  .map((file) => JSON.parse(node_fs_1.default.readFileSync(file, "utf8")));
806
790
  }
@@ -823,6 +807,12 @@ function sandboxPolicyForBoundary(run, scope, options = {}) {
823
807
  if (scope.sandboxPolicy && !options.policy && !options.sandboxProfileId)
824
808
  return scope.sandboxPolicy;
825
809
  const profileId = options.sandboxProfileId || options.policy?.sandboxProfileId || scope.sandboxProfileId || sandbox_profile_1.DEFAULT_SANDBOX_PROFILE_ID;
810
+ // H7: when the scope.sandboxPolicy snapshot is LOST, this re-resolves the policy
811
+ // by its logical profileId against the WORKER's paths (scope.workerDir etc.). For
812
+ // a CUSTOM profile the bundled lookup would throw not-found; threading
813
+ // run.customSandboxProfiles lets resolveSandboxProfileById re-resolve the persisted
814
+ // DEFINITION here — re-enforcing the same policy with worker-correct path tokens
815
+ // (NOT the dispatch-time paths), so a legitimate worker write is not falsely denied.
826
816
  return (0, sandbox_profile_1.sandboxPolicyForWorker)(profileId, {
827
817
  cwd: run.cwd,
828
818
  runDir: run.paths.runDir,
@@ -838,7 +828,8 @@ function sandboxPolicyForBoundary(run, scope, options = {}) {
838
828
  ...(!scope.sandboxPolicy ? scope.allowedPaths || [] : [])
839
829
  ],
840
830
  allowArtifacts: options.policy?.allowArtifacts,
841
- allowLogs: options.policy?.allowLogs
831
+ allowLogs: options.policy?.allowLogs,
832
+ customProfiles: run.customSandboxProfiles
842
833
  });
843
834
  }
844
835
  function blackboardManifest(run, scope) {
@@ -927,7 +918,7 @@ function blackboardLinkage(run, scope) {
927
918
  const role = scope.multiAgent?.roleId ? run.multiAgent?.roles.find((entry) => entry.id === scope.multiAgent?.roleId) : undefined;
928
919
  const multiAgentRun = scope.multiAgent?.runId ? run.multiAgent?.runs.find((entry) => entry.id === scope.multiAgent?.runId) : undefined;
929
920
  const blackboardId = membership?.blackboardId || group?.blackboardId || role?.blackboardId || multiAgentRun?.blackboardId;
930
- const topicIds = unique([
921
+ const topicIds = (0, helpers_1.unique)([
931
922
  ...(membership?.topicIds || []),
932
923
  ...(group?.topicIds || []),
933
924
  ...(role?.topicIds || []),
@@ -935,94 +926,27 @@ function blackboardLinkage(run, scope) {
935
926
  ]);
936
927
  return { blackboardId, topicIds };
937
928
  }
938
- function manifestPath(scope) {
939
- return node_path_1.default.join(scope.workerDir, WORKER_MANIFEST_FILE);
940
- }
941
- function workerScopePath(scope) {
942
- return node_path_1.default.join(scope.workerDir, WORKER_SCOPE_FILE);
943
- }
944
- // Deterministic worker id (v0.1.40 self-audit P2): a wall-clock stamp + Math.random()
945
- // made every dispatch mint a different id, so audit references were not reproducible
946
- // across re-runs of the same inputs. The id is now derived from the task plus a
947
- // per-task sequence (count of worker scopes already allocated for that task + 1),
948
- // so re-running the same workflow yields byte-identical worker ids while retries of
949
- // the SAME task still get a fresh, unique id. (workerId is excluded from the
950
- // snapshot source fingerprint, so this does not change replay digests.)
951
- function createWorkerId(run, taskId) {
952
- const prefix = `worker-${(0, state_1.safeFileName)(taskId)}-`;
953
- const seq = (run.workers || []).filter((scope) => scope.id.startsWith(prefix)).length + 1;
954
- return `${prefix}${String(seq).padStart(4, "0")}`;
955
- }
956
- function workerArtifacts(scope) {
957
- return [
958
- { id: "worker", kind: "json", path: workerScopePath(scope) },
959
- { id: "worker-manifest", kind: "json", path: manifestPath(scope) },
960
- { id: "worker-input", kind: "markdown", path: scope.inputPath }
961
- ];
962
- }
963
929
  function normalizeWorkerError(error, scope, options) {
964
- if (isBoundaryViolation(error)) {
965
- return structuredError(error.code, error.message, {
930
+ if ((0, helpers_1.isBoundaryViolation)(error)) {
931
+ return (0, helpers_1.structuredError)(error.code, error.message, {
966
932
  path: error.path,
967
933
  retryable: false,
968
934
  details: { allowedPaths: error.allowedPaths, workerId: scope.id, taskId: scope.taskId, sandboxProfileId: scope.sandboxProfileId }
969
935
  });
970
936
  }
971
- if (isStateNodeError(error)) {
937
+ if ((0, helpers_1.isStateNodeError)(error)) {
972
938
  return {
973
939
  ...error,
974
940
  at: error.at || new Date().toISOString(),
975
941
  path: options.path || error.path,
976
942
  retryable: options.retryable ?? error.retryable ?? false,
977
- details: compactMetadata({ ...(error.details || {}), workerId: scope.id, taskId: scope.taskId })
943
+ details: (0, helpers_1.compactMetadata)({ ...(error.details || {}), workerId: scope.id, taskId: scope.taskId })
978
944
  };
979
945
  }
980
946
  const message = error instanceof Error ? error.message : String(error);
981
- return structuredError(options.code || "worker-runtime-error", message, {
947
+ return (0, helpers_1.structuredError)(options.code || "worker-runtime-error", message, {
982
948
  path: options.path,
983
949
  retryable: options.retryable ?? false,
984
950
  details: { workerId: scope.id, taskId: scope.taskId }
985
951
  });
986
952
  }
987
- function structuredError(code, message, options = {}) {
988
- return {
989
- code,
990
- message,
991
- at: new Date().toISOString(),
992
- path: options.path,
993
- retryable: options.retryable,
994
- details: options.details
995
- };
996
- }
997
- function isBoundaryViolation(value) {
998
- return Boolean(value && typeof value === "object" && "allowedPaths" in value && "message" in value);
999
- }
1000
- function isStateNodeError(value) {
1001
- return Boolean(value && typeof value === "object" && "code" in value && "message" in value);
1002
- }
1003
- function mergeScopes(left, right) {
1004
- const merged = [...left];
1005
- for (const scope of right) {
1006
- const index = merged.findIndex((candidate) => candidate.id === scope.id);
1007
- if (index >= 0)
1008
- merged[index] = scope;
1009
- else
1010
- merged.push(scope);
1011
- }
1012
- return merged;
1013
- }
1014
- function unique(values) {
1015
- return Array.from(new Set(values.filter(Boolean)));
1016
- }
1017
- function compactMetadata(value) {
1018
- const entries = Object.entries(value).filter(([, entry]) => entry !== undefined);
1019
- return entries.length ? Object.fromEntries(entries) : undefined;
1020
- }
1021
- function countBy(items, key) {
1022
- const counts = {};
1023
- for (const item of items) {
1024
- const value = key(item);
1025
- counts[value] = (counts[value] || 0) + 1;
1026
- }
1027
- return counts;
1028
- }
@@ -131,8 +131,17 @@ node dist/cli.js backend probe agent --json # ready iff configured, else un
131
131
  node dist/cli.js run architecture-review --drive --repo /path/to/repo --question "Is the design sound?"
132
132
  node dist/cli.js run architecture-review --drive --once --repo /path/to/repo --question "..." # one step
133
133
  node dist/cli.js run drive <run-id> --json # read-only preview of the next step
134
+
135
+ # quickstart --resume: a guided stop-then-resume a newcomer can WITNESS in <5 min
136
+ node dist/cli.js quickstart --resume --repo /path/to/repo --question "..." # advances ONE step, prints a continue line
137
+ node dist/cli.js quickstart --run <run-id> --resume # continues that run to completion
134
138
  ```
135
139
 
140
+ `quickstart --resume` with no `--run` drives a single step and prints a
141
+ copy-pasteable `cw quickstart --run <id> --resume` continue line; rerun it with the
142
+ `--run <id>` to finish. The continuing invocation echoes `resumedFrom: <id>`. Bare
143
+ `quickstart` (no `--resume`) is unchanged — it drives straight to completion.
144
+
136
145
  For faster first results, use the opt-in fast app instead of changing the full
137
146
  review contract:
138
147
 
@@ -248,3 +257,7 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
248
257
  ## Fast Architecture Review (v0.1.80)
249
258
 
250
259
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
260
+
261
+ ## Resumable Drive & Resume Routing (v0.1.81)
262
+
263
+ Adds `run resume <id> --drive/--once` alongside `quickstart --resume`: a stopped pipeline resumes in-place, advancing to completion (`--drive`) or one deterministic step (`--once`) over the same plan->dispatch->agent-fulfill->accept->commit lifecycle, echoing `resumedFrom: <id>`. Fixes the `run resume --drive` CLI routing so the drive flag reaches the resumed run instead of being read as an app name. Replay determinism and the agent evidence triple are unchanged.
@@ -385,3 +385,7 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
385
385
  ## Fast Architecture Review (v0.1.80)
386
386
 
387
387
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
388
+
389
+ ## Re-Prove Verbs on Both Surfaces (v0.1.81)
390
+
391
+ v0.1.81 grows the parity surface with two new both-surface, fail-closed verbs declared once in the capability registry: `cw audit verify` / `cw_audit_verify` re-proves the trust-audit chain and exits non-zero on any unverified or corrupt chain, and `cw run inspect-archive` / `cw_run_inspect_archive` is a read-only archive integrity check. Each `cw <cmd> --json` is schema-identical to its `cw_<tool>` and validated by the same parity gate.
@@ -127,3 +127,5 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
127
127
  ## Fast Architecture Review (v0.1.80)
128
128
 
129
129
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
130
+
131
+ _No changes to the contract-migration subsystem in v0.1.81._
@@ -114,3 +114,5 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
114
114
  ## Fast Architecture Review (v0.1.80)
115
115
 
116
116
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
117
+
118
+ _No changes to the control-plane scheduling surface in v0.1.81._
@@ -0,0 +1,40 @@
1
+ # Dogfood: real `builtin:claude` agent + `run resume --drive` (2026-06-14)
2
+
3
+ A live dogfood run with a REAL external agent (`CW_AGENT_COMMAND=builtin:claude`, the
4
+ bundled read-only claude wrapper). The model ran in the agent's own process; CW
5
+ spawned it and recorded the attested output — CW holds no API key and imports no
6
+ model SDK. This run had two purposes and delivered both: it confirmed the real-agent
7
+ delegation path works end to end, and — because it exercised the **CLI** rather than
8
+ the unit-test function path — it **caught a real shipped bug** in `run resume --drive`.
9
+
10
+ ## What ran
11
+
12
+ - `cw run architecture-review --drive --once --repo <tmp> --question "…"` with a real
13
+ `builtin:claude` agent: **1 worker completed** end-to-end with zero hand-written
14
+ result.md — the worker's `result.md` was produced by real claude, passed the
15
+ evidence-gated acceptance, and a `report.md` (7.5 KB, "# Architecture Review …")
16
+ + 3 state commits were written. The real-agent path (spawn → attested output →
17
+ evidence gate → commit) works.
18
+ - Run: `architecture-review-20260614T104416Z-upkor2`, status `in-progress` (1/14)
19
+ after the single `--once` step.
20
+
21
+ ## The bug it caught (and the fix)
22
+
23
+ Resuming the partway run with `cw run resume <id> --drive` failed:
24
+ `cw: Workflow app not found: resume`. The `run` command's early `--drive` branch
25
+ (the `cw run <app> --drive` one-command form) intercepted the invocation *before* the
26
+ subcommand switch, so the `resume` keyword was misread as an app name and never
27
+ reached the `runResume` continuation shipped in #155.
28
+
29
+ The A1 unit smoke (`run-resume-drive-smoke`) had tested `runResume()` **directly**, so
30
+ it never exercised the CLI dispatch — only a real CLI run surfaced it. Fixed by
31
+ guarding the early app-drive route so a leading run-registry subcommand keyword
32
+ (resume/show/export/…) falls through to the switch; `run-resume-drive-smoke` now drives
33
+ `cw run resume <id> --drive` through the actual CLI and asserts it routes to the verb,
34
+ plus a regression guard that `run <app> --drive` still routes to the app drive.
35
+
36
+ ## Takeaway
37
+
38
+ Unit tests that call the capability function directly can miss CLI-dispatch bugs.
39
+ Every both-surface verb that adds a flag wants at least one test through the real CLI
40
+ argv path, not just the exported function.
@@ -111,3 +111,7 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
111
111
  ## Fast Architecture Review (v0.1.80)
112
112
 
113
113
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
114
+
115
+ ## Deterministic Tombstone Hash (v0.1.81)
116
+
117
+ The reclamation tombstone's freed-manifest is now path-sorted before it feeds `tombstoneHash`, so the same freed set always yields the same hash regardless of filesystem enumeration order. This removes a non-determinism from the write-ahead chain (v0.1.39/v0.1.40), keeping the per-run tombstone hash-chain replayable and stable across hosts. Atomicity, locking, and the durable re-point seam are unchanged. v0.1.81 also adds import-time refusal (`CW_REQUIRE_ARCHIVE_INTEGRITY=1`) and restore-time trust-audit re-proving — see run-registry-control-plane(7).
@@ -274,3 +274,5 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
274
274
  ## Fast Architecture Review (v0.1.80)
275
275
 
276
276
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
277
+
278
+ _No changes to the Evidence Adoption Reasoning Chain surface in v0.1.81. The v0.1.81 trust-audit `computeEventHash` fix hardens the underlying audit records this chain links to by reference, but the derived reasoning view, its commands, and its eval gates are unchanged._
@@ -304,3 +304,5 @@ Migration DAG with reversible edges (v0.1.45), capability auto-discovery (v0.1.4
304
304
  ## Fast Architecture Review (v0.1.80)
305
305
 
306
306
  Adds the opt-in fast architecture-review lane: scoped JSONL source contexts, diff-aware exports, reusable Map and Assess results, measurable wrapper metrics, actionable background full-review handoff, and userland model policy flags for routing fast/strong workers without changing the full review contract.
307
+
308
+ _No changes to the execution-backends surface in v0.1.81._
package/docs/index.md CHANGED
@@ -7,6 +7,7 @@ Read these in order when you are new to CW:
7
7
  3. [Workflow App framework](workflow-app-framework.7.md) - userland app manifests, entrypoints, compatibility, and validation.
8
8
  4. [Sandbox Profiles](sandbox-profiles.7.md) - named worker policy contracts for read/write/execute/network/env handling.
9
9
  5. [Security / Trust Hardening](security-trust-hardening.7.md) - audit records, provenance, sandbox attestations, and acceptance rationale.
10
+ - [Trust Model & Limitations](trust-model.md) - what the ed25519 + hash-chain tamper-evidence proves and, honestly, what it does **not** (the single-keyholder ceiling). Read this before relying on a green verdict.
10
11
  6. [Multi-Agent Runtime Core](multi-agent-runtime-core.7.md) - first-class MultiAgentRun, roles, groups, memberships, fanout, fanin, and lifecycle state.
11
12
  7. [Coordinator / Blackboard](coordinator-blackboard.7.md) - shared topics, messages, context frames, artifact refs, snapshots, decisions, conflicts, and fanin evidence.
12
13
  8. [Multi-Agent Topologies](multi-agent-topologies.7.md) - official map-reduce, debate, and judge-panel recipes built on multi-agent and blackboard records.
@@ -9,7 +9,7 @@ Everything leads with the 30-second `npx cool-workflow demo tamper` proof.
9
9
  ## ✅ FINAL — Show HN (copy-paste ready)
10
10
 
11
11
  **Pre-flight (do these first):**
12
- 1. Record the demo GIF: `vhs plugins/cool-workflow/docs/launch/demo.tape` → swap it into the README hero (replace the fenced output block with the GIF).
12
+ 1. Record the demo GIF: `vhs plugins/cool-workflow/docs/launch/demo.tape` → add it to the README hero (insert the GIF near the badges/intro).
13
13
  2. Confirm on a clean machine: `npx cool-workflow demo tamper` runs and prints `VERDICT: tamper-evidence holds ✓`.
14
14
  3. Post during US morning (HN traffic peak); reply to the first comment with the npm + provenance link.
15
15
 
@@ -34,25 +34,33 @@ agent you configure (claude -p, codex exec, an HTTP endpoint) and never embeds a
34
34
  model SDK or holds an API key. What it owns is the audit trail: each agent hop's
35
35
  reported usage is signed (ed25519) and appended to a hash-chained ledger, so
36
36
  editing any record — or even recomputing its local hash to cover the edit — breaks
37
- the chain downstream. You re-verify a finished run offline, with only the public
38
- key. No telemetry service to trust or breach.
37
+ the chain downstream. You re-verify a finished run offline no telemetry service
38
+ to trust or breach.
39
39
 
40
40
  30-second proof, no install:
41
41
 
42
42
  npx cool-workflow demo tamper
43
43
 
44
44
  It builds a real signed ledger, forges it two ways (flip a verdict + re-seal its
45
- hash; inflate reported tokens + reuse the signature), and shows both caught. On a
46
- real run, `cw telemetry verify <run>` does the same against what's on disk.
45
+ hash; inflate reported tokens + reuse the signature), and catches both offline with
46
+ only the public key. On a real run, `cw telemetry verify <run>` re-proves the
47
+ recorded ledger on disk — recomputing the chain so any later edit to a verdict or
48
+ usage digest is caught; add `--pubkey <public.pem>` to re-run each attested hop's
49
+ signature check offline too. I keep an
50
+ honest trust-model doc (what it does and does NOT prove, incl. the single-keyholder
51
+ ceiling): https://github.com/coo1white/cool-workflow/blob/main/plugins/cool-workflow/docs/trust-model.md
47
52
 
48
53
  Also: concurrent parallel() phases with declared collapse semantics (collect-all +
49
54
  kill-on-timeout — 16 agents with a forced hang/crash/dirty-return finish without
50
55
  deadlock and replay who-passed-who-failed), per-task output-schema gates, token
51
- budgets enforced against attested usage, and a one-way executor boundary welded
56
+ budgets enforced against the host's recorded usage (opt-in gate fails closed on
57
+ unattested telemetry), and a one-way executor boundary welded
52
58
  into the type system (a callable that could reach a model API fails `npm run
53
- build`). Zero runtime deps, BSD-2, published to npm with provenance.
59
+ build`). Zero runtime deps, BSD-2, published to npm with provenance. Ships generated
60
+ plugin manifests for 5 agent platforms (claude, codex, agents, gemini, opencode);
61
+ `npm run manifest:load-check` boots all five from one source of truth.
54
62
 
55
- It's early (v0.1.79) — I'd genuinely like to hear where the "delegate, prove,
63
+ It's early (v0.1.81) — I'd genuinely like to hear where the "delegate, prove,
56
64
  replay" model breaks down for your workflows.
57
65
 
58
66
  npm: https://www.npmjs.com/package/cool-workflow
@@ -64,8 +72,8 @@ npm: https://www.npmjs.com/package/cool-workflow
64
72
 
65
73
  > Cool Workflow is an auditable control-plane for multi-agent workflows. It
66
74
  > *delegates* model execution — never embeds it — and makes every recorded agent
67
- > telemetry verdict tamper-evident: anyone can re-verify a run offline with only a
68
- > public key.
75
+ > telemetry verdict tamper-evident: anyone can re-verify a run's integrity offline,
76
+ > and check the ed25519 attribution with the public key alone.
69
77
 
70
78
  ## Elevator (2 sentences)
71
79
 
@@ -94,8 +102,8 @@ npm: https://www.npmjs.com/package/cool-workflow
94
102
  > holds an API key. What it *does* own is the audit trail: each agent hop's reported
95
103
  > usage is signed (ed25519) and appended to a hash-chained ledger, so editing any
96
104
  > record — or even recomputing its local hash to cover the edit — breaks the chain
97
- > downstream. You can re-verify a finished run with only the public key, no network,
98
- > no trusted server.
105
+ > downstream. You can re-verify a finished run offline no network, no trusted
106
+ > server.
99
107
  >
100
108
  > The 30-second proof, no install:
101
109
  >
@@ -104,18 +112,23 @@ npm: https://www.npmjs.com/package/cool-workflow
104
112
  > ```
105
113
  >
106
114
  > It builds a real signed ledger, forges it two ways (flip a verdict + re-seal its
107
- > hash; inflate reported tokens + reuse the signature), and shows both forgeries
108
- > caught offline. On a real run, `cw telemetry verify <run>` does the same against
109
- > what's on disk.
115
+ > hash; inflate reported tokens + reuse the signature), and catches both offline with
116
+ > only the public key. On a real run, `cw telemetry verify <run>` re-proves the
117
+ > recorded ledger on disk — recomputing the chain so any later edit to a verdict or
118
+ > usage digest is caught; add `--pubkey <public.pem>` to re-run each attested hop's
119
+ > signature check offline too. I keep an
120
+ > honest [trust model & limitations](https://github.com/coo1white/cool-workflow/blob/main/plugins/cool-workflow/docs/trust-model.md)
121
+ > doc, including the single-keyholder ceiling.
110
122
  >
111
123
  > Other things it does: concurrent `parallel()` phases with declared collapse
112
124
  > semantics (collect-all + kill-on-timeout — 16 agents with a forced hang/crash/
113
125
  > dirty-return finish without deadlock and replay "who passed/who failed"), per-task
114
- > output-schema gates, token budgets enforced against attested usage, and a one-way
126
+ > output-schema gates, token budgets enforced against the host's recorded usage
127
+ > (an opt-in gate fails closed on unattested telemetry), and a one-way
115
128
  > executor boundary welded into the type system (a callable that could reach a model
116
129
  > API fails `npm run build`).
117
130
  >
118
- > Runs anywhere Node runs; `dist/` is committed; BSD-2. It's early (v0.1.79) and I'd
131
+ > Runs anywhere Node runs; `dist/` is committed; BSD-2. It's early (v0.1.81) and I'd
119
132
  > genuinely like to hear where the "delegate, prove, replay" model breaks down for
120
133
  > your workflows.
121
134
  >
@@ -134,8 +147,9 @@ catches both offline with only the public key. A control-plane that delegates
134
147
  model execution but can still prove the bill is real.
135
148
 
136
149
  3/ Also: concurrent batches that don't deadlock when an agent hangs, schema-gated
137
- outputs, token budgets vs *attested* usage, and a red line (never call a model
138
- API) enforced at compile time. Zero deps, BSD-2.
150
+ outputs, token budgets vs the host's recorded usage (attested-telemetry gate is
151
+ opt-in), and a red line (never call a model API) enforced at compile time. Zero
152
+ deps, BSD-2.
139
153
  → https://github.com/coo1white/cool-workflow
140
154
 
141
155
  ---
@@ -146,20 +160,29 @@ API) enforced at compile time. Zero deps, BSD-2.
146
160
  reported usage. The thing that *spends the money* is not the thing that *keeps
147
161
  the books* — the property auditors require everywhere except, so far, agent
148
162
  infra.
149
- - **Offline, public-key verification.** No telemetry service to trust or breach.
150
- The record proves its own integrity; the verifier needs only the public key.
163
+ - **Offline verification.** No telemetry service to trust or breach. The record
164
+ proves its own integrity offline — re-proving the chain needs no key at all — and
165
+ the ed25519 attribution checks against the public key alone.
151
166
  - **Replayable, not just logged.** CW breaks at dispatch and writes to disk, so a
152
167
  run replays deterministically — "who passed / who failed" is reconstructable, not
153
- a scrollback of a fused process.
168
+ a scrollback of a fused process. A finished run is portable and self-proving:
169
+ `cw run inspect-archive <archive>` re-proves every file digest, the manifest, and
170
+ the whole-archive hash without importing it; `cw run import` then
171
+ `cw run verify-import <run-id>` restores it and re-proves the restored digests +
172
+ telemetry chain — a tampered archive is caught before it is trusted.
154
173
  - **Fail-closed by default where it counts.** Schema mismatch parks the hop;
155
174
  unverifiable usage can be refused (opt-in); an empty-capture result can't be
156
175
  presented as a clean commit.
176
+ - **Cross-vendor, and it actually boots.** One source manifest
177
+ (`manifest/plugin.manifest.json`) generates Claude / Codex / Gemini / OpenCode /
178
+ agents adapters, and `npm run manifest:load-check` boots all five (184 tools each)
179
+ — the neutrality moat is executable, not aspirational.
157
180
 
158
181
  ## Assets to capture before posting
159
182
 
160
183
  - [ ] **Demo GIF** — reproducible, no manual screen-recording: `vhs
161
184
  plugins/cool-workflow/docs/launch/demo.tape` → `docs/launch/demo-tamper.gif`,
162
- then swap it into the README hero (replace the fenced output block). The
185
+ then add it to the README hero (insert it near the badges/intro). The
163
186
  ✗ DETECTED lines are the hook.
164
187
  - [ ] Confirm `npx cool-workflow demo tamper` works from a clean machine (no clone).
165
188
  - [ ] Pin the npm version badge / release + provenance link in the first comment.