cool-workflow 0.1.80 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.codex-plugin/plugin.json +1 -1
  3. package/README.md +42 -2
  4. package/apps/architecture-review/app.json +1 -1
  5. package/apps/architecture-review-fast/app.json +1 -1
  6. package/apps/end-to-end-golden-path/app.json +1 -1
  7. package/apps/pr-review-fix-ci/app.json +1 -1
  8. package/apps/release-cut/app.json +1 -1
  9. package/apps/research-synthesis/app.json +1 -1
  10. package/dist/agent-config.js +21 -7
  11. package/dist/candidate-scoring.js +42 -22
  12. package/dist/capability-core.js +94 -17
  13. package/dist/capability-registry.js +138 -171
  14. package/dist/cli.js +90 -100
  15. package/dist/collaboration.js +5 -6
  16. package/dist/commit.js +20 -6
  17. package/dist/compare.js +18 -0
  18. package/dist/coordinator/classify.js +45 -0
  19. package/dist/coordinator/paths.js +42 -0
  20. package/dist/coordinator/util.js +129 -0
  21. package/dist/coordinator.js +127 -300
  22. package/dist/dispatch.js +35 -0
  23. package/dist/drive.js +7 -7
  24. package/dist/error-feedback.js +8 -4
  25. package/dist/evidence-reasoning.js +1 -1
  26. package/dist/execution-backend/agent.js +331 -0
  27. package/dist/execution-backend/probes.js +96 -0
  28. package/dist/execution-backend/util.js +47 -0
  29. package/dist/execution-backend.js +67 -420
  30. package/dist/mcp-server.js +34 -173
  31. package/dist/multi-agent/graph.js +84 -0
  32. package/dist/multi-agent/helpers.js +145 -0
  33. package/dist/multi-agent/paths.js +22 -0
  34. package/dist/multi-agent-eval/format.js +194 -0
  35. package/dist/multi-agent-eval/normalize.js +51 -0
  36. package/dist/multi-agent-eval.js +39 -244
  37. package/dist/multi-agent-host.js +0 -19
  38. package/dist/multi-agent.js +125 -314
  39. package/dist/node-snapshot.js +3 -3
  40. package/dist/observability/format.js +61 -0
  41. package/dist/observability/intake.js +98 -0
  42. package/dist/observability.js +14 -160
  43. package/dist/operator-ux/format.js +364 -0
  44. package/dist/operator-ux.js +22 -363
  45. package/dist/orchestrator/report.js +8 -0
  46. package/dist/orchestrator.js +25 -8
  47. package/dist/reclamation.js +26 -21
  48. package/dist/run-export.js +138 -14
  49. package/dist/run-registry/derive.js +172 -0
  50. package/dist/run-registry/format.js +124 -0
  51. package/dist/run-registry/gc.js +251 -0
  52. package/dist/run-registry/policy.js +16 -0
  53. package/dist/run-registry/queue.js +116 -0
  54. package/dist/run-registry.js +78 -593
  55. package/dist/run-state-schema.js +1 -0
  56. package/dist/sandbox-profile.js +43 -2
  57. package/dist/state-explosion/format.js +159 -0
  58. package/dist/state-explosion/helpers.js +82 -0
  59. package/dist/state-explosion.js +65 -283
  60. package/dist/state-node.js +19 -4
  61. package/dist/telemetry-attestation.js +55 -0
  62. package/dist/telemetry-demo.js +15 -3
  63. package/dist/telemetry-ledger.js +60 -15
  64. package/dist/topology.js +25 -8
  65. package/dist/triggers.js +33 -14
  66. package/dist/trust-audit.js +145 -33
  67. package/dist/version.js +1 -1
  68. package/dist/worker-isolation/helpers.js +51 -0
  69. package/dist/worker-isolation/paths.js +46 -0
  70. package/dist/worker-isolation.js +39 -115
  71. package/docs/agent-delegation-drive.7.md +13 -0
  72. package/docs/cli-mcp-parity.7.md +4 -0
  73. package/docs/contract-migration-tooling.7.md +2 -0
  74. package/docs/control-plane-scheduling.7.md +2 -0
  75. package/docs/dogfood/resume-drive-real-agent-2026-06-14.md +40 -0
  76. package/docs/durable-state-and-locking.7.md +4 -0
  77. package/docs/evidence-adoption-reasoning-chain.7.md +2 -0
  78. package/docs/execution-backends.7.md +2 -0
  79. package/docs/index.md +1 -0
  80. package/docs/launch/launch-kit.md +46 -23
  81. package/docs/launch/pre-launch-checklist.md +14 -14
  82. package/docs/multi-agent-cli-mcp-surface.7.md +4 -0
  83. package/docs/multi-agent-eval-replay-harness.7.md +2 -0
  84. package/docs/multi-agent-operator-ux.7.md +2 -0
  85. package/docs/multi-agent-trust-policy-audit.7.md +27 -0
  86. package/docs/node-snapshot-diff-replay.7.md +2 -0
  87. package/docs/observability-cost-accounting.7.md +2 -0
  88. package/docs/project-index.md +18 -5
  89. package/docs/real-execution-backends.7.md +2 -0
  90. package/docs/release-and-migration.7.md +4 -0
  91. package/docs/release-tooling.7.md +2 -0
  92. package/docs/run-registry-control-plane.7.md +54 -8
  93. package/docs/run-retention-reclamation.7.md +4 -0
  94. package/docs/state-explosion-management.7.md +2 -0
  95. package/docs/team-collaboration.7.md +2 -0
  96. package/docs/trust-model.md +267 -0
  97. package/docs/vendor-manifest-loadability.7.md +43 -0
  98. package/docs/web-desktop-workbench.7.md +2 -0
  99. package/manifest/plugin.manifest.json +1 -1
  100. package/package.json +4 -2
  101. package/scripts/agents/builtin-templates.json +7 -0
  102. package/scripts/bump-version.js +5 -11
  103. package/scripts/canonical-apps-list.js +64 -0
  104. package/scripts/canonical-apps.js +19 -4
  105. package/scripts/dogfood-release.js +1 -1
  106. package/scripts/golden-path.js +4 -4
  107. package/scripts/parity-check.js +5 -0
  108. package/scripts/release-check.js +5 -1
  109. package/scripts/version-sync-check.js +5 -8
  110. package/dist/capability-dispatcher.js +0 -86
@@ -0,0 +1,194 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.formatMultiAgentEval = formatMultiAgentEval;
7
+ // Human formatting for the multi-agent eval replay layer (CLI-only; never affects
8
+ // --json / MCP payloads). Pure functions — a result object in, a string out —
9
+ // carved out of multi-agent-eval.ts (FreeBSD-audit god-module split) so the eval
10
+ // router no longer bundles the rendering layer. The runtime-discriminating type
11
+ // guards travel with the renderer that is their only consumer. Re-exported from
12
+ // multi-agent-eval.ts to keep the public surface byte-unchanged.
13
+ //
14
+ // Types are imported type-only from the parent module: `import type` is fully
15
+ // erased at runtime, so there is no import cycle despite the parent re-exporting
16
+ // formatMultiAgentEval from here.
17
+ const node_path_1 = __importDefault(require("node:path"));
18
+ function formatMultiAgentEval(value) {
19
+ if (isGate(value)) {
20
+ return [
21
+ "Eval Suite",
22
+ ` ${value.suiteId}`,
23
+ "",
24
+ "Replay Status",
25
+ ` ${value.status} (${value.score}/${value.maxScore})`,
26
+ "",
27
+ "Regression Findings",
28
+ ...(value.findings.length ? value.findings.map((entry) => ` ${entry.severity} ${entry.category}: ${entry.reason}`) : [" none"]),
29
+ "",
30
+ "Final Verdict",
31
+ ` ${value.verdict}`,
32
+ "",
33
+ "Next Action",
34
+ ` ${value.nextAction}`
35
+ ].join("\n");
36
+ }
37
+ if (isScore(value)) {
38
+ return [
39
+ "Eval Suite",
40
+ ` ${node_path_1.default.basename(value.paths.suiteDir)}`,
41
+ "",
42
+ "Replay Status",
43
+ ` ${value.status} (${value.score}/${value.maxScore})`,
44
+ "",
45
+ "Graph Comparison",
46
+ ` ${metricStatus(value, "replay_completed")}; ${metricStatus(value, "graph_parity")}; ${metricStatus(value, "role_parity")}; ${metricStatus(value, "group_parity")}; ${metricStatus(value, "membership_parity")}; ${metricStatus(value, "fanout_parity")}; ${metricStatus(value, "fanin_parity")}; ${metricStatus(value, "dependency_parity")}; ${metricStatus(value, "failure_parity")}`,
47
+ "",
48
+ "Evidence Comparison",
49
+ ` ${metricStatus(value, "blackboard_record_parity")}; ${metricStatus(value, "evidence_adoption_parity")}; ${metricStatus(value, "blackboard_provenance_parity")}`,
50
+ "",
51
+ "Trust / Policy / Audit Comparison",
52
+ ` ${metricStatus(value, "trust_audit_parity")}; ${metricStatus(value, "role_policy_parity")}; ${metricStatus(value, "permission_decision_parity")}; ${metricStatus(value, "policy_violation_parity")}; ${metricStatus(value, "judge_rationale_parity")}; ${metricStatus(value, "panel_decision_parity")}`,
53
+ "",
54
+ "Candidate Score Comparison",
55
+ ` ${metricStatus(value, "candidate_score_parity")}`,
56
+ "",
57
+ "Selection / Commit Gate",
58
+ ` ${metricStatus(value, "selection_parity")}; ${metricStatus(value, "verifier_commit_gate_parity")}`,
59
+ "",
60
+ "State Explosion Summaries",
61
+ ` ${metricStatus(value, "summary_freshness")}; ${metricStatus(value, "compact_graph_parity")}; ${metricStatus(value, "blackboard_digest_parity")}; ${metricStatus(value, "critical_path_parity")}; ${metricStatus(value, "evidence_digest_parity")}; ${metricStatus(value, "expansion_ref_integrity")}`,
62
+ "",
63
+ "Regression Findings",
64
+ ...(value.findings.length ? value.findings.map((entry) => ` ${entry.severity} ${entry.category}: ${entry.reason}`) : [" none"]),
65
+ "",
66
+ "Final Verdict",
67
+ ` ${value.status}`,
68
+ "",
69
+ "Next Action",
70
+ ` ${value.status === "pass" ? "Run eval gate or include report path as evidence." : "Review findings before release."}`
71
+ ].join("\n");
72
+ }
73
+ if (isComparison(value)) {
74
+ return [
75
+ "Eval Suite",
76
+ ` ${node_path_1.default.basename(value.paths.suiteDir)}`,
77
+ "",
78
+ "Replay Status",
79
+ ` ${value.status}`,
80
+ "",
81
+ "Graph Comparison",
82
+ ` ${sectionStatus(value, "workflow")}; ${sectionStatus(value, "topologyShape")}; ${sectionStatus(value, "roles")}; ${sectionStatus(value, "groups")}; ${sectionStatus(value, "memberships")}; ${sectionStatus(value, "fanouts")}; ${sectionStatus(value, "fanins")}; ${sectionStatus(value, "dependencyEdges")}; ${sectionStatus(value, "failures")}`,
83
+ "",
84
+ "Evidence Comparison",
85
+ ` ${sectionStatus(value, "blackboardRecords")}; ${sectionStatus(value, "evidenceAdoption")}; ${sectionStatus(value, "messageProvenance")}`,
86
+ "",
87
+ "Trust / Policy / Audit Comparison",
88
+ ` ${sectionStatus(value, "blackboardWriteAudit")}; ${sectionStatus(value, "rolePolicies")}; ${sectionStatus(value, "permissionDecisions")}; ${sectionStatus(value, "policyViolations")}; ${sectionStatus(value, "judgeRationales")}; ${sectionStatus(value, "panelDecisions")}`,
89
+ "",
90
+ "Candidate Score Comparison",
91
+ ` ${sectionStatus(value, "candidateScores")}`,
92
+ "",
93
+ "Selection / Commit Gate",
94
+ ` ${sectionStatus(value, "selectedCandidates")}; ${sectionStatus(value, "verifierCommitGate")}`,
95
+ "",
96
+ "Regression Findings",
97
+ ...(value.findings.length ? value.findings.map((entry) => ` ${entry.severity} ${entry.category}: ${entry.reason}`) : [" none"]),
98
+ "",
99
+ "Final Verdict",
100
+ ` ${value.status}`,
101
+ "",
102
+ "Next Action",
103
+ " Score the replay or run the eval gate."
104
+ ].join("\n");
105
+ }
106
+ if (isReplay(value)) {
107
+ return [
108
+ "Eval Suite",
109
+ ` ${node_path_1.default.basename(value.paths.suiteDir)}`,
110
+ "",
111
+ "Replay Status",
112
+ ` ${value.status}`,
113
+ ` replay=${value.paths.replayRunPath}`,
114
+ "",
115
+ "Next Action",
116
+ ` node scripts/cw.js eval compare ${value.paths.snapshotPath} ${value.paths.replayRunPath}`
117
+ ].join("\n");
118
+ }
119
+ if (isSnapshot(value)) {
120
+ return [
121
+ "Eval Suite",
122
+ ` ${value.id}`,
123
+ "",
124
+ "Replay Status",
125
+ " snapshot captured",
126
+ ` snapshot=${value.paths.snapshotPath}`,
127
+ "",
128
+ "Graph Comparison",
129
+ ` topology records=${value.normalized.topologyShape.length}`,
130
+ "",
131
+ "Evidence Comparison",
132
+ ` evidence records=${value.normalized.evidenceAdoption.length}`,
133
+ "",
134
+ "Trust / Policy / Audit Comparison",
135
+ ` audit records=${value.normalized.blackboardWriteAudit.length + value.normalized.messageProvenance.length}`,
136
+ "",
137
+ "Candidate Score Comparison",
138
+ ` score records=${value.normalized.candidateScores.length}`,
139
+ "",
140
+ "Selection / Commit Gate",
141
+ ` selected=${value.normalized.selectedCandidates.length}; commit gates=${value.normalized.verifierCommitGate.length}`,
142
+ "",
143
+ "Regression Findings",
144
+ " none",
145
+ "",
146
+ "Final Verdict",
147
+ " snapshot-ready",
148
+ "",
149
+ "Next Action",
150
+ ` node scripts/cw.js eval replay ${value.paths.snapshotPath}`
151
+ ].join("\n");
152
+ }
153
+ if (isReport(value)) {
154
+ return [
155
+ "Eval Suite",
156
+ ` ${node_path_1.default.dirname(value.reportPath)}`,
157
+ "",
158
+ "Replay Status",
159
+ ` ${value.status} (${value.score}/${value.maxScore})`,
160
+ "",
161
+ "Final Verdict",
162
+ ` report written: ${value.reportPath}`,
163
+ "",
164
+ "Next Action",
165
+ " Run eval gate if this is release evidence."
166
+ ].join("\n");
167
+ }
168
+ return JSON.stringify(value, null, 2);
169
+ }
170
+ function metricStatus(score, id) {
171
+ const metric = score.metrics.find((entry) => entry.id === id);
172
+ return `${id}=${metric?.status || "missing"}`;
173
+ }
174
+ function sectionStatus(comparison, id) {
175
+ return `${id}=${comparison.sections[id]?.status || "missing"}`;
176
+ }
177
+ function isSnapshot(value) {
178
+ return Boolean(value && typeof value === "object" && value.kind === "multi-agent-replay-snapshot");
179
+ }
180
+ function isReplay(value) {
181
+ return Boolean(value && typeof value === "object" && value.kind === "multi-agent-replay-run");
182
+ }
183
+ function isComparison(value) {
184
+ return Boolean(value && typeof value === "object" && "sections" in value && "findings" in value);
185
+ }
186
+ function isScore(value) {
187
+ return Boolean(value && typeof value === "object" && "metrics" in value && "score" in value);
188
+ }
189
+ function isGate(value) {
190
+ return Boolean(value && typeof value === "object" && "verdict" in value && "requiredArtifacts" in value);
191
+ }
192
+ function isReport(value) {
193
+ return Boolean(value && typeof value === "object" && "reportPath" in value && !("verdict" in value));
194
+ }
@@ -0,0 +1,51 @@
1
+ "use strict";
2
+ // Pure, stateless normalization helpers for the multi-agent eval replay layer —
3
+ // timestamp/path scrubbing, recursive value normalization, and stable
4
+ // stringification. Carved out of multi-agent-eval.ts (FreeBSD-audit god-module
5
+ // split) so the eval router no longer bundles the deterministic-normalization
6
+ // primitives. Nothing here touches state; everything is a pure function of its
7
+ // arguments. Re-exported verbatim from multi-agent-eval.ts so every importer
8
+ // (including node-snapshot.ts and reclamation.ts) stays byte-unchanged.
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.normalizeValue = normalizeValue;
11
+ exports.lines = lines;
12
+ exports.replayStableStringify = replayStableStringify;
13
+ function normalizeValue(value) {
14
+ if (Array.isArray(value))
15
+ return value.map(normalizeValue);
16
+ if (!value || typeof value !== "object") {
17
+ if (typeof value === "string")
18
+ return normalizeString(value);
19
+ return value;
20
+ }
21
+ const record = value;
22
+ const normalized = {};
23
+ for (const key of Object.keys(record).sort()) {
24
+ if (["createdAt", "updatedAt", "recordedAt", "selectedAt", "replayedAt", "generatedAt"].includes(key))
25
+ continue;
26
+ if (key.endsWith("Path") || key === "path" || key === "cwd" || key === "runDir" || key.endsWith("Dir")) {
27
+ normalized[key] = normalizeString(String(record[key]));
28
+ }
29
+ else {
30
+ normalized[key] = normalizeValue(record[key]);
31
+ }
32
+ }
33
+ return normalized;
34
+ }
35
+ function normalizeString(value) {
36
+ return value
37
+ .replace(/[0-9]{8}T[0-9]{6}Z/g, "<timestamp>")
38
+ .replace(/[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.]+Z/g, "<timestamp>")
39
+ .replace(/\/[^"\s]+\/\.cw\/runs\/[^"\s/]+/g, "<run-dir>")
40
+ .replace(/\/[^"\s]+\/\.cw\/evals\/[^"\s/]+/g, "<eval-dir>")
41
+ .replace(/\/var\/folders\/[^"\s]+|\/tmp\/[^"\s]+|\/private\/tmp\/[^"\s]+/g, "<tmp>");
42
+ }
43
+ function lines(value) {
44
+ const normalized = normalizeValue(value);
45
+ if (Array.isArray(normalized))
46
+ return normalized.map((entry) => replayStableStringify(entry)).sort();
47
+ return [replayStableStringify(normalized)].sort();
48
+ }
49
+ function replayStableStringify(value) {
50
+ return JSON.stringify(normalizeValue(value));
51
+ }
@@ -3,16 +3,13 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.formatMultiAgentEval = exports.replayStableStringify = exports.normalizeValue = exports.lines = void 0;
6
7
  exports.createMultiAgentReplaySnapshot = createMultiAgentReplaySnapshot;
7
8
  exports.replayMultiAgentSnapshot = replayMultiAgentSnapshot;
8
9
  exports.compareMultiAgentReplay = compareMultiAgentReplay;
9
10
  exports.scoreMultiAgentReplay = scoreMultiAgentReplay;
10
11
  exports.gateMultiAgentEval = gateMultiAgentEval;
11
12
  exports.reportMultiAgentEval = reportMultiAgentEval;
12
- exports.formatMultiAgentEval = formatMultiAgentEval;
13
- exports.normalizeValue = normalizeValue;
14
- exports.lines = lines;
15
- exports.stableStringify = stableStringify;
16
13
  const node_fs_1 = __importDefault(require("node:fs"));
17
14
  const node_path_1 = __importDefault(require("node:path"));
18
15
  const multi_agent_1 = require("./multi-agent");
@@ -24,6 +21,16 @@ const trust_audit_1 = require("./trust-audit");
24
21
  const state_explosion_1 = require("./state-explosion");
25
22
  const evidence_reasoning_1 = require("./evidence-reasoning");
26
23
  const state_1 = require("./state");
24
+ const normalize_1 = require("./multi-agent-eval/normalize");
25
+ // Pure normalization primitives carved into ./multi-agent-eval/normalize.ts;
26
+ // re-exported verbatim so every external importer stays byte-unchanged.
27
+ var normalize_2 = require("./multi-agent-eval/normalize");
28
+ Object.defineProperty(exports, "lines", { enumerable: true, get: function () { return normalize_2.lines; } });
29
+ Object.defineProperty(exports, "normalizeValue", { enumerable: true, get: function () { return normalize_2.normalizeValue; } });
30
+ Object.defineProperty(exports, "replayStableStringify", { enumerable: true, get: function () { return normalize_2.replayStableStringify; } });
31
+ // Human formatter (CLI-only renderer) carved into ./multi-agent-eval/format.ts.
32
+ var format_1 = require("./multi-agent-eval/format");
33
+ Object.defineProperty(exports, "formatMultiAgentEval", { enumerable: true, get: function () { return format_1.formatMultiAgentEval; } });
27
34
  const METRIC_SECTIONS = [
28
35
  { metric: "replay_completed", section: "workflow", title: "Replay completed" },
29
36
  { metric: "graph_parity", section: "topologyShape", title: "Topology graph parity" },
@@ -84,7 +91,7 @@ function createMultiAgentReplaySnapshot(run, options = {}) {
84
91
  appVersion: run.workflow.app?.version,
85
92
  title: run.workflow.title
86
93
  },
87
- inputs: normalizeValue(run.inputs),
94
+ inputs: (0, normalize_1.normalizeValue)(run.inputs),
88
95
  paths: {
89
96
  suiteDir,
90
97
  snapshotPath,
@@ -152,7 +159,7 @@ function compareMultiAgentReplay(baselineTarget, replayTarget) {
152
159
  const findings = [];
153
160
  for (const spec of ALL_METRIC_SECTIONS) {
154
161
  const { baselineValue, replayValue } = comparisonValues(spec.metric, spec.section, baseline.normalized, replay);
155
- const equal = stableStringify(baselineValue) === stableStringify(replayValue);
162
+ const equal = (0, normalize_1.replayStableStringify)(baselineValue) === (0, normalize_1.replayStableStringify)(replayValue);
156
163
  const id = String(spec.section);
157
164
  sections[id] = {
158
165
  id,
@@ -391,158 +398,6 @@ function loadScoreForTarget(target, scorePath) {
391
398
  }
392
399
  return scoreMultiAgentReplay(target);
393
400
  }
394
- function formatMultiAgentEval(value) {
395
- if (isGate(value)) {
396
- return [
397
- "Eval Suite",
398
- ` ${value.suiteId}`,
399
- "",
400
- "Replay Status",
401
- ` ${value.status} (${value.score}/${value.maxScore})`,
402
- "",
403
- "Regression Findings",
404
- ...(value.findings.length ? value.findings.map((entry) => ` ${entry.severity} ${entry.category}: ${entry.reason}`) : [" none"]),
405
- "",
406
- "Final Verdict",
407
- ` ${value.verdict}`,
408
- "",
409
- "Next Action",
410
- ` ${value.nextAction}`
411
- ].join("\n");
412
- }
413
- if (isScore(value)) {
414
- return [
415
- "Eval Suite",
416
- ` ${node_path_1.default.basename(value.paths.suiteDir)}`,
417
- "",
418
- "Replay Status",
419
- ` ${value.status} (${value.score}/${value.maxScore})`,
420
- "",
421
- "Graph Comparison",
422
- ` ${metricStatus(value, "replay_completed")}; ${metricStatus(value, "graph_parity")}; ${metricStatus(value, "role_parity")}; ${metricStatus(value, "group_parity")}; ${metricStatus(value, "membership_parity")}; ${metricStatus(value, "fanout_parity")}; ${metricStatus(value, "fanin_parity")}; ${metricStatus(value, "dependency_parity")}; ${metricStatus(value, "failure_parity")}`,
423
- "",
424
- "Evidence Comparison",
425
- ` ${metricStatus(value, "blackboard_record_parity")}; ${metricStatus(value, "evidence_adoption_parity")}; ${metricStatus(value, "blackboard_provenance_parity")}`,
426
- "",
427
- "Trust / Policy / Audit Comparison",
428
- ` ${metricStatus(value, "trust_audit_parity")}; ${metricStatus(value, "role_policy_parity")}; ${metricStatus(value, "permission_decision_parity")}; ${metricStatus(value, "policy_violation_parity")}; ${metricStatus(value, "judge_rationale_parity")}; ${metricStatus(value, "panel_decision_parity")}`,
429
- "",
430
- "Candidate Score Comparison",
431
- ` ${metricStatus(value, "candidate_score_parity")}`,
432
- "",
433
- "Selection / Commit Gate",
434
- ` ${metricStatus(value, "selection_parity")}; ${metricStatus(value, "verifier_commit_gate_parity")}`,
435
- "",
436
- "State Explosion Summaries",
437
- ` ${metricStatus(value, "summary_freshness")}; ${metricStatus(value, "compact_graph_parity")}; ${metricStatus(value, "blackboard_digest_parity")}; ${metricStatus(value, "critical_path_parity")}; ${metricStatus(value, "evidence_digest_parity")}; ${metricStatus(value, "expansion_ref_integrity")}`,
438
- "",
439
- "Regression Findings",
440
- ...(value.findings.length ? value.findings.map((entry) => ` ${entry.severity} ${entry.category}: ${entry.reason}`) : [" none"]),
441
- "",
442
- "Final Verdict",
443
- ` ${value.status}`,
444
- "",
445
- "Next Action",
446
- ` ${value.status === "pass" ? "Run eval gate or include report path as evidence." : "Review findings before release."}`
447
- ].join("\n");
448
- }
449
- if (isComparison(value)) {
450
- return [
451
- "Eval Suite",
452
- ` ${node_path_1.default.basename(value.paths.suiteDir)}`,
453
- "",
454
- "Replay Status",
455
- ` ${value.status}`,
456
- "",
457
- "Graph Comparison",
458
- ` ${sectionStatus(value, "workflow")}; ${sectionStatus(value, "topologyShape")}; ${sectionStatus(value, "roles")}; ${sectionStatus(value, "groups")}; ${sectionStatus(value, "memberships")}; ${sectionStatus(value, "fanouts")}; ${sectionStatus(value, "fanins")}; ${sectionStatus(value, "dependencyEdges")}; ${sectionStatus(value, "failures")}`,
459
- "",
460
- "Evidence Comparison",
461
- ` ${sectionStatus(value, "blackboardRecords")}; ${sectionStatus(value, "evidenceAdoption")}; ${sectionStatus(value, "messageProvenance")}`,
462
- "",
463
- "Trust / Policy / Audit Comparison",
464
- ` ${sectionStatus(value, "blackboardWriteAudit")}; ${sectionStatus(value, "rolePolicies")}; ${sectionStatus(value, "permissionDecisions")}; ${sectionStatus(value, "policyViolations")}; ${sectionStatus(value, "judgeRationales")}; ${sectionStatus(value, "panelDecisions")}`,
465
- "",
466
- "Candidate Score Comparison",
467
- ` ${sectionStatus(value, "candidateScores")}`,
468
- "",
469
- "Selection / Commit Gate",
470
- ` ${sectionStatus(value, "selectedCandidates")}; ${sectionStatus(value, "verifierCommitGate")}`,
471
- "",
472
- "Regression Findings",
473
- ...(value.findings.length ? value.findings.map((entry) => ` ${entry.severity} ${entry.category}: ${entry.reason}`) : [" none"]),
474
- "",
475
- "Final Verdict",
476
- ` ${value.status}`,
477
- "",
478
- "Next Action",
479
- " Score the replay or run the eval gate."
480
- ].join("\n");
481
- }
482
- if (isReplay(value)) {
483
- return [
484
- "Eval Suite",
485
- ` ${node_path_1.default.basename(value.paths.suiteDir)}`,
486
- "",
487
- "Replay Status",
488
- ` ${value.status}`,
489
- ` replay=${value.paths.replayRunPath}`,
490
- "",
491
- "Next Action",
492
- ` node scripts/cw.js eval compare ${value.paths.snapshotPath} ${value.paths.replayRunPath}`
493
- ].join("\n");
494
- }
495
- if (isSnapshot(value)) {
496
- return [
497
- "Eval Suite",
498
- ` ${value.id}`,
499
- "",
500
- "Replay Status",
501
- " snapshot captured",
502
- ` snapshot=${value.paths.snapshotPath}`,
503
- "",
504
- "Graph Comparison",
505
- ` topology records=${value.normalized.topologyShape.length}`,
506
- "",
507
- "Evidence Comparison",
508
- ` evidence records=${value.normalized.evidenceAdoption.length}`,
509
- "",
510
- "Trust / Policy / Audit Comparison",
511
- ` audit records=${value.normalized.blackboardWriteAudit.length + value.normalized.messageProvenance.length}`,
512
- "",
513
- "Candidate Score Comparison",
514
- ` score records=${value.normalized.candidateScores.length}`,
515
- "",
516
- "Selection / Commit Gate",
517
- ` selected=${value.normalized.selectedCandidates.length}; commit gates=${value.normalized.verifierCommitGate.length}`,
518
- "",
519
- "Regression Findings",
520
- " none",
521
- "",
522
- "Final Verdict",
523
- " snapshot-ready",
524
- "",
525
- "Next Action",
526
- ` node scripts/cw.js eval replay ${value.paths.snapshotPath}`
527
- ].join("\n");
528
- }
529
- if (isReport(value)) {
530
- return [
531
- "Eval Suite",
532
- ` ${node_path_1.default.dirname(value.reportPath)}`,
533
- "",
534
- "Replay Status",
535
- ` ${value.status} (${value.score}/${value.maxScore})`,
536
- "",
537
- "Final Verdict",
538
- ` report written: ${value.reportPath}`,
539
- "",
540
- "Next Action",
541
- " Run eval gate if this is release evidence."
542
- ].join("\n");
543
- }
544
- return JSON.stringify(value, null, 2);
545
- }
546
401
  function captureRun(run) {
547
402
  return {
548
403
  topology: run.topologies || { schemaVersion: 1, runs: [] },
@@ -566,13 +421,13 @@ function normalizeRun(run) {
566
421
  const topologies = (0, topology_1.summarizeTopologies)(run);
567
422
  const multiAgent = (0, multi_agent_1.summarizeMultiAgent)(run);
568
423
  return {
569
- workflow: normalizeValue({
424
+ workflow: (0, normalize_1.normalizeValue)({
570
425
  id: run.workflow.id,
571
426
  appId: run.workflow.app?.id,
572
427
  appVersion: run.workflow.app?.version,
573
428
  taskCount: run.tasks.length
574
429
  }),
575
- topologyShape: lines([
430
+ topologyShape: (0, normalize_1.lines)([
576
431
  topologies.active.map((entry) => ({
577
432
  topologyId: entry.topologyId,
578
433
  status: entry.status,
@@ -583,22 +438,22 @@ function normalizeRun(run) {
583
438
  })),
584
439
  multiAgent.groupsDetail
585
440
  ]),
586
- roles: lines(run.multiAgent?.roles || []),
587
- groups: lines(run.multiAgent?.groups || []),
588
- memberships: lines(run.multiAgent?.memberships || []),
589
- fanouts: lines(run.multiAgent?.fanouts || []),
590
- fanins: lines(run.multiAgent?.fanins || []),
591
- dependencyEdges: lines(operator.dependencies.map((entry) => ({ from: entry.from, to: entry.to, label: entry.label, status: entry.status }))),
592
- failures: lines(operator.failures.map((entry) => ({ kind: entry.kind, status: entry.status, owner: entry.owner, reason: entry.reason }))),
593
- blackboardRecords: lines([blackboard.boards, blackboard.topics, blackboard.messages, blackboard.contexts, blackboard.artifacts, blackboard.snapshots, blackboard.decisions]),
594
- messageProvenance: lines(trust.messageProvenance || []),
595
- rolePolicies: lines(trust.rolePolicies || []),
596
- permissionDecisions: lines(trust.permissionDecisions || []),
597
- blackboardWriteAudit: lines(trust.blackboardWrites || []),
598
- judgeRationales: lines(trust.judgeRationales || []),
599
- panelDecisions: lines(trust.panelDecisions || []),
600
- policyViolations: lines(trust.policyViolations || []),
601
- evidenceAdoption: lines(operator.evidence.map((entry) => ({
441
+ roles: (0, normalize_1.lines)(run.multiAgent?.roles || []),
442
+ groups: (0, normalize_1.lines)(run.multiAgent?.groups || []),
443
+ memberships: (0, normalize_1.lines)(run.multiAgent?.memberships || []),
444
+ fanouts: (0, normalize_1.lines)(run.multiAgent?.fanouts || []),
445
+ fanins: (0, normalize_1.lines)(run.multiAgent?.fanins || []),
446
+ dependencyEdges: (0, normalize_1.lines)(operator.dependencies.map((entry) => ({ from: entry.from, to: entry.to, label: entry.label, status: entry.status }))),
447
+ failures: (0, normalize_1.lines)(operator.failures.map((entry) => ({ kind: entry.kind, status: entry.status, owner: entry.owner, reason: entry.reason }))),
448
+ blackboardRecords: (0, normalize_1.lines)([blackboard.boards, blackboard.topics, blackboard.messages, blackboard.contexts, blackboard.artifacts, blackboard.snapshots, blackboard.decisions]),
449
+ messageProvenance: (0, normalize_1.lines)(trust.messageProvenance || []),
450
+ rolePolicies: (0, normalize_1.lines)(trust.rolePolicies || []),
451
+ permissionDecisions: (0, normalize_1.lines)(trust.permissionDecisions || []),
452
+ blackboardWriteAudit: (0, normalize_1.lines)(trust.blackboardWrites || []),
453
+ judgeRationales: (0, normalize_1.lines)(trust.judgeRationales || []),
454
+ panelDecisions: (0, normalize_1.lines)(trust.panelDecisions || []),
455
+ policyViolations: (0, normalize_1.lines)(trust.policyViolations || []),
456
+ evidenceAdoption: (0, normalize_1.lines)(operator.evidence.map((entry) => ({
602
457
  ref: entry.ref || entry.id,
603
458
  status: entry.status,
604
459
  adoptedBy: entry.adoptedBy,
@@ -606,15 +461,15 @@ function normalizeRun(run) {
606
461
  selectionIds: entry.selectionIds,
607
462
  commitIds: entry.commitIds
608
463
  }))),
609
- candidateScores: lines(collectCandidateScores(run)),
610
- selectedCandidates: lines((run.candidateSelections || []).map((entry) => ({
464
+ candidateScores: (0, normalize_1.lines)(collectCandidateScores(run)),
465
+ selectedCandidates: (0, normalize_1.lines)((run.candidateSelections || []).map((entry) => ({
611
466
  candidateId: entry.candidateId,
612
467
  scoreId: entry.scoreId,
613
468
  verifierNodeId: entry.verifierNodeId,
614
469
  reason: entry.reason,
615
470
  evidenceCount: entry.evidence.length
616
471
  }))),
617
- verifierCommitGate: lines((run.commits || []).map((entry) => ({
472
+ verifierCommitGate: (0, normalize_1.lines)((run.commits || []).map((entry) => ({
618
473
  verifierGated: Boolean(entry.verifierGated),
619
474
  checkpoint: Boolean(entry.checkpoint),
620
475
  candidateId: entry.candidateId,
@@ -631,7 +486,11 @@ function collectCandidateScores(run) {
631
486
  const scores = [];
632
487
  for (const candidate of run.candidates || []) {
633
488
  for (const scoreId of candidate.scores || []) {
634
- const scorePath = node_path_1.default.join(run.paths.candidatesDir || node_path_1.default.join(run.paths.runDir, "candidates"), `${(0, state_1.safeFileName)(candidate.id)}.${(0, state_1.safeFileName)(scoreId)}.score.json`);
489
+ // Canonical nested score path MUST match the writers (candidate-scoring.ts
490
+ // persistScore, commit.ts): candidates/<candidateId>/scores/<scoreId>.json.
491
+ // The old flat `<id>.<scoreId>.score.json` path was written by nobody, so the
492
+ // candidate_score_parity eval metric silently scored empty placeholders.
493
+ const scorePath = node_path_1.default.join(run.paths.candidatesDir || node_path_1.default.join(run.paths.runDir, "candidates"), (0, state_1.safeFileName)(candidate.id), "scores", `${(0, state_1.safeFileName)(scoreId)}.json`);
635
494
  if (node_fs_1.default.existsSync(scorePath)) {
636
495
  const score = (0, state_1.readJson)(scorePath);
637
496
  scores.push({
@@ -784,45 +643,6 @@ function loadSuiteFromDir(suiteDir) {
784
643
  paths: { suiteDir, snapshotPath: node_path_1.default.join(suiteDir, "snapshot.json") }
785
644
  };
786
645
  }
787
- function normalizeValue(value) {
788
- if (Array.isArray(value))
789
- return value.map(normalizeValue);
790
- if (!value || typeof value !== "object") {
791
- if (typeof value === "string")
792
- return normalizeString(value);
793
- return value;
794
- }
795
- const record = value;
796
- const normalized = {};
797
- for (const key of Object.keys(record).sort()) {
798
- if (["createdAt", "updatedAt", "recordedAt", "selectedAt", "replayedAt", "generatedAt"].includes(key))
799
- continue;
800
- if (key.endsWith("Path") || key === "path" || key === "cwd" || key === "runDir" || key.endsWith("Dir")) {
801
- normalized[key] = normalizeString(String(record[key]));
802
- }
803
- else {
804
- normalized[key] = normalizeValue(record[key]);
805
- }
806
- }
807
- return normalized;
808
- }
809
- function normalizeString(value) {
810
- return value
811
- .replace(/[0-9]{8}T[0-9]{6}Z/g, "<timestamp>")
812
- .replace(/[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9:.]+Z/g, "<timestamp>")
813
- .replace(/\/[^"\s]+\/\.cw\/runs\/[^"\s/]+/g, "<run-dir>")
814
- .replace(/\/[^"\s]+\/\.cw\/evals\/[^"\s/]+/g, "<eval-dir>")
815
- .replace(/\/var\/folders\/[^"\s]+|\/tmp\/[^"\s]+|\/private\/tmp\/[^"\s]+/g, "<tmp>");
816
- }
817
- function lines(value) {
818
- const normalized = normalizeValue(value);
819
- if (Array.isArray(normalized))
820
- return normalized.map((entry) => stableStringify(entry)).sort();
821
- return [stableStringify(normalized)].sort();
822
- }
823
- function stableStringify(value) {
824
- return JSON.stringify(normalizeValue(value));
825
- }
826
646
  function now() {
827
647
  return new Date().toISOString();
828
648
  }
@@ -830,28 +650,3 @@ function metricLine(score, id) {
830
650
  const metric = score.metrics.find((entry) => entry.id === id);
831
651
  return `- ${id}: ${metric?.status || "missing"} - ${metric?.reason || "metric missing"}`;
832
652
  }
833
- function metricStatus(score, id) {
834
- const metric = score.metrics.find((entry) => entry.id === id);
835
- return `${id}=${metric?.status || "missing"}`;
836
- }
837
- function sectionStatus(comparison, id) {
838
- return `${id}=${comparison.sections[id]?.status || "missing"}`;
839
- }
840
- function isSnapshot(value) {
841
- return Boolean(value && typeof value === "object" && value.kind === "multi-agent-replay-snapshot");
842
- }
843
- function isReplay(value) {
844
- return Boolean(value && typeof value === "object" && value.kind === "multi-agent-replay-run");
845
- }
846
- function isComparison(value) {
847
- return Boolean(value && typeof value === "object" && "sections" in value && "findings" in value);
848
- }
849
- function isScore(value) {
850
- return Boolean(value && typeof value === "object" && "metrics" in value && "score" in value);
851
- }
852
- function isGate(value) {
853
- return Boolean(value && typeof value === "object" && "verdict" in value && "requiredArtifacts" in value);
854
- }
855
- function isReport(value) {
856
- return Boolean(value && typeof value === "object" && "reportPath" in value && !("verdict" in value));
857
- }
@@ -3,7 +3,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.hostRun = hostRun;
4
4
  exports.hostStatus = hostStatus;
5
5
  exports.hostStep = hostStep;
6
- exports.hostAutoStep = hostAutoStep;
7
6
  exports.hostBlackboard = hostBlackboard;
8
7
  exports.hostScore = hostScore;
9
8
  exports.hostSelect = hostSelect;
@@ -179,24 +178,6 @@ function hostStep(run, options = {}) {
179
178
  requiredHostAction: "No safe deterministic step is available. Use multi-agent status for the next explicit command."
180
179
  });
181
180
  }
182
- /** Auto-step: loop hostStep until blocked, complete, or max iterations reached
183
- * (v0.1.74). Each iteration performs one deterministic step. Returns the final
184
- * response and the number of steps taken. */
185
- function hostAutoStep(run, options = {}) {
186
- const maxSteps = Number(options.maxSteps || options["max-steps"] || 20);
187
- const steps = [];
188
- let response = envelope(run, "step", { performed: "none" });
189
- for (let i = 0; i < maxSteps; i++) {
190
- response = hostStep(run, options);
191
- const performed = response.data?.performed;
192
- steps.push({ step: i + 1, performed: String(performed || "none") });
193
- if (performed === "none" || performed === undefined)
194
- break;
195
- if (response.data?.requiredHostAction)
196
- break;
197
- }
198
- return { finalResponse: response, stepsTaken: steps.length, steps };
199
- }
200
181
  function hostBlackboard(run, action, options = {}) {
201
182
  const topology = optionalSingleActiveTopology(run);
202
183
  const blackboardId = resolveHostBlackboardId(run, topology, options);