cool-workflow 0.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +20 -0
- package/.codex-plugin/mcp.json +10 -0
- package/.codex-plugin/plugin.json +38 -0
- package/.mcp.json +10 -0
- package/LICENSE +24 -0
- package/README.md +638 -0
- package/apps/architecture-review/app.json +51 -0
- package/apps/architecture-review/workflow.js +116 -0
- package/apps/end-to-end-golden-path/app.json +30 -0
- package/apps/end-to-end-golden-path/workflow.js +33 -0
- package/apps/pr-review-fix-ci/app.json +59 -0
- package/apps/pr-review-fix-ci/workflow.js +90 -0
- package/apps/release-cut/app.json +54 -0
- package/apps/release-cut/workflow.js +82 -0
- package/apps/research-synthesis/app.json +50 -0
- package/apps/research-synthesis/workflow.js +76 -0
- package/apps/workflow-app-framework-demo/app.json +29 -0
- package/apps/workflow-app-framework-demo/workflow.js +44 -0
- package/dist/agent-config.js +223 -0
- package/dist/candidate-scoring.js +715 -0
- package/dist/capability-core.js +630 -0
- package/dist/capability-dispatcher.js +86 -0
- package/dist/capability-registry.js +523 -0
- package/dist/cli.js +1276 -0
- package/dist/collaboration.js +727 -0
- package/dist/commit.js +570 -0
- package/dist/contract-migration.js +234 -0
- package/dist/coordinator.js +1163 -0
- package/dist/daemon.js +44 -0
- package/dist/dispatch.js +201 -0
- package/dist/drive.js +503 -0
- package/dist/error-feedback.js +415 -0
- package/dist/evidence-grounding.js +179 -0
- package/dist/evidence-reasoning.js +733 -0
- package/dist/execution-backend.js +1279 -0
- package/dist/harness.js +61 -0
- package/dist/mcp-server.js +1615 -0
- package/dist/multi-agent-eval.js +857 -0
- package/dist/multi-agent-host.js +764 -0
- package/dist/multi-agent-operator-ux.js +537 -0
- package/dist/multi-agent-trust.js +366 -0
- package/dist/multi-agent.js +1173 -0
- package/dist/node-snapshot.js +270 -0
- package/dist/observability.js +922 -0
- package/dist/operator-ux.js +971 -0
- package/dist/orchestrator/audit-operations.js +182 -0
- package/dist/orchestrator/candidate-operations.js +117 -0
- package/dist/orchestrator/cli-options.js +288 -0
- package/dist/orchestrator/collaboration-operations.js +86 -0
- package/dist/orchestrator/feedback-operations.js +81 -0
- package/dist/orchestrator/host-operations.js +78 -0
- package/dist/orchestrator/lifecycle-operations.js +462 -0
- package/dist/orchestrator/migration-operations.js +44 -0
- package/dist/orchestrator/multi-agent-operations.js +362 -0
- package/dist/orchestrator/report.js +369 -0
- package/dist/orchestrator/topology-operations.js +84 -0
- package/dist/orchestrator.js +874 -0
- package/dist/pipeline-contract.js +92 -0
- package/dist/pipeline-runner.js +285 -0
- package/dist/reclamation.js +882 -0
- package/dist/result-normalize.js +194 -0
- package/dist/run-export.js +64 -0
- package/dist/run-registry.js +1347 -0
- package/dist/run-state-schema.js +67 -0
- package/dist/sandbox-profile.js +471 -0
- package/dist/scheduler.js +266 -0
- package/dist/scheduling.js +184 -0
- package/dist/schema-validate.js +98 -0
- package/dist/state-explosion.js +1213 -0
- package/dist/state-migrations.js +463 -0
- package/dist/state-node.js +301 -0
- package/dist/state.js +308 -0
- package/dist/telemetry-attestation.js +156 -0
- package/dist/telemetry-ledger.js +145 -0
- package/dist/topology.js +527 -0
- package/dist/triggers.js +159 -0
- package/dist/trust-audit.js +475 -0
- package/dist/types/blackboard.js +2 -0
- package/dist/types/boundary.js +29 -0
- package/dist/types/candidate.js +2 -0
- package/dist/types/collaboration.js +2 -0
- package/dist/types/core.js +2 -0
- package/dist/types/drive.js +10 -0
- package/dist/types/error-feedback.js +2 -0
- package/dist/types/evidence-reasoning.js +2 -0
- package/dist/types/execution-backend.js +2 -0
- package/dist/types/multi-agent.js +2 -0
- package/dist/types/observability.js +2 -0
- package/dist/types/pipeline.js +2 -0
- package/dist/types/reclamation.js +8 -0
- package/dist/types/result.js +2 -0
- package/dist/types/run-registry.js +2 -0
- package/dist/types/run.js +2 -0
- package/dist/types/sandbox.js +2 -0
- package/dist/types/schedule.js +2 -0
- package/dist/types/state-node.js +2 -0
- package/dist/types/topology.js +2 -0
- package/dist/types/trust.js +2 -0
- package/dist/types/workbench.js +2 -0
- package/dist/types/worker.js +2 -0
- package/dist/types/workflow-app.js +2 -0
- package/dist/types.js +43 -0
- package/dist/verifier-registry.js +46 -0
- package/dist/verifier.js +78 -0
- package/dist/version.js +8 -0
- package/dist/workbench-host.js +172 -0
- package/dist/workbench.js +190 -0
- package/dist/worker-isolation.js +1028 -0
- package/dist/workflow-api.js +98 -0
- package/dist/workflow-app-framework.js +626 -0
- package/docs/agent-delegation-drive.7.md +190 -0
- package/docs/agent-framework.md +176 -0
- package/docs/candidate-scoring.7.md +106 -0
- package/docs/canonical-workflow-apps.7.md +137 -0
- package/docs/capability-topology-registry.7.md +168 -0
- package/docs/cli-mcp-parity.7.md +373 -0
- package/docs/contract-migration-tooling.7.md +123 -0
- package/docs/control-plane-scheduling.7.md +110 -0
- package/docs/coordinator-blackboard.7.md +183 -0
- package/docs/dogfood/architecture-review-cool-workflow.md +16 -0
- package/docs/dogfood-one-real-repo.7.md +168 -0
- package/docs/durable-state-and-locking.7.md +107 -0
- package/docs/end-to-end-golden-path.7.md +117 -0
- package/docs/error-feedback.7.md +153 -0
- package/docs/evidence-adoption-reasoning-chain.7.md +270 -0
- package/docs/execution-backends.7.md +300 -0
- package/docs/getting-started.md +99 -0
- package/docs/index.md +41 -0
- package/docs/mcp-app-surface.7.md +235 -0
- package/docs/multi-agent-cli-mcp-surface.7.md +265 -0
- package/docs/multi-agent-eval-replay-harness.7.md +302 -0
- package/docs/multi-agent-operator-ux.7.md +314 -0
- package/docs/multi-agent-runtime-core.7.md +231 -0
- package/docs/multi-agent-topologies.7.md +103 -0
- package/docs/multi-agent-trust-policy-audit.7.md +154 -0
- package/docs/node-snapshot-diff-replay.7.md +135 -0
- package/docs/observability-cost-accounting.7.md +194 -0
- package/docs/operator-ux.7.md +180 -0
- package/docs/pipeline-runner.7.md +136 -0
- package/docs/project-index.md +261 -0
- package/docs/real-execution-backends.7.md +142 -0
- package/docs/release-and-migration.7.md +280 -0
- package/docs/release-tooling.7.md +159 -0
- package/docs/routines.md +48 -0
- package/docs/run-registry-control-plane.7.md +312 -0
- package/docs/run-retention-reclamation.7.md +191 -0
- package/docs/sandbox-profiles.7.md +137 -0
- package/docs/scheduled-tasks.md +80 -0
- package/docs/security-trust-hardening.7.md +117 -0
- package/docs/state-explosion-management.7.md +264 -0
- package/docs/state-node.7.md +96 -0
- package/docs/team-collaboration.7.md +207 -0
- package/docs/unix-principles.md +192 -0
- package/docs/verifier-gated-commit.7.md +140 -0
- package/docs/web-desktop-workbench.7.md +215 -0
- package/docs/worker-isolation.7.md +167 -0
- package/docs/workflow-app-framework.7.md +274 -0
- package/manifest/README.md +43 -0
- package/manifest/plugin.manifest.json +316 -0
- package/manifest/pricing.policy.json +14 -0
- package/package.json +79 -0
- package/scripts/agents/claude-p-agent.js +104 -0
- package/scripts/agents/claude-p-agent.sh +9 -0
- package/scripts/agents/cw-attest-keygen.js +55 -0
- package/scripts/agents/cw-attest-wrap.js +143 -0
- package/scripts/block-unapproved-tag.sh +39 -0
- package/scripts/bump-version.js +249 -0
- package/scripts/canonical-apps.js +171 -0
- package/scripts/cw.js +4 -0
- package/scripts/dist-drift-check.js +79 -0
- package/scripts/dogfood-architecture-review.js +237 -0
- package/scripts/dogfood-release.js +624 -0
- package/scripts/forward-ref-docs.js +73 -0
- package/scripts/gen-manifests.js +232 -0
- package/scripts/golden-path.js +300 -0
- package/scripts/mcp-server.js +4 -0
- package/scripts/new-feature.js +121 -0
- package/scripts/parity-check.js +213 -0
- package/scripts/release-check.js +118 -0
- package/scripts/release-flow.js +272 -0
- package/scripts/release-gate.sh +85 -0
- package/scripts/sync-project-index.js +387 -0
- package/scripts/validate-run-state-schema.js +126 -0
- package/scripts/verify-container-selfref.js +64 -0
- package/scripts/version-sync-check.js +237 -0
- package/skills/cool-workflow/SKILL.md +162 -0
- package/skills/cool-workflow/references/commands.md +282 -0
- package/tsconfig.json +16 -0
- package/ui/workbench/app.css +76 -0
- package/ui/workbench/app.js +159 -0
- package/ui/workbench/index.html +32 -0
- package/workflows/architecture-review.workflow.js +84 -0
- package/workflows/research-synthesis.workflow.js +47 -0
|
@@ -0,0 +1,922 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Observability + Cost Accounting (v0.1.31) — DERIVED metrics, ATTESTED cost.
|
|
3
|
+
//
|
|
4
|
+
// BSD / Unix discipline (each non-trivial choice cites its tenet):
|
|
5
|
+
//
|
|
6
|
+
// - DERIVED, NOT A TELEMETRY PIPELINE. Every number here is a PROJECTION of one
|
|
7
|
+
// run's durable `.cw/runs/<id>/state.json`: timestamps → durations, verifier
|
|
8
|
+
// nodes → pass rate, candidates → acceptance rate, failed workers/feedback →
|
|
9
|
+
// failure rate. No metrics database, no collector daemon, no hidden counter.
|
|
10
|
+
// deriveMetricsReport() is a PURE function of (run, now, policy) and NEVER
|
|
11
|
+
// mutates source records.
|
|
12
|
+
//
|
|
13
|
+
// - COST IS ATTESTED, NEVER MEASURED OR FABRICATED. CW does not call the model;
|
|
14
|
+
// the host/worker does. Token usage is read from the host-attested UsageRecord
|
|
15
|
+
// on the task/worker record. Absent usage is `unreported` — never 0. Cost is
|
|
16
|
+
// `attested` only when attested usage is priced by an EXACT policy match;
|
|
17
|
+
// assumed pricing is a separate `estimated` figure; the two never conflate.
|
|
18
|
+
//
|
|
19
|
+
// - MECHANISM VS POLICY. This module is MECHANISM. The pricing table is POLICY,
|
|
20
|
+
// supplied as DATA (CostPolicy) and kept out of the kernel: the same attested
|
|
21
|
+
// usage yields different cost under different policies without touching code.
|
|
22
|
+
//
|
|
23
|
+
// - A COUNTER YOU CANNOT TRUST IS WORSE THAN NONE. A rate over zero samples is
|
|
24
|
+
// `n/a`, never 0%/100%. Every RateMetric carries count + total + buckets.
|
|
25
|
+
//
|
|
26
|
+
// - DETERMINISTIC & REPLAYABLE. Wall-clock "now" is INJECTED (the only
|
|
27
|
+
// now-derived field is `generatedAt`); all durations come from recorded
|
|
28
|
+
// timestamps. A report over a fixed snapshot is byte-reproducible.
|
|
29
|
+
//
|
|
30
|
+
// - FAIL CLOSED ON DRIFT. A fingerprinted, rebuildable per-run snapshot reports
|
|
31
|
+
// `valid|stale|absent` against current source — same ethos as the v0.1.25
|
|
32
|
+
// state-explosion summaries and the v0.1.28 registry.
|
|
33
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
34
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
35
|
+
};
|
|
36
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
37
|
+
exports.METRICS_SCHEMA_VERSION = void 0;
|
|
38
|
+
exports.fingerprintMetricsSource = fingerprintMetricsSource;
|
|
39
|
+
exports.deriveUsageTotals = deriveUsageTotals;
|
|
40
|
+
exports.deriveAttestationCoverage = deriveAttestationCoverage;
|
|
41
|
+
exports.deriveCost = deriveCost;
|
|
42
|
+
exports.deriveFailureRate = deriveFailureRate;
|
|
43
|
+
exports.deriveVerifierPassRate = deriveVerifierPassRate;
|
|
44
|
+
exports.deriveCandidateAcceptanceRate = deriveCandidateAcceptanceRate;
|
|
45
|
+
exports.deriveMetricsReport = deriveMetricsReport;
|
|
46
|
+
exports.deriveCollaborationMetrics = deriveCollaborationMetrics;
|
|
47
|
+
exports.metricsDir = metricsDir;
|
|
48
|
+
exports.loadPersistedMetricsFingerprint = loadPersistedMetricsFingerprint;
|
|
49
|
+
exports.loadPersistedMetricsReport = loadPersistedMetricsReport;
|
|
50
|
+
exports.showMetricsReport = showMetricsReport;
|
|
51
|
+
exports.deriveMetricsSummary = deriveMetricsSummary;
|
|
52
|
+
exports.loadCostPolicy = loadCostPolicy;
|
|
53
|
+
exports.parseUsageFromArgs = parseUsageFromArgs;
|
|
54
|
+
exports.formatMetricsReport = formatMetricsReport;
|
|
55
|
+
exports.formatMetricsSummary = formatMetricsSummary;
|
|
56
|
+
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
57
|
+
const node_fs_1 = __importDefault(require("node:fs"));
|
|
58
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
59
|
+
const state_1 = require("./state");
|
|
60
|
+
const telemetry_ledger_1 = require("./telemetry-ledger");
|
|
61
|
+
exports.METRICS_SCHEMA_VERSION = 1;
|
|
62
|
+
// Verifier-gate decision classes (derived, never invented).
|
|
63
|
+
const VERIFIER_PASS_STATUSES = new Set(["verified", "completed", "committed"]);
|
|
64
|
+
const VERIFIER_FAIL_STATUSES = new Set(["failed", "rejected", "blocked"]);
|
|
65
|
+
// Candidate acceptance classes.
|
|
66
|
+
const CANDIDATE_ACCEPTED_STATUSES = new Set(["selected", "verified"]);
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
// Deterministic helpers
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
function fingerprintStrings(values) {
|
|
71
|
+
const hash = node_crypto_1.default.createHash("sha256");
|
|
72
|
+
hash.update(JSON.stringify([...values].sort()));
|
|
73
|
+
return `sha256:${hash.digest("hex").slice(0, 32)}`;
|
|
74
|
+
}
|
|
75
|
+
/** Round to a fixed precision deterministically (no locale, no float drift in
|
|
76
|
+
* the serialized form). */
|
|
77
|
+
function round(value, decimals) {
|
|
78
|
+
const factor = 10 ** decimals;
|
|
79
|
+
return Math.round(value * factor) / factor;
|
|
80
|
+
}
|
|
81
|
+
/** ms between two ISO timestamps, or null if either is missing/unparseable or
|
|
82
|
+
* the result would be negative (clock skew ⇒ untrustworthy ⇒ null, not a lie). */
|
|
83
|
+
function durationMs(startedAt, endedAt) {
|
|
84
|
+
if (!startedAt || !endedAt)
|
|
85
|
+
return null;
|
|
86
|
+
const start = Date.parse(startedAt);
|
|
87
|
+
const end = Date.parse(endedAt);
|
|
88
|
+
if (!Number.isFinite(start) || !Number.isFinite(end))
|
|
89
|
+
return null;
|
|
90
|
+
const delta = end - start;
|
|
91
|
+
return delta >= 0 ? delta : null;
|
|
92
|
+
}
|
|
93
|
+
function duration(startedAt, endedAt) {
|
|
94
|
+
const wallClockMs = durationMs(startedAt, endedAt);
|
|
95
|
+
return {
|
|
96
|
+
startedAt,
|
|
97
|
+
endedAt,
|
|
98
|
+
wallClockMs,
|
|
99
|
+
inFlight: !endedAt
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
/** Build a fail-closed rate. total === 0 ⇒ `n/a` (count/rate null). */
|
|
103
|
+
function rate(metric, count, total, buckets) {
|
|
104
|
+
if (total <= 0) {
|
|
105
|
+
return { metric, state: "n/a", count: null, total: 0, rate: null, buckets };
|
|
106
|
+
}
|
|
107
|
+
return { metric, state: "ok", count, total, rate: round(count / total, 6), buckets };
|
|
108
|
+
}
|
|
109
|
+
// ---------------------------------------------------------------------------
|
|
110
|
+
// Source fingerprint (structural, not mtime — a tampered status trips `stale`).
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
function fingerprintMetricsSource(run) {
|
|
113
|
+
const parts = [
|
|
114
|
+
`id:${run.id}`,
|
|
115
|
+
`createdAt:${run.createdAt}`,
|
|
116
|
+
`updatedAt:${run.updatedAt}`,
|
|
117
|
+
`app:${run.workflow.app?.id || run.workflow.id}`
|
|
118
|
+
];
|
|
119
|
+
for (const task of [...(run.tasks || [])].sort((a, b) => a.id.localeCompare(b.id))) {
|
|
120
|
+
parts.push(`task:${task.id}:${task.status}:${task.dispatchedAt || "-"}:${task.completedAt || "-"}:${usageKey(task.usage)}:${task.backendId || "-"}`);
|
|
121
|
+
}
|
|
122
|
+
for (const worker of [...(run.workers || [])].sort((a, b) => a.id.localeCompare(b.id))) {
|
|
123
|
+
parts.push(`worker:${worker.id}:${worker.status}:${worker.output?.recordedAt || "-"}:${usageKey(worker.usage)}:${worker.backendId || "-"}`);
|
|
124
|
+
}
|
|
125
|
+
for (const node of [...(run.nodes || [])].filter((n) => n.kind === "verifier").sort((a, b) => a.id.localeCompare(b.id))) {
|
|
126
|
+
parts.push(`verifier:${node.id}:${node.status}`);
|
|
127
|
+
}
|
|
128
|
+
for (const cand of [...(run.candidates || [])].sort((a, b) => a.id.localeCompare(b.id))) {
|
|
129
|
+
parts.push(`candidate:${cand.id}:${cand.status}`);
|
|
130
|
+
}
|
|
131
|
+
for (const fb of [...(run.feedback || [])].sort((a, b) => a.id.localeCompare(b.id))) {
|
|
132
|
+
parts.push(`feedback:${fb.id}:${fb.status}`);
|
|
133
|
+
}
|
|
134
|
+
for (const m of [...(run.multiAgent?.memberships || [])].sort((a, b) => a.id.localeCompare(b.id))) {
|
|
135
|
+
parts.push(`membership:${m.id}:${m.status}`);
|
|
136
|
+
}
|
|
137
|
+
return fingerprintStrings(parts);
|
|
138
|
+
}
|
|
139
|
+
function usageKey(usage) {
|
|
140
|
+
if (!usage)
|
|
141
|
+
return "unreported";
|
|
142
|
+
return `${usage.source}:${usage.model || "-"}:${usage.inputTokens ?? "-"}:${usage.outputTokens ?? "-"}`;
|
|
143
|
+
}
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
// Worker / unit helpers
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
/** A worker's recorded end timestamp, or undefined when still in-flight. */
|
|
148
|
+
function workerEndAt(worker) {
|
|
149
|
+
if (worker.output?.recordedAt)
|
|
150
|
+
return worker.output.recordedAt;
|
|
151
|
+
if (["completed", "verified", "failed", "rejected"].includes(worker.status))
|
|
152
|
+
return worker.updatedAt;
|
|
153
|
+
return undefined;
|
|
154
|
+
}
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
// Usage + cost
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
/** The work units that COULD carry attested usage: every worker that produced
|
|
159
|
+
* output, plus every completed task NOT already represented by such a worker (a
|
|
160
|
+
* worker-output-backed task's usage rides on the worker, so it is never
|
|
161
|
+
* double-counted; a task completed directly via `cw result` is its own unit
|
|
162
|
+
* even if a worker was allocated but never recorded output). */
|
|
163
|
+
function usageUnits(run) {
|
|
164
|
+
const units = [];
|
|
165
|
+
const outputTaskIds = new Set();
|
|
166
|
+
for (const worker of run.workers || []) {
|
|
167
|
+
if (worker.output) {
|
|
168
|
+
outputTaskIds.add(worker.output.taskId || worker.taskId);
|
|
169
|
+
units.push({ unit: worker.id, kind: "worker", usage: worker.usage });
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
for (const task of run.tasks || []) {
|
|
173
|
+
if (task.status === "completed" && !outputTaskIds.has(task.id)) {
|
|
174
|
+
units.push({ unit: task.id, kind: "task", usage: task.usage });
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return units.sort((a, b) => a.unit.localeCompare(b.unit));
|
|
178
|
+
}
|
|
179
|
+
function tokenTotal(usage) {
|
|
180
|
+
if (typeof usage.totalTokens === "number")
|
|
181
|
+
return usage.totalTokens;
|
|
182
|
+
return (usage.inputTokens || 0) + (usage.outputTokens || 0);
|
|
183
|
+
}
|
|
184
|
+
function deriveUsageTotals(run) {
|
|
185
|
+
const units = usageUnits(run);
|
|
186
|
+
const rows = [];
|
|
187
|
+
let inputTokens = 0;
|
|
188
|
+
let outputTokens = 0;
|
|
189
|
+
let totalTokens = 0;
|
|
190
|
+
const models = new Set();
|
|
191
|
+
let attestedUnits = 0;
|
|
192
|
+
for (const u of units) {
|
|
193
|
+
if (!u.usage)
|
|
194
|
+
continue;
|
|
195
|
+
attestedUnits++;
|
|
196
|
+
inputTokens += u.usage.inputTokens || 0;
|
|
197
|
+
outputTokens += u.usage.outputTokens || 0;
|
|
198
|
+
totalTokens += tokenTotal(u.usage);
|
|
199
|
+
if (u.usage.model)
|
|
200
|
+
models.add(u.usage.model);
|
|
201
|
+
rows.push({ unit: u.unit, kind: u.kind, usage: u.usage });
|
|
202
|
+
}
|
|
203
|
+
const unitCount = units.length;
|
|
204
|
+
const totals = {
|
|
205
|
+
units: unitCount,
|
|
206
|
+
attestedUnits,
|
|
207
|
+
unreportedUnits: unitCount - attestedUnits,
|
|
208
|
+
coverage: unitCount > 0 ? round(attestedUnits / unitCount, 6) : null,
|
|
209
|
+
inputTokens,
|
|
210
|
+
outputTokens,
|
|
211
|
+
totalTokens,
|
|
212
|
+
models: [...models].sort()
|
|
213
|
+
};
|
|
214
|
+
return { totals, rows };
|
|
215
|
+
}
|
|
216
|
+
/** Track 1 cryptographic attestation coverage over a run's work units, plus the
|
|
217
|
+
* tamper-evident ledger state. DIFFERENT axis from UsageTotals.coverage: that
|
|
218
|
+
* counts units that merely carry a usage record; here `attested` counts units
|
|
219
|
+
* whose reported usage VERIFIED against the operator trust key. Deterministic —
|
|
220
|
+
* reads recorded `usage.attestation` and recomputes the ledger chain; no
|
|
221
|
+
* now-derived field, so it stays byte-stable for CLI<->MCP parity. */
|
|
222
|
+
function deriveAttestationCoverage(run) {
|
|
223
|
+
const units = usageUnits(run);
|
|
224
|
+
let attested = 0;
|
|
225
|
+
let unattested = 0;
|
|
226
|
+
let absent = 0;
|
|
227
|
+
let unverified = 0;
|
|
228
|
+
for (const u of units) {
|
|
229
|
+
if (!u.usage)
|
|
230
|
+
continue;
|
|
231
|
+
switch (u.usage.attestation) {
|
|
232
|
+
case "attested":
|
|
233
|
+
attested++;
|
|
234
|
+
break;
|
|
235
|
+
case "unattested":
|
|
236
|
+
unattested++;
|
|
237
|
+
break;
|
|
238
|
+
case "absent":
|
|
239
|
+
absent++;
|
|
240
|
+
break;
|
|
241
|
+
default:
|
|
242
|
+
// A usage record with no verdict (operator-recorded / legacy intake).
|
|
243
|
+
unverified++;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
const unitCount = units.length;
|
|
247
|
+
const ledger = (0, telemetry_ledger_1.verifyTelemetryLedger)(run);
|
|
248
|
+
return {
|
|
249
|
+
units: unitCount,
|
|
250
|
+
attested,
|
|
251
|
+
unattested,
|
|
252
|
+
absent,
|
|
253
|
+
unverified,
|
|
254
|
+
verifiedCoverage: unitCount > 0 ? round(attested / unitCount, 6) : null,
|
|
255
|
+
ledger: { present: ledger.present, verified: ledger.verified, records: ledger.records.length }
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
/** Compute cost from attested usage × an optional pricing policy. The contract:
|
|
259
|
+
* attested = exact-match priced; estimated = default/fallback priced; unpriced =
|
|
260
|
+
* attested usage with no policy coverage; unreported = no attested usage. */
|
|
261
|
+
function deriveCost(rows, policy) {
|
|
262
|
+
const currency = policy?.currency || "USD";
|
|
263
|
+
if (rows.length === 0) {
|
|
264
|
+
return {
|
|
265
|
+
state: "unreported",
|
|
266
|
+
currency,
|
|
267
|
+
attestedUsd: null,
|
|
268
|
+
estimatedUsd: null,
|
|
269
|
+
policyId: policy?.id,
|
|
270
|
+
unpricedModels: [],
|
|
271
|
+
pricedCoverage: null,
|
|
272
|
+
notes: ["No attested usage on this run; cost is unreported, not zero."]
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
if (!policy) {
|
|
276
|
+
return {
|
|
277
|
+
state: "unpriced",
|
|
278
|
+
currency,
|
|
279
|
+
attestedUsd: null,
|
|
280
|
+
estimatedUsd: null,
|
|
281
|
+
unpricedModels: [],
|
|
282
|
+
pricedCoverage: null,
|
|
283
|
+
notes: ["Attested usage present but no pricing policy supplied; pass --pricing <path> to price it."]
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
const byModel = new Map();
|
|
287
|
+
for (const m of policy.models || [])
|
|
288
|
+
byModel.set(m.model, m);
|
|
289
|
+
let attestedUsd = 0;
|
|
290
|
+
let estimatedUsd = 0;
|
|
291
|
+
let pricedTokens = 0;
|
|
292
|
+
let attestedTokens = 0;
|
|
293
|
+
let usedDefault = false;
|
|
294
|
+
let usedExact = false;
|
|
295
|
+
const unpriced = new Set();
|
|
296
|
+
for (const row of rows) {
|
|
297
|
+
const usage = row.usage;
|
|
298
|
+
const tokens = tokenTotal(usage);
|
|
299
|
+
attestedTokens += tokens;
|
|
300
|
+
const model = usage.model;
|
|
301
|
+
const exact = model ? byModel.get(model) : undefined;
|
|
302
|
+
const price = exact || policy.defaultPrice;
|
|
303
|
+
if (!price) {
|
|
304
|
+
if (model)
|
|
305
|
+
unpriced.add(model);
|
|
306
|
+
continue; // attested usage we cannot price under this policy
|
|
307
|
+
}
|
|
308
|
+
const cost = priceUsage(usage, price);
|
|
309
|
+
pricedTokens += tokens;
|
|
310
|
+
if (exact) {
|
|
311
|
+
attestedUsd += cost;
|
|
312
|
+
usedExact = true;
|
|
313
|
+
}
|
|
314
|
+
else {
|
|
315
|
+
estimatedUsd += cost;
|
|
316
|
+
usedDefault = true;
|
|
317
|
+
if (model)
|
|
318
|
+
unpriced.add(model);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
let state;
|
|
322
|
+
if (pricedTokens === 0)
|
|
323
|
+
state = "unpriced";
|
|
324
|
+
else if (usedDefault)
|
|
325
|
+
state = "estimated";
|
|
326
|
+
else if (usedExact)
|
|
327
|
+
state = "attested";
|
|
328
|
+
else
|
|
329
|
+
state = "unpriced";
|
|
330
|
+
const notes = [];
|
|
331
|
+
if (usedDefault)
|
|
332
|
+
notes.push("Some models lacked an exact policy entry and were priced with the policy default; that portion is `estimated`, not `attested`.");
|
|
333
|
+
if (state === "unpriced")
|
|
334
|
+
notes.push("Attested usage present but no policy entry (and no default) priced it; cost is unpriced.");
|
|
335
|
+
return {
|
|
336
|
+
state,
|
|
337
|
+
currency,
|
|
338
|
+
attestedUsd: usedExact ? round(attestedUsd, 6) : null,
|
|
339
|
+
estimatedUsd: usedDefault ? round(estimatedUsd, 6) : null,
|
|
340
|
+
policyId: policy.id,
|
|
341
|
+
unpricedModels: [...unpriced].sort(),
|
|
342
|
+
pricedCoverage: attestedTokens > 0 ? round(pricedTokens / attestedTokens, 6) : null,
|
|
343
|
+
notes
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
function priceUsage(usage, price) {
|
|
347
|
+
const input = ((usage.inputTokens || 0) / 1_000_000) * price.inputPerMillion;
|
|
348
|
+
const output = ((usage.outputTokens || 0) / 1_000_000) * price.outputPerMillion;
|
|
349
|
+
const cacheRead = ((usage.cacheReadTokens || 0) / 1_000_000) * (price.cacheReadPerMillion || 0);
|
|
350
|
+
const cacheWrite = ((usage.cacheWriteTokens || 0) / 1_000_000) * (price.cacheWritePerMillion || 0);
|
|
351
|
+
return input + output + cacheRead + cacheWrite;
|
|
352
|
+
}
|
|
353
|
+
// ---------------------------------------------------------------------------
|
|
354
|
+
// Rates (each a fail-closed RateMetric with explicit sample counts)
|
|
355
|
+
// ---------------------------------------------------------------------------
|
|
356
|
+
function deriveFailureRate(run) {
|
|
357
|
+
const workers = run.workers || [];
|
|
358
|
+
const memberships = run.multiAgent?.memberships || [];
|
|
359
|
+
const feedback = run.feedback || [];
|
|
360
|
+
// Tasks not backed by a worker (a worker-backed task's outcome rides on its
|
|
361
|
+
// worker), counted only when dispatched (an attempt actually happened).
|
|
362
|
+
const tasks = (run.tasks || []).filter((t) => !t.workerId && t.dispatchedAt);
|
|
363
|
+
const workersFailed = workers.filter((w) => w.status === "failed" || w.status === "rejected").length;
|
|
364
|
+
const tasksFailed = tasks.filter((t) => t.status === "failed").length;
|
|
365
|
+
const membershipsFailed = memberships.filter((m) => m.status === "failed").length;
|
|
366
|
+
const feedbackUnresolved = feedback.filter((f) => f.status === "open" || f.status === "tasked").length;
|
|
367
|
+
const total = workers.length + tasks.length + memberships.length + feedback.length;
|
|
368
|
+
const failures = workersFailed + tasksFailed + membershipsFailed + feedbackUnresolved;
|
|
369
|
+
return rate("failure", failures, total, {
|
|
370
|
+
workers: workers.length,
|
|
371
|
+
workersFailed,
|
|
372
|
+
tasks: tasks.length,
|
|
373
|
+
tasksFailed,
|
|
374
|
+
memberships: memberships.length,
|
|
375
|
+
membershipsFailed,
|
|
376
|
+
feedback: feedback.length,
|
|
377
|
+
feedbackUnresolved
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
function deriveVerifierPassRate(run) {
|
|
381
|
+
const gates = (run.nodes || []).filter((n) => n.kind === "verifier");
|
|
382
|
+
const buckets = {};
|
|
383
|
+
let passed = 0;
|
|
384
|
+
let decided = 0;
|
|
385
|
+
for (const gate of gates) {
|
|
386
|
+
buckets[gate.status] = (buckets[gate.status] || 0) + 1;
|
|
387
|
+
if (VERIFIER_PASS_STATUSES.has(gate.status)) {
|
|
388
|
+
passed++;
|
|
389
|
+
decided++;
|
|
390
|
+
}
|
|
391
|
+
else if (VERIFIER_FAIL_STATUSES.has(gate.status)) {
|
|
392
|
+
decided++;
|
|
393
|
+
}
|
|
394
|
+
// pending/running gates are undecided ⇒ excluded from the denominator.
|
|
395
|
+
}
|
|
396
|
+
return rate("verifier-pass", passed, decided, buckets);
|
|
397
|
+
}
|
|
398
|
+
function deriveCandidateAcceptanceRate(run) {
|
|
399
|
+
const candidates = run.candidates || [];
|
|
400
|
+
const buckets = {};
|
|
401
|
+
let accepted = 0;
|
|
402
|
+
for (const cand of candidates) {
|
|
403
|
+
buckets[cand.status] = (buckets[cand.status] || 0) + 1;
|
|
404
|
+
if (CANDIDATE_ACCEPTED_STATUSES.has(cand.status))
|
|
405
|
+
accepted++;
|
|
406
|
+
}
|
|
407
|
+
return rate("candidate-acceptance", accepted, candidates.length, buckets);
|
|
408
|
+
}
|
|
409
|
+
// ---------------------------------------------------------------------------
|
|
410
|
+
// Time / duration
|
|
411
|
+
// ---------------------------------------------------------------------------
|
|
412
|
+
function taskRows(run) {
|
|
413
|
+
return (run.tasks || [])
|
|
414
|
+
.map((task) => ({
|
|
415
|
+
id: task.id,
|
|
416
|
+
kind: "task",
|
|
417
|
+
status: task.status,
|
|
418
|
+
duration: duration(task.dispatchedAt, task.completedAt)
|
|
419
|
+
}))
|
|
420
|
+
.sort((a, b) => a.id.localeCompare(b.id));
|
|
421
|
+
}
|
|
422
|
+
function workerRows(run) {
|
|
423
|
+
return (run.workers || [])
|
|
424
|
+
.map((worker) => ({
|
|
425
|
+
id: worker.id,
|
|
426
|
+
kind: "worker",
|
|
427
|
+
status: worker.status,
|
|
428
|
+
duration: duration(worker.createdAt, workerEndAt(worker))
|
|
429
|
+
}))
|
|
430
|
+
.sort((a, b) => a.id.localeCompare(b.id));
|
|
431
|
+
}
|
|
432
|
+
function deriveMetricsReport(run, options) {
|
|
433
|
+
const tasks = taskRows(run);
|
|
434
|
+
const workers = workerRows(run);
|
|
435
|
+
const activeTaskMs = tasks.reduce((acc, row) => acc + (row.duration.wallClockMs || 0), 0);
|
|
436
|
+
const inFlight = tasks.filter((t) => t.duration.inFlight).length + workers.filter((w) => w.duration.inFlight).length;
|
|
437
|
+
const pendingOrRunning = (run.tasks || []).filter((t) => t.status === "pending" || t.status === "running").length;
|
|
438
|
+
const runDuration = {
|
|
439
|
+
startedAt: run.createdAt,
|
|
440
|
+
endedAt: run.updatedAt,
|
|
441
|
+
wallClockMs: durationMs(run.createdAt, run.updatedAt),
|
|
442
|
+
inFlight: pendingOrRunning > 0
|
|
443
|
+
};
|
|
444
|
+
const { totals, rows } = deriveUsageTotals(run);
|
|
445
|
+
const cost = deriveCost(rows, options.policy);
|
|
446
|
+
const currentFingerprint = fingerprintMetricsSource(run);
|
|
447
|
+
let status;
|
|
448
|
+
if (!options.persistedFingerprint)
|
|
449
|
+
status = "absent";
|
|
450
|
+
else if (options.persistedFingerprint === currentFingerprint)
|
|
451
|
+
status = "valid";
|
|
452
|
+
else
|
|
453
|
+
status = "stale";
|
|
454
|
+
const backendIds = new Set();
|
|
455
|
+
for (const task of run.tasks || [])
|
|
456
|
+
if (task.backendId)
|
|
457
|
+
backendIds.add(task.backendId);
|
|
458
|
+
for (const worker of run.workers || [])
|
|
459
|
+
if (worker.backendId)
|
|
460
|
+
backendIds.add(worker.backendId);
|
|
461
|
+
const report = {
|
|
462
|
+
schemaVersion: exports.METRICS_SCHEMA_VERSION,
|
|
463
|
+
surface: "metrics",
|
|
464
|
+
runId: run.id,
|
|
465
|
+
generatedAt: options.now,
|
|
466
|
+
sourceFingerprint: currentFingerprint,
|
|
467
|
+
freshness: {
|
|
468
|
+
status,
|
|
469
|
+
persistedFingerprint: options.persistedFingerprint,
|
|
470
|
+
currentFingerprint
|
|
471
|
+
},
|
|
472
|
+
scope: {
|
|
473
|
+
app: run.workflow.app?.id || run.workflow.id,
|
|
474
|
+
backendIds: [...backendIds].sort()
|
|
475
|
+
},
|
|
476
|
+
time: {
|
|
477
|
+
run: runDuration,
|
|
478
|
+
activeTaskMs,
|
|
479
|
+
inFlight,
|
|
480
|
+
tasks,
|
|
481
|
+
workers
|
|
482
|
+
},
|
|
483
|
+
rates: {
|
|
484
|
+
failure: deriveFailureRate(run),
|
|
485
|
+
verifierPass: deriveVerifierPassRate(run),
|
|
486
|
+
candidateAcceptance: deriveCandidateAcceptanceRate(run)
|
|
487
|
+
},
|
|
488
|
+
usage: totals,
|
|
489
|
+
cost,
|
|
490
|
+
attestedUsage: rows,
|
|
491
|
+
attestation: deriveAttestationCoverage(run),
|
|
492
|
+
collaboration: deriveCollaborationMetrics(run),
|
|
493
|
+
nextAction: totals.unreportedUnits > 0 && totals.attestedUnits === 0
|
|
494
|
+
? "No attested usage yet — record host usage on result/worker intake (cw result ... --usage-input-tokens N --usage-output-tokens M --usage-model ID)."
|
|
495
|
+
: `node scripts/cw.js metrics show ${run.id} --json`
|
|
496
|
+
};
|
|
497
|
+
return report;
|
|
498
|
+
}
|
|
499
|
+
/** v0.1.32 collaboration metrics — counts, approval rate, and time-to-approval,
|
|
500
|
+
* all from append-only records + recorded timestamps. The ONLY now-derived
|
|
501
|
+
* field anywhere in the report remains `generatedAt`; these are byte-stable. */
|
|
502
|
+
function deriveCollaborationMetrics(run) {
|
|
503
|
+
const collab = run.collaboration;
|
|
504
|
+
const approvalRecords = (collab?.approvals || []).filter((record) => record.decision === "approve");
|
|
505
|
+
const rejectionRecords = (collab?.approvals || []).filter((record) => record.decision === "reject");
|
|
506
|
+
const reviewers = new Set(approvalRecords.map((record) => record.actor?.id).filter((id) => id && id !== "unattributed"));
|
|
507
|
+
const samples = [];
|
|
508
|
+
for (const record of approvalRecords) {
|
|
509
|
+
const createdAt = targetCreatedAt(run, record.target);
|
|
510
|
+
const ms = durationMs(createdAt, record.createdAt);
|
|
511
|
+
if (ms !== null)
|
|
512
|
+
samples.push(ms);
|
|
513
|
+
}
|
|
514
|
+
const meanMs = samples.length ? Math.round(samples.reduce((acc, ms) => acc + ms, 0) / samples.length) : null;
|
|
515
|
+
const maxMs = samples.length ? Math.max(...samples) : null;
|
|
516
|
+
return {
|
|
517
|
+
approvals: approvalRecords.length,
|
|
518
|
+
rejections: rejectionRecords.length,
|
|
519
|
+
comments: (collab?.comments || []).length,
|
|
520
|
+
handoffs: (collab?.handoffs || []).length,
|
|
521
|
+
reviewers: reviewers.size,
|
|
522
|
+
approvalRate: rate("approval", approvalRecords.length, approvalRecords.length + rejectionRecords.length, {
|
|
523
|
+
approve: approvalRecords.length,
|
|
524
|
+
reject: rejectionRecords.length
|
|
525
|
+
}),
|
|
526
|
+
timeToApproval: { samples: samples.length, meanMs, maxMs }
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
/** Recorded creation timestamp of an approval's target (for time-to-approval). */
|
|
530
|
+
function targetCreatedAt(run, target) {
|
|
531
|
+
if (target.kind === "candidate")
|
|
532
|
+
return (run.candidates || []).find((entry) => entry.id === target.id)?.createdAt;
|
|
533
|
+
if (target.kind === "commit")
|
|
534
|
+
return (run.commits || []).find((entry) => entry.id === target.id)?.createdAt;
|
|
535
|
+
if (target.kind === "selection")
|
|
536
|
+
return (run.candidateSelections || []).find((entry) => entry.id === target.id)?.selectedAt;
|
|
537
|
+
return undefined;
|
|
538
|
+
}
|
|
539
|
+
// ---------------------------------------------------------------------------
|
|
540
|
+
// Persistence — a rebuildable, fingerprinted snapshot (fail-closed freshness).
|
|
541
|
+
// ---------------------------------------------------------------------------
|
|
542
|
+
function metricsDir(run) {
|
|
543
|
+
return node_path_1.default.join(run.paths.runDir, "metrics");
|
|
544
|
+
}
|
|
545
|
+
function metricsReportPath(run) {
|
|
546
|
+
return node_path_1.default.join(metricsDir(run), "metrics-report.json");
|
|
547
|
+
}
|
|
548
|
+
/** Read the persisted source fingerprint for this run, if any (never throws). */
|
|
549
|
+
function loadPersistedMetricsFingerprint(run) {
|
|
550
|
+
const file = metricsReportPath(run);
|
|
551
|
+
if (!node_fs_1.default.existsSync(file))
|
|
552
|
+
return undefined;
|
|
553
|
+
try {
|
|
554
|
+
const parsed = (0, state_1.readJson)(file);
|
|
555
|
+
return parsed.sourceFingerprint;
|
|
556
|
+
}
|
|
557
|
+
catch {
|
|
558
|
+
return undefined;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
/** Read the full persisted per-run report, if any (never throws). */
|
|
562
|
+
function loadPersistedMetricsReport(run) {
|
|
563
|
+
const file = metricsReportPath(run);
|
|
564
|
+
if (!node_fs_1.default.existsSync(file))
|
|
565
|
+
return undefined;
|
|
566
|
+
try {
|
|
567
|
+
return (0, state_1.readJson)(file);
|
|
568
|
+
}
|
|
569
|
+
catch {
|
|
570
|
+
return undefined;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
/** Derive + persist the per-run report. The RETURNED payload is order- and
|
|
574
|
+
* cache-independent (freshness === "valid", persistedFingerprint === itself),
|
|
575
|
+
* so `cw metrics show --json` and `cw_metrics_show` are byte-identical. The
|
|
576
|
+
* persisted file is what the cross-repo summary + Workbench read back. */
|
|
577
|
+
function showMetricsReport(run, options) {
|
|
578
|
+
const live = deriveMetricsReport(run, { now: options.now, policy: options.policy });
|
|
579
|
+
const report = {
|
|
580
|
+
...live,
|
|
581
|
+
freshness: {
|
|
582
|
+
status: "valid",
|
|
583
|
+
persistedFingerprint: live.sourceFingerprint,
|
|
584
|
+
currentFingerprint: live.sourceFingerprint
|
|
585
|
+
}
|
|
586
|
+
};
|
|
587
|
+
node_fs_1.default.mkdirSync(metricsDir(run), { recursive: true });
|
|
588
|
+
(0, state_1.writeJson)(metricsReportPath(run), report);
|
|
589
|
+
return report;
|
|
590
|
+
}
|
|
591
|
+
// ---------------------------------------------------------------------------
|
|
592
|
+
// Cross-repo rollup (pool samples; sum attested usage/cost with coverage).
|
|
593
|
+
// ---------------------------------------------------------------------------
|
|
594
|
+
/** Pool a list of RateMetrics into one (insufficient-data when no samples). */
|
|
595
|
+
function poolRates(metric, rates) {
|
|
596
|
+
let count = 0;
|
|
597
|
+
let total = 0;
|
|
598
|
+
const buckets = {};
|
|
599
|
+
for (const r of rates) {
|
|
600
|
+
total += r.total;
|
|
601
|
+
count += r.count || 0;
|
|
602
|
+
for (const [k, v] of Object.entries(r.buckets || {}))
|
|
603
|
+
buckets[k] = (buckets[k] || 0) + v;
|
|
604
|
+
}
|
|
605
|
+
return rate(metric, count, total, buckets);
|
|
606
|
+
}
|
|
607
|
+
/** Sum a list of UsageTotals (coverage recomputed over pooled units). */
|
|
608
|
+
function poolUsage(list) {
|
|
609
|
+
let units = 0;
|
|
610
|
+
let attestedUnits = 0;
|
|
611
|
+
let inputTokens = 0;
|
|
612
|
+
let outputTokens = 0;
|
|
613
|
+
let totalTokens = 0;
|
|
614
|
+
const models = new Set();
|
|
615
|
+
for (const u of list) {
|
|
616
|
+
units += u.units;
|
|
617
|
+
attestedUnits += u.attestedUnits;
|
|
618
|
+
inputTokens += u.inputTokens;
|
|
619
|
+
outputTokens += u.outputTokens;
|
|
620
|
+
totalTokens += u.totalTokens;
|
|
621
|
+
for (const m of u.models)
|
|
622
|
+
models.add(m);
|
|
623
|
+
}
|
|
624
|
+
return {
|
|
625
|
+
units,
|
|
626
|
+
attestedUnits,
|
|
627
|
+
unreportedUnits: units - attestedUnits,
|
|
628
|
+
coverage: units > 0 ? round(attestedUnits / units, 6) : null,
|
|
629
|
+
inputTokens,
|
|
630
|
+
outputTokens,
|
|
631
|
+
totalTokens,
|
|
632
|
+
models: [...models].sort()
|
|
633
|
+
};
|
|
634
|
+
}
|
|
635
|
+
/** Sum a list of CostMetrics (attested + estimated kept separate; states merge
|
|
636
|
+
* conservatively so attested+estimated never collapses into one figure). */
|
|
637
|
+
function poolCost(list) {
|
|
638
|
+
const currency = list.find((c) => c.currency)?.currency || "USD";
|
|
639
|
+
let attestedUsd = null;
|
|
640
|
+
let estimatedUsd = null;
|
|
641
|
+
const unpriced = new Set();
|
|
642
|
+
let anyAttested = false;
|
|
643
|
+
let anyEstimated = false;
|
|
644
|
+
let anyUnpriced = false;
|
|
645
|
+
let anyReported = false;
|
|
646
|
+
let policyId;
|
|
647
|
+
for (const c of list) {
|
|
648
|
+
if (c.attestedUsd !== null) {
|
|
649
|
+
attestedUsd = round((attestedUsd || 0) + c.attestedUsd, 6);
|
|
650
|
+
anyAttested = true;
|
|
651
|
+
}
|
|
652
|
+
if (c.estimatedUsd !== null) {
|
|
653
|
+
estimatedUsd = round((estimatedUsd || 0) + c.estimatedUsd, 6);
|
|
654
|
+
anyEstimated = true;
|
|
655
|
+
}
|
|
656
|
+
for (const m of c.unpricedModels)
|
|
657
|
+
unpriced.add(m);
|
|
658
|
+
if (c.state === "unpriced")
|
|
659
|
+
anyUnpriced = true;
|
|
660
|
+
if (c.state !== "unreported")
|
|
661
|
+
anyReported = true;
|
|
662
|
+
if (c.policyId)
|
|
663
|
+
policyId = c.policyId;
|
|
664
|
+
}
|
|
665
|
+
let state;
|
|
666
|
+
if (!anyReported)
|
|
667
|
+
state = "unreported";
|
|
668
|
+
else if (anyEstimated)
|
|
669
|
+
state = "estimated";
|
|
670
|
+
else if (anyAttested)
|
|
671
|
+
state = "attested";
|
|
672
|
+
else if (anyUnpriced)
|
|
673
|
+
state = "unpriced";
|
|
674
|
+
else
|
|
675
|
+
state = "unreported";
|
|
676
|
+
const notes = [];
|
|
677
|
+
if (anyAttested && anyEstimated)
|
|
678
|
+
notes.push("Totals mix exact-priced (attested) and default-priced (estimated) runs; the two USD figures are kept separate.");
|
|
679
|
+
return {
|
|
680
|
+
state,
|
|
681
|
+
currency,
|
|
682
|
+
attestedUsd,
|
|
683
|
+
estimatedUsd,
|
|
684
|
+
policyId,
|
|
685
|
+
unpricedModels: [...unpriced].sort(),
|
|
686
|
+
pricedCoverage: null,
|
|
687
|
+
notes
|
|
688
|
+
};
|
|
689
|
+
}
|
|
690
|
+
/** Build the cross-repo rollup from already-loaded runs. PURE over its inputs +
|
|
691
|
+
* injected `now`. `unreadableRuns` counts runs whose source could not be loaded
|
|
692
|
+
* (the caller passes the count); they are surfaced, never silently dropped. */
|
|
693
|
+
function deriveMetricsSummary(inputs, options) {
|
|
694
|
+
const perRun = [];
|
|
695
|
+
for (const input of inputs) {
|
|
696
|
+
const report = deriveMetricsReport(input.run, {
|
|
697
|
+
now: options.now,
|
|
698
|
+
policy: options.policy,
|
|
699
|
+
persistedFingerprint: input.persistedFingerprint
|
|
700
|
+
});
|
|
701
|
+
perRun.push({
|
|
702
|
+
report,
|
|
703
|
+
ref: {
|
|
704
|
+
runId: report.runId,
|
|
705
|
+
repo: input.repo,
|
|
706
|
+
app: report.scope.app,
|
|
707
|
+
backendIds: report.scope.backendIds,
|
|
708
|
+
freshness: report.freshness.status,
|
|
709
|
+
rates: report.rates,
|
|
710
|
+
usage: report.usage,
|
|
711
|
+
cost: report.cost
|
|
712
|
+
}
|
|
713
|
+
});
|
|
714
|
+
}
|
|
715
|
+
perRun.sort((a, b) => a.report.runId.localeCompare(b.report.runId));
|
|
716
|
+
const groupBy = (keyOf) => {
|
|
717
|
+
const map = new Map();
|
|
718
|
+
for (const { report } of perRun) {
|
|
719
|
+
for (const key of keyOf(report)) {
|
|
720
|
+
const list = map.get(key) || [];
|
|
721
|
+
list.push(report);
|
|
722
|
+
map.set(key, list);
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
return [...map.entries()]
|
|
726
|
+
.sort((a, b) => a[0].localeCompare(b[0]))
|
|
727
|
+
.map(([key, reports]) => ({
|
|
728
|
+
key,
|
|
729
|
+
runCount: reports.length,
|
|
730
|
+
rates: {
|
|
731
|
+
failure: poolRates("failure", reports.map((r) => r.rates.failure)),
|
|
732
|
+
verifierPass: poolRates("verifier-pass", reports.map((r) => r.rates.verifierPass)),
|
|
733
|
+
candidateAcceptance: poolRates("candidate-acceptance", reports.map((r) => r.rates.candidateAcceptance))
|
|
734
|
+
},
|
|
735
|
+
usage: poolUsage(reports.map((r) => r.usage)),
|
|
736
|
+
cost: poolCost(reports.map((r) => r.cost))
|
|
737
|
+
}));
|
|
738
|
+
};
|
|
739
|
+
const allReports = perRun.map((p) => p.report);
|
|
740
|
+
const totalOutputBytes = inputs.reduce((sum, input) => sum + (input.run.workers || []).reduce((ws, w) => ws + (w.outputSizeBytes || 0), 0), 0);
|
|
741
|
+
const computePerBackendCost = (inputList) => {
|
|
742
|
+
const map = new Map();
|
|
743
|
+
for (const input of inputList) {
|
|
744
|
+
const backends = new Set((input.run.workers || []).map((w) => w.backendId || "node"));
|
|
745
|
+
const bytes = (input.run.workers || []).reduce((s, w) => s + (w.outputSizeBytes || 0), 0);
|
|
746
|
+
for (const bid of backends) {
|
|
747
|
+
const entry = map.get(bid) || { runCount: 0, outputBytes: 0 };
|
|
748
|
+
entry.runCount++;
|
|
749
|
+
entry.outputBytes += bytes;
|
|
750
|
+
map.set(bid, entry);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
return [...map.entries()].map(([backendId, data]) => ({ backendId, ...data }));
|
|
754
|
+
};
|
|
755
|
+
return {
|
|
756
|
+
schemaVersion: exports.METRICS_SCHEMA_VERSION,
|
|
757
|
+
surface: "metrics",
|
|
758
|
+
scope: options.scope,
|
|
759
|
+
generatedAt: options.now,
|
|
760
|
+
runCount: perRun.length,
|
|
761
|
+
unreadableRuns: options.unreadableRuns || 0,
|
|
762
|
+
rates: {
|
|
763
|
+
failure: poolRates("failure", allReports.map((r) => r.rates.failure)),
|
|
764
|
+
verifierPass: poolRates("verifier-pass", allReports.map((r) => r.rates.verifierPass)),
|
|
765
|
+
candidateAcceptance: poolRates("candidate-acceptance", allReports.map((r) => r.rates.candidateAcceptance))
|
|
766
|
+
},
|
|
767
|
+
usage: poolUsage(allReports.map((r) => r.usage)),
|
|
768
|
+
cost: poolCost(allReports.map((r) => r.cost)),
|
|
769
|
+
totalOutputBytes,
|
|
770
|
+
byBackendCost: computePerBackendCost(inputs),
|
|
771
|
+
byApp: groupBy((r) => [r.scope.app || "unknown"]),
|
|
772
|
+
byBackend: groupBy((r) => (r.scope.backendIds.length ? r.scope.backendIds : ["unreported"])),
|
|
773
|
+
runs: perRun.map((p) => p.ref),
|
|
774
|
+
nextAction: perRun.length === 0 ? "No indexed runs; run a workflow, then `cw metrics summary`." : "Per-run detail: cw metrics show <run-id> --json"
|
|
775
|
+
};
|
|
776
|
+
}
|
|
777
|
+
// ---------------------------------------------------------------------------
|
|
778
|
+
// Pricing policy loader (POLICY as DATA — kept out of the kernel).
|
|
779
|
+
// ---------------------------------------------------------------------------
|
|
780
|
+
/** Resolve a CostPolicy from CLI/MCP args. `--pricing <path>` loads a policy
|
|
781
|
+
* file; `--pricing default|bundled` loads the bundled example under
|
|
782
|
+
* manifest/pricing.policy.json. Absent ⇒ undefined ⇒ cost is `unpriced`/
|
|
783
|
+
* `unreported`, never guessed. */
|
|
784
|
+
function loadCostPolicy(args, pluginRoot) {
|
|
785
|
+
const raw = args.pricing ?? args.pricingPolicy ?? args.policy;
|
|
786
|
+
if (raw === undefined || raw === null || raw === "")
|
|
787
|
+
return undefined;
|
|
788
|
+
const value = String(raw);
|
|
789
|
+
const file = value === "default" || value === "bundled"
|
|
790
|
+
? node_path_1.default.join(pluginRoot, "manifest", "pricing.policy.json")
|
|
791
|
+
: node_path_1.default.resolve(value);
|
|
792
|
+
if (!node_fs_1.default.existsSync(file))
|
|
793
|
+
throw new Error(`Pricing policy file not found: ${file}`);
|
|
794
|
+
const parsed = (0, state_1.readJson)(file);
|
|
795
|
+
if (!parsed || parsed.schemaVersion !== 1 || !Array.isArray(parsed.models)) {
|
|
796
|
+
throw new Error(`Invalid pricing policy (expected schemaVersion 1 + models[]): ${file}`);
|
|
797
|
+
}
|
|
798
|
+
return parsed;
|
|
799
|
+
}
|
|
800
|
+
/** Parse a host-attested UsageRecord from CLI/MCP intake args. Returns undefined
|
|
801
|
+
* when NO usage was provided (⇒ `unreported`). CW never fabricates usage, so a
|
|
802
|
+
* caller that passes nothing gets nothing. */
|
|
803
|
+
function parseUsageFromArgs(args, now) {
|
|
804
|
+
const inline = args.usage;
|
|
805
|
+
if (inline && typeof inline === "object" && !Array.isArray(inline)) {
|
|
806
|
+
return normalizeUsage(inline, now);
|
|
807
|
+
}
|
|
808
|
+
const input = numeric(args.usageInputTokens ?? args["usage-input-tokens"]);
|
|
809
|
+
const output = numeric(args.usageOutputTokens ?? args["usage-output-tokens"]);
|
|
810
|
+
const model = args.usageModel ?? args["usage-model"];
|
|
811
|
+
const total = numeric(args.usageTotalTokens ?? args["usage-total-tokens"]);
|
|
812
|
+
const cacheRead = numeric(args.usageCacheReadTokens ?? args["usage-cache-read-tokens"]);
|
|
813
|
+
const cacheWrite = numeric(args.usageCacheWriteTokens ?? args["usage-cache-write-tokens"]);
|
|
814
|
+
if (input === undefined && output === undefined && total === undefined && model === undefined) {
|
|
815
|
+
return undefined;
|
|
816
|
+
}
|
|
817
|
+
return normalizeUsage({
|
|
818
|
+
source: args.usageSource ?? args["usage-source"],
|
|
819
|
+
model,
|
|
820
|
+
inputTokens: input,
|
|
821
|
+
outputTokens: output,
|
|
822
|
+
totalTokens: total,
|
|
823
|
+
cacheReadTokens: cacheRead,
|
|
824
|
+
cacheWriteTokens: cacheWrite,
|
|
825
|
+
attestedAt: args.usageAttestedAt ?? args["usage-attested-at"],
|
|
826
|
+
note: args.usageNote ?? args["usage-note"]
|
|
827
|
+
}, now);
|
|
828
|
+
}
|
|
829
|
+
function normalizeUsage(raw, now) {
|
|
830
|
+
const source = raw.source === "operator-recorded" ? "operator-recorded" : "host-attested";
|
|
831
|
+
const usage = {
|
|
832
|
+
schemaVersion: 1,
|
|
833
|
+
source,
|
|
834
|
+
attestedAt: typeof raw.attestedAt === "string" && raw.attestedAt ? raw.attestedAt : now
|
|
835
|
+
};
|
|
836
|
+
if (raw.model !== undefined && raw.model !== null && raw.model !== "")
|
|
837
|
+
usage.model = String(raw.model);
|
|
838
|
+
const input = numeric(raw.inputTokens);
|
|
839
|
+
const output = numeric(raw.outputTokens);
|
|
840
|
+
const total = numeric(raw.totalTokens);
|
|
841
|
+
const cacheRead = numeric(raw.cacheReadTokens);
|
|
842
|
+
const cacheWrite = numeric(raw.cacheWriteTokens);
|
|
843
|
+
if (input !== undefined)
|
|
844
|
+
usage.inputTokens = input;
|
|
845
|
+
if (output !== undefined)
|
|
846
|
+
usage.outputTokens = output;
|
|
847
|
+
if (total !== undefined)
|
|
848
|
+
usage.totalTokens = total;
|
|
849
|
+
if (cacheRead !== undefined)
|
|
850
|
+
usage.cacheReadTokens = cacheRead;
|
|
851
|
+
if (cacheWrite !== undefined)
|
|
852
|
+
usage.cacheWriteTokens = cacheWrite;
|
|
853
|
+
if (raw.note !== undefined && raw.note !== null && raw.note !== "")
|
|
854
|
+
usage.note = String(raw.note);
|
|
855
|
+
return usage;
|
|
856
|
+
}
|
|
857
|
+
function numeric(value) {
|
|
858
|
+
if (value === undefined || value === null || value === "")
|
|
859
|
+
return undefined;
|
|
860
|
+
const n = Number(value);
|
|
861
|
+
return Number.isFinite(n) ? n : undefined;
|
|
862
|
+
}
|
|
863
|
+
// ---------------------------------------------------------------------------
|
|
864
|
+
// Human formatters (CLI default text; --json emits the canonical payload).
|
|
865
|
+
// ---------------------------------------------------------------------------
|
|
866
|
+
function formatRate(r) {
|
|
867
|
+
if (r.state === "n/a")
|
|
868
|
+
return `n/a (0 samples)`;
|
|
869
|
+
return `${((r.rate * 100)).toFixed(1)}% (${r.count}/${r.total})`;
|
|
870
|
+
}
|
|
871
|
+
function formatMs(ms) {
|
|
872
|
+
if (ms === null)
|
|
873
|
+
return "—";
|
|
874
|
+
if (ms < 1000)
|
|
875
|
+
return `${ms}ms`;
|
|
876
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
877
|
+
}
|
|
878
|
+
function formatCost(c) {
|
|
879
|
+
const parts = [`state=${c.state}`];
|
|
880
|
+
if (c.attestedUsd !== null)
|
|
881
|
+
parts.push(`attested=${c.currency} ${c.attestedUsd}`);
|
|
882
|
+
if (c.estimatedUsd !== null)
|
|
883
|
+
parts.push(`estimated=${c.currency} ${c.estimatedUsd}`);
|
|
884
|
+
if (c.unpricedModels.length)
|
|
885
|
+
parts.push(`unpriced-models=${c.unpricedModels.join(",")}`);
|
|
886
|
+
return parts.join(" ");
|
|
887
|
+
}
|
|
888
|
+
function formatMetricsReport(report) {
|
|
889
|
+
const lines = [];
|
|
890
|
+
lines.push(`metrics ${report.runId} [${report.freshness.status}] app=${report.scope.app || "-"}`);
|
|
891
|
+
lines.push(` time: run=${formatMs(report.time.run.wallClockMs)}${report.time.run.inFlight ? " (in-flight)" : ""} active-task=${formatMs(report.time.activeTaskMs)} in-flight-items=${report.time.inFlight}`);
|
|
892
|
+
lines.push(` failure-rate: ${formatRate(report.rates.failure)}`);
|
|
893
|
+
lines.push(` verifier-pass: ${formatRate(report.rates.verifierPass)}`);
|
|
894
|
+
lines.push(` cand-acceptance: ${formatRate(report.rates.candidateAcceptance)}`);
|
|
895
|
+
const collab = report.collaboration;
|
|
896
|
+
lines.push(` collaboration: approvals=${collab.approvals} rejections=${collab.rejections} comments=${collab.comments} handoffs=${collab.handoffs} reviewers=${collab.reviewers} approval-rate=${formatRate(collab.approvalRate)} time-to-approval=${collab.timeToApproval.meanMs === null ? "n/a" : `${Math.round(collab.timeToApproval.meanMs / 1000)}s`} (${collab.timeToApproval.samples} samples)`);
|
|
897
|
+
const cov = report.usage.coverage === null ? "n/a" : `${(report.usage.coverage * 100).toFixed(0)}%`;
|
|
898
|
+
lines.push(` usage: attested=${report.usage.attestedUnits}/${report.usage.units} units (coverage ${cov}), unreported=${report.usage.unreportedUnits}; tokens in=${report.usage.inputTokens} out=${report.usage.outputTokens} total=${report.usage.totalTokens}`);
|
|
899
|
+
lines.push(` cost: ${formatCost(report.cost)}`);
|
|
900
|
+
if (report.usage.models.length)
|
|
901
|
+
lines.push(` models: ${report.usage.models.join(", ")}`);
|
|
902
|
+
lines.push(` next: ${report.nextAction}`);
|
|
903
|
+
return lines.join("\n");
|
|
904
|
+
}
|
|
905
|
+
function formatMetricsSummary(summary) {
|
|
906
|
+
const lines = [];
|
|
907
|
+
lines.push(`metrics summary scope=${summary.scope} runs=${summary.runCount}${summary.unreadableRuns ? ` (+${summary.unreadableRuns} unreadable)` : ""}`);
|
|
908
|
+
lines.push(` failure-rate: ${formatRate(summary.rates.failure)}`);
|
|
909
|
+
lines.push(` verifier-pass: ${formatRate(summary.rates.verifierPass)}`);
|
|
910
|
+
lines.push(` cand-acceptance: ${formatRate(summary.rates.candidateAcceptance)}`);
|
|
911
|
+
const cov = summary.usage.coverage === null ? "n/a" : `${(summary.usage.coverage * 100).toFixed(0)}%`;
|
|
912
|
+
lines.push(` usage: attested=${summary.usage.attestedUnits}/${summary.usage.units} units (coverage ${cov}); tokens total=${summary.usage.totalTokens}`);
|
|
913
|
+
lines.push(` cost: ${formatCost(summary.cost)}`);
|
|
914
|
+
for (const app of summary.byApp) {
|
|
915
|
+
lines.push(` app ${app.key}: runs=${app.runCount} verifier=${formatRate(app.rates.verifierPass)} cost=${formatCost(app.cost)}`);
|
|
916
|
+
}
|
|
917
|
+
for (const backend of summary.byBackend) {
|
|
918
|
+
lines.push(` backend ${backend.key}: runs=${backend.runCount} failure=${formatRate(backend.rates.failure)}`);
|
|
919
|
+
}
|
|
920
|
+
lines.push(` next: ${summary.nextAction}`);
|
|
921
|
+
return lines.join("\n");
|
|
922
|
+
}
|