@tangle-network/agent-eval 0.71.0 → 0.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,186 @@
1
+ import {
2
+ canonicalize
3
+ } from "./chunk-VSMTAMNK.js";
4
+ import {
5
+ ValidationError
6
+ } from "./chunk-3BFEG2F6.js";
7
+
8
+ // src/metrics.ts
9
+ var MODEL_PRICING = {
10
+ "gpt-4o": { input: 25e-4, output: 0.01 },
11
+ "gpt-4o-mini": { input: 15e-5, output: 6e-4 },
12
+ "gpt-4-turbo": { input: 0.01, output: 0.03 },
13
+ "claude-sonnet-4-20250514": { input: 3e-3, output: 0.015 },
14
+ "claude-opus-4-20250514": { input: 0.015, output: 0.075 },
15
+ "claude-3-haiku-20240307": { input: 25e-5, output: 125e-5 }
16
+ };
17
+ var FAMILY_PRICING = [
18
+ [/claude.*opus/, { input: 0.015, output: 0.075 }],
19
+ [/claude.*haiku/, { input: 8e-4, output: 4e-3 }],
20
+ [/claude.*sonnet|claude-code|claude-sonnet/, { input: 3e-3, output: 0.015 }],
21
+ [/gpt-4o-mini/, { input: 15e-5, output: 6e-4 }],
22
+ [/gpt-5|gpt-4\.1|o[134]\b/, { input: 125e-5, output: 0.01 }],
23
+ [/gpt-4o|gpt-4/, { input: 25e-4, output: 0.01 }],
24
+ [/deepseek/, { input: 3e-4, output: 11e-4 }],
25
+ [/glm|zhipu|zai/, { input: 6e-4, output: 22e-4 }],
26
+ [/kimi|moonshot/, { input: 6e-4, output: 25e-4 }],
27
+ [/qwen/, { input: 4e-4, output: 12e-4 }],
28
+ [/gemini.*flash/, { input: 1e-4, output: 4e-4 }],
29
+ [/gemini/, { input: 125e-5, output: 5e-3 }],
30
+ [/llama/, { input: 2e-4, output: 6e-4 }]
31
+ ];
32
+ function normalizeModelId(model) {
33
+ return (model.split("@")[0] ?? model).trim().toLowerCase();
34
+ }
35
+ function resolveModelPricing(model) {
36
+ if (MODEL_PRICING[model]) return MODEL_PRICING[model];
37
+ const id = normalizeModelId(model);
38
+ if (MODEL_PRICING[id]) return MODEL_PRICING[id];
39
+ for (const [pattern, price] of FAMILY_PRICING) {
40
+ if (pattern.test(id)) return price;
41
+ }
42
+ return null;
43
+ }
44
+ function isModelPriced(model) {
45
+ return resolveModelPricing(model) !== null;
46
+ }
47
+ var warnedUnpricedModels = /* @__PURE__ */ new Set();
48
+ function estimateTokens(text) {
49
+ return Math.ceil(text.length / 4);
50
+ }
51
+ function estimateCost(inputTokens, outputTokens, model) {
52
+ const pricing = resolveModelPricing(model);
53
+ if (!pricing) {
54
+ if (!warnedUnpricedModels.has(model)) {
55
+ warnedUnpricedModels.add(model);
56
+ console.warn(
57
+ `estimateCost: no pricing for model "${model}" \u2014 returning 0; add it to MODEL_PRICING/FAMILY_PRICING (cost/Pareto axes will be blank until then)`
58
+ );
59
+ }
60
+ return 0;
61
+ }
62
+ return inputTokens / 1e3 * pricing.input + outputTokens / 1e3 * pricing.output;
63
+ }
64
+ var TokenCounter = class {
65
+ totalInput = 0;
66
+ totalOutput = 0;
67
+ totalCost = 0;
68
+ model;
69
+ constructor(model = "gpt-4o") {
70
+ this.model = model;
71
+ }
72
+ /** Record tokens for a turn, returns per-turn cost */
73
+ record(inputTokens, outputTokens) {
74
+ this.totalInput += inputTokens;
75
+ this.totalOutput += outputTokens;
76
+ const cost = estimateCost(inputTokens, outputTokens, this.model);
77
+ this.totalCost += cost;
78
+ return cost;
79
+ }
80
+ /** Estimate and record from raw text */
81
+ recordFromText(inputText, outputText) {
82
+ const inputTokens = estimateTokens(inputText);
83
+ const outputTokens = estimateTokens(outputText);
84
+ const cost = this.record(inputTokens, outputTokens);
85
+ return { inputTokens, outputTokens, cost };
86
+ }
87
+ getTotalInput() {
88
+ return this.totalInput;
89
+ }
90
+ getTotalOutput() {
91
+ return this.totalOutput;
92
+ }
93
+ getTotalCost() {
94
+ return this.totalCost;
95
+ }
96
+ };
97
+ var MetricsCollector = class {
98
+ client;
99
+ workspaceId;
100
+ metrics = [];
101
+ constructor(client, workspaceId) {
102
+ this.client = client;
103
+ this.workspaceId = workspaceId;
104
+ }
105
+ /** Collect metrics after a turn completes */
106
+ async collect(turn, responseLatencyMs, responseChars, codeBlocksProduced, blocksExtracted, completionCriteriaMet, completionCriteriaTotal, qualityScore, inputTokens = 0, outputTokens = 0, estimatedCostUsd = 0) {
107
+ const state = await this.getState();
108
+ const m = {
109
+ turn,
110
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
111
+ tasks: state.tasks,
112
+ events: state.events,
113
+ proposals: state.proposals,
114
+ vaultFiles: state.vaultFiles.length,
115
+ responseLatencyMs,
116
+ responseChars,
117
+ codeBlocksProduced,
118
+ blocksExtracted,
119
+ qualityScore,
120
+ inputTokens,
121
+ outputTokens,
122
+ estimatedCostUsd,
123
+ totalCostUsd: estimatedCostUsd,
124
+ completionPercent: completionCriteriaTotal > 0 ? completionCriteriaMet / completionCriteriaTotal * 100 : 0
125
+ };
126
+ this.metrics.push(m);
127
+ return m;
128
+ }
129
+ /** Get current product state */
130
+ async getState() {
131
+ const [tasks, events, approvals, vaultFiles] = await Promise.all([
132
+ this.client.getTasks(this.workspaceId),
133
+ this.client.getEvents(this.workspaceId),
134
+ this.client.getApprovals(this.workspaceId),
135
+ this.client.getVaultTree(this.workspaceId)
136
+ ]);
137
+ return {
138
+ tasks: tasks.length,
139
+ events: events.length,
140
+ proposals: {
141
+ pending: approvals.filter((a) => a.status === "pending").length,
142
+ approved: approvals.filter((a) => a.status === "approved").length,
143
+ rejected: approvals.filter((a) => a.status === "rejected").length
144
+ },
145
+ vaultFiles,
146
+ codeBlocks: 0,
147
+ generations: 0
148
+ };
149
+ }
150
+ /** Get all collected metrics */
151
+ getMetrics() {
152
+ return [...this.metrics];
153
+ }
154
+ /** Get convergence curve (completion% over turns) */
155
+ getConvergenceCurve() {
156
+ return this.metrics.map((m) => m.completionPercent);
157
+ }
158
+ };
159
+
160
+ // src/agent-profile.ts
161
+ import { createHash } from "crypto";
162
+ function agentProfileHash(profile) {
163
+ if (typeof profile.model !== "string" || profile.model.trim().length === 0) {
164
+ throw new ValidationError(`AgentProfile "${profile.id}" has no model \u2014 cannot hash`);
165
+ }
166
+ const behaviour = {
167
+ model: profile.model.trim(),
168
+ skills: [...profile.skills ?? []].sort(),
169
+ promptVersion: profile.promptVersion ?? null,
170
+ tools: [...profile.tools ?? []].sort(),
171
+ metadata: profile.metadata ?? {}
172
+ };
173
+ return createHash("sha256").update(JSON.stringify(canonicalize(behaviour))).digest("hex");
174
+ }
175
+
176
+ export {
177
+ MODEL_PRICING,
178
+ resolveModelPricing,
179
+ isModelPriced,
180
+ estimateTokens,
181
+ estimateCost,
182
+ TokenCounter,
183
+ MetricsCollector,
184
+ agentProfileHash
185
+ };
186
+ //# sourceMappingURL=chunk-SL55X4VN.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/metrics.ts","../src/agent-profile.ts"],"sourcesContent":["import type { ProductClient } from './client'\nimport type { DriverState, TurnMetrics } from './types'\n\ninterface TokenPrice {\n input: number\n output: number\n}\n\n/** Per-1K token pricing for exact model ids. */\nexport const MODEL_PRICING: Record<string, TokenPrice> = {\n 'gpt-4o': { input: 0.0025, output: 0.01 },\n 'gpt-4o-mini': { input: 0.00015, output: 0.0006 },\n 'gpt-4-turbo': { input: 0.01, output: 0.03 },\n 'claude-sonnet-4-20250514': { input: 0.003, output: 0.015 },\n 'claude-opus-4-20250514': { input: 0.015, output: 0.075 },\n 'claude-3-haiku-20240307': { input: 0.00025, output: 0.00125 },\n}\n\n/** Family-level pricing fallbacks (per-1K), matched against a normalized id\n * after exact lookup misses. Ordered — first match wins. Covers the model\n * ids actually used through the Tangle router + cli-bridge harnesses\n * (`claude-code/sonnet`, `opencode/zai-coding-plan/glm-5.1`,\n * `kimi-code/kimi-k2.6`, `deepseek-v4-pro`, `anthropic/claude-sonnet-4-6`, …),\n * none of which appear in the exact table above — without this they priced\n * to a silent $0, blanking every cost/Pareto axis downstream. */\nconst FAMILY_PRICING: Array<[RegExp, TokenPrice]> = [\n [/claude.*opus/, { input: 0.015, output: 0.075 }],\n [/claude.*haiku/, { input: 0.0008, output: 0.004 }],\n [/claude.*sonnet|claude-code|claude-sonnet/, { input: 0.003, output: 0.015 }],\n [/gpt-4o-mini/, { input: 0.00015, output: 0.0006 }],\n [/gpt-5|gpt-4\\.1|o[134]\\b/, { input: 0.00125, output: 0.01 }],\n [/gpt-4o|gpt-4/, { input: 0.0025, output: 0.01 }],\n [/deepseek/, { input: 0.0003, output: 0.0011 }],\n [/glm|zhipu|zai/, { input: 0.0006, output: 0.0022 }],\n [/kimi|moonshot/, { input: 0.0006, output: 0.0025 }],\n [/qwen/, { input: 0.0004, output: 0.0012 }],\n [/gemini.*flash/, { input: 0.0001, output: 0.0004 }],\n [/gemini/, { input: 0.00125, output: 0.005 }],\n [/llama/, { input: 0.0002, output: 0.0006 }],\n]\n\n/** Normalize a model id for pricing: drop a `@snapshot` suffix, lowercase,\n * and keep the final harness/provider-prefixed segment so family regexes\n * match (`opencode/zai-coding-plan/glm-5.1` → `glm-5.1`). */\nfunction normalizeModelId(model: string): string {\n return (model.split('@')[0] ?? model).trim().toLowerCase()\n}\n\n/** Resolve pricing for a model id: exact table, then family fallback.\n * Returns null when the id matches nothing (caller decides — never a\n * silent-zero masquerading as a real $0 cost). */\nexport function resolveModelPricing(model: string): TokenPrice | null {\n if (MODEL_PRICING[model]) return MODEL_PRICING[model]\n const id = normalizeModelId(model)\n if (MODEL_PRICING[id]) return MODEL_PRICING[id]\n for (const [pattern, price] of FAMILY_PRICING) {\n if (pattern.test(id)) return price\n }\n return null\n}\n\n/** True when `model` has known pricing (exact or family). Lets cost-aware\n * callers distinguish a real $0 from an unpriced model. */\nexport function isModelPriced(model: string): boolean {\n return resolveModelPricing(model) !== null\n}\n\nconst warnedUnpricedModels = new Set<string>()\n\n/** Estimate token count from string length (chars / 4 approximation) */\nexport function estimateTokens(text: string): number {\n return Math.ceil(text.length / 4)\n}\n\n/** Calculate cost in USD from token counts and model. Unknown models warn\n * once (not a silent zero) and return 0 so callers that ignore pricing keep\n * working; cost-sensitive callers should gate on {@link isModelPriced}. */\nexport function estimateCost(inputTokens: number, outputTokens: number, model: string): number {\n const pricing = resolveModelPricing(model)\n if (!pricing) {\n if (!warnedUnpricedModels.has(model)) {\n warnedUnpricedModels.add(model)\n console.warn(\n `estimateCost: no pricing for model \"${model}\" — returning 0; add it to ` +\n 'MODEL_PRICING/FAMILY_PRICING (cost/Pareto axes will be blank until then)',\n )\n }\n return 0\n }\n return (inputTokens / 1000) * pricing.input + (outputTokens / 1000) * pricing.output\n}\n\n/**\n * TokenCounter — accumulates token usage and cost across turns.\n */\nexport class TokenCounter {\n private totalInput = 0\n private totalOutput = 0\n private totalCost = 0\n private model: string\n\n constructor(model = 'gpt-4o') {\n this.model = model\n }\n\n /** Record tokens for a turn, returns per-turn cost */\n record(inputTokens: number, outputTokens: number): number {\n this.totalInput += inputTokens\n this.totalOutput += outputTokens\n const cost = estimateCost(inputTokens, outputTokens, this.model)\n this.totalCost += cost\n return cost\n }\n\n /** Estimate and record from raw text */\n recordFromText(\n inputText: string,\n outputText: string,\n ): { inputTokens: number; outputTokens: number; cost: number } {\n const inputTokens = estimateTokens(inputText)\n const outputTokens = estimateTokens(outputText)\n const cost = this.record(inputTokens, outputTokens)\n return { inputTokens, outputTokens, cost }\n }\n\n getTotalInput(): number {\n return this.totalInput\n }\n getTotalOutput(): number {\n return this.totalOutput\n }\n getTotalCost(): number {\n return this.totalCost\n }\n}\n\n/**\n * MetricsCollector — collects per-turn metrics from the product.\n *\n * After each turn, queries the product's APIs to measure state changes.\n */\nexport class MetricsCollector {\n private client: ProductClient\n private workspaceId: string\n private metrics: TurnMetrics[] = []\n constructor(client: ProductClient, workspaceId: string) {\n this.client = client\n this.workspaceId = workspaceId\n }\n\n /** Collect metrics after a turn completes */\n async collect(\n turn: number,\n responseLatencyMs: number,\n responseChars: number,\n codeBlocksProduced: number,\n blocksExtracted: number,\n completionCriteriaMet: number,\n completionCriteriaTotal: number,\n qualityScore?: number,\n inputTokens = 0,\n outputTokens = 0,\n estimatedCostUsd = 0,\n ): Promise<TurnMetrics> {\n const state = await this.getState()\n\n const m: TurnMetrics = {\n turn,\n timestamp: new Date().toISOString(),\n tasks: state.tasks,\n events: state.events,\n proposals: state.proposals,\n vaultFiles: state.vaultFiles.length,\n responseLatencyMs,\n responseChars,\n codeBlocksProduced,\n blocksExtracted,\n qualityScore,\n inputTokens,\n outputTokens,\n estimatedCostUsd,\n totalCostUsd: estimatedCostUsd,\n completionPercent:\n completionCriteriaTotal > 0 ? (completionCriteriaMet / completionCriteriaTotal) * 100 : 0,\n }\n\n this.metrics.push(m)\n return m\n }\n\n /** Get current product state */\n async getState(): Promise<DriverState> {\n const [tasks, events, approvals, vaultFiles] = await Promise.all([\n this.client.getTasks(this.workspaceId),\n this.client.getEvents(this.workspaceId),\n this.client.getApprovals(this.workspaceId),\n this.client.getVaultTree(this.workspaceId),\n ])\n\n return {\n tasks: tasks.length,\n events: events.length,\n proposals: {\n pending: approvals.filter((a) => a.status === 'pending').length,\n approved: approvals.filter((a) => a.status === 'approved').length,\n rejected: approvals.filter((a) => a.status === 'rejected').length,\n },\n vaultFiles,\n codeBlocks: 0,\n generations: 0,\n }\n }\n\n /** Get all collected metrics */\n getMetrics(): TurnMetrics[] {\n return [...this.metrics]\n }\n\n /** Get convergence curve (completion% over turns) */\n getConvergenceCurve(): number[] {\n return this.metrics.map((m) => m.completionPercent)\n }\n}\n","/**\n * @stable\n *\n * AgentProfile — the eval harness's unit of variation.\n *\n * A profile pins everything that changes agent behaviour for a benchmark\n * cell: the model, the active skills, the prompt version, the available\n * tools. Vary the profile — swap a model, add a skill — and re-run the suite\n * to benchmark the change. The scorecard keys a cell on\n * `(scenarioId, profileHash)`, so the model is not a separate axis: it lives\n * inside the profile, and two profiles with the same model but different\n * skills are different cells.\n *\n * `agentProfileHash` is the profile's behaviour identity. Two profiles that\n * produce the same agent behaviour share a hash (and a scorecard cell);\n * reordering `skills` or `tools` does not change it; the human-facing `id`\n * label does not affect it.\n */\n\nimport { createHash } from 'node:crypto'\nimport { ValidationError } from './errors'\nimport { canonicalize } from './pre-registration'\n\nexport interface AgentProfile {\n /** Human-facing label, e.g. `sonnet-legal-skills-v3`. Not part of the hash. */\n id: string\n /** Model snapshot id this profile pins, e.g. `claude-sonnet-4-6@2025-04-15`. */\n model: string\n /** Skill ids/versions active in this profile — the primary behaviour lever. */\n skills?: string[]\n /** Prompt version identifier. */\n promptVersion?: string\n /** Tool ids available to the agent. */\n tools?: string[]\n /** Any other behaviour-bearing knobs that should fingerprint into the hash. */\n metadata?: Record<string, string | number | boolean>\n}\n\n/**\n * Deterministic behaviour identity of a profile — a sha256 over the\n * behaviour-bearing fields. `skills` and `tools` are order-insensitive; the\n * `id` label is excluded. Throws on a profile with no `model` — an unkeyable\n * profile must fail loud rather than collapse into a blank-model cell.\n */\nexport function agentProfileHash(profile: AgentProfile): string {\n if (typeof profile.model !== 'string' || profile.model.trim().length === 0) {\n throw new ValidationError(`AgentProfile \"${profile.id}\" has no model — cannot hash`)\n }\n const behaviour = {\n model: profile.model.trim(),\n skills: [...(profile.skills ?? [])].sort(),\n promptVersion: profile.promptVersion ?? null,\n tools: [...(profile.tools ?? [])].sort(),\n metadata: profile.metadata ?? {},\n }\n return createHash('sha256')\n .update(JSON.stringify(canonicalize(behaviour)))\n .digest('hex')\n}\n"],"mappings":";;;;;;;;AASO,IAAM,gBAA4C;AAAA,EACvD,UAAU,EAAE,OAAO,OAAQ,QAAQ,KAAK;AAAA,EACxC,eAAe,EAAE,OAAO,OAAS,QAAQ,KAAO;AAAA,EAChD,eAAe,EAAE,OAAO,MAAM,QAAQ,KAAK;AAAA,EAC3C,4BAA4B,EAAE,OAAO,MAAO,QAAQ,MAAM;AAAA,EAC1D,0BAA0B,EAAE,OAAO,OAAO,QAAQ,MAAM;AAAA,EACxD,2BAA2B,EAAE,OAAO,OAAS,QAAQ,OAAQ;AAC/D;AASA,IAAM,iBAA8C;AAAA,EAClD,CAAC,gBAAgB,EAAE,OAAO,OAAO,QAAQ,MAAM,CAAC;AAAA,EAChD,CAAC,iBAAiB,EAAE,OAAO,MAAQ,QAAQ,KAAM,CAAC;AAAA,EAClD,CAAC,4CAA4C,EAAE,OAAO,MAAO,QAAQ,MAAM,CAAC;AAAA,EAC5E,CAAC,eAAe,EAAE,OAAO,OAAS,QAAQ,KAAO,CAAC;AAAA,EAClD,CAAC,2BAA2B,EAAE,OAAO,QAAS,QAAQ,KAAK,CAAC;AAAA,EAC5D,CAAC,gBAAgB,EAAE,OAAO,OAAQ,QAAQ,KAAK,CAAC;AAAA,EAChD,CAAC,YAAY,EAAE,OAAO,MAAQ,QAAQ,MAAO,CAAC;AAAA,EAC9C,CAAC,iBAAiB,EAAE,OAAO,MAAQ,QAAQ,MAAO,CAAC;AAAA,EACnD,CAAC,iBAAiB,EAAE,OAAO,MAAQ,QAAQ,MAAO,CAAC;AAAA,EACnD,CAAC,QAAQ,EAAE,OAAO,MAAQ,QAAQ,MAAO,CAAC;AAAA,EAC1C,CAAC,iBAAiB,EAAE,OAAO,MAAQ,QAAQ,KAAO,CAAC;AAAA,EACnD,CAAC,UAAU,EAAE,OAAO,QAAS,QAAQ,KAAM,CAAC;AAAA,EAC5C,CAAC,SAAS,EAAE,OAAO,MAAQ,QAAQ,KAAO,CAAC;AAC7C;AAKA,SAAS,iBAAiB,OAAuB;AAC/C,UAAQ,MAAM,MAAM,GAAG,EAAE,CAAC,KAAK,OAAO,KAAK,EAAE,YAAY;AAC3D;AAKO,SAAS,oBAAoB,OAAkC;AACpE,MAAI,cAAc,KAAK,EAAG,QAAO,cAAc,KAAK;AACpD,QAAM,KAAK,iBAAiB,KAAK;AACjC,MAAI,cAAc,EAAE,EAAG,QAAO,cAAc,EAAE;AAC9C,aAAW,CAAC,SAAS,KAAK,KAAK,gBAAgB;AAC7C,QAAI,QAAQ,KAAK,EAAE,EAAG,QAAO;AAAA,EAC/B;AACA,SAAO;AACT;AAIO,SAAS,cAAc,OAAwB;AACpD,SAAO,oBAAoB,KAAK,MAAM;AACxC;AAEA,IAAM,uBAAuB,oBAAI,IAAY;AAGtC,SAAS,eAAe,MAAsB;AACnD,SAAO,KAAK,KAAK,KAAK,SAAS,CAAC;AAClC;AAKO,SAAS,aAAa,aAAqB,cAAsB,OAAuB;AAC7F,QAAM,UAAU,oBAAoB,KAAK;AACzC,MAAI,CAAC,SAAS;AACZ,QAAI,CAAC,qBAAqB,IAAI,KAAK,GAAG;AACpC,2BAAqB,IAAI,KAAK;AAC9B,cAAQ;AAAA,QACN,uCAAuC,KAAK;AAAA,MAE9C;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACA,SAAQ,cAAc,MAAQ,QAAQ,QAAS,eAAe,MAAQ,QAAQ;AAChF;AAKO,IAAM,eAAN,MAAmB;AAAA,EAChB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,YAAY;AAAA,EACZ;AAAA,EAER,YAAY,QAAQ,UAAU;AAC5B,SAAK,QAAQ;AAAA,EACf;AAAA;AAAA,EAGA,OAAO,aAAqB,cAA8B;AACxD,SAAK,cAAc;AACnB,SAAK,eAAe;AACpB,UAAM,OAAO,aAAa,aAAa,cAAc,KAAK,KAAK;AAC/D,SAAK,aAAa;AAClB,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,eACE,WACA,YAC6D;AAC7D,UAAM,cAAc,eAAe,SAAS;AAC5C,UAAM,eAAe,eAAe,UAAU;AAC9C,UAAM,OAAO,KAAK,OAAO,aAAa,YAAY;AAClD,WAAO,EAAE,aAAa,cAAc,KAAK;AAAA,EAC3C;AAAA,EAEA,gBAAwB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EACA,iBAAyB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EACA,eAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AACF;AAOO,IAAM,mBAAN,MAAuB;AAAA,EACpB;AAAA,EACA;AAAA,EACA,UAAyB,CAAC;AAAA,EAClC,YAAY,QAAuB,aAAqB;AACtD,SAAK,SAAS;AACd,SAAK,cAAc;AAAA,EACrB;AAAA;AAAA,EAGA,MAAM,QACJ,MACA,mBACA,eACA,oBACA,iBACA,uBACA,yBACA,cACA,cAAc,GACd,eAAe,GACf,mBAAmB,GACG;AACtB,UAAM,QAAQ,MAAM,KAAK,SAAS;AAElC,UAAM,IAAiB;AAAA,MACrB;AAAA,MACA,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,MAClC,OAAO,MAAM;AAAA,MACb,QAAQ,MAAM;AAAA,MACd,WAAW,MAAM;AAAA,MACjB,YAAY,MAAM,WAAW;AAAA,MAC7B;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,cAAc;AAAA,MACd,mBACE,0BAA0B,IAAK,wBAAwB,0BAA2B,MAAM;AAAA,IAC5F;AAEA,SAAK,QAAQ,KAAK,CAAC;AACnB,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,MAAM,WAAiC;AACrC,UAAM,CAAC,OAAO,QAAQ,WAAW,UAAU,IAAI,MAAM,QAAQ,IAAI;AAAA,MAC/D,KAAK,OAAO,SAAS,KAAK,WAAW;AAAA,MACrC,KAAK,OAAO,UAAU,KAAK,WAAW;AAAA,MACtC,KAAK,OAAO,aAAa,KAAK,WAAW;AAAA,MACzC,KAAK,OAAO,aAAa,KAAK,WAAW;AAAA,IAC3C,CAAC;AAED,WAAO;AAAA,MACL,OAAO,MAAM;AAAA,MACb,QAAQ,OAAO;AAAA,MACf,WAAW;AAAA,QACT,SAAS,UAAU,OAAO,CAAC,MAAM,EAAE,WAAW,SAAS,EAAE;AAAA,QACzD,UAAU,UAAU,OAAO,CAAC,MAAM,EAAE,WAAW,UAAU,EAAE;AAAA,QAC3D,UAAU,UAAU,OAAO,CAAC,MAAM,EAAE,WAAW,UAAU,EAAE;AAAA,MAC7D;AAAA,MACA;AAAA,MACA,YAAY;AAAA,MACZ,aAAa;AAAA,IACf;AAAA,EACF;AAAA;AAAA,EAGA,aAA4B;AAC1B,WAAO,CAAC,GAAG,KAAK,OAAO;AAAA,EACzB;AAAA;AAAA,EAGA,sBAAgC;AAC9B,WAAO,KAAK,QAAQ,IAAI,CAAC,MAAM,EAAE,iBAAiB;AAAA,EACpD;AACF;;;AC3MA,SAAS,kBAAkB;AAyBpB,SAAS,iBAAiB,SAA+B;AAC9D,MAAI,OAAO,QAAQ,UAAU,YAAY,QAAQ,MAAM,KAAK,EAAE,WAAW,GAAG;AAC1E,UAAM,IAAI,gBAAgB,iBAAiB,QAAQ,EAAE,mCAA8B;AAAA,EACrF;AACA,QAAM,YAAY;AAAA,IAChB,OAAO,QAAQ,MAAM,KAAK;AAAA,IAC1B,QAAQ,CAAC,GAAI,QAAQ,UAAU,CAAC,CAAE,EAAE,KAAK;AAAA,IACzC,eAAe,QAAQ,iBAAiB;AAAA,IACxC,OAAO,CAAC,GAAI,QAAQ,SAAS,CAAC,CAAE,EAAE,KAAK;AAAA,IACvC,UAAU,QAAQ,YAAY,CAAC;AAAA,EACjC;AACA,SAAO,WAAW,QAAQ,EACvB,OAAO,KAAK,UAAU,aAAa,SAAS,CAAC,CAAC,EAC9C,OAAO,KAAK;AACjB;","names":[]}
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  runCanaries,
3
3
  scoreRedTeamOutput
4
- } from "./chunk-VMAYE3LM.js";
4
+ } from "./chunk-4QJN7RDX.js";
5
5
  import {
6
6
  runCampaign
7
- } from "./chunk-6XQIEUQ2.js";
7
+ } from "./chunk-ZPSKPT3V.js";
8
8
  import {
9
9
  detectRewardHacking
10
10
  } from "./chunk-YV7J7X5N.js";
@@ -315,4 +315,4 @@ export {
315
315
  defaultProductionGate,
316
316
  runEval
317
317
  };
318
- //# sourceMappingURL=chunk-6QZUCFKM.js.map
318
+ //# sourceMappingURL=chunk-UD6EF73X.js.map
@@ -86,8 +86,10 @@ function buildDiagnosis(r) {
86
86
  const pct = (r.uncostedRecords / r.totalRecords * 100).toFixed(0);
87
87
  return [
88
88
  `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens).`,
89
- `${r.uncostedRecords} (${pct}%) have output tokens but costUsd=0 \u2014 cost ledger is mis-wired (no input-token`,
90
- "propagation from the runtime stream into RunRecord)."
89
+ `${r.uncostedRecords} (${pct}%) have output tokens but costUsd=0. Two distinct roots:`,
90
+ "(a) cost ledger mis-wired \u2014 no usage propagation from the runtime stream into RunRecord; or",
91
+ "(b) the model is unpriced at the source (sandbox/router returned $0 despite real tokens).",
92
+ "For (b), price the measured tokens against the substrate table (estimateCost) instead of leaving $0."
91
93
  ].join(" ");
92
94
  }
93
95
  return `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens, $${r.totalCostUsd.toFixed(4)}).`;
@@ -533,4 +535,4 @@ export {
533
535
  inMemoryCampaignStorage,
534
536
  runCampaign
535
537
  };
536
- //# sourceMappingURL=chunk-6XQIEUQ2.js.map
538
+ //# sourceMappingURL=chunk-ZPSKPT3V.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/campaign/run-campaign.ts","../src/integrity/backend-integrity.ts","../src/campaign/storage.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `runCampaign` — Pass A substrate primitive. ONE function that orchestrates\n * scenarios → dispatch → artifacts → judges → aggregates, with full\n * reproducibility (seed + manifest hash), cell-level resumability, bootstrap\n * CIs, and the `LabeledScenarioStore` capture flywheel.\n *\n * Improvement loops (optimizer / gate / autoOnPromote) ride on top of this\n * primitive but live in `presets/run-improvement-loop.ts`. This file keeps\n * the core orchestrator minimal — Phase 1 of the Pass A track.\n */\n\nimport { createHash } from 'node:crypto'\nimport { join } from 'node:path'\nimport { BackendIntegrityError, type BackendIntegrityReport } from '../integrity/backend-integrity'\nimport { confidenceInterval } from '../statistics'\nimport { type CampaignStorage, fsCampaignStorage } from './storage'\nimport type {\n CampaignAggregates,\n CampaignArtifactWriter,\n CampaignCellResult,\n CampaignCostMeter,\n CampaignResult,\n CampaignTokenUsage,\n CampaignTraceWriter,\n DispatchContext,\n DispatchFn,\n JudgeAggregate,\n JudgeConfig,\n JudgeScore,\n LabeledScenarioStore,\n Scenario,\n ScenarioAggregate,\n TraceSpan,\n} from './types'\n\nexport interface RunCampaignOptions<TScenario extends Scenario, TArtifact> {\n scenarios: TScenario[]\n dispatch: DispatchFn<TScenario, TArtifact>\n judges?: JudgeConfig<TArtifact, TScenario>[]\n /** Required for reproducibility. Default 42. */\n seed?: number\n /** Per-scenario replicates for CI bands. Default 1; raise to 5+ for\n * bootstrap-tight intervals on critical eval. */\n reps?: number\n /** When true (default), completed cells are cached by\n * (manifestHash, scenarioId, rep, generation). Re-runs skip cached cells. */\n resumable?: boolean\n /** Optional store — when present, every artifact + judge score is captured\n * with the configured `captureSource`. Capture is default ON; pass `'off'`\n * to disable. */\n labeledStore?: LabeledScenarioStore | 'off'\n captureSource?: 'production-trace' | 'eval-run' | 'manual' | 'red-team' | 'synthetic'\n captureSourceVersionHash?: string\n /** Wall-clock cost cap across all cells. Cells beyond ceiling are skipped. */\n costCeiling?: number\n /** Max concurrent cells. Default 2. */\n maxConcurrency?: number\n /**\n * Per-cell dispatch deadline in ms. A `dispatch` that neither resolves nor\n * rejects within this window is a hang (a stalled model request, an\n * exhausted runtime resource, a backend that never closes its stream). When\n * set, the cell's `ctx.signal` is aborted and the cell is recorded as a LOUD\n * error (`dispatch exceeded <N>ms`) so the campaign proceeds and the failure\n * is visible — instead of one wedged cell silently hanging the whole run (and\n * every loop/CI job above it) forever. `undefined`/`0` = unbounded (legacy).\n */\n dispatchTimeoutMs?: number\n /** Required: where artifacts + traces land. */\n runDir: string\n /** Tracing posture. Default is the substrate's `FileSystemTraceStore` rooted\n * at `<runDir>/traces/`. `'off'` disables capture entirely — substrate\n * refuses this when the caller wires `autoOnPromote !== 'none'`. */\n tracing?: 'on' | 'off'\n /**\n * Per-cell usage expectation — the early, fine-grained sibling of the\n * batch `assertRealBackend` guard. A cell that produced an artifact (no\n * error) but reported `costUsd === 0` AND zero tokens is a stub: the\n * dispatch never reported LLM activity via `ctx.cost`. Modes:\n * - `'warn'` (default) — log the offending cell loudly, keep going.\n * - `'assert'` — throw `BackendIntegrityError` on the first such cell\n * (fail-fast; recommended for CI campaigns expecting real LLM calls).\n * - `'off'` — no check (replay / deterministic-only / offline analysis).\n */\n expectUsage?: 'assert' | 'warn' | 'off'\n /** Test seam — override the wall clock for deterministic tests. */\n now?: () => Date\n /** Test seam — override per-cell trace writer factory. */\n buildTraceWriter?: (cellId: string, dir: string) => CampaignTraceWriter\n /** Storage backend for run/cell dirs, the resumability cache, artifacts,\n * and trace spans. Default: the Node filesystem (`fsCampaignStorage`).\n * Pass `inMemoryCampaignStorage()` to run in a filesystem-less runtime\n * (Cloudflare Workers, Deno, edge) — the `CampaignResult` is still\n * produced; artifacts/traces just aren't persisted to disk. */\n storage?: CampaignStorage\n /**\n * Optional per-cell placement strategy. Returns an opaque string the\n * substrate forwards as `ctx.placement` to the Dispatch — placement-aware\n * Dispatches (e.g. `httpDispatch` from `/adapters/http`) use it to route\n * each cell to the right worker, region, or sandbox. When unset, every\n * cell receives `ctx.placement = undefined` and behaves identically to\n * the in-process case.\n *\n * @example\n * cellPlacement: ({ scenario }) => scenario.tags?.includes('eu') ? 'eu-west' : 'us-east'\n */\n cellPlacement?: (input: {\n scenario: TScenario\n rep: number\n generation?: number\n }) => string | undefined\n}\n\nexport async function runCampaign<TScenario extends Scenario, TArtifact>(\n opts: RunCampaignOptions<TScenario, TArtifact>,\n): Promise<CampaignResult<TArtifact, TScenario>> {\n const seed = opts.seed ?? 42\n const reps = opts.reps ?? 1\n const resumable = opts.resumable ?? true\n const maxConcurrency = opts.maxConcurrency ?? 2\n const now = opts.now ?? (() => new Date())\n const judges = opts.judges ?? []\n const storage = opts.storage ?? fsCampaignStorage()\n\n storage.ensureDir(opts.runDir)\n\n const manifestHash = computeManifestHash({\n scenarios: opts.scenarios,\n judges: judges as unknown as JudgeConfig<unknown>[],\n dispatchRef: opts.dispatch.name || 'anonymous',\n seed,\n reps,\n })\n\n const startedAt = now()\n const cells: CampaignCellResult<TArtifact>[] = []\n const artifactsByPath: Record<string, string> = {}\n\n // Build the cell schedule (scenario × rep).\n const schedule: Array<{ scenario: TScenario; rep: number; cellId: string; cellSeed: number }> = []\n let cellIndex = 0\n for (const scenario of opts.scenarios) {\n for (let rep = 0; rep < reps; rep++) {\n const cellId = `${scenario.id}:${rep}`\n const cellSeed = seed + cellIndex\n schedule.push({ scenario, rep, cellId, cellSeed })\n cellIndex += 1\n }\n }\n\n // Concurrency-limited execution.\n let totalCostUsd = 0\n let costCeilingReached = false\n const abortController = new AbortController()\n // Concurrency lanes that drain the cell schedule. Named \"lanes\" — not\n // \"workers\" — to avoid clashing with the taxonomy's worker (= the agent\n // harness in a sandbox, invoked behind `dispatch`). See loop-taxonomy.md.\n const lanes: Promise<void>[] = []\n let nextIdx = 0\n const cellsRef = cells\n\n for (let i = 0; i < maxConcurrency; i++) {\n lanes.push(\n (async () => {\n while (true) {\n const myIdx = nextIdx++\n if (myIdx >= schedule.length) return\n const slot = schedule[myIdx]!\n if (costCeilingReached) {\n cellsRef.push(skippedCell(slot, 'cost_ceiling_reached'))\n continue\n }\n const result = await executeCell({\n slot,\n opts,\n manifestHash,\n resumable,\n now,\n storage,\n buildTraceWriter: opts.buildTraceWriter ?? defaultBuildTraceWriter(storage),\n signal: abortController.signal,\n dispatchTimeoutMs: opts.dispatchTimeoutMs,\n })\n cellsRef.push(result.cell)\n enforceCellUsage(result.cell, opts.expectUsage ?? 'warn')\n totalCostUsd += result.cell.costUsd\n Object.assign(artifactsByPath, result.artifactsByPath)\n if (opts.costCeiling !== undefined && totalCostUsd >= opts.costCeiling) {\n costCeilingReached = true\n }\n // Capture into LabeledScenarioStore unless explicitly disabled.\n if (opts.labeledStore && opts.labeledStore !== 'off' && !result.cell.error) {\n await captureToStore({\n store: opts.labeledStore,\n cell: result.cell,\n scenario: slot.scenario,\n opts,\n now,\n }).catch((err) => {\n // Capture failures are non-fatal — log but don't crash the campaign.\n // (Trace would normally land here.)\n console.warn(\n `[runCampaign] capture failed for ${result.cell.cellId}: ${err instanceof Error ? err.message : String(err)}`,\n )\n })\n }\n }\n })(),\n )\n }\n await Promise.all(lanes)\n\n const endedAt = now()\n cellsRef.sort((a, b) => a.cellId.localeCompare(b.cellId))\n\n const aggregates = computeAggregates(\n cellsRef,\n judges as unknown as JudgeConfig<TArtifact>[],\n seed,\n )\n\n return {\n manifestHash,\n seed,\n startedAt: startedAt.toISOString(),\n endedAt: endedAt.toISOString(),\n durationMs: endedAt.getTime() - startedAt.getTime(),\n cells: cellsRef,\n aggregates,\n runDir: opts.runDir,\n artifactsByPath,\n scenarios: opts.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n }\n}\n\n// ── Internals ─────────────────────────────────────────────────────────\n\ninterface ExecuteCellArgs<TScenario extends Scenario, TArtifact> {\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number }\n opts: RunCampaignOptions<TScenario, TArtifact>\n manifestHash: string\n resumable: boolean\n now: () => Date\n storage: CampaignStorage\n buildTraceWriter: (cellId: string, dir: string) => CampaignTraceWriter\n signal: AbortSignal\n dispatchTimeoutMs?: number\n}\n\nasync function executeCell<TScenario extends Scenario, TArtifact>(\n args: ExecuteCellArgs<TScenario, TArtifact>,\n): Promise<{ cell: CampaignCellResult<TArtifact>; artifactsByPath: Record<string, string> }> {\n const storage = args.storage\n const cellDir = join(args.opts.runDir, args.slot.cellId.replace(/[^a-zA-Z0-9_-]/g, '_'))\n storage.ensureDir(cellDir)\n\n // Resumability: cache key = (manifestHash, scenarioId, rep)\n const cachePath = join(cellDir, 'cached-result.json')\n if (args.resumable) {\n const raw = storage.read(cachePath)\n if (raw !== undefined) {\n try {\n const cached = JSON.parse(raw) as CampaignCellResult<TArtifact>\n if (cached.cellId === args.slot.cellId) {\n return { cell: { ...cached, cached: true }, artifactsByPath: {} }\n }\n } catch {\n // Corrupt cache — fall through to re-run.\n }\n }\n }\n\n const startMs = Date.now()\n const trace = args.buildTraceWriter(args.slot.cellId, cellDir)\n const artifactsByPath: Record<string, string> = {}\n const artifacts: CampaignArtifactWriter = {\n async write(path, content) {\n const fullPath = join(cellDir, path)\n storage.ensureDir(join(fullPath, '..'))\n storage.write(fullPath, content)\n artifactsByPath[`${args.slot.cellId}/${path}`] = fullPath\n return fullPath\n },\n async writeJson(path, value) {\n return artifacts.write(path, JSON.stringify(value, null, 2))\n },\n }\n let costSoFar = 0\n const tokensSoFar: CampaignTokenUsage = { input: 0, output: 0 }\n const cost: CampaignCostMeter = {\n observe(amount, source) {\n costSoFar += amount\n trace.span(`cost.${source}`, { amountUsd: amount }).end()\n },\n observeTokens(usage) {\n tokensSoFar.input += usage.input\n tokensSoFar.output += usage.output\n if (usage.cached) tokensSoFar.cached = (tokensSoFar.cached ?? 0) + usage.cached\n },\n current() {\n return costSoFar\n },\n tokens() {\n return { ...tokensSoFar }\n },\n }\n\n const placement = args.opts.cellPlacement?.({\n scenario: args.slot.scenario,\n rep: args.slot.rep,\n })\n\n // Per-cell abort signal, chained to the campaign signal. The dispatch sees\n // THIS signal so a timeout (below) can abort just this cell's in-flight work\n // without tearing down sibling cells — and a signal-honoring dispatch\n // releases its open request instead of leaking it past the deadline.\n const cellAbort = new AbortController()\n const onCampaignAbort = () => cellAbort.abort((args.signal as { reason?: unknown }).reason)\n if (args.signal.aborted) cellAbort.abort((args.signal as { reason?: unknown }).reason)\n else args.signal.addEventListener('abort', onCampaignAbort, { once: true })\n\n const ctx: DispatchContext = {\n cellId: args.slot.cellId,\n rep: args.slot.rep,\n seed: args.slot.cellSeed,\n signal: cellAbort.signal,\n trace,\n artifacts,\n cost,\n placement,\n }\n\n let artifact: TArtifact | undefined\n let errorMessage: string | undefined\n const timeoutMs = args.dispatchTimeoutMs\n let timeoutTimer: ReturnType<typeof setTimeout> | undefined\n try {\n const dispatched = args.opts.dispatch(args.slot.scenario, ctx)\n if (timeoutMs !== undefined && timeoutMs > 0) {\n // A dispatch that never settles (stalled model request, exhausted runtime\n // resource, a stream that never closes) must NOT hang the cell — and with\n // it the lane, the campaign, the loop, the CI job — forever. Race it\n // against the deadline; on timeout, abort the cell and fail it LOUD.\n artifact = await Promise.race([\n dispatched,\n new Promise<never>((_, reject) => {\n timeoutTimer = setTimeout(() => {\n cellAbort.abort(new Error('dispatch timeout'))\n reject(\n new Error(\n `dispatch exceeded ${timeoutMs}ms for cell '${args.slot.cellId}' — aborted and failed loud (no silent hang)`,\n ),\n )\n }, timeoutMs)\n if (typeof (timeoutTimer as { unref?: () => void }).unref === 'function')\n (timeoutTimer as { unref: () => void }).unref()\n }),\n ])\n } else {\n artifact = await dispatched\n }\n } catch (err) {\n errorMessage = err instanceof Error ? err.message : String(err)\n } finally {\n if (timeoutTimer) clearTimeout(timeoutTimer)\n args.signal.removeEventListener('abort', onCampaignAbort)\n }\n\n // Run judges (only if we have an artifact). A judge that throws invalidates\n // the cell — recorded as `error`, NOT folded into a fake composite:0 (a fake\n // zero is indistinguishable from a real zero and poisons every aggregate).\n const judgeScores: Record<string, JudgeScore> = {}\n if (artifact !== undefined) {\n for (const judge of args.opts.judges ?? []) {\n if (judge.appliesTo && !judge.appliesTo(args.slot.scenario)) continue\n try {\n judgeScores[judge.name] = await runJudgeCell(judge, {\n artifact,\n scenario: args.slot.scenario,\n signal: args.signal,\n })\n } catch (err) {\n errorMessage = `judge '${judge.name}' failed: ${err instanceof Error ? err.message : String(err)}`\n break\n }\n }\n }\n\n await trace.flush()\n\n const cell: CampaignCellResult<TArtifact> = {\n cellId: args.slot.cellId,\n scenarioId: args.slot.scenario.id,\n rep: args.slot.rep,\n artifact: (artifact ?? null) as TArtifact,\n judgeScores,\n costUsd: costSoFar,\n tokenUsage: { ...tokensSoFar },\n durationMs: Date.now() - startMs,\n seed: args.slot.cellSeed,\n cached: false,\n error: errorMessage,\n }\n\n if (!errorMessage && args.resumable) {\n storage.write(cachePath, JSON.stringify(cell))\n }\n\n return { cell, artifactsByPath }\n}\n\n/**\n * Per-cell stub guard. A cell that produced an artifact (no error) but reported\n * `costUsd === 0` AND zero tokens means the dispatch never called `ctx.cost` —\n * i.e. it ran against a stub or silently dropped its usage. `'warn'` logs it,\n * `'assert'` throws (fail-fast), `'off'` skips. An errored/skipped cell or a\n * deterministic judge-only run that genuinely made no LLM call is not flagged.\n */\nfunction enforceCellUsage<TArtifact>(\n cell: CampaignCellResult<TArtifact>,\n mode: 'assert' | 'warn' | 'off',\n): void {\n if (mode === 'off' || cell.error) return\n if (cell.artifact === null || cell.artifact === undefined) return\n const zeroTokens = cell.tokenUsage.input === 0 && cell.tokenUsage.output === 0\n if (cell.costUsd !== 0 || !zeroTokens) return\n const msg = `cell '${cell.cellId}' produced an artifact but reported zero cost and zero tokens — the dispatch never reported LLM usage via ctx.cost.observe/observeTokens (a stub cell)`\n if (mode === 'assert') {\n const report: BackendIntegrityReport = {\n totalRecords: 1,\n stubRecords: 1,\n realRecords: 0,\n uncostedRecords: 0,\n totalInputTokens: 0,\n totalOutputTokens: 0,\n totalCostUsd: 0,\n verdict: 'stub',\n diagnosis: msg,\n }\n throw new BackendIntegrityError(`expectUsage: ${msg}`, report)\n }\n // eslint-disable-next-line no-console\n console.warn(`[runCampaign] expectUsage: ${msg}`)\n}\n\nasync function runJudgeCell<TArtifact, TScenario extends Scenario>(\n judge: JudgeConfig<TArtifact, TScenario>,\n input: { artifact: TArtifact; scenario: TScenario; signal: AbortSignal },\n): Promise<JudgeScore> {\n return judge.score(input)\n}\n\nfunction defaultBuildTraceWriter(\n storage: CampaignStorage,\n): (cellId: string, dir: string) => CampaignTraceWriter {\n return (cellId, dir) => {\n const spans: Array<Record<string, unknown>> = []\n return {\n span(name, attributes) {\n const startMs = Date.now()\n const record: Record<string, unknown> = { name, cellId, startMs, ...(attributes ?? {}) }\n const finish: TraceSpan = {\n end(endAttrs) {\n record.durationMs = Date.now() - startMs\n if (endAttrs) Object.assign(record, endAttrs)\n spans.push(record)\n },\n setAttribute(key, value) {\n record[key] = value\n },\n }\n return finish\n },\n async flush() {\n storage.write(join(dir, 'spans.jsonl'), spans.map((s) => JSON.stringify(s)).join('\\n'))\n },\n }\n }\n}\n\nfunction skippedCell<TScenario extends Scenario, TArtifact>(\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number },\n reason: string,\n): CampaignCellResult<TArtifact> {\n return {\n cellId: slot.cellId,\n scenarioId: slot.scenario.id,\n rep: slot.rep,\n artifact: null as unknown as TArtifact,\n judgeScores: {},\n costUsd: 0,\n tokenUsage: { input: 0, output: 0 },\n durationMs: 0,\n seed: slot.cellSeed,\n cached: false,\n error: `skipped: ${reason}`,\n }\n}\n\ninterface CaptureArgs<TScenario extends Scenario, TArtifact> {\n store: LabeledScenarioStore\n cell: CampaignCellResult<TArtifact>\n scenario: TScenario\n opts: RunCampaignOptions<TScenario, TArtifact>\n now: () => Date\n}\n\nasync function captureToStore<TScenario extends Scenario, TArtifact>(\n args: CaptureArgs<TScenario, TArtifact>,\n): Promise<void> {\n await args.store.observe({\n scenario: args.scenario,\n artifact: args.cell.artifact,\n judgeScores: args.cell.judgeScores,\n source: args.opts.captureSource ?? 'eval-run',\n sourceVersionHash: args.opts.captureSourceVersionHash ?? 'unknown',\n capturedAt: args.now().toISOString(),\n redactionStatus: 'raw',\n })\n}\n\n// ── Aggregates + manifest hash ────────────────────────────────────────\n\nfunction computeManifestHash(input: {\n scenarios: Scenario[]\n judges: JudgeConfig<unknown>[]\n dispatchRef: string\n seed: number\n reps: number\n}): string {\n const canonical = {\n scenarios: input.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n judges: input.judges.map((j) => ({ name: j.name, dims: j.dimensions.map((d) => d.key) })),\n dispatch: input.dispatchRef,\n seed: input.seed,\n reps: input.reps,\n }\n return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')\n}\n\nfunction computeAggregates<TArtifact>(\n cells: CampaignCellResult<TArtifact>[],\n judges: JudgeConfig<TArtifact>[],\n seed: number,\n): CampaignAggregates {\n const byJudge: Record<string, JudgeAggregate> = {}\n for (const judge of judges) {\n const scores: number[] = []\n for (const cell of cells) {\n const s = cell.judgeScores[judge.name]\n if (s !== undefined) scores.push(s.composite)\n }\n byJudge[judge.name] = aggregate(scores, seed)\n }\n const byScenario: Record<string, ScenarioAggregate> = {}\n const scenarioGroups = new Map<string, number[]>()\n for (const cell of cells) {\n const composites = Object.values(cell.judgeScores).map((s) => s.composite)\n if (composites.length === 0) continue\n const mean = composites.reduce((a, b) => a + b, 0) / composites.length\n const arr = scenarioGroups.get(cell.scenarioId) ?? []\n arr.push(mean)\n scenarioGroups.set(cell.scenarioId, arr)\n }\n for (const [scenarioId, samples] of scenarioGroups) {\n const ag = aggregate(samples, seed)\n byScenario[scenarioId] = { meanComposite: ag.mean, ci95: ag.ci95, n: ag.n }\n }\n return {\n byJudge,\n byScenario,\n totalCostUsd: cells.reduce((a, c) => a + c.costUsd, 0),\n cellsExecuted: cells.filter((c) => !c.error).length,\n cellsSkipped: cells.filter((c) => c.error?.startsWith('skipped:')).length,\n cellsCached: cells.filter((c) => c.cached).length,\n cellsFailed: cells.filter((c) => c.error && !c.error.startsWith('skipped:')).length,\n }\n}\n\n// Percentile bootstrap CI95 via seeded resampling. Deterministic for a given\n// seed — same campaign re-run produces identical CI bands. Falls back to\n// degenerate intervals at n<=1 (the bootstrap is undefined there).\nfunction aggregate(samples: number[], seed: number): JudgeAggregate {\n const n = samples.length\n if (n === 0) return { mean: 0, stdev: 0, ci95: [0, 0], n: 0 }\n const mean = samples.reduce((a, b) => a + b, 0) / n\n const variance = samples.reduce((a, b) => a + (b - mean) ** 2, 0) / Math.max(1, n - 1)\n const stdev = Math.sqrt(variance)\n const ci = confidenceInterval(samples, 0.95, { seed, resamples: 1000 })\n return { mean, stdev, ci95: [ci.lower, ci.upper], n }\n}\n","/**\n * Backend-integrity guard: distinguish \"agent failed\" from \"eval ran against\n * a stub / unconfigured backend.\" Without this guard a canonical eval can\n * silently report `0/N passed` and look like an agent-quality problem when\n * the LLM was never actually called — the failure mode we just hit running\n * the 4-vertical parallel eval (legal-sandbox-stub returned hard-coded 33-104\n * char strings; gtm/creative defaulted to a cli-bridge that wasn't running).\n *\n * The shape:\n *\n * const report = summarizeBackendIntegrity(records)\n * assertRealBackend(records) // throws BackendIntegrityError if 100% stub\n *\n * A record is \"stub-mode\" if its `tokenUsage.input === 0 && tokenUsage.output === 0`.\n * (`costUsd` alone is unreliable — some backends successfully call LLMs but\n * don't propagate pricing, producing real tokens with $0 cost.)\n *\n * Verdicts:\n * - `real` — at least one record has nonzero token usage\n * - `stub` — every record is stub-mode (eval ran blind)\n * - `mixed` — some records real, some stub (partial backend failure;\n * often the 429-cascade or auth-half-failed case)\n */\n\nimport { AgentEvalError } from '../errors'\nimport type { RunRecord } from '../run-record'\n\nexport interface BackendIntegrityReport {\n /** Total records inspected. */\n totalRecords: number\n /** Records with input=0 AND output=0 (a stub fingerprint). */\n stubRecords: number\n /** Records with nonzero token usage (real LLM activity). */\n realRecords: number\n /** Records where output>0 but costUsd=0 (real LLM, broken cost ledger). */\n uncostedRecords: number\n /** Sum of input tokens across all records. */\n totalInputTokens: number\n /** Sum of output tokens across all records. */\n totalOutputTokens: number\n /** Sum of costUsd across all records. */\n totalCostUsd: number\n /** Worst-case integrity verdict. */\n verdict: 'real' | 'mixed' | 'stub'\n /** Human-readable diagnosis suitable for terminal output. */\n diagnosis: string\n}\n\n/**\n * Error thrown when an integrity assertion fails. Caller can pattern-match\n * by `code === 'AGENT_EVAL_BACKEND_STUB'` to differentiate from other\n * errors.\n */\nexport class BackendIntegrityError extends AgentEvalError {\n constructor(\n message: string,\n public readonly report: BackendIntegrityReport,\n ) {\n super('backend_integrity', message)\n }\n}\n\nfunction isStubRecord(rec: RunRecord): boolean {\n return rec.tokenUsage.input === 0 && rec.tokenUsage.output === 0\n}\n\nfunction isUncostedRecord(rec: RunRecord): boolean {\n return rec.tokenUsage.output > 0 && rec.costUsd === 0\n}\n\n/**\n * Inspect a batch of RunRecords and return an integrity report. Pure\n * function — no I/O, no logging. The caller decides what to do with the\n * verdict (print warning, throw, gate CI, etc.).\n */\nexport function summarizeBackendIntegrity(\n records: ReadonlyArray<RunRecord>,\n): BackendIntegrityReport {\n const totalRecords = records.length\n let stubRecords = 0\n let realRecords = 0\n let uncostedRecords = 0\n let totalInputTokens = 0\n let totalOutputTokens = 0\n let totalCostUsd = 0\n for (const rec of records) {\n totalInputTokens += rec.tokenUsage.input\n totalOutputTokens += rec.tokenUsage.output\n totalCostUsd += rec.costUsd\n if (isStubRecord(rec)) stubRecords++\n else realRecords++\n if (isUncostedRecord(rec)) uncostedRecords++\n }\n const verdict: BackendIntegrityReport['verdict'] =\n totalRecords === 0\n ? 'stub'\n : stubRecords === totalRecords\n ? 'stub'\n : stubRecords === 0\n ? 'real'\n : 'mixed'\n const diagnosis = buildDiagnosis({\n totalRecords,\n stubRecords,\n realRecords,\n uncostedRecords,\n totalInputTokens,\n totalOutputTokens,\n totalCostUsd,\n verdict,\n })\n return {\n totalRecords,\n stubRecords,\n realRecords,\n uncostedRecords,\n totalInputTokens,\n totalOutputTokens,\n totalCostUsd,\n verdict,\n diagnosis,\n }\n}\n\nfunction buildDiagnosis(r: Omit<BackendIntegrityReport, 'diagnosis'>): string {\n if (r.totalRecords === 0) {\n return 'no records — eval produced zero runs; backend likely failed before first turn'\n }\n if (r.verdict === 'stub') {\n return [\n `all ${r.totalRecords} records have zero token usage — the LLM backend was never called.`,\n 'common causes: --backend sandbox without a sandbox bridge running; stub model returning hard-coded strings;',\n 'auth misconfigured so requests were silently dropped before the LLM. Re-run with --backend tcloud and TANGLE_API_KEY set,',\n 'or boot the cli-bridge / sandbox before invoking the eval.',\n ].join(' ')\n }\n if (r.verdict === 'mixed') {\n const pct = ((r.stubRecords / r.totalRecords) * 100).toFixed(0)\n return [\n `${r.stubRecords}/${r.totalRecords} records (${pct}%) have zero token usage — the backend partially failed.`,\n 'common causes: rate-limit cascade (429s after the first N personas);',\n 'transient auth expiry mid-run; provider outage. Treat the affected records as missing data, not agent failures.',\n ].join(' ')\n }\n // verdict === 'real'\n if (r.uncostedRecords > 0) {\n const pct = ((r.uncostedRecords / r.totalRecords) * 100).toFixed(0)\n return [\n `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens).`,\n `${r.uncostedRecords} (${pct}%) have output tokens but costUsd=0 — cost ledger is mis-wired (no input-token`,\n 'propagation from the runtime stream into RunRecord).',\n ].join(' ')\n }\n return `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens, $${r.totalCostUsd.toFixed(4)}).`\n}\n\n/**\n * Throw BackendIntegrityError if the verdict is 'stub' — i.e. every record\n * shows zero LLM activity. Non-strict callers can pass `{ allowMixed: false }`\n * to also reject mixed verdicts (recommended for CI gates).\n *\n * Real backends pass through silently.\n */\nexport function assertRealBackend(\n records: ReadonlyArray<RunRecord>,\n opts: { allowMixed?: boolean } = {},\n): BackendIntegrityReport {\n const report = summarizeBackendIntegrity(records)\n const allowMixed = opts.allowMixed ?? true\n if (report.verdict === 'stub') {\n throw new BackendIntegrityError(\n `backend-integrity: ran against a stub or unconfigured backend — ${report.diagnosis}`,\n report,\n )\n }\n if (!allowMixed && report.verdict === 'mixed') {\n throw new BackendIntegrityError(\n `backend-integrity: partial backend failure rejected — ${report.diagnosis}`,\n report,\n )\n }\n return report\n}\n","import { createRequire } from 'node:module'\n\n/**\n * @experimental\n *\n * `CampaignStorage` — the filesystem seam `runCampaign` writes through\n * (run/cell dirs, the resumability cache, per-cell artifacts, trace spans).\n *\n * The default (`fsCampaignStorage`) is the Node filesystem — identical\n * behavior to the inline `node:fs` calls it replaces, so existing CLI\n * consumers are unaffected. `inMemoryCampaignStorage` keeps everything in a\n * `Map`, so the substrate runs in environments WITHOUT a filesystem\n * (Cloudflare Workers, Deno Deploy, other edge runtimes) — the campaign\n * still produces its `CampaignResult` (cells + aggregates) in memory;\n * artifacts/traces simply aren't persisted to disk.\n *\n * Paths are opaque keys to the in-memory adapter — it does not parse them,\n * so the same `join(...)`-built paths work unchanged across both adapters.\n */\nexport interface CampaignStorage {\n /** Ensure a directory exists (recursive). No-op for in-memory. */\n ensureDir(dir: string): void\n /** Does this path exist (as a written file or an ensured dir)? */\n exists(path: string): boolean\n /** Read a UTF-8 file; `undefined` when missing or unreadable. */\n read(path: string): string | undefined\n /** Write a file (string or bytes). Parent dir is assumed ensured. */\n write(path: string, content: string | Uint8Array): void\n}\n\n/** Node-filesystem storage — the default. Lazily requires `node:fs` so the\n * module imports cleanly in non-Node runtimes (where the caller passes\n * `inMemoryCampaignStorage` instead and never constructs this).\n *\n * `createRequire(import.meta.url)` is the ESM-native lazy require — a bare\n * `require` is a ReferenceError under `\"type\": \"module\"`, which is exactly\n * the shape this package publishes. */\nexport function fsCampaignStorage(): CampaignStorage {\n const nodeRequire = createRequire(import.meta.url)\n const { existsSync, mkdirSync, readFileSync, writeFileSync } = nodeRequire(\n 'node:fs',\n ) as typeof import('node:fs')\n return {\n ensureDir(dir) {\n if (!existsSync(dir)) mkdirSync(dir, { recursive: true })\n },\n exists(path) {\n return existsSync(path)\n },\n read(path) {\n try {\n return readFileSync(path, 'utf8')\n } catch {\n return undefined\n }\n },\n write(path, content) {\n writeFileSync(path, content as Uint8Array)\n },\n }\n}\n\n/** In-memory storage for filesystem-less runtimes. Artifacts + trace spans\n * live in a `Map` for the duration of the run; the `CampaignResult` is\n * fully populated, but nothing is persisted to disk. */\nexport function inMemoryCampaignStorage(): CampaignStorage {\n const files = new Map<string, string | Uint8Array>()\n const dirs = new Set<string>()\n return {\n ensureDir(dir) {\n dirs.add(dir)\n },\n exists(path) {\n return files.has(path) || dirs.has(path)\n },\n read(path) {\n const value = files.get(path)\n if (value === undefined) return undefined\n return typeof value === 'string' ? value : new TextDecoder().decode(value)\n },\n write(path, content) {\n files.set(path, content)\n },\n }\n}\n"],"mappings":";;;;;;;;AAaA,SAAS,kBAAkB;AAC3B,SAAS,YAAY;;;ACuCd,IAAM,wBAAN,cAAoC,eAAe;AAAA,EACxD,YACE,SACgB,QAChB;AACA,UAAM,qBAAqB,OAAO;AAFlB;AAAA,EAGlB;AAAA,EAHkB;AAIpB;AAEA,SAAS,aAAa,KAAyB;AAC7C,SAAO,IAAI,WAAW,UAAU,KAAK,IAAI,WAAW,WAAW;AACjE;AAEA,SAAS,iBAAiB,KAAyB;AACjD,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,YAAY;AACtD;AAOO,SAAS,0BACd,SACwB;AACxB,QAAM,eAAe,QAAQ;AAC7B,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,MAAI,kBAAkB;AACtB,MAAI,mBAAmB;AACvB,MAAI,oBAAoB;AACxB,MAAI,eAAe;AACnB,aAAW,OAAO,SAAS;AACzB,wBAAoB,IAAI,WAAW;AACnC,yBAAqB,IAAI,WAAW;AACpC,oBAAgB,IAAI;AACpB,QAAI,aAAa,GAAG,EAAG;AAAA,QAClB;AACL,QAAI,iBAAiB,GAAG,EAAG;AAAA,EAC7B;AACA,QAAM,UACJ,iBAAiB,IACb,SACA,gBAAgB,eACd,SACA,gBAAgB,IACd,SACA;AACV,QAAM,YAAY,eAAe;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAAS,eAAe,GAAsD;AAC5E,MAAI,EAAE,iBAAiB,GAAG;AACxB,WAAO;AAAA,EACT;AACA,MAAI,EAAE,YAAY,QAAQ;AACxB,WAAO;AAAA,MACL,OAAO,EAAE,YAAY;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AACA,MAAI,EAAE,YAAY,SAAS;AACzB,UAAM,OAAQ,EAAE,cAAc,EAAE,eAAgB,KAAK,QAAQ,CAAC;AAC9D,WAAO;AAAA,MACL,GAAG,EAAE,WAAW,IAAI,EAAE,YAAY,aAAa,GAAG;AAAA,MAClD;AAAA,MACA;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AAEA,MAAI,EAAE,kBAAkB,GAAG;AACzB,UAAM,OAAQ,EAAE,kBAAkB,EAAE,eAAgB,KAAK,QAAQ,CAAC;AAClE,WAAO;AAAA,MACL,GAAG,EAAE,YAAY,uCAAuC,EAAE,gBAAgB,SAAS,EAAE,iBAAiB;AAAA,MACtG,GAAG,EAAE,eAAe,KAAK,GAAG;AAAA,MAC5B;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AACA,SAAO,GAAG,EAAE,YAAY,uCAAuC,EAAE,gBAAgB,SAAS,EAAE,iBAAiB,aAAa,EAAE,aAAa,QAAQ,CAAC,CAAC;AACrJ;AASO,SAAS,kBACd,SACA,OAAiC,CAAC,GACV;AACxB,QAAM,SAAS,0BAA0B,OAAO;AAChD,QAAM,aAAa,KAAK,cAAc;AACtC,MAAI,OAAO,YAAY,QAAQ;AAC7B,UAAM,IAAI;AAAA,MACR,wEAAmE,OAAO,SAAS;AAAA,MACnF;AAAA,IACF;AAAA,EACF;AACA,MAAI,CAAC,cAAc,OAAO,YAAY,SAAS;AAC7C,UAAM,IAAI;AAAA,MACR,8DAAyD,OAAO,SAAS;AAAA,MACzE;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;;;ACtLA,SAAS,qBAAqB;AAqCvB,SAAS,oBAAqC;AACnD,QAAM,cAAc,cAAc,YAAY,GAAG;AACjD,QAAM,EAAE,YAAY,WAAW,cAAc,cAAc,IAAI;AAAA,IAC7D;AAAA,EACF;AACA,SAAO;AAAA,IACL,UAAU,KAAK;AACb,UAAI,CAAC,WAAW,GAAG,EAAG,WAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AAAA,IAC1D;AAAA,IACA,OAAO,MAAM;AACX,aAAO,WAAW,IAAI;AAAA,IACxB;AAAA,IACA,KAAK,MAAM;AACT,UAAI;AACF,eAAO,aAAa,MAAM,MAAM;AAAA,MAClC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,oBAAc,MAAM,OAAqB;AAAA,IAC3C;AAAA,EACF;AACF;AAKO,SAAS,0BAA2C;AACzD,QAAM,QAAQ,oBAAI,IAAiC;AACnD,QAAM,OAAO,oBAAI,IAAY;AAC7B,SAAO;AAAA,IACL,UAAU,KAAK;AACb,WAAK,IAAI,GAAG;AAAA,IACd;AAAA,IACA,OAAO,MAAM;AACX,aAAO,MAAM,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI;AAAA,IACzC;AAAA,IACA,KAAK,MAAM;AACT,YAAM,QAAQ,MAAM,IAAI,IAAI;AAC5B,UAAI,UAAU,OAAW,QAAO;AAChC,aAAO,OAAO,UAAU,WAAW,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK;AAAA,IAC3E;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,YAAM,IAAI,MAAM,OAAO;AAAA,IACzB;AAAA,EACF;AACF;;;AF8BA,eAAsB,YACpB,MAC+C;AAC/C,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,QAAM,MAAM,KAAK,QAAQ,MAAM,oBAAI,KAAK;AACxC,QAAM,SAAS,KAAK,UAAU,CAAC;AAC/B,QAAM,UAAU,KAAK,WAAW,kBAAkB;AAElD,UAAQ,UAAU,KAAK,MAAM;AAE7B,QAAM,eAAe,oBAAoB;AAAA,IACvC,WAAW,KAAK;AAAA,IAChB;AAAA,IACA,aAAa,KAAK,SAAS,QAAQ;AAAA,IACnC;AAAA,IACA;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI;AACtB,QAAM,QAAyC,CAAC;AAChD,QAAM,kBAA0C,CAAC;AAGjD,QAAM,WAA0F,CAAC;AACjG,MAAI,YAAY;AAChB,aAAW,YAAY,KAAK,WAAW;AACrC,aAAS,MAAM,GAAG,MAAM,MAAM,OAAO;AACnC,YAAM,SAAS,GAAG,SAAS,EAAE,IAAI,GAAG;AACpC,YAAM,WAAW,OAAO;AACxB,eAAS,KAAK,EAAE,UAAU,KAAK,QAAQ,SAAS,CAAC;AACjD,mBAAa;AAAA,IACf;AAAA,EACF;AAGA,MAAI,eAAe;AACnB,MAAI,qBAAqB;AACzB,QAAM,kBAAkB,IAAI,gBAAgB;AAI5C,QAAM,QAAyB,CAAC;AAChC,MAAI,UAAU;AACd,QAAM,WAAW;AAEjB,WAAS,IAAI,GAAG,IAAI,gBAAgB,KAAK;AACvC,UAAM;AAAA,OACH,YAAY;AACX,eAAO,MAAM;AACX,gBAAM,QAAQ;AACd,cAAI,SAAS,SAAS,OAAQ;AAC9B,gBAAM,OAAO,SAAS,KAAK;AAC3B,cAAI,oBAAoB;AACtB,qBAAS,KAAK,YAAY,MAAM,sBAAsB,CAAC;AACvD;AAAA,UACF;AACA,gBAAM,SAAS,MAAM,YAAY;AAAA,YAC/B;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,kBAAkB,KAAK,oBAAoB,wBAAwB,OAAO;AAAA,YAC1E,QAAQ,gBAAgB;AAAA,YACxB,mBAAmB,KAAK;AAAA,UAC1B,CAAC;AACD,mBAAS,KAAK,OAAO,IAAI;AACzB,2BAAiB,OAAO,MAAM,KAAK,eAAe,MAAM;AACxD,0BAAgB,OAAO,KAAK;AAC5B,iBAAO,OAAO,iBAAiB,OAAO,eAAe;AACrD,cAAI,KAAK,gBAAgB,UAAa,gBAAgB,KAAK,aAAa;AACtE,iCAAqB;AAAA,UACvB;AAEA,cAAI,KAAK,gBAAgB,KAAK,iBAAiB,SAAS,CAAC,OAAO,KAAK,OAAO;AAC1E,kBAAM,eAAe;AAAA,cACnB,OAAO,KAAK;AAAA,cACZ,MAAM,OAAO;AAAA,cACb,UAAU,KAAK;AAAA,cACf;AAAA,cACA;AAAA,YACF,CAAC,EAAE,MAAM,CAAC,QAAQ;AAGhB,sBAAQ;AAAA,gBACN,oCAAoC,OAAO,KAAK,MAAM,KAAK,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,cAC7G;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF,GAAG;AAAA,IACL;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,KAAK;AAEvB,QAAM,UAAU,IAAI;AACpB,WAAS,KAAK,CAAC,GAAG,MAAM,EAAE,OAAO,cAAc,EAAE,MAAM,CAAC;AAExD,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,WAAW,UAAU,YAAY;AAAA,IACjC,SAAS,QAAQ,YAAY;AAAA,IAC7B,YAAY,QAAQ,QAAQ,IAAI,UAAU,QAAQ;AAAA,IAClD,OAAO;AAAA,IACP;AAAA,IACA,QAAQ,KAAK;AAAA,IACb;AAAA,IACA,WAAW,KAAK,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,EACnE;AACF;AAgBA,eAAe,YACb,MAC2F;AAC3F,QAAM,UAAU,KAAK;AACrB,QAAM,UAAU,KAAK,KAAK,KAAK,QAAQ,KAAK,KAAK,OAAO,QAAQ,mBAAmB,GAAG,CAAC;AACvF,UAAQ,UAAU,OAAO;AAGzB,QAAM,YAAY,KAAK,SAAS,oBAAoB;AACpD,MAAI,KAAK,WAAW;AAClB,UAAM,MAAM,QAAQ,KAAK,SAAS;AAClC,QAAI,QAAQ,QAAW;AACrB,UAAI;AACF,cAAM,SAAS,KAAK,MAAM,GAAG;AAC7B,YAAI,OAAO,WAAW,KAAK,KAAK,QAAQ;AACtC,iBAAO,EAAE,MAAM,EAAE,GAAG,QAAQ,QAAQ,KAAK,GAAG,iBAAiB,CAAC,EAAE;AAAA,QAClE;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,KAAK,IAAI;AACzB,QAAM,QAAQ,KAAK,iBAAiB,KAAK,KAAK,QAAQ,OAAO;AAC7D,QAAM,kBAA0C,CAAC;AACjD,QAAM,YAAoC;AAAA,IACxC,MAAM,MAAM,MAAM,SAAS;AACzB,YAAM,WAAW,KAAK,SAAS,IAAI;AACnC,cAAQ,UAAU,KAAK,UAAU,IAAI,CAAC;AACtC,cAAQ,MAAM,UAAU,OAAO;AAC/B,sBAAgB,GAAG,KAAK,KAAK,MAAM,IAAI,IAAI,EAAE,IAAI;AACjD,aAAO;AAAA,IACT;AAAA,IACA,MAAM,UAAU,MAAM,OAAO;AAC3B,aAAO,UAAU,MAAM,MAAM,KAAK,UAAU,OAAO,MAAM,CAAC,CAAC;AAAA,IAC7D;AAAA,EACF;AACA,MAAI,YAAY;AAChB,QAAM,cAAkC,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC9D,QAAM,OAA0B;AAAA,IAC9B,QAAQ,QAAQ,QAAQ;AACtB,mBAAa;AACb,YAAM,KAAK,QAAQ,MAAM,IAAI,EAAE,WAAW,OAAO,CAAC,EAAE,IAAI;AAAA,IAC1D;AAAA,IACA,cAAc,OAAO;AACnB,kBAAY,SAAS,MAAM;AAC3B,kBAAY,UAAU,MAAM;AAC5B,UAAI,MAAM,OAAQ,aAAY,UAAU,YAAY,UAAU,KAAK,MAAM;AAAA,IAC3E;AAAA,IACA,UAAU;AACR,aAAO;AAAA,IACT;AAAA,IACA,SAAS;AACP,aAAO,EAAE,GAAG,YAAY;AAAA,IAC1B;AAAA,EACF;AAEA,QAAM,YAAY,KAAK,KAAK,gBAAgB;AAAA,IAC1C,UAAU,KAAK,KAAK;AAAA,IACpB,KAAK,KAAK,KAAK;AAAA,EACjB,CAAC;AAMD,QAAM,YAAY,IAAI,gBAAgB;AACtC,QAAM,kBAAkB,MAAM,UAAU,MAAO,KAAK,OAAgC,MAAM;AAC1F,MAAI,KAAK,OAAO,QAAS,WAAU,MAAO,KAAK,OAAgC,MAAM;AAAA,MAChF,MAAK,OAAO,iBAAiB,SAAS,iBAAiB,EAAE,MAAM,KAAK,CAAC;AAE1E,QAAM,MAAuB;AAAA,IAC3B,QAAQ,KAAK,KAAK;AAAA,IAClB,KAAK,KAAK,KAAK;AAAA,IACf,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ,UAAU;AAAA,IAClB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,MAAI;AACJ,MAAI;AACJ,QAAM,YAAY,KAAK;AACvB,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,GAAG;AAC7D,QAAI,cAAc,UAAa,YAAY,GAAG;AAK5C,iBAAW,MAAM,QAAQ,KAAK;AAAA,QAC5B;AAAA,QACA,IAAI,QAAe,CAAC,GAAG,WAAW;AAChC,yBAAe,WAAW,MAAM;AAC9B,sBAAU,MAAM,IAAI,MAAM,kBAAkB,CAAC;AAC7C;AAAA,cACE,IAAI;AAAA,gBACF,qBAAqB,SAAS,gBAAgB,KAAK,KAAK,MAAM;AAAA,cAChE;AAAA,YACF;AAAA,UACF,GAAG,SAAS;AACZ,cAAI,OAAQ,aAAwC,UAAU;AAC5D,YAAC,aAAuC,MAAM;AAAA,QAClD,CAAC;AAAA,MACH,CAAC;AAAA,IACH,OAAO;AACL,iBAAW,MAAM;AAAA,IACnB;AAAA,EACF,SAAS,KAAK;AACZ,mBAAe,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,EAChE,UAAE;AACA,QAAI,aAAc,cAAa,YAAY;AAC3C,SAAK,OAAO,oBAAoB,SAAS,eAAe;AAAA,EAC1D;AAKA,QAAM,cAA0C,CAAC;AACjD,MAAI,aAAa,QAAW;AAC1B,eAAW,SAAS,KAAK,KAAK,UAAU,CAAC,GAAG;AAC1C,UAAI,MAAM,aAAa,CAAC,MAAM,UAAU,KAAK,KAAK,QAAQ,EAAG;AAC7D,UAAI;AACF,oBAAY,MAAM,IAAI,IAAI,MAAM,aAAa,OAAO;AAAA,UAClD;AAAA,UACA,UAAU,KAAK,KAAK;AAAA,UACpB,QAAQ,KAAK;AAAA,QACf,CAAC;AAAA,MACH,SAAS,KAAK;AACZ,uBAAe,UAAU,MAAM,IAAI,aAAa,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAChG;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,MAAM,MAAM;AAElB,QAAM,OAAsC;AAAA,IAC1C,QAAQ,KAAK,KAAK;AAAA,IAClB,YAAY,KAAK,KAAK,SAAS;AAAA,IAC/B,KAAK,KAAK,KAAK;AAAA,IACf,UAAW,YAAY;AAAA,IACvB;AAAA,IACA,SAAS;AAAA,IACT,YAAY,EAAE,GAAG,YAAY;AAAA,IAC7B,YAAY,KAAK,IAAI,IAAI;AAAA,IACzB,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ;AAAA,IACR,OAAO;AAAA,EACT;AAEA,MAAI,CAAC,gBAAgB,KAAK,WAAW;AACnC,YAAQ,MAAM,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,EAC/C;AAEA,SAAO,EAAE,MAAM,gBAAgB;AACjC;AASA,SAAS,iBACP,MACA,MACM;AACN,MAAI,SAAS,SAAS,KAAK,MAAO;AAClC,MAAI,KAAK,aAAa,QAAQ,KAAK,aAAa,OAAW;AAC3D,QAAM,aAAa,KAAK,WAAW,UAAU,KAAK,KAAK,WAAW,WAAW;AAC7E,MAAI,KAAK,YAAY,KAAK,CAAC,WAAY;AACvC,QAAM,MAAM,SAAS,KAAK,MAAM;AAChC,MAAI,SAAS,UAAU;AACrB,UAAM,SAAiC;AAAA,MACrC,cAAc;AAAA,MACd,aAAa;AAAA,MACb,aAAa;AAAA,MACb,iBAAiB;AAAA,MACjB,kBAAkB;AAAA,MAClB,mBAAmB;AAAA,MACnB,cAAc;AAAA,MACd,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AACA,UAAM,IAAI,sBAAsB,gBAAgB,GAAG,IAAI,MAAM;AAAA,EAC/D;AAEA,UAAQ,KAAK,8BAA8B,GAAG,EAAE;AAClD;AAEA,eAAe,aACb,OACA,OACqB;AACrB,SAAO,MAAM,MAAM,KAAK;AAC1B;AAEA,SAAS,wBACP,SACsD;AACtD,SAAO,CAAC,QAAQ,QAAQ;AACtB,UAAM,QAAwC,CAAC;AAC/C,WAAO;AAAA,MACL,KAAK,MAAM,YAAY;AACrB,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAkC,EAAE,MAAM,QAAQ,SAAS,GAAI,cAAc,CAAC,EAAG;AACvF,cAAM,SAAoB;AAAA,UACxB,IAAI,UAAU;AACZ,mBAAO,aAAa,KAAK,IAAI,IAAI;AACjC,gBAAI,SAAU,QAAO,OAAO,QAAQ,QAAQ;AAC5C,kBAAM,KAAK,MAAM;AAAA,UACnB;AAAA,UACA,aAAa,KAAK,OAAO;AACvB,mBAAO,GAAG,IAAI;AAAA,UAChB;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,MACA,MAAM,QAAQ;AACZ,gBAAQ,MAAM,KAAK,KAAK,aAAa,GAAG,MAAM,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,YACP,MACA,QAC+B;AAC/B,SAAO;AAAA,IACL,QAAQ,KAAK;AAAA,IACb,YAAY,KAAK,SAAS;AAAA,IAC1B,KAAK,KAAK;AAAA,IACV,UAAU;AAAA,IACV,aAAa,CAAC;AAAA,IACd,SAAS;AAAA,IACT,YAAY,EAAE,OAAO,GAAG,QAAQ,EAAE;AAAA,IAClC,YAAY;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,QAAQ;AAAA,IACR,OAAO,YAAY,MAAM;AAAA,EAC3B;AACF;AAUA,eAAe,eACb,MACe;AACf,QAAM,KAAK,MAAM,QAAQ;AAAA,IACvB,UAAU,KAAK;AAAA,IACf,UAAU,KAAK,KAAK;AAAA,IACpB,aAAa,KAAK,KAAK;AAAA,IACvB,QAAQ,KAAK,KAAK,iBAAiB;AAAA,IACnC,mBAAmB,KAAK,KAAK,4BAA4B;AAAA,IACzD,YAAY,KAAK,IAAI,EAAE,YAAY;AAAA,IACnC,iBAAiB;AAAA,EACnB,CAAC;AACH;AAIA,SAAS,oBAAoB,OAMlB;AACT,QAAM,YAAY;AAAA,IAChB,WAAW,MAAM,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,IAClE,QAAQ,MAAM,OAAO,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,EAAE,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;AAAA,IACxF,UAAU,MAAM;AAAA,IAChB,MAAM,MAAM;AAAA,IACZ,MAAM,MAAM;AAAA,EACd;AACA,SAAO,WAAW,QAAQ,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC,EAAE,OAAO,KAAK;AAC5E;AAEA,SAAS,kBACP,OACA,QACA,MACoB;AACpB,QAAM,UAA0C,CAAC;AACjD,aAAW,SAAS,QAAQ;AAC1B,UAAM,SAAmB,CAAC;AAC1B,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,YAAY,MAAM,IAAI;AACrC,UAAI,MAAM,OAAW,QAAO,KAAK,EAAE,SAAS;AAAA,IAC9C;AACA,YAAQ,MAAM,IAAI,IAAI,UAAU,QAAQ,IAAI;AAAA,EAC9C;AACA,QAAM,aAAgD,CAAC;AACvD,QAAM,iBAAiB,oBAAI,IAAsB;AACjD,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACzE,QAAI,WAAW,WAAW,EAAG;AAC7B,UAAM,OAAO,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAChE,UAAM,MAAM,eAAe,IAAI,KAAK,UAAU,KAAK,CAAC;AACpD,QAAI,KAAK,IAAI;AACb,mBAAe,IAAI,KAAK,YAAY,GAAG;AAAA,EACzC;AACA,aAAW,CAAC,YAAY,OAAO,KAAK,gBAAgB;AAClD,UAAM,KAAK,UAAU,SAAS,IAAI;AAClC,eAAW,UAAU,IAAI,EAAE,eAAe,GAAG,MAAM,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE;AAAA,EAC5E;AACA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,cAAc,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,SAAS,CAAC;AAAA,IACrD,eAAe,MAAM,OAAO,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE;AAAA,IAC7C,cAAc,MAAM,OAAO,CAAC,MAAM,EAAE,OAAO,WAAW,UAAU,CAAC,EAAE;AAAA,IACnE,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,IAC3C,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,WAAW,UAAU,CAAC,EAAE;AAAA,EAC/E;AACF;AAKA,SAAS,UAAU,SAAmB,MAA8B;AAClE,QAAM,IAAI,QAAQ;AAClB,MAAI,MAAM,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,EAAE;AAC5D,QAAM,OAAO,QAAQ,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAClD,QAAM,WAAW,QAAQ,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,SAAS,GAAG,CAAC,IAAI,KAAK,IAAI,GAAG,IAAI,CAAC;AACrF,QAAM,QAAQ,KAAK,KAAK,QAAQ;AAChC,QAAM,KAAK,mBAAmB,SAAS,MAAM,EAAE,MAAM,WAAW,IAAK,CAAC;AACtE,SAAO,EAAE,MAAM,OAAO,MAAM,CAAC,GAAG,OAAO,GAAG,KAAK,GAAG,EAAE;AACtD;","names":[]}
1
+ {"version":3,"sources":["../src/campaign/run-campaign.ts","../src/integrity/backend-integrity.ts","../src/campaign/storage.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `runCampaign` — Pass A substrate primitive. ONE function that orchestrates\n * scenarios → dispatch → artifacts → judges → aggregates, with full\n * reproducibility (seed + manifest hash), cell-level resumability, bootstrap\n * CIs, and the `LabeledScenarioStore` capture flywheel.\n *\n * Improvement loops (optimizer / gate / autoOnPromote) ride on top of this\n * primitive but live in `presets/run-improvement-loop.ts`. This file keeps\n * the core orchestrator minimal — Phase 1 of the Pass A track.\n */\n\nimport { createHash } from 'node:crypto'\nimport { join } from 'node:path'\nimport { BackendIntegrityError, type BackendIntegrityReport } from '../integrity/backend-integrity'\nimport { confidenceInterval } from '../statistics'\nimport { type CampaignStorage, fsCampaignStorage } from './storage'\nimport type {\n CampaignAggregates,\n CampaignArtifactWriter,\n CampaignCellResult,\n CampaignCostMeter,\n CampaignResult,\n CampaignTokenUsage,\n CampaignTraceWriter,\n DispatchContext,\n DispatchFn,\n JudgeAggregate,\n JudgeConfig,\n JudgeScore,\n LabeledScenarioStore,\n Scenario,\n ScenarioAggregate,\n TraceSpan,\n} from './types'\n\nexport interface RunCampaignOptions<TScenario extends Scenario, TArtifact> {\n scenarios: TScenario[]\n dispatch: DispatchFn<TScenario, TArtifact>\n judges?: JudgeConfig<TArtifact, TScenario>[]\n /** Required for reproducibility. Default 42. */\n seed?: number\n /** Per-scenario replicates for CI bands. Default 1; raise to 5+ for\n * bootstrap-tight intervals on critical eval. */\n reps?: number\n /** When true (default), completed cells are cached by\n * (manifestHash, scenarioId, rep, generation). Re-runs skip cached cells. */\n resumable?: boolean\n /** Optional store — when present, every artifact + judge score is captured\n * with the configured `captureSource`. Capture is default ON; pass `'off'`\n * to disable. */\n labeledStore?: LabeledScenarioStore | 'off'\n captureSource?: 'production-trace' | 'eval-run' | 'manual' | 'red-team' | 'synthetic'\n captureSourceVersionHash?: string\n /** Wall-clock cost cap across all cells. Cells beyond ceiling are skipped. */\n costCeiling?: number\n /** Max concurrent cells. Default 2. */\n maxConcurrency?: number\n /**\n * Per-cell dispatch deadline in ms. A `dispatch` that neither resolves nor\n * rejects within this window is a hang (a stalled model request, an\n * exhausted runtime resource, a backend that never closes its stream). When\n * set, the cell's `ctx.signal` is aborted and the cell is recorded as a LOUD\n * error (`dispatch exceeded <N>ms`) so the campaign proceeds and the failure\n * is visible — instead of one wedged cell silently hanging the whole run (and\n * every loop/CI job above it) forever. `undefined`/`0` = unbounded (legacy).\n */\n dispatchTimeoutMs?: number\n /** Required: where artifacts + traces land. */\n runDir: string\n /** Tracing posture. Default is the substrate's `FileSystemTraceStore` rooted\n * at `<runDir>/traces/`. `'off'` disables capture entirely — substrate\n * refuses this when the caller wires `autoOnPromote !== 'none'`. */\n tracing?: 'on' | 'off'\n /**\n * Per-cell usage expectation — the early, fine-grained sibling of the\n * batch `assertRealBackend` guard. A cell that produced an artifact (no\n * error) but reported `costUsd === 0` AND zero tokens is a stub: the\n * dispatch never reported LLM activity via `ctx.cost`. Modes:\n * - `'warn'` (default) — log the offending cell loudly, keep going.\n * - `'assert'` — throw `BackendIntegrityError` on the first such cell\n * (fail-fast; recommended for CI campaigns expecting real LLM calls).\n * - `'off'` — no check (replay / deterministic-only / offline analysis).\n */\n expectUsage?: 'assert' | 'warn' | 'off'\n /** Test seam — override the wall clock for deterministic tests. */\n now?: () => Date\n /** Test seam — override per-cell trace writer factory. */\n buildTraceWriter?: (cellId: string, dir: string) => CampaignTraceWriter\n /** Storage backend for run/cell dirs, the resumability cache, artifacts,\n * and trace spans. Default: the Node filesystem (`fsCampaignStorage`).\n * Pass `inMemoryCampaignStorage()` to run in a filesystem-less runtime\n * (Cloudflare Workers, Deno, edge) — the `CampaignResult` is still\n * produced; artifacts/traces just aren't persisted to disk. */\n storage?: CampaignStorage\n /**\n * Optional per-cell placement strategy. Returns an opaque string the\n * substrate forwards as `ctx.placement` to the Dispatch — placement-aware\n * Dispatches (e.g. `httpDispatch` from `/adapters/http`) use it to route\n * each cell to the right worker, region, or sandbox. When unset, every\n * cell receives `ctx.placement = undefined` and behaves identically to\n * the in-process case.\n *\n * @example\n * cellPlacement: ({ scenario }) => scenario.tags?.includes('eu') ? 'eu-west' : 'us-east'\n */\n cellPlacement?: (input: {\n scenario: TScenario\n rep: number\n generation?: number\n }) => string | undefined\n}\n\nexport async function runCampaign<TScenario extends Scenario, TArtifact>(\n opts: RunCampaignOptions<TScenario, TArtifact>,\n): Promise<CampaignResult<TArtifact, TScenario>> {\n const seed = opts.seed ?? 42\n const reps = opts.reps ?? 1\n const resumable = opts.resumable ?? true\n const maxConcurrency = opts.maxConcurrency ?? 2\n const now = opts.now ?? (() => new Date())\n const judges = opts.judges ?? []\n const storage = opts.storage ?? fsCampaignStorage()\n\n storage.ensureDir(opts.runDir)\n\n const manifestHash = computeManifestHash({\n scenarios: opts.scenarios,\n judges: judges as unknown as JudgeConfig<unknown>[],\n dispatchRef: opts.dispatch.name || 'anonymous',\n seed,\n reps,\n })\n\n const startedAt = now()\n const cells: CampaignCellResult<TArtifact>[] = []\n const artifactsByPath: Record<string, string> = {}\n\n // Build the cell schedule (scenario × rep).\n const schedule: Array<{ scenario: TScenario; rep: number; cellId: string; cellSeed: number }> = []\n let cellIndex = 0\n for (const scenario of opts.scenarios) {\n for (let rep = 0; rep < reps; rep++) {\n const cellId = `${scenario.id}:${rep}`\n const cellSeed = seed + cellIndex\n schedule.push({ scenario, rep, cellId, cellSeed })\n cellIndex += 1\n }\n }\n\n // Concurrency-limited execution.\n let totalCostUsd = 0\n let costCeilingReached = false\n const abortController = new AbortController()\n // Concurrency lanes that drain the cell schedule. Named \"lanes\" — not\n // \"workers\" — to avoid clashing with the taxonomy's worker (= the agent\n // harness in a sandbox, invoked behind `dispatch`). See loop-taxonomy.md.\n const lanes: Promise<void>[] = []\n let nextIdx = 0\n const cellsRef = cells\n\n for (let i = 0; i < maxConcurrency; i++) {\n lanes.push(\n (async () => {\n while (true) {\n const myIdx = nextIdx++\n if (myIdx >= schedule.length) return\n const slot = schedule[myIdx]!\n if (costCeilingReached) {\n cellsRef.push(skippedCell(slot, 'cost_ceiling_reached'))\n continue\n }\n const result = await executeCell({\n slot,\n opts,\n manifestHash,\n resumable,\n now,\n storage,\n buildTraceWriter: opts.buildTraceWriter ?? defaultBuildTraceWriter(storage),\n signal: abortController.signal,\n dispatchTimeoutMs: opts.dispatchTimeoutMs,\n })\n cellsRef.push(result.cell)\n enforceCellUsage(result.cell, opts.expectUsage ?? 'warn')\n totalCostUsd += result.cell.costUsd\n Object.assign(artifactsByPath, result.artifactsByPath)\n if (opts.costCeiling !== undefined && totalCostUsd >= opts.costCeiling) {\n costCeilingReached = true\n }\n // Capture into LabeledScenarioStore unless explicitly disabled.\n if (opts.labeledStore && opts.labeledStore !== 'off' && !result.cell.error) {\n await captureToStore({\n store: opts.labeledStore,\n cell: result.cell,\n scenario: slot.scenario,\n opts,\n now,\n }).catch((err) => {\n // Capture failures are non-fatal — log but don't crash the campaign.\n // (Trace would normally land here.)\n console.warn(\n `[runCampaign] capture failed for ${result.cell.cellId}: ${err instanceof Error ? err.message : String(err)}`,\n )\n })\n }\n }\n })(),\n )\n }\n await Promise.all(lanes)\n\n const endedAt = now()\n cellsRef.sort((a, b) => a.cellId.localeCompare(b.cellId))\n\n const aggregates = computeAggregates(\n cellsRef,\n judges as unknown as JudgeConfig<TArtifact>[],\n seed,\n )\n\n return {\n manifestHash,\n seed,\n startedAt: startedAt.toISOString(),\n endedAt: endedAt.toISOString(),\n durationMs: endedAt.getTime() - startedAt.getTime(),\n cells: cellsRef,\n aggregates,\n runDir: opts.runDir,\n artifactsByPath,\n scenarios: opts.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n }\n}\n\n// ── Internals ─────────────────────────────────────────────────────────\n\ninterface ExecuteCellArgs<TScenario extends Scenario, TArtifact> {\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number }\n opts: RunCampaignOptions<TScenario, TArtifact>\n manifestHash: string\n resumable: boolean\n now: () => Date\n storage: CampaignStorage\n buildTraceWriter: (cellId: string, dir: string) => CampaignTraceWriter\n signal: AbortSignal\n dispatchTimeoutMs?: number\n}\n\nasync function executeCell<TScenario extends Scenario, TArtifact>(\n args: ExecuteCellArgs<TScenario, TArtifact>,\n): Promise<{ cell: CampaignCellResult<TArtifact>; artifactsByPath: Record<string, string> }> {\n const storage = args.storage\n const cellDir = join(args.opts.runDir, args.slot.cellId.replace(/[^a-zA-Z0-9_-]/g, '_'))\n storage.ensureDir(cellDir)\n\n // Resumability: cache key = (manifestHash, scenarioId, rep)\n const cachePath = join(cellDir, 'cached-result.json')\n if (args.resumable) {\n const raw = storage.read(cachePath)\n if (raw !== undefined) {\n try {\n const cached = JSON.parse(raw) as CampaignCellResult<TArtifact>\n if (cached.cellId === args.slot.cellId) {\n return { cell: { ...cached, cached: true }, artifactsByPath: {} }\n }\n } catch {\n // Corrupt cache — fall through to re-run.\n }\n }\n }\n\n const startMs = Date.now()\n const trace = args.buildTraceWriter(args.slot.cellId, cellDir)\n const artifactsByPath: Record<string, string> = {}\n const artifacts: CampaignArtifactWriter = {\n async write(path, content) {\n const fullPath = join(cellDir, path)\n storage.ensureDir(join(fullPath, '..'))\n storage.write(fullPath, content)\n artifactsByPath[`${args.slot.cellId}/${path}`] = fullPath\n return fullPath\n },\n async writeJson(path, value) {\n return artifacts.write(path, JSON.stringify(value, null, 2))\n },\n }\n let costSoFar = 0\n const tokensSoFar: CampaignTokenUsage = { input: 0, output: 0 }\n const cost: CampaignCostMeter = {\n observe(amount, source) {\n costSoFar += amount\n trace.span(`cost.${source}`, { amountUsd: amount }).end()\n },\n observeTokens(usage) {\n tokensSoFar.input += usage.input\n tokensSoFar.output += usage.output\n if (usage.cached) tokensSoFar.cached = (tokensSoFar.cached ?? 0) + usage.cached\n },\n current() {\n return costSoFar\n },\n tokens() {\n return { ...tokensSoFar }\n },\n }\n\n const placement = args.opts.cellPlacement?.({\n scenario: args.slot.scenario,\n rep: args.slot.rep,\n })\n\n // Per-cell abort signal, chained to the campaign signal. The dispatch sees\n // THIS signal so a timeout (below) can abort just this cell's in-flight work\n // without tearing down sibling cells — and a signal-honoring dispatch\n // releases its open request instead of leaking it past the deadline.\n const cellAbort = new AbortController()\n const onCampaignAbort = () => cellAbort.abort((args.signal as { reason?: unknown }).reason)\n if (args.signal.aborted) cellAbort.abort((args.signal as { reason?: unknown }).reason)\n else args.signal.addEventListener('abort', onCampaignAbort, { once: true })\n\n const ctx: DispatchContext = {\n cellId: args.slot.cellId,\n rep: args.slot.rep,\n seed: args.slot.cellSeed,\n signal: cellAbort.signal,\n trace,\n artifacts,\n cost,\n placement,\n }\n\n let artifact: TArtifact | undefined\n let errorMessage: string | undefined\n const timeoutMs = args.dispatchTimeoutMs\n let timeoutTimer: ReturnType<typeof setTimeout> | undefined\n try {\n const dispatched = args.opts.dispatch(args.slot.scenario, ctx)\n if (timeoutMs !== undefined && timeoutMs > 0) {\n // A dispatch that never settles (stalled model request, exhausted runtime\n // resource, a stream that never closes) must NOT hang the cell — and with\n // it the lane, the campaign, the loop, the CI job — forever. Race it\n // against the deadline; on timeout, abort the cell and fail it LOUD.\n artifact = await Promise.race([\n dispatched,\n new Promise<never>((_, reject) => {\n timeoutTimer = setTimeout(() => {\n cellAbort.abort(new Error('dispatch timeout'))\n reject(\n new Error(\n `dispatch exceeded ${timeoutMs}ms for cell '${args.slot.cellId}' — aborted and failed loud (no silent hang)`,\n ),\n )\n }, timeoutMs)\n if (typeof (timeoutTimer as { unref?: () => void }).unref === 'function')\n (timeoutTimer as { unref: () => void }).unref()\n }),\n ])\n } else {\n artifact = await dispatched\n }\n } catch (err) {\n errorMessage = err instanceof Error ? err.message : String(err)\n } finally {\n if (timeoutTimer) clearTimeout(timeoutTimer)\n args.signal.removeEventListener('abort', onCampaignAbort)\n }\n\n // Run judges (only if we have an artifact). A judge that throws invalidates\n // the cell — recorded as `error`, NOT folded into a fake composite:0 (a fake\n // zero is indistinguishable from a real zero and poisons every aggregate).\n const judgeScores: Record<string, JudgeScore> = {}\n if (artifact !== undefined) {\n for (const judge of args.opts.judges ?? []) {\n if (judge.appliesTo && !judge.appliesTo(args.slot.scenario)) continue\n try {\n judgeScores[judge.name] = await runJudgeCell(judge, {\n artifact,\n scenario: args.slot.scenario,\n signal: args.signal,\n })\n } catch (err) {\n errorMessage = `judge '${judge.name}' failed: ${err instanceof Error ? err.message : String(err)}`\n break\n }\n }\n }\n\n await trace.flush()\n\n const cell: CampaignCellResult<TArtifact> = {\n cellId: args.slot.cellId,\n scenarioId: args.slot.scenario.id,\n rep: args.slot.rep,\n artifact: (artifact ?? null) as TArtifact,\n judgeScores,\n costUsd: costSoFar,\n tokenUsage: { ...tokensSoFar },\n durationMs: Date.now() - startMs,\n seed: args.slot.cellSeed,\n cached: false,\n error: errorMessage,\n }\n\n if (!errorMessage && args.resumable) {\n storage.write(cachePath, JSON.stringify(cell))\n }\n\n return { cell, artifactsByPath }\n}\n\n/**\n * Per-cell stub guard. A cell that produced an artifact (no error) but reported\n * `costUsd === 0` AND zero tokens means the dispatch never called `ctx.cost` —\n * i.e. it ran against a stub or silently dropped its usage. `'warn'` logs it,\n * `'assert'` throws (fail-fast), `'off'` skips. An errored/skipped cell or a\n * deterministic judge-only run that genuinely made no LLM call is not flagged.\n */\nfunction enforceCellUsage<TArtifact>(\n cell: CampaignCellResult<TArtifact>,\n mode: 'assert' | 'warn' | 'off',\n): void {\n if (mode === 'off' || cell.error) return\n if (cell.artifact === null || cell.artifact === undefined) return\n const zeroTokens = cell.tokenUsage.input === 0 && cell.tokenUsage.output === 0\n if (cell.costUsd !== 0 || !zeroTokens) return\n const msg = `cell '${cell.cellId}' produced an artifact but reported zero cost and zero tokens — the dispatch never reported LLM usage via ctx.cost.observe/observeTokens (a stub cell)`\n if (mode === 'assert') {\n const report: BackendIntegrityReport = {\n totalRecords: 1,\n stubRecords: 1,\n realRecords: 0,\n uncostedRecords: 0,\n totalInputTokens: 0,\n totalOutputTokens: 0,\n totalCostUsd: 0,\n verdict: 'stub',\n diagnosis: msg,\n }\n throw new BackendIntegrityError(`expectUsage: ${msg}`, report)\n }\n // eslint-disable-next-line no-console\n console.warn(`[runCampaign] expectUsage: ${msg}`)\n}\n\nasync function runJudgeCell<TArtifact, TScenario extends Scenario>(\n judge: JudgeConfig<TArtifact, TScenario>,\n input: { artifact: TArtifact; scenario: TScenario; signal: AbortSignal },\n): Promise<JudgeScore> {\n return judge.score(input)\n}\n\nfunction defaultBuildTraceWriter(\n storage: CampaignStorage,\n): (cellId: string, dir: string) => CampaignTraceWriter {\n return (cellId, dir) => {\n const spans: Array<Record<string, unknown>> = []\n return {\n span(name, attributes) {\n const startMs = Date.now()\n const record: Record<string, unknown> = { name, cellId, startMs, ...(attributes ?? {}) }\n const finish: TraceSpan = {\n end(endAttrs) {\n record.durationMs = Date.now() - startMs\n if (endAttrs) Object.assign(record, endAttrs)\n spans.push(record)\n },\n setAttribute(key, value) {\n record[key] = value\n },\n }\n return finish\n },\n async flush() {\n storage.write(join(dir, 'spans.jsonl'), spans.map((s) => JSON.stringify(s)).join('\\n'))\n },\n }\n }\n}\n\nfunction skippedCell<TScenario extends Scenario, TArtifact>(\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number },\n reason: string,\n): CampaignCellResult<TArtifact> {\n return {\n cellId: slot.cellId,\n scenarioId: slot.scenario.id,\n rep: slot.rep,\n artifact: null as unknown as TArtifact,\n judgeScores: {},\n costUsd: 0,\n tokenUsage: { input: 0, output: 0 },\n durationMs: 0,\n seed: slot.cellSeed,\n cached: false,\n error: `skipped: ${reason}`,\n }\n}\n\ninterface CaptureArgs<TScenario extends Scenario, TArtifact> {\n store: LabeledScenarioStore\n cell: CampaignCellResult<TArtifact>\n scenario: TScenario\n opts: RunCampaignOptions<TScenario, TArtifact>\n now: () => Date\n}\n\nasync function captureToStore<TScenario extends Scenario, TArtifact>(\n args: CaptureArgs<TScenario, TArtifact>,\n): Promise<void> {\n await args.store.observe({\n scenario: args.scenario,\n artifact: args.cell.artifact,\n judgeScores: args.cell.judgeScores,\n source: args.opts.captureSource ?? 'eval-run',\n sourceVersionHash: args.opts.captureSourceVersionHash ?? 'unknown',\n capturedAt: args.now().toISOString(),\n redactionStatus: 'raw',\n })\n}\n\n// ── Aggregates + manifest hash ────────────────────────────────────────\n\nfunction computeManifestHash(input: {\n scenarios: Scenario[]\n judges: JudgeConfig<unknown>[]\n dispatchRef: string\n seed: number\n reps: number\n}): string {\n const canonical = {\n scenarios: input.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n judges: input.judges.map((j) => ({ name: j.name, dims: j.dimensions.map((d) => d.key) })),\n dispatch: input.dispatchRef,\n seed: input.seed,\n reps: input.reps,\n }\n return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')\n}\n\nfunction computeAggregates<TArtifact>(\n cells: CampaignCellResult<TArtifact>[],\n judges: JudgeConfig<TArtifact>[],\n seed: number,\n): CampaignAggregates {\n const byJudge: Record<string, JudgeAggregate> = {}\n for (const judge of judges) {\n const scores: number[] = []\n for (const cell of cells) {\n const s = cell.judgeScores[judge.name]\n if (s !== undefined) scores.push(s.composite)\n }\n byJudge[judge.name] = aggregate(scores, seed)\n }\n const byScenario: Record<string, ScenarioAggregate> = {}\n const scenarioGroups = new Map<string, number[]>()\n for (const cell of cells) {\n const composites = Object.values(cell.judgeScores).map((s) => s.composite)\n if (composites.length === 0) continue\n const mean = composites.reduce((a, b) => a + b, 0) / composites.length\n const arr = scenarioGroups.get(cell.scenarioId) ?? []\n arr.push(mean)\n scenarioGroups.set(cell.scenarioId, arr)\n }\n for (const [scenarioId, samples] of scenarioGroups) {\n const ag = aggregate(samples, seed)\n byScenario[scenarioId] = { meanComposite: ag.mean, ci95: ag.ci95, n: ag.n }\n }\n return {\n byJudge,\n byScenario,\n totalCostUsd: cells.reduce((a, c) => a + c.costUsd, 0),\n cellsExecuted: cells.filter((c) => !c.error).length,\n cellsSkipped: cells.filter((c) => c.error?.startsWith('skipped:')).length,\n cellsCached: cells.filter((c) => c.cached).length,\n cellsFailed: cells.filter((c) => c.error && !c.error.startsWith('skipped:')).length,\n }\n}\n\n// Percentile bootstrap CI95 via seeded resampling. Deterministic for a given\n// seed — same campaign re-run produces identical CI bands. Falls back to\n// degenerate intervals at n<=1 (the bootstrap is undefined there).\nfunction aggregate(samples: number[], seed: number): JudgeAggregate {\n const n = samples.length\n if (n === 0) return { mean: 0, stdev: 0, ci95: [0, 0], n: 0 }\n const mean = samples.reduce((a, b) => a + b, 0) / n\n const variance = samples.reduce((a, b) => a + (b - mean) ** 2, 0) / Math.max(1, n - 1)\n const stdev = Math.sqrt(variance)\n const ci = confidenceInterval(samples, 0.95, { seed, resamples: 1000 })\n return { mean, stdev, ci95: [ci.lower, ci.upper], n }\n}\n","/**\n * Backend-integrity guard: distinguish \"agent failed\" from \"eval ran against\n * a stub / unconfigured backend.\" Without this guard a canonical eval can\n * silently report `0/N passed` and look like an agent-quality problem when\n * the LLM was never actually called — the failure mode we just hit running\n * the 4-vertical parallel eval (legal-sandbox-stub returned hard-coded 33-104\n * char strings; gtm/creative defaulted to a cli-bridge that wasn't running).\n *\n * The shape:\n *\n * const report = summarizeBackendIntegrity(records)\n * assertRealBackend(records) // throws BackendIntegrityError if 100% stub\n *\n * A record is \"stub-mode\" if its `tokenUsage.input === 0 && tokenUsage.output === 0`.\n * (`costUsd` alone is unreliable — some backends successfully call LLMs but\n * don't propagate pricing, producing real tokens with $0 cost.)\n *\n * Verdicts:\n * - `real` — at least one record has nonzero token usage\n * - `stub` — every record is stub-mode (eval ran blind)\n * - `mixed` — some records real, some stub (partial backend failure;\n * often the 429-cascade or auth-half-failed case)\n */\n\nimport { AgentEvalError } from '../errors'\nimport type { RunRecord } from '../run-record'\n\nexport interface BackendIntegrityReport {\n /** Total records inspected. */\n totalRecords: number\n /** Records with input=0 AND output=0 (a stub fingerprint). */\n stubRecords: number\n /** Records with nonzero token usage (real LLM activity). */\n realRecords: number\n /** Records where output>0 but costUsd=0 (real LLM, broken cost ledger). */\n uncostedRecords: number\n /** Sum of input tokens across all records. */\n totalInputTokens: number\n /** Sum of output tokens across all records. */\n totalOutputTokens: number\n /** Sum of costUsd across all records. */\n totalCostUsd: number\n /** Worst-case integrity verdict. */\n verdict: 'real' | 'mixed' | 'stub'\n /** Human-readable diagnosis suitable for terminal output. */\n diagnosis: string\n}\n\n/**\n * Error thrown when an integrity assertion fails. Caller can pattern-match\n * by `code === 'AGENT_EVAL_BACKEND_STUB'` to differentiate from other\n * errors.\n */\nexport class BackendIntegrityError extends AgentEvalError {\n constructor(\n message: string,\n public readonly report: BackendIntegrityReport,\n ) {\n super('backend_integrity', message)\n }\n}\n\nfunction isStubRecord(rec: RunRecord): boolean {\n return rec.tokenUsage.input === 0 && rec.tokenUsage.output === 0\n}\n\nfunction isUncostedRecord(rec: RunRecord): boolean {\n return rec.tokenUsage.output > 0 && rec.costUsd === 0\n}\n\n/**\n * Inspect a batch of RunRecords and return an integrity report. Pure\n * function — no I/O, no logging. The caller decides what to do with the\n * verdict (print warning, throw, gate CI, etc.).\n */\nexport function summarizeBackendIntegrity(\n records: ReadonlyArray<RunRecord>,\n): BackendIntegrityReport {\n const totalRecords = records.length\n let stubRecords = 0\n let realRecords = 0\n let uncostedRecords = 0\n let totalInputTokens = 0\n let totalOutputTokens = 0\n let totalCostUsd = 0\n for (const rec of records) {\n totalInputTokens += rec.tokenUsage.input\n totalOutputTokens += rec.tokenUsage.output\n totalCostUsd += rec.costUsd\n if (isStubRecord(rec)) stubRecords++\n else realRecords++\n if (isUncostedRecord(rec)) uncostedRecords++\n }\n const verdict: BackendIntegrityReport['verdict'] =\n totalRecords === 0\n ? 'stub'\n : stubRecords === totalRecords\n ? 'stub'\n : stubRecords === 0\n ? 'real'\n : 'mixed'\n const diagnosis = buildDiagnosis({\n totalRecords,\n stubRecords,\n realRecords,\n uncostedRecords,\n totalInputTokens,\n totalOutputTokens,\n totalCostUsd,\n verdict,\n })\n return {\n totalRecords,\n stubRecords,\n realRecords,\n uncostedRecords,\n totalInputTokens,\n totalOutputTokens,\n totalCostUsd,\n verdict,\n diagnosis,\n }\n}\n\nfunction buildDiagnosis(r: Omit<BackendIntegrityReport, 'diagnosis'>): string {\n if (r.totalRecords === 0) {\n return 'no records — eval produced zero runs; backend likely failed before first turn'\n }\n if (r.verdict === 'stub') {\n return [\n `all ${r.totalRecords} records have zero token usage — the LLM backend was never called.`,\n 'common causes: --backend sandbox without a sandbox bridge running; stub model returning hard-coded strings;',\n 'auth misconfigured so requests were silently dropped before the LLM. Re-run with --backend tcloud and TANGLE_API_KEY set,',\n 'or boot the cli-bridge / sandbox before invoking the eval.',\n ].join(' ')\n }\n if (r.verdict === 'mixed') {\n const pct = ((r.stubRecords / r.totalRecords) * 100).toFixed(0)\n return [\n `${r.stubRecords}/${r.totalRecords} records (${pct}%) have zero token usage — the backend partially failed.`,\n 'common causes: rate-limit cascade (429s after the first N personas);',\n 'transient auth expiry mid-run; provider outage. Treat the affected records as missing data, not agent failures.',\n ].join(' ')\n }\n // verdict === 'real'\n if (r.uncostedRecords > 0) {\n const pct = ((r.uncostedRecords / r.totalRecords) * 100).toFixed(0)\n return [\n `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens).`,\n `${r.uncostedRecords} (${pct}%) have output tokens but costUsd=0. Two distinct roots:`,\n '(a) cost ledger mis-wired — no usage propagation from the runtime stream into RunRecord; or',\n '(b) the model is unpriced at the source (sandbox/router returned $0 despite real tokens).',\n 'For (b), price the measured tokens against the substrate table (estimateCost) instead of leaving $0.',\n ].join(' ')\n }\n return `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens, $${r.totalCostUsd.toFixed(4)}).`\n}\n\n/**\n * Throw BackendIntegrityError if the verdict is 'stub' — i.e. every record\n * shows zero LLM activity. Non-strict callers can pass `{ allowMixed: false }`\n * to also reject mixed verdicts (recommended for CI gates).\n *\n * Real backends pass through silently.\n */\nexport function assertRealBackend(\n records: ReadonlyArray<RunRecord>,\n opts: { allowMixed?: boolean } = {},\n): BackendIntegrityReport {\n const report = summarizeBackendIntegrity(records)\n const allowMixed = opts.allowMixed ?? true\n if (report.verdict === 'stub') {\n throw new BackendIntegrityError(\n `backend-integrity: ran against a stub or unconfigured backend — ${report.diagnosis}`,\n report,\n )\n }\n if (!allowMixed && report.verdict === 'mixed') {\n throw new BackendIntegrityError(\n `backend-integrity: partial backend failure rejected — ${report.diagnosis}`,\n report,\n )\n }\n return report\n}\n","import { createRequire } from 'node:module'\n\n/**\n * @experimental\n *\n * `CampaignStorage` — the filesystem seam `runCampaign` writes through\n * (run/cell dirs, the resumability cache, per-cell artifacts, trace spans).\n *\n * The default (`fsCampaignStorage`) is the Node filesystem — identical\n * behavior to the inline `node:fs` calls it replaces, so existing CLI\n * consumers are unaffected. `inMemoryCampaignStorage` keeps everything in a\n * `Map`, so the substrate runs in environments WITHOUT a filesystem\n * (Cloudflare Workers, Deno Deploy, other edge runtimes) — the campaign\n * still produces its `CampaignResult` (cells + aggregates) in memory;\n * artifacts/traces simply aren't persisted to disk.\n *\n * Paths are opaque keys to the in-memory adapter — it does not parse them,\n * so the same `join(...)`-built paths work unchanged across both adapters.\n */\nexport interface CampaignStorage {\n /** Ensure a directory exists (recursive). No-op for in-memory. */\n ensureDir(dir: string): void\n /** Does this path exist (as a written file or an ensured dir)? */\n exists(path: string): boolean\n /** Read a UTF-8 file; `undefined` when missing or unreadable. */\n read(path: string): string | undefined\n /** Write a file (string or bytes). Parent dir is assumed ensured. */\n write(path: string, content: string | Uint8Array): void\n}\n\n/** Node-filesystem storage — the default. Lazily requires `node:fs` so the\n * module imports cleanly in non-Node runtimes (where the caller passes\n * `inMemoryCampaignStorage` instead and never constructs this).\n *\n * `createRequire(import.meta.url)` is the ESM-native lazy require — a bare\n * `require` is a ReferenceError under `\"type\": \"module\"`, which is exactly\n * the shape this package publishes. */\nexport function fsCampaignStorage(): CampaignStorage {\n const nodeRequire = createRequire(import.meta.url)\n const { existsSync, mkdirSync, readFileSync, writeFileSync } = nodeRequire(\n 'node:fs',\n ) as typeof import('node:fs')\n return {\n ensureDir(dir) {\n if (!existsSync(dir)) mkdirSync(dir, { recursive: true })\n },\n exists(path) {\n return existsSync(path)\n },\n read(path) {\n try {\n return readFileSync(path, 'utf8')\n } catch {\n return undefined\n }\n },\n write(path, content) {\n writeFileSync(path, content as Uint8Array)\n },\n }\n}\n\n/** In-memory storage for filesystem-less runtimes. Artifacts + trace spans\n * live in a `Map` for the duration of the run; the `CampaignResult` is\n * fully populated, but nothing is persisted to disk. */\nexport function inMemoryCampaignStorage(): CampaignStorage {\n const files = new Map<string, string | Uint8Array>()\n const dirs = new Set<string>()\n return {\n ensureDir(dir) {\n dirs.add(dir)\n },\n exists(path) {\n return files.has(path) || dirs.has(path)\n },\n read(path) {\n const value = files.get(path)\n if (value === undefined) return undefined\n return typeof value === 'string' ? value : new TextDecoder().decode(value)\n },\n write(path, content) {\n files.set(path, content)\n },\n }\n}\n"],"mappings":";;;;;;;;AAaA,SAAS,kBAAkB;AAC3B,SAAS,YAAY;;;ACuCd,IAAM,wBAAN,cAAoC,eAAe;AAAA,EACxD,YACE,SACgB,QAChB;AACA,UAAM,qBAAqB,OAAO;AAFlB;AAAA,EAGlB;AAAA,EAHkB;AAIpB;AAEA,SAAS,aAAa,KAAyB;AAC7C,SAAO,IAAI,WAAW,UAAU,KAAK,IAAI,WAAW,WAAW;AACjE;AAEA,SAAS,iBAAiB,KAAyB;AACjD,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,YAAY;AACtD;AAOO,SAAS,0BACd,SACwB;AACxB,QAAM,eAAe,QAAQ;AAC7B,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,MAAI,kBAAkB;AACtB,MAAI,mBAAmB;AACvB,MAAI,oBAAoB;AACxB,MAAI,eAAe;AACnB,aAAW,OAAO,SAAS;AACzB,wBAAoB,IAAI,WAAW;AACnC,yBAAqB,IAAI,WAAW;AACpC,oBAAgB,IAAI;AACpB,QAAI,aAAa,GAAG,EAAG;AAAA,QAClB;AACL,QAAI,iBAAiB,GAAG,EAAG;AAAA,EAC7B;AACA,QAAM,UACJ,iBAAiB,IACb,SACA,gBAAgB,eACd,SACA,gBAAgB,IACd,SACA;AACV,QAAM,YAAY,eAAe;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAAS,eAAe,GAAsD;AAC5E,MAAI,EAAE,iBAAiB,GAAG;AACxB,WAAO;AAAA,EACT;AACA,MAAI,EAAE,YAAY,QAAQ;AACxB,WAAO;AAAA,MACL,OAAO,EAAE,YAAY;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AACA,MAAI,EAAE,YAAY,SAAS;AACzB,UAAM,OAAQ,EAAE,cAAc,EAAE,eAAgB,KAAK,QAAQ,CAAC;AAC9D,WAAO;AAAA,MACL,GAAG,EAAE,WAAW,IAAI,EAAE,YAAY,aAAa,GAAG;AAAA,MAClD;AAAA,MACA;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AAEA,MAAI,EAAE,kBAAkB,GAAG;AACzB,UAAM,OAAQ,EAAE,kBAAkB,EAAE,eAAgB,KAAK,QAAQ,CAAC;AAClE,WAAO;AAAA,MACL,GAAG,EAAE,YAAY,uCAAuC,EAAE,gBAAgB,SAAS,EAAE,iBAAiB;AAAA,MACtG,GAAG,EAAE,eAAe,KAAK,GAAG;AAAA,MAC5B;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,KAAK,GAAG;AAAA,EACZ;AACA,SAAO,GAAG,EAAE,YAAY,uCAAuC,EAAE,gBAAgB,SAAS,EAAE,iBAAiB,aAAa,EAAE,aAAa,QAAQ,CAAC,CAAC;AACrJ;AASO,SAAS,kBACd,SACA,OAAiC,CAAC,GACV;AACxB,QAAM,SAAS,0BAA0B,OAAO;AAChD,QAAM,aAAa,KAAK,cAAc;AACtC,MAAI,OAAO,YAAY,QAAQ;AAC7B,UAAM,IAAI;AAAA,MACR,wEAAmE,OAAO,SAAS;AAAA,MACnF;AAAA,IACF;AAAA,EACF;AACA,MAAI,CAAC,cAAc,OAAO,YAAY,SAAS;AAC7C,UAAM,IAAI;AAAA,MACR,8DAAyD,OAAO,SAAS;AAAA,MACzE;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;;;ACxLA,SAAS,qBAAqB;AAqCvB,SAAS,oBAAqC;AACnD,QAAM,cAAc,cAAc,YAAY,GAAG;AACjD,QAAM,EAAE,YAAY,WAAW,cAAc,cAAc,IAAI;AAAA,IAC7D;AAAA,EACF;AACA,SAAO;AAAA,IACL,UAAU,KAAK;AACb,UAAI,CAAC,WAAW,GAAG,EAAG,WAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AAAA,IAC1D;AAAA,IACA,OAAO,MAAM;AACX,aAAO,WAAW,IAAI;AAAA,IACxB;AAAA,IACA,KAAK,MAAM;AACT,UAAI;AACF,eAAO,aAAa,MAAM,MAAM;AAAA,MAClC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,oBAAc,MAAM,OAAqB;AAAA,IAC3C;AAAA,EACF;AACF;AAKO,SAAS,0BAA2C;AACzD,QAAM,QAAQ,oBAAI,IAAiC;AACnD,QAAM,OAAO,oBAAI,IAAY;AAC7B,SAAO;AAAA,IACL,UAAU,KAAK;AACb,WAAK,IAAI,GAAG;AAAA,IACd;AAAA,IACA,OAAO,MAAM;AACX,aAAO,MAAM,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI;AAAA,IACzC;AAAA,IACA,KAAK,MAAM;AACT,YAAM,QAAQ,MAAM,IAAI,IAAI;AAC5B,UAAI,UAAU,OAAW,QAAO;AAChC,aAAO,OAAO,UAAU,WAAW,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK;AAAA,IAC3E;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,YAAM,IAAI,MAAM,OAAO;AAAA,IACzB;AAAA,EACF;AACF;;;AF8BA,eAAsB,YACpB,MAC+C;AAC/C,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,QAAM,MAAM,KAAK,QAAQ,MAAM,oBAAI,KAAK;AACxC,QAAM,SAAS,KAAK,UAAU,CAAC;AAC/B,QAAM,UAAU,KAAK,WAAW,kBAAkB;AAElD,UAAQ,UAAU,KAAK,MAAM;AAE7B,QAAM,eAAe,oBAAoB;AAAA,IACvC,WAAW,KAAK;AAAA,IAChB;AAAA,IACA,aAAa,KAAK,SAAS,QAAQ;AAAA,IACnC;AAAA,IACA;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI;AACtB,QAAM,QAAyC,CAAC;AAChD,QAAM,kBAA0C,CAAC;AAGjD,QAAM,WAA0F,CAAC;AACjG,MAAI,YAAY;AAChB,aAAW,YAAY,KAAK,WAAW;AACrC,aAAS,MAAM,GAAG,MAAM,MAAM,OAAO;AACnC,YAAM,SAAS,GAAG,SAAS,EAAE,IAAI,GAAG;AACpC,YAAM,WAAW,OAAO;AACxB,eAAS,KAAK,EAAE,UAAU,KAAK,QAAQ,SAAS,CAAC;AACjD,mBAAa;AAAA,IACf;AAAA,EACF;AAGA,MAAI,eAAe;AACnB,MAAI,qBAAqB;AACzB,QAAM,kBAAkB,IAAI,gBAAgB;AAI5C,QAAM,QAAyB,CAAC;AAChC,MAAI,UAAU;AACd,QAAM,WAAW;AAEjB,WAAS,IAAI,GAAG,IAAI,gBAAgB,KAAK;AACvC,UAAM;AAAA,OACH,YAAY;AACX,eAAO,MAAM;AACX,gBAAM,QAAQ;AACd,cAAI,SAAS,SAAS,OAAQ;AAC9B,gBAAM,OAAO,SAAS,KAAK;AAC3B,cAAI,oBAAoB;AACtB,qBAAS,KAAK,YAAY,MAAM,sBAAsB,CAAC;AACvD;AAAA,UACF;AACA,gBAAM,SAAS,MAAM,YAAY;AAAA,YAC/B;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,kBAAkB,KAAK,oBAAoB,wBAAwB,OAAO;AAAA,YAC1E,QAAQ,gBAAgB;AAAA,YACxB,mBAAmB,KAAK;AAAA,UAC1B,CAAC;AACD,mBAAS,KAAK,OAAO,IAAI;AACzB,2BAAiB,OAAO,MAAM,KAAK,eAAe,MAAM;AACxD,0BAAgB,OAAO,KAAK;AAC5B,iBAAO,OAAO,iBAAiB,OAAO,eAAe;AACrD,cAAI,KAAK,gBAAgB,UAAa,gBAAgB,KAAK,aAAa;AACtE,iCAAqB;AAAA,UACvB;AAEA,cAAI,KAAK,gBAAgB,KAAK,iBAAiB,SAAS,CAAC,OAAO,KAAK,OAAO;AAC1E,kBAAM,eAAe;AAAA,cACnB,OAAO,KAAK;AAAA,cACZ,MAAM,OAAO;AAAA,cACb,UAAU,KAAK;AAAA,cACf;AAAA,cACA;AAAA,YACF,CAAC,EAAE,MAAM,CAAC,QAAQ;AAGhB,sBAAQ;AAAA,gBACN,oCAAoC,OAAO,KAAK,MAAM,KAAK,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,cAC7G;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF,GAAG;AAAA,IACL;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,KAAK;AAEvB,QAAM,UAAU,IAAI;AACpB,WAAS,KAAK,CAAC,GAAG,MAAM,EAAE,OAAO,cAAc,EAAE,MAAM,CAAC;AAExD,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,WAAW,UAAU,YAAY;AAAA,IACjC,SAAS,QAAQ,YAAY;AAAA,IAC7B,YAAY,QAAQ,QAAQ,IAAI,UAAU,QAAQ;AAAA,IAClD,OAAO;AAAA,IACP;AAAA,IACA,QAAQ,KAAK;AAAA,IACb;AAAA,IACA,WAAW,KAAK,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,EACnE;AACF;AAgBA,eAAe,YACb,MAC2F;AAC3F,QAAM,UAAU,KAAK;AACrB,QAAM,UAAU,KAAK,KAAK,KAAK,QAAQ,KAAK,KAAK,OAAO,QAAQ,mBAAmB,GAAG,CAAC;AACvF,UAAQ,UAAU,OAAO;AAGzB,QAAM,YAAY,KAAK,SAAS,oBAAoB;AACpD,MAAI,KAAK,WAAW;AAClB,UAAM,MAAM,QAAQ,KAAK,SAAS;AAClC,QAAI,QAAQ,QAAW;AACrB,UAAI;AACF,cAAM,SAAS,KAAK,MAAM,GAAG;AAC7B,YAAI,OAAO,WAAW,KAAK,KAAK,QAAQ;AACtC,iBAAO,EAAE,MAAM,EAAE,GAAG,QAAQ,QAAQ,KAAK,GAAG,iBAAiB,CAAC,EAAE;AAAA,QAClE;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,KAAK,IAAI;AACzB,QAAM,QAAQ,KAAK,iBAAiB,KAAK,KAAK,QAAQ,OAAO;AAC7D,QAAM,kBAA0C,CAAC;AACjD,QAAM,YAAoC;AAAA,IACxC,MAAM,MAAM,MAAM,SAAS;AACzB,YAAM,WAAW,KAAK,SAAS,IAAI;AACnC,cAAQ,UAAU,KAAK,UAAU,IAAI,CAAC;AACtC,cAAQ,MAAM,UAAU,OAAO;AAC/B,sBAAgB,GAAG,KAAK,KAAK,MAAM,IAAI,IAAI,EAAE,IAAI;AACjD,aAAO;AAAA,IACT;AAAA,IACA,MAAM,UAAU,MAAM,OAAO;AAC3B,aAAO,UAAU,MAAM,MAAM,KAAK,UAAU,OAAO,MAAM,CAAC,CAAC;AAAA,IAC7D;AAAA,EACF;AACA,MAAI,YAAY;AAChB,QAAM,cAAkC,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC9D,QAAM,OAA0B;AAAA,IAC9B,QAAQ,QAAQ,QAAQ;AACtB,mBAAa;AACb,YAAM,KAAK,QAAQ,MAAM,IAAI,EAAE,WAAW,OAAO,CAAC,EAAE,IAAI;AAAA,IAC1D;AAAA,IACA,cAAc,OAAO;AACnB,kBAAY,SAAS,MAAM;AAC3B,kBAAY,UAAU,MAAM;AAC5B,UAAI,MAAM,OAAQ,aAAY,UAAU,YAAY,UAAU,KAAK,MAAM;AAAA,IAC3E;AAAA,IACA,UAAU;AACR,aAAO;AAAA,IACT;AAAA,IACA,SAAS;AACP,aAAO,EAAE,GAAG,YAAY;AAAA,IAC1B;AAAA,EACF;AAEA,QAAM,YAAY,KAAK,KAAK,gBAAgB;AAAA,IAC1C,UAAU,KAAK,KAAK;AAAA,IACpB,KAAK,KAAK,KAAK;AAAA,EACjB,CAAC;AAMD,QAAM,YAAY,IAAI,gBAAgB;AACtC,QAAM,kBAAkB,MAAM,UAAU,MAAO,KAAK,OAAgC,MAAM;AAC1F,MAAI,KAAK,OAAO,QAAS,WAAU,MAAO,KAAK,OAAgC,MAAM;AAAA,MAChF,MAAK,OAAO,iBAAiB,SAAS,iBAAiB,EAAE,MAAM,KAAK,CAAC;AAE1E,QAAM,MAAuB;AAAA,IAC3B,QAAQ,KAAK,KAAK;AAAA,IAClB,KAAK,KAAK,KAAK;AAAA,IACf,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ,UAAU;AAAA,IAClB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,MAAI;AACJ,MAAI;AACJ,QAAM,YAAY,KAAK;AACvB,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,GAAG;AAC7D,QAAI,cAAc,UAAa,YAAY,GAAG;AAK5C,iBAAW,MAAM,QAAQ,KAAK;AAAA,QAC5B;AAAA,QACA,IAAI,QAAe,CAAC,GAAG,WAAW;AAChC,yBAAe,WAAW,MAAM;AAC9B,sBAAU,MAAM,IAAI,MAAM,kBAAkB,CAAC;AAC7C;AAAA,cACE,IAAI;AAAA,gBACF,qBAAqB,SAAS,gBAAgB,KAAK,KAAK,MAAM;AAAA,cAChE;AAAA,YACF;AAAA,UACF,GAAG,SAAS;AACZ,cAAI,OAAQ,aAAwC,UAAU;AAC5D,YAAC,aAAuC,MAAM;AAAA,QAClD,CAAC;AAAA,MACH,CAAC;AAAA,IACH,OAAO;AACL,iBAAW,MAAM;AAAA,IACnB;AAAA,EACF,SAAS,KAAK;AACZ,mBAAe,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,EAChE,UAAE;AACA,QAAI,aAAc,cAAa,YAAY;AAC3C,SAAK,OAAO,oBAAoB,SAAS,eAAe;AAAA,EAC1D;AAKA,QAAM,cAA0C,CAAC;AACjD,MAAI,aAAa,QAAW;AAC1B,eAAW,SAAS,KAAK,KAAK,UAAU,CAAC,GAAG;AAC1C,UAAI,MAAM,aAAa,CAAC,MAAM,UAAU,KAAK,KAAK,QAAQ,EAAG;AAC7D,UAAI;AACF,oBAAY,MAAM,IAAI,IAAI,MAAM,aAAa,OAAO;AAAA,UAClD;AAAA,UACA,UAAU,KAAK,KAAK;AAAA,UACpB,QAAQ,KAAK;AAAA,QACf,CAAC;AAAA,MACH,SAAS,KAAK;AACZ,uBAAe,UAAU,MAAM,IAAI,aAAa,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAChG;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,MAAM,MAAM;AAElB,QAAM,OAAsC;AAAA,IAC1C,QAAQ,KAAK,KAAK;AAAA,IAClB,YAAY,KAAK,KAAK,SAAS;AAAA,IAC/B,KAAK,KAAK,KAAK;AAAA,IACf,UAAW,YAAY;AAAA,IACvB;AAAA,IACA,SAAS;AAAA,IACT,YAAY,EAAE,GAAG,YAAY;AAAA,IAC7B,YAAY,KAAK,IAAI,IAAI;AAAA,IACzB,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ;AAAA,IACR,OAAO;AAAA,EACT;AAEA,MAAI,CAAC,gBAAgB,KAAK,WAAW;AACnC,YAAQ,MAAM,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,EAC/C;AAEA,SAAO,EAAE,MAAM,gBAAgB;AACjC;AASA,SAAS,iBACP,MACA,MACM;AACN,MAAI,SAAS,SAAS,KAAK,MAAO;AAClC,MAAI,KAAK,aAAa,QAAQ,KAAK,aAAa,OAAW;AAC3D,QAAM,aAAa,KAAK,WAAW,UAAU,KAAK,KAAK,WAAW,WAAW;AAC7E,MAAI,KAAK,YAAY,KAAK,CAAC,WAAY;AACvC,QAAM,MAAM,SAAS,KAAK,MAAM;AAChC,MAAI,SAAS,UAAU;AACrB,UAAM,SAAiC;AAAA,MACrC,cAAc;AAAA,MACd,aAAa;AAAA,MACb,aAAa;AAAA,MACb,iBAAiB;AAAA,MACjB,kBAAkB;AAAA,MAClB,mBAAmB;AAAA,MACnB,cAAc;AAAA,MACd,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AACA,UAAM,IAAI,sBAAsB,gBAAgB,GAAG,IAAI,MAAM;AAAA,EAC/D;AAEA,UAAQ,KAAK,8BAA8B,GAAG,EAAE;AAClD;AAEA,eAAe,aACb,OACA,OACqB;AACrB,SAAO,MAAM,MAAM,KAAK;AAC1B;AAEA,SAAS,wBACP,SACsD;AACtD,SAAO,CAAC,QAAQ,QAAQ;AACtB,UAAM,QAAwC,CAAC;AAC/C,WAAO;AAAA,MACL,KAAK,MAAM,YAAY;AACrB,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAkC,EAAE,MAAM,QAAQ,SAAS,GAAI,cAAc,CAAC,EAAG;AACvF,cAAM,SAAoB;AAAA,UACxB,IAAI,UAAU;AACZ,mBAAO,aAAa,KAAK,IAAI,IAAI;AACjC,gBAAI,SAAU,QAAO,OAAO,QAAQ,QAAQ;AAC5C,kBAAM,KAAK,MAAM;AAAA,UACnB;AAAA,UACA,aAAa,KAAK,OAAO;AACvB,mBAAO,GAAG,IAAI;AAAA,UAChB;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,MACA,MAAM,QAAQ;AACZ,gBAAQ,MAAM,KAAK,KAAK,aAAa,GAAG,MAAM,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,YACP,MACA,QAC+B;AAC/B,SAAO;AAAA,IACL,QAAQ,KAAK;AAAA,IACb,YAAY,KAAK,SAAS;AAAA,IAC1B,KAAK,KAAK;AAAA,IACV,UAAU;AAAA,IACV,aAAa,CAAC;AAAA,IACd,SAAS;AAAA,IACT,YAAY,EAAE,OAAO,GAAG,QAAQ,EAAE;AAAA,IAClC,YAAY;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,QAAQ;AAAA,IACR,OAAO,YAAY,MAAM;AAAA,EAC3B;AACF;AAUA,eAAe,eACb,MACe;AACf,QAAM,KAAK,MAAM,QAAQ;AAAA,IACvB,UAAU,KAAK;AAAA,IACf,UAAU,KAAK,KAAK;AAAA,IACpB,aAAa,KAAK,KAAK;AAAA,IACvB,QAAQ,KAAK,KAAK,iBAAiB;AAAA,IACnC,mBAAmB,KAAK,KAAK,4BAA4B;AAAA,IACzD,YAAY,KAAK,IAAI,EAAE,YAAY;AAAA,IACnC,iBAAiB;AAAA,EACnB,CAAC;AACH;AAIA,SAAS,oBAAoB,OAMlB;AACT,QAAM,YAAY;AAAA,IAChB,WAAW,MAAM,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,IAClE,QAAQ,MAAM,OAAO,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,EAAE,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;AAAA,IACxF,UAAU,MAAM;AAAA,IAChB,MAAM,MAAM;AAAA,IACZ,MAAM,MAAM;AAAA,EACd;AACA,SAAO,WAAW,QAAQ,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC,EAAE,OAAO,KAAK;AAC5E;AAEA,SAAS,kBACP,OACA,QACA,MACoB;AACpB,QAAM,UAA0C,CAAC;AACjD,aAAW,SAAS,QAAQ;AAC1B,UAAM,SAAmB,CAAC;AAC1B,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,YAAY,MAAM,IAAI;AACrC,UAAI,MAAM,OAAW,QAAO,KAAK,EAAE,SAAS;AAAA,IAC9C;AACA,YAAQ,MAAM,IAAI,IAAI,UAAU,QAAQ,IAAI;AAAA,EAC9C;AACA,QAAM,aAAgD,CAAC;AACvD,QAAM,iBAAiB,oBAAI,IAAsB;AACjD,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACzE,QAAI,WAAW,WAAW,EAAG;AAC7B,UAAM,OAAO,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAChE,UAAM,MAAM,eAAe,IAAI,KAAK,UAAU,KAAK,CAAC;AACpD,QAAI,KAAK,IAAI;AACb,mBAAe,IAAI,KAAK,YAAY,GAAG;AAAA,EACzC;AACA,aAAW,CAAC,YAAY,OAAO,KAAK,gBAAgB;AAClD,UAAM,KAAK,UAAU,SAAS,IAAI;AAClC,eAAW,UAAU,IAAI,EAAE,eAAe,GAAG,MAAM,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE;AAAA,EAC5E;AACA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,cAAc,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,SAAS,CAAC;AAAA,IACrD,eAAe,MAAM,OAAO,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE;AAAA,IAC7C,cAAc,MAAM,OAAO,CAAC,MAAM,EAAE,OAAO,WAAW,UAAU,CAAC,EAAE;AAAA,IACnE,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,IAC3C,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,WAAW,UAAU,CAAC,EAAE;AAAA,EAC/E;AACF;AAKA,SAAS,UAAU,SAAmB,MAA8B;AAClE,QAAM,IAAI,QAAQ;AAClB,MAAI,MAAM,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,EAAE;AAC5D,QAAM,OAAO,QAAQ,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAClD,QAAM,WAAW,QAAQ,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,SAAS,GAAG,CAAC,IAAI,KAAK,IAAI,GAAG,IAAI,CAAC;AACrF,QAAM,QAAQ,KAAK,KAAK,QAAQ;AAChC,QAAM,KAAK,mBAAmB,SAAS,MAAM,EAAE,MAAM,WAAW,IAAK,CAAC;AACtE,SAAO,EAAE,MAAM,OAAO,MAAM,CAAC,GAAG,OAAO,GAAG,KAAK,GAAG,EAAE;AACtD;","names":[]}
@@ -3,7 +3,7 @@ import {
3
3
  defaultProductionGate,
4
4
  evolutionaryDriver,
5
5
  runEval
6
- } from "../chunk-6QZUCFKM.js";
6
+ } from "../chunk-UD6EF73X.js";
7
7
  import {
8
8
  createHostedClient
9
9
  } from "../chunk-DFS3FEXO.js";
@@ -16,13 +16,13 @@ import {
16
16
  heldOutGate,
17
17
  runImprovementLoop,
18
18
  surfaceContentHash
19
- } from "../chunk-VMAYE3LM.js";
19
+ } from "../chunk-4QJN7RDX.js";
20
20
  import {
21
21
  fsCampaignStorage,
22
22
  inMemoryCampaignStorage,
23
23
  runCampaign,
24
24
  summarizeBackendIntegrity
25
- } from "../chunk-6XQIEUQ2.js";
25
+ } from "../chunk-ZPSKPT3V.js";
26
26
  import "../chunk-YV7J7X5N.js";
27
27
  import {
28
28
  FileSystemOutcomeStore,