@tangle-network/agent-eval 0.44.1 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/http.d.ts +138 -0
- package/dist/adapters/http.js +196 -0
- package/dist/adapters/http.js.map +1 -0
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/adapters/langchain.js.map +1 -1
- package/dist/campaign/index.d.ts +3 -3
- package/dist/campaign/index.js +2 -2
- package/dist/{chunk-H5BGRSN4.js → chunk-HRKOCLQA.js} +3 -3
- package/dist/{chunk-RXK7FXLV.js → chunk-J3EIOI3O.js} +7 -2
- package/dist/chunk-J3EIOI3O.js.map +1 -0
- package/dist/contract/index.d.ts +2 -2
- package/dist/contract/index.js +2 -2
- package/dist/openapi.json +1 -1
- package/dist/rl.d.ts +1 -1
- package/dist/{run-campaign-GNDO66B4.js → run-campaign-6UEVBPP3.js} +2 -2
- package/dist/{run-improvement-loop-CbilHQAb.d.ts → run-improvement-loop-pJ4yrx4X.d.ts} +17 -1
- package/dist/{types-DToGONFA.d.ts → types-BURGZ8Ug.d.ts} +8 -0
- package/docs/adapters-observability.md +121 -0
- package/docs/distributed-driver.md +173 -0
- package/package.json +6 -1
- package/dist/chunk-RXK7FXLV.js.map +0 -1
- /package/dist/{chunk-H5BGRSN4.js.map → chunk-HRKOCLQA.js.map} +0 -0
- /package/dist/{run-campaign-GNDO66B4.js.map → run-campaign-6UEVBPP3.js.map} +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { S as Scenario, D as DispatchFn, g as DispatchContext } from '../types-BURGZ8Ug.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* # `@tangle-network/agent-eval/adapters/http` — distributed Dispatch over HTTP.
|
|
5
|
+
*
|
|
6
|
+
* Decouples driver and worker. The driver (running `runImprovementLoop` or
|
|
7
|
+
* `runCampaign`) can live anywhere — your VPC, a dev laptop, a cron VM. The
|
|
8
|
+
* workers (running the actual agent) can live anywhere else — different
|
|
9
|
+
* regions, different clouds, different boxes — as long as they speak HTTP.
|
|
10
|
+
*
|
|
11
|
+
* Both sides:
|
|
12
|
+
*
|
|
13
|
+
* - **`httpDispatch({ url | resolveUrl, ... })`** — client. Returns a
|
|
14
|
+
* `Dispatch` that POSTs `{ scenario, ctx }` to a worker URL and parses
|
|
15
|
+
* the artifact back. AbortSignal-aware, retries on idempotent errors,
|
|
16
|
+
* bounded timeout per call.
|
|
17
|
+
* - **`runDispatchServer({ dispatch, port, ... })`** — server. Wraps your
|
|
18
|
+
* local `Dispatch` as an HTTP endpoint. Handles auth, JSON parsing,
|
|
19
|
+
* error mapping, and cancellation when the client aborts.
|
|
20
|
+
*
|
|
21
|
+
* # Topology examples
|
|
22
|
+
*
|
|
23
|
+
* **Single-worker:** driver on box A, worker on box B. Set
|
|
24
|
+
* `httpDispatch({ url: 'https://box-b/dispatch' })`.
|
|
25
|
+
*
|
|
26
|
+
* **Multi-region:** N workers across regions. Use `httpDispatch({ resolveUrl })`
|
|
27
|
+
* with a function that picks the URL per cell from `ctx.placement`. Combined
|
|
28
|
+
* with `cellPlacement` on `RunCampaignOptions`, the substrate fans cells
|
|
29
|
+
* across geographies in parallel.
|
|
30
|
+
*
|
|
31
|
+
* **Driver-as-a-service:** driver runs as a long-lived process or service
|
|
32
|
+
* (holds optimization state across generations); workers are stateless
|
|
33
|
+
* HTTP services that can scale horizontally per cell.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
interface HttpDispatchOptions<TScenario extends Scenario, _TArtifact> {
|
|
37
|
+
/** Static endpoint URL. Mutually exclusive with `resolveUrl`. */
|
|
38
|
+
url?: string;
|
|
39
|
+
/**
|
|
40
|
+
* Dynamic per-cell URL resolver. Receives the scenario + the substrate
|
|
41
|
+
* placement key (from `RunCampaignOptions.cellPlacement`) and returns the
|
|
42
|
+
* worker URL to invoke. Mutually exclusive with `url`.
|
|
43
|
+
*/
|
|
44
|
+
resolveUrl?: (input: {
|
|
45
|
+
scenario: TScenario;
|
|
46
|
+
placement?: string;
|
|
47
|
+
cellId: string;
|
|
48
|
+
}) => string;
|
|
49
|
+
/** Bearer token or static auth string set as `Authorization`. */
|
|
50
|
+
auth?: string | (() => string | Promise<string>);
|
|
51
|
+
/** Extra headers merged into every request. */
|
|
52
|
+
headers?: Record<string, string>;
|
|
53
|
+
/** Per-call timeout in ms. Default 5 minutes. */
|
|
54
|
+
timeoutMs?: number;
|
|
55
|
+
/** How many idempotent retries on 5xx / network errors. Default 2. */
|
|
56
|
+
retries?: number;
|
|
57
|
+
/** Optional fetch override (auth wrappers, custom agent, mocks). */
|
|
58
|
+
fetchImpl?: typeof fetch;
|
|
59
|
+
}
|
|
60
|
+
interface HttpDispatchRequestBody<TScenario extends Scenario> {
|
|
61
|
+
scenario: TScenario;
|
|
62
|
+
cellId: string;
|
|
63
|
+
rep: number;
|
|
64
|
+
generation?: number;
|
|
65
|
+
seed: number;
|
|
66
|
+
placement?: string;
|
|
67
|
+
cycleId?: string;
|
|
68
|
+
}
|
|
69
|
+
interface HttpDispatchResponseBody<TArtifact> {
|
|
70
|
+
artifact: TArtifact;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Wrap a remote HTTP endpoint as a `Dispatch`. The remote side should run
|
|
74
|
+
* `runDispatchServer` (or any service that speaks the same wire shape).
|
|
75
|
+
*
|
|
76
|
+
* Cancellation: the substrate's per-cell `AbortSignal` is forwarded; the
|
|
77
|
+
* server's `runDispatchServer` translates the resulting `AbortError` into
|
|
78
|
+
* a 499 (client-closed) so the client doesn't retry.
|
|
79
|
+
*/
|
|
80
|
+
declare function httpDispatch<TScenario extends Scenario, TArtifact>(opts: HttpDispatchOptions<TScenario, TArtifact>): DispatchFn<TScenario, TArtifact>;
|
|
81
|
+
interface RunDispatchServerOptions<TScenario extends Scenario, TArtifact> {
|
|
82
|
+
/** The Dispatch this server exposes — what runs when a request lands. */
|
|
83
|
+
dispatch: DispatchFn<TScenario, TArtifact>;
|
|
84
|
+
/** TCP port to bind. */
|
|
85
|
+
port: number;
|
|
86
|
+
/** Optional bind host; defaults to 0.0.0.0. */
|
|
87
|
+
host?: string;
|
|
88
|
+
/** Required for any non-test deployment: the bearer token clients must
|
|
89
|
+
* send. The substrate refuses to start without auth unless `auth: false`
|
|
90
|
+
* is set explicitly (intended ONLY for closed-network/internal testing). */
|
|
91
|
+
auth: string | false;
|
|
92
|
+
/** Path the server listens on. Default `/dispatch`. */
|
|
93
|
+
path?: string;
|
|
94
|
+
/**
|
|
95
|
+
* Per-request handler that wraps `dispatch` with whatever context the
|
|
96
|
+
* worker side needs to construct a `DispatchContext` — typically the
|
|
97
|
+
* trace writer, artifact writer, and cost meter. The substrate provides
|
|
98
|
+
* synthetic-but-typed defaults if not supplied; production deployments
|
|
99
|
+
* should wire real ones (e.g. ship traces to your OTel collector).
|
|
100
|
+
*/
|
|
101
|
+
contextFactory?: (req: HttpDispatchRequestBody<TScenario>, signal: AbortSignal) => Promise<DispatchContext>;
|
|
102
|
+
/** Optional max payload size for the request body (bytes). Default 10 MB. */
|
|
103
|
+
maxBodyBytes?: number;
|
|
104
|
+
/** Hook for observability — called on every successful or failed turn. */
|
|
105
|
+
onRequest?: (event: {
|
|
106
|
+
cellId: string;
|
|
107
|
+
durationMs: number;
|
|
108
|
+
success: boolean;
|
|
109
|
+
error?: unknown;
|
|
110
|
+
}) => void;
|
|
111
|
+
}
|
|
112
|
+
interface DispatchServerHandle {
|
|
113
|
+
/** The actual bound port (useful when `port: 0` requests an ephemeral port). */
|
|
114
|
+
port: number;
|
|
115
|
+
/** Stop accepting new connections and drain existing ones. */
|
|
116
|
+
close: () => Promise<void>;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Start an HTTP server exposing a local `Dispatch` over the wire. Pair with
|
|
120
|
+
* `httpDispatch` on the driver side.
|
|
121
|
+
*
|
|
122
|
+
* Wire shape:
|
|
123
|
+
*
|
|
124
|
+
* POST /dispatch
|
|
125
|
+
* Authorization: Bearer <token>
|
|
126
|
+
* Body: HttpDispatchRequestBody
|
|
127
|
+
* 200 OK: HttpDispatchResponseBody
|
|
128
|
+
* 401: missing/invalid auth
|
|
129
|
+
* 408: per-request timeout exceeded
|
|
130
|
+
* 499: client aborted before completion
|
|
131
|
+
* 500: dispatch threw
|
|
132
|
+
*
|
|
133
|
+
* The server is `node:http`-based to keep the runtime dependency surface
|
|
134
|
+
* minimal — works in plain Node, sandbox, or any container.
|
|
135
|
+
*/
|
|
136
|
+
declare function runDispatchServer<TScenario extends Scenario, TArtifact>(opts: RunDispatchServerOptions<TScenario, TArtifact>): Promise<DispatchServerHandle>;
|
|
137
|
+
|
|
138
|
+
export { type DispatchServerHandle, type HttpDispatchOptions, type HttpDispatchRequestBody, type HttpDispatchResponseBody, type RunDispatchServerOptions, httpDispatch, runDispatchServer };
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import "../chunk-NSBPE2FW.js";
|
|
2
|
+
|
|
3
|
+
// src/adapters/http.ts
|
|
4
|
+
function resolveAuth(auth) {
|
|
5
|
+
if (!auth) return Promise.resolve(null);
|
|
6
|
+
if (typeof auth === "string") return Promise.resolve(auth);
|
|
7
|
+
return Promise.resolve(auth());
|
|
8
|
+
}
|
|
9
|
+
function httpDispatch(opts) {
|
|
10
|
+
if (!opts.url && !opts.resolveUrl) {
|
|
11
|
+
throw new Error("httpDispatch: pass exactly one of `url` or `resolveUrl`.");
|
|
12
|
+
}
|
|
13
|
+
if (opts.url && opts.resolveUrl) {
|
|
14
|
+
throw new Error("httpDispatch: pass exactly one of `url` or `resolveUrl`, not both.");
|
|
15
|
+
}
|
|
16
|
+
const timeoutMs = opts.timeoutMs ?? 5 * 60 * 1e3;
|
|
17
|
+
const maxRetries = opts.retries ?? 2;
|
|
18
|
+
const f = opts.fetchImpl ?? ((...args) => fetch(...args));
|
|
19
|
+
return async (scenario, ctx) => {
|
|
20
|
+
const url = opts.url ?? opts.resolveUrl({ scenario, placement: ctx.placement, cellId: ctx.cellId });
|
|
21
|
+
const authValue = await resolveAuth(opts.auth);
|
|
22
|
+
const body = {
|
|
23
|
+
scenario,
|
|
24
|
+
cellId: ctx.cellId,
|
|
25
|
+
rep: ctx.rep,
|
|
26
|
+
generation: ctx.generation,
|
|
27
|
+
seed: ctx.seed,
|
|
28
|
+
placement: ctx.placement,
|
|
29
|
+
cycleId: ctx.cycleId
|
|
30
|
+
};
|
|
31
|
+
let lastError;
|
|
32
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
33
|
+
const ourTimeout = AbortSignal.timeout(timeoutMs);
|
|
34
|
+
const combinedSignal = AbortSignal.any([ctx.signal, ourTimeout]);
|
|
35
|
+
try {
|
|
36
|
+
const res = await f(url, {
|
|
37
|
+
method: "POST",
|
|
38
|
+
headers: {
|
|
39
|
+
"Content-Type": "application/json",
|
|
40
|
+
...authValue ? { Authorization: authValue.startsWith("Bearer ") ? authValue : `Bearer ${authValue}` } : {},
|
|
41
|
+
...opts.headers
|
|
42
|
+
},
|
|
43
|
+
body: JSON.stringify(body),
|
|
44
|
+
signal: combinedSignal
|
|
45
|
+
});
|
|
46
|
+
if (!res.ok) {
|
|
47
|
+
const retryable = res.status >= 500 || res.status === 408 || res.status === 429;
|
|
48
|
+
if (!retryable || attempt === maxRetries) {
|
|
49
|
+
const text = await res.text().catch(() => "");
|
|
50
|
+
throw new Error(`httpDispatch ${url} failed (${res.status}): ${text.slice(0, 500)}`);
|
|
51
|
+
}
|
|
52
|
+
await sleep(2 ** attempt * 200 + Math.random() * 200);
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
const parsed = await res.json();
|
|
56
|
+
return parsed.artifact;
|
|
57
|
+
} catch (err) {
|
|
58
|
+
if (ctx.signal.aborted) throw err;
|
|
59
|
+
lastError = err;
|
|
60
|
+
if (attempt === maxRetries) throw err;
|
|
61
|
+
await sleep(2 ** attempt * 200 + Math.random() * 200);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
throw lastError ?? new Error("httpDispatch exhausted retries");
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function sleep(ms) {
|
|
68
|
+
return new Promise((resolve) => {
|
|
69
|
+
const t = setTimeout(resolve, ms);
|
|
70
|
+
if (typeof t.unref === "function") t.unref();
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
async function runDispatchServer(opts) {
|
|
74
|
+
if (opts.auth === void 0) {
|
|
75
|
+
throw new Error("runDispatchServer: 'auth' is required (pass a bearer-token string, or `auth: false` explicitly for a closed-network test deployment).");
|
|
76
|
+
}
|
|
77
|
+
const path = opts.path ?? "/dispatch";
|
|
78
|
+
const maxBytes = opts.maxBodyBytes ?? 10 * 1024 * 1024;
|
|
79
|
+
const expectedAuth = typeof opts.auth === "string" ? `Bearer ${opts.auth.replace(/^Bearer\s+/, "")}` : null;
|
|
80
|
+
const { createServer } = await import("http");
|
|
81
|
+
const server = createServer(async (req, res) => {
|
|
82
|
+
const start = Date.now();
|
|
83
|
+
let cellId = "unknown";
|
|
84
|
+
let success = false;
|
|
85
|
+
let errCaught;
|
|
86
|
+
try {
|
|
87
|
+
if (req.method !== "POST" || req.url?.split("?")[0] !== path) {
|
|
88
|
+
res.statusCode = 404;
|
|
89
|
+
res.end("not found");
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
if (expectedAuth) {
|
|
93
|
+
const got = req.headers["authorization"];
|
|
94
|
+
if (got !== expectedAuth) {
|
|
95
|
+
res.statusCode = 401;
|
|
96
|
+
res.end("unauthorized");
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
const chunks = [];
|
|
101
|
+
let totalBytes = 0;
|
|
102
|
+
const aborter = new AbortController();
|
|
103
|
+
req.on("close", () => {
|
|
104
|
+
if (!res.writableEnded) aborter.abort();
|
|
105
|
+
});
|
|
106
|
+
for await (const chunk of req) {
|
|
107
|
+
const buf = chunk;
|
|
108
|
+
totalBytes += buf.length;
|
|
109
|
+
if (totalBytes > maxBytes) {
|
|
110
|
+
res.statusCode = 413;
|
|
111
|
+
res.end("payload too large");
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
chunks.push(buf);
|
|
115
|
+
}
|
|
116
|
+
const body = JSON.parse(Buffer.concat(chunks).toString("utf8"));
|
|
117
|
+
cellId = body.cellId;
|
|
118
|
+
const ctx = opts.contextFactory ? await opts.contextFactory(body, aborter.signal) : {
|
|
119
|
+
cellId: body.cellId,
|
|
120
|
+
rep: body.rep,
|
|
121
|
+
generation: body.generation,
|
|
122
|
+
seed: body.seed,
|
|
123
|
+
signal: aborter.signal,
|
|
124
|
+
placement: body.placement,
|
|
125
|
+
cycleId: body.cycleId,
|
|
126
|
+
trace: NOOP_TRACE,
|
|
127
|
+
artifacts: NOOP_ARTIFACTS,
|
|
128
|
+
cost: NOOP_COST
|
|
129
|
+
};
|
|
130
|
+
const artifact = await opts.dispatch(body.scenario, ctx);
|
|
131
|
+
const responseBody = { artifact };
|
|
132
|
+
res.statusCode = 200;
|
|
133
|
+
res.setHeader("content-type", "application/json");
|
|
134
|
+
res.end(JSON.stringify(responseBody));
|
|
135
|
+
success = true;
|
|
136
|
+
} catch (err) {
|
|
137
|
+
errCaught = err;
|
|
138
|
+
if (err?.name === "AbortError") {
|
|
139
|
+
res.statusCode = 499;
|
|
140
|
+
res.end("client aborted");
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
res.statusCode = 500;
|
|
144
|
+
res.setHeader("content-type", "application/json");
|
|
145
|
+
res.end(JSON.stringify({ error: err instanceof Error ? err.message : String(err) }));
|
|
146
|
+
} finally {
|
|
147
|
+
opts.onRequest?.({
|
|
148
|
+
cellId,
|
|
149
|
+
durationMs: Date.now() - start,
|
|
150
|
+
success,
|
|
151
|
+
error: errCaught
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
await new Promise((resolve, reject) => {
|
|
156
|
+
server.once("error", reject);
|
|
157
|
+
server.listen(opts.port, opts.host ?? "0.0.0.0", () => resolve());
|
|
158
|
+
});
|
|
159
|
+
const addr = server.address();
|
|
160
|
+
const boundPort = typeof addr === "object" && addr ? addr.port : opts.port;
|
|
161
|
+
return {
|
|
162
|
+
port: boundPort,
|
|
163
|
+
close: () => new Promise((resolve, reject) => {
|
|
164
|
+
server.close((err) => err ? reject(err) : resolve());
|
|
165
|
+
})
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
var NOOP_TRACE = {
|
|
169
|
+
span: () => ({
|
|
170
|
+
end: () => {
|
|
171
|
+
},
|
|
172
|
+
setAttribute: () => {
|
|
173
|
+
},
|
|
174
|
+
setStatus: () => {
|
|
175
|
+
},
|
|
176
|
+
recordException: () => {
|
|
177
|
+
},
|
|
178
|
+
addEvent: () => {
|
|
179
|
+
}
|
|
180
|
+
})
|
|
181
|
+
};
|
|
182
|
+
var NOOP_ARTIFACTS = {
|
|
183
|
+
write: async () => void 0,
|
|
184
|
+
read: async () => void 0,
|
|
185
|
+
list: async () => []
|
|
186
|
+
};
|
|
187
|
+
var NOOP_COST = {
|
|
188
|
+
record: () => {
|
|
189
|
+
},
|
|
190
|
+
total: () => 0
|
|
191
|
+
};
|
|
192
|
+
export {
|
|
193
|
+
httpDispatch,
|
|
194
|
+
runDispatchServer
|
|
195
|
+
};
|
|
196
|
+
//# sourceMappingURL=http.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/http.ts"],"sourcesContent":["/**\n * # `@tangle-network/agent-eval/adapters/http` — distributed Dispatch over HTTP.\n *\n * Decouples driver and worker. The driver (running `runImprovementLoop` or\n * `runCampaign`) can live anywhere — your VPC, a dev laptop, a cron VM. The\n * workers (running the actual agent) can live anywhere else — different\n * regions, different clouds, different boxes — as long as they speak HTTP.\n *\n * Both sides:\n *\n * - **`httpDispatch({ url | resolveUrl, ... })`** — client. Returns a\n * `Dispatch` that POSTs `{ scenario, ctx }` to a worker URL and parses\n * the artifact back. AbortSignal-aware, retries on idempotent errors,\n * bounded timeout per call.\n * - **`runDispatchServer({ dispatch, port, ... })`** — server. Wraps your\n * local `Dispatch` as an HTTP endpoint. Handles auth, JSON parsing,\n * error mapping, and cancellation when the client aborts.\n *\n * # Topology examples\n *\n * **Single-worker:** driver on box A, worker on box B. Set\n * `httpDispatch({ url: 'https://box-b/dispatch' })`.\n *\n * **Multi-region:** N workers across regions. Use `httpDispatch({ resolveUrl })`\n * with a function that picks the URL per cell from `ctx.placement`. Combined\n * with `cellPlacement` on `RunCampaignOptions`, the substrate fans cells\n * across geographies in parallel.\n *\n * **Driver-as-a-service:** driver runs as a long-lived process or service\n * (holds optimization state across generations); workers are stateless\n * HTTP services that can scale horizontally per cell.\n */\n\nimport type { Dispatch, DispatchContext, Scenario } from '../contract'\n\n// ── Client ───────────────────────────────────────────────────────────\n\n// eslint-disable-next-line @typescript-eslint/no-unused-vars -- TArtifact is unused\n// in this options interface but kept as a parameter so callers can write\n// `HttpDispatchOptions<MyScenario, MyArtifact>` symmetrically with\n// `Dispatch<MyScenario, MyArtifact>`. Marking it unused at the position\n// where it bites.\nexport interface HttpDispatchOptions<TScenario extends Scenario, _TArtifact> {\n /** Static endpoint URL. Mutually exclusive with `resolveUrl`. */\n url?: string\n /**\n * Dynamic per-cell URL resolver. Receives the scenario + the substrate\n * placement key (from `RunCampaignOptions.cellPlacement`) and returns the\n * worker URL to invoke. Mutually exclusive with `url`.\n */\n resolveUrl?: (input: { scenario: TScenario; placement?: string; cellId: string }) => string\n /** Bearer token or static auth string set as `Authorization`. */\n auth?: string | (() => string | Promise<string>)\n /** Extra headers merged into every request. */\n headers?: Record<string, string>\n /** Per-call timeout in ms. Default 5 minutes. */\n timeoutMs?: number\n /** How many idempotent retries on 5xx / network errors. Default 2. */\n retries?: number\n /** Optional fetch override (auth wrappers, custom agent, mocks). */\n fetchImpl?: typeof fetch\n}\n\nexport interface HttpDispatchRequestBody<TScenario extends Scenario> {\n scenario: TScenario\n cellId: string\n rep: number\n generation?: number\n seed: number\n placement?: string\n cycleId?: string\n}\n\nexport interface HttpDispatchResponseBody<TArtifact> {\n artifact: TArtifact\n}\n\nfunction resolveAuth(auth: HttpDispatchOptions<Scenario, unknown>['auth']): Promise<string | null> {\n if (!auth) return Promise.resolve(null)\n if (typeof auth === 'string') return Promise.resolve(auth)\n return Promise.resolve(auth())\n}\n\n/**\n * Wrap a remote HTTP endpoint as a `Dispatch`. The remote side should run\n * `runDispatchServer` (or any service that speaks the same wire shape).\n *\n * Cancellation: the substrate's per-cell `AbortSignal` is forwarded; the\n * server's `runDispatchServer` translates the resulting `AbortError` into\n * a 499 (client-closed) so the client doesn't retry.\n */\nexport function httpDispatch<TScenario extends Scenario, TArtifact>(\n opts: HttpDispatchOptions<TScenario, TArtifact>,\n): Dispatch<TScenario, TArtifact> {\n if (!opts.url && !opts.resolveUrl) {\n throw new Error('httpDispatch: pass exactly one of `url` or `resolveUrl`.')\n }\n if (opts.url && opts.resolveUrl) {\n throw new Error('httpDispatch: pass exactly one of `url` or `resolveUrl`, not both.')\n }\n const timeoutMs = opts.timeoutMs ?? 5 * 60 * 1000\n const maxRetries = opts.retries ?? 2\n const f: typeof fetch = opts.fetchImpl ?? ((...args) => fetch(...args))\n\n return async (scenario, ctx) => {\n const url = opts.url ?? opts.resolveUrl!({ scenario, placement: ctx.placement, cellId: ctx.cellId })\n const authValue = await resolveAuth(opts.auth)\n const body: HttpDispatchRequestBody<TScenario> = {\n scenario,\n cellId: ctx.cellId,\n rep: ctx.rep,\n generation: ctx.generation,\n seed: ctx.seed,\n placement: ctx.placement,\n cycleId: ctx.cycleId,\n }\n\n let lastError: unknown\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n // Compose the request signal: caller's signal OR our timeout.\n const ourTimeout = AbortSignal.timeout(timeoutMs)\n const combinedSignal = AbortSignal.any([ctx.signal, ourTimeout])\n try {\n const res = await f(url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n ...(authValue ? { Authorization: authValue.startsWith('Bearer ') ? authValue : `Bearer ${authValue}` } : {}),\n ...opts.headers,\n },\n body: JSON.stringify(body),\n signal: combinedSignal,\n })\n if (!res.ok) {\n // 4xx is non-retryable (caller error, auth, bad scenario shape).\n // 5xx / 408 / 429 / 502 / 503 / 504 are retryable.\n const retryable = res.status >= 500 || res.status === 408 || res.status === 429\n if (!retryable || attempt === maxRetries) {\n const text = await res.text().catch(() => '')\n throw new Error(`httpDispatch ${url} failed (${res.status}): ${text.slice(0, 500)}`)\n }\n // exponential backoff with jitter\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n continue\n }\n const parsed = (await res.json()) as HttpDispatchResponseBody<TArtifact>\n return parsed.artifact\n } catch (err) {\n // Caller-driven abort is terminal — never retry.\n if (ctx.signal.aborted) throw err\n lastError = err\n if (attempt === maxRetries) throw err\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n }\n }\n throw lastError ?? new Error('httpDispatch exhausted retries')\n }\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => {\n const t = setTimeout(resolve, ms)\n // Don't keep node process alive purely for backoff sleeps.\n if (typeof (t as { unref?: () => void }).unref === 'function') (t as { unref: () => void }).unref()\n })\n}\n\n// ── Server ───────────────────────────────────────────────────────────\n\nexport interface RunDispatchServerOptions<TScenario extends Scenario, TArtifact> {\n /** The Dispatch this server exposes — what runs when a request lands. */\n dispatch: Dispatch<TScenario, TArtifact>\n /** TCP port to bind. */\n port: number\n /** Optional bind host; defaults to 0.0.0.0. */\n host?: string\n /** Required for any non-test deployment: the bearer token clients must\n * send. The substrate refuses to start without auth unless `auth: false`\n * is set explicitly (intended ONLY for closed-network/internal testing). */\n auth: string | false\n /** Path the server listens on. Default `/dispatch`. */\n path?: string\n /**\n * Per-request handler that wraps `dispatch` with whatever context the\n * worker side needs to construct a `DispatchContext` — typically the\n * trace writer, artifact writer, and cost meter. The substrate provides\n * synthetic-but-typed defaults if not supplied; production deployments\n * should wire real ones (e.g. ship traces to your OTel collector).\n */\n contextFactory?: (req: HttpDispatchRequestBody<TScenario>, signal: AbortSignal) => Promise<DispatchContext>\n /** Optional max payload size for the request body (bytes). Default 10 MB. */\n maxBodyBytes?: number\n /** Hook for observability — called on every successful or failed turn. */\n onRequest?: (event: {\n cellId: string\n durationMs: number\n success: boolean\n error?: unknown\n }) => void\n}\n\nexport interface DispatchServerHandle {\n /** The actual bound port (useful when `port: 0` requests an ephemeral port). */\n port: number\n /** Stop accepting new connections and drain existing ones. */\n close: () => Promise<void>\n}\n\n/**\n * Start an HTTP server exposing a local `Dispatch` over the wire. Pair with\n * `httpDispatch` on the driver side.\n *\n * Wire shape:\n *\n * POST /dispatch\n * Authorization: Bearer <token>\n * Body: HttpDispatchRequestBody\n * 200 OK: HttpDispatchResponseBody\n * 401: missing/invalid auth\n * 408: per-request timeout exceeded\n * 499: client aborted before completion\n * 500: dispatch threw\n *\n * The server is `node:http`-based to keep the runtime dependency surface\n * minimal — works in plain Node, sandbox, or any container.\n */\nexport async function runDispatchServer<TScenario extends Scenario, TArtifact>(\n opts: RunDispatchServerOptions<TScenario, TArtifact>,\n): Promise<DispatchServerHandle> {\n if (opts.auth === undefined) {\n throw new Error(\"runDispatchServer: 'auth' is required (pass a bearer-token string, or `auth: false` explicitly for a closed-network test deployment).\")\n }\n const path = opts.path ?? '/dispatch'\n const maxBytes = opts.maxBodyBytes ?? 10 * 1024 * 1024\n const expectedAuth = typeof opts.auth === 'string' ? `Bearer ${opts.auth.replace(/^Bearer\\s+/, '')}` : null\n\n // Lazy-import node:http so the file is usable from non-Node bundlers\n // that import the client side only (e.g. an edge driver shipping\n // httpDispatch alone). Server side is opt-in by calling this function.\n const { createServer } = await import('node:http')\n\n const server = createServer(async (req, res) => {\n const start = Date.now()\n let cellId = 'unknown'\n let success = false\n let errCaught: unknown\n\n try {\n if (req.method !== 'POST' || req.url?.split('?')[0] !== path) {\n res.statusCode = 404\n res.end('not found')\n return\n }\n if (expectedAuth) {\n const got = req.headers['authorization']\n if (got !== expectedAuth) {\n res.statusCode = 401\n res.end('unauthorized')\n return\n }\n }\n\n // Read body up to maxBytes\n const chunks: Buffer[] = []\n let totalBytes = 0\n const aborter = new AbortController()\n req.on('close', () => {\n if (!res.writableEnded) aborter.abort()\n })\n\n for await (const chunk of req) {\n const buf = chunk as Buffer\n totalBytes += buf.length\n if (totalBytes > maxBytes) {\n res.statusCode = 413\n res.end('payload too large')\n return\n }\n chunks.push(buf)\n }\n\n const body = JSON.parse(Buffer.concat(chunks).toString('utf8')) as HttpDispatchRequestBody<TScenario>\n cellId = body.cellId\n\n const ctx: DispatchContext = opts.contextFactory\n ? await opts.contextFactory(body, aborter.signal)\n : {\n cellId: body.cellId,\n rep: body.rep,\n generation: body.generation,\n seed: body.seed,\n signal: aborter.signal,\n placement: body.placement,\n cycleId: body.cycleId,\n trace: NOOP_TRACE,\n artifacts: NOOP_ARTIFACTS,\n cost: NOOP_COST,\n }\n\n const artifact = await opts.dispatch(body.scenario, ctx)\n const responseBody: HttpDispatchResponseBody<TArtifact> = { artifact }\n\n res.statusCode = 200\n res.setHeader('content-type', 'application/json')\n res.end(JSON.stringify(responseBody))\n success = true\n } catch (err) {\n errCaught = err\n // Client-cancelled — they don't care about the result.\n if ((err as Error)?.name === 'AbortError') {\n res.statusCode = 499\n res.end('client aborted')\n return\n }\n res.statusCode = 500\n res.setHeader('content-type', 'application/json')\n res.end(JSON.stringify({ error: err instanceof Error ? err.message : String(err) }))\n } finally {\n opts.onRequest?.({\n cellId,\n durationMs: Date.now() - start,\n success,\n error: errCaught,\n })\n }\n })\n\n await new Promise<void>((resolve, reject) => {\n server.once('error', reject)\n server.listen(opts.port, opts.host ?? '0.0.0.0', () => resolve())\n })\n\n const addr = server.address()\n const boundPort = typeof addr === 'object' && addr ? addr.port : opts.port\n\n return {\n port: boundPort,\n close: () =>\n new Promise<void>((resolve, reject) => {\n server.close((err) => (err ? reject(err) : resolve()))\n }),\n }\n}\n\n// ── No-op default ctx machinery (worker can replace via contextFactory) ──\n\nconst NOOP_TRACE = {\n span: () => ({\n end: () => {},\n setAttribute: () => {},\n setStatus: () => {},\n recordException: () => {},\n addEvent: () => {},\n }),\n} as unknown as DispatchContext['trace']\n\nconst NOOP_ARTIFACTS = {\n write: async () => undefined,\n read: async () => undefined,\n list: async () => [],\n} as unknown as DispatchContext['artifacts']\n\nconst NOOP_COST = {\n record: () => {},\n total: () => 0,\n} as unknown as DispatchContext['cost']\n"],"mappings":";;;AA6EA,SAAS,YAAY,MAA8E;AACjG,MAAI,CAAC,KAAM,QAAO,QAAQ,QAAQ,IAAI;AACtC,MAAI,OAAO,SAAS,SAAU,QAAO,QAAQ,QAAQ,IAAI;AACzD,SAAO,QAAQ,QAAQ,KAAK,CAAC;AAC/B;AAUO,SAAS,aACd,MACgC;AAChC,MAAI,CAAC,KAAK,OAAO,CAAC,KAAK,YAAY;AACjC,UAAM,IAAI,MAAM,0DAA0D;AAAA,EAC5E;AACA,MAAI,KAAK,OAAO,KAAK,YAAY;AAC/B,UAAM,IAAI,MAAM,oEAAoE;AAAA,EACtF;AACA,QAAM,YAAY,KAAK,aAAa,IAAI,KAAK;AAC7C,QAAM,aAAa,KAAK,WAAW;AACnC,QAAM,IAAkB,KAAK,cAAc,IAAI,SAAS,MAAM,GAAG,IAAI;AAErE,SAAO,OAAO,UAAU,QAAQ;AAC9B,UAAM,MAAM,KAAK,OAAO,KAAK,WAAY,EAAE,UAAU,WAAW,IAAI,WAAW,QAAQ,IAAI,OAAO,CAAC;AACnG,UAAM,YAAY,MAAM,YAAY,KAAK,IAAI;AAC7C,UAAM,OAA2C;AAAA,MAC/C;AAAA,MACA,QAAQ,IAAI;AAAA,MACZ,KAAK,IAAI;AAAA,MACT,YAAY,IAAI;AAAA,MAChB,MAAM,IAAI;AAAA,MACV,WAAW,IAAI;AAAA,MACf,SAAS,IAAI;AAAA,IACf;AAEA,QAAI;AACJ,aAAS,UAAU,GAAG,WAAW,YAAY,WAAW;AAEtD,YAAM,aAAa,YAAY,QAAQ,SAAS;AAChD,YAAM,iBAAiB,YAAY,IAAI,CAAC,IAAI,QAAQ,UAAU,CAAC;AAC/D,UAAI;AACF,cAAM,MAAM,MAAM,EAAE,KAAK;AAAA,UACvB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,gBAAgB;AAAA,YAChB,GAAI,YAAY,EAAE,eAAe,UAAU,WAAW,SAAS,IAAI,YAAY,UAAU,SAAS,GAAG,IAAI,CAAC;AAAA,YAC1G,GAAG,KAAK;AAAA,UACV;AAAA,UACA,MAAM,KAAK,UAAU,IAAI;AAAA,UACzB,QAAQ;AAAA,QACV,CAAC;AACD,YAAI,CAAC,IAAI,IAAI;AAGX,gBAAM,YAAY,IAAI,UAAU,OAAO,IAAI,WAAW,OAAO,IAAI,WAAW;AAC5E,cAAI,CAAC,aAAa,YAAY,YAAY;AACxC,kBAAM,OAAO,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,EAAE;AAC5C,kBAAM,IAAI,MAAM,gBAAgB,GAAG,YAAY,IAAI,MAAM,MAAM,KAAK,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,UACrF;AAEA,gBAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AACpD;AAAA,QACF;AACA,cAAM,SAAU,MAAM,IAAI,KAAK;AAC/B,eAAO,OAAO;AAAA,MAChB,SAAS,KAAK;AAEZ,YAAI,IAAI,OAAO,QAAS,OAAM;AAC9B,oBAAY;AACZ,YAAI,YAAY,WAAY,OAAM;AAClC,cAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,MACtD;AAAA,IACF;AACA,UAAM,aAAa,IAAI,MAAM,gCAAgC;AAAA,EAC/D;AACF;AAEA,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAM,IAAI,WAAW,SAAS,EAAE;AAEhC,QAAI,OAAQ,EAA6B,UAAU,WAAY,CAAC,EAA4B,MAAM;AAAA,EACpG,CAAC;AACH;AA6DA,eAAsB,kBACpB,MAC+B;AAC/B,MAAI,KAAK,SAAS,QAAW;AAC3B,UAAM,IAAI,MAAM,uIAAuI;AAAA,EACzJ;AACA,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,WAAW,KAAK,gBAAgB,KAAK,OAAO;AAClD,QAAM,eAAe,OAAO,KAAK,SAAS,WAAW,UAAU,KAAK,KAAK,QAAQ,cAAc,EAAE,CAAC,KAAK;AAKvG,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,MAAW;AAEjD,QAAM,SAAS,aAAa,OAAO,KAAK,QAAQ;AAC9C,UAAM,QAAQ,KAAK,IAAI;AACvB,QAAI,SAAS;AACb,QAAI,UAAU;AACd,QAAI;AAEJ,QAAI;AACF,UAAI,IAAI,WAAW,UAAU,IAAI,KAAK,MAAM,GAAG,EAAE,CAAC,MAAM,MAAM;AAC5D,YAAI,aAAa;AACjB,YAAI,IAAI,WAAW;AACnB;AAAA,MACF;AACA,UAAI,cAAc;AAChB,cAAM,MAAM,IAAI,QAAQ,eAAe;AACvC,YAAI,QAAQ,cAAc;AACxB,cAAI,aAAa;AACjB,cAAI,IAAI,cAAc;AACtB;AAAA,QACF;AAAA,MACF;AAGA,YAAM,SAAmB,CAAC;AAC1B,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB;AACpC,UAAI,GAAG,SAAS,MAAM;AACpB,YAAI,CAAC,IAAI,cAAe,SAAQ,MAAM;AAAA,MACxC,CAAC;AAED,uBAAiB,SAAS,KAAK;AAC7B,cAAM,MAAM;AACZ,sBAAc,IAAI;AAClB,YAAI,aAAa,UAAU;AACzB,cAAI,aAAa;AACjB,cAAI,IAAI,mBAAmB;AAC3B;AAAA,QACF;AACA,eAAO,KAAK,GAAG;AAAA,MACjB;AAEA,YAAM,OAAO,KAAK,MAAM,OAAO,OAAO,MAAM,EAAE,SAAS,MAAM,CAAC;AAC9D,eAAS,KAAK;AAEd,YAAM,MAAuB,KAAK,iBAC9B,MAAM,KAAK,eAAe,MAAM,QAAQ,MAAM,IAC9C;AAAA,QACE,QAAQ,KAAK;AAAA,QACb,KAAK,KAAK;AAAA,QACV,YAAY,KAAK;AAAA,QACjB,MAAM,KAAK;AAAA,QACX,QAAQ,QAAQ;AAAA,QAChB,WAAW,KAAK;AAAA,QAChB,SAAS,KAAK;AAAA,QACd,OAAO;AAAA,QACP,WAAW;AAAA,QACX,MAAM;AAAA,MACR;AAEJ,YAAM,WAAW,MAAM,KAAK,SAAS,KAAK,UAAU,GAAG;AACvD,YAAM,eAAoD,EAAE,SAAS;AAErE,UAAI,aAAa;AACjB,UAAI,UAAU,gBAAgB,kBAAkB;AAChD,UAAI,IAAI,KAAK,UAAU,YAAY,CAAC;AACpC,gBAAU;AAAA,IACZ,SAAS,KAAK;AACZ,kBAAY;AAEZ,UAAK,KAAe,SAAS,cAAc;AACzC,YAAI,aAAa;AACjB,YAAI,IAAI,gBAAgB;AACxB;AAAA,MACF;AACA,UAAI,aAAa;AACjB,UAAI,UAAU,gBAAgB,kBAAkB;AAChD,UAAI,IAAI,KAAK,UAAU,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC,CAAC;AAAA,IACrF,UAAE;AACA,WAAK,YAAY;AAAA,QACf;AAAA,QACA,YAAY,KAAK,IAAI,IAAI;AAAA,QACzB;AAAA,QACA,OAAO;AAAA,MACT,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AAED,QAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,WAAO,KAAK,SAAS,MAAM;AAC3B,WAAO,OAAO,KAAK,MAAM,KAAK,QAAQ,WAAW,MAAM,QAAQ,CAAC;AAAA,EAClE,CAAC;AAED,QAAM,OAAO,OAAO,QAAQ;AAC5B,QAAM,YAAY,OAAO,SAAS,YAAY,OAAO,KAAK,OAAO,KAAK;AAEtE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,OAAO,MACL,IAAI,QAAc,CAAC,SAAS,WAAW;AACrC,aAAO,MAAM,CAAC,QAAS,MAAM,OAAO,GAAG,IAAI,QAAQ,CAAE;AAAA,IACvD,CAAC;AAAA,EACL;AACF;AAIA,IAAM,aAAa;AAAA,EACjB,MAAM,OAAO;AAAA,IACX,KAAK,MAAM;AAAA,IAAC;AAAA,IACZ,cAAc,MAAM;AAAA,IAAC;AAAA,IACrB,WAAW,MAAM;AAAA,IAAC;AAAA,IAClB,iBAAiB,MAAM;AAAA,IAAC;AAAA,IACxB,UAAU,MAAM;AAAA,IAAC;AAAA,EACnB;AACF;AAEA,IAAM,iBAAiB;AAAA,EACrB,OAAO,YAAY;AAAA,EACnB,MAAM,YAAY;AAAA,EAClB,MAAM,YAAY,CAAC;AACrB;AAEA,IAAM,YAAY;AAAA,EAChB,QAAQ,MAAM;AAAA,EAAC;AAAA,EACf,OAAO,MAAM;AACf;","names":[]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { S as Scenario, n as JudgeScore, D as DispatchFn, J as JudgeConfig } from '../types-
|
|
1
|
+
import { S as Scenario, n as JudgeScore, D as DispatchFn, J as JudgeConfig } from '../types-BURGZ8Ug.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* # `@tangle-network/agent-eval/adapters/langchain` — wrap any LangChain
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/adapters/langchain.ts"],"sourcesContent":["/**\n * # `@tangle-network/agent-eval/adapters/langchain` — wrap any LangChain\n * Runnable as a `Dispatch` (or `JudgeConfig`).\n *\n * **Why structural, not pinned**: we don't depend on `@langchain/core` at\n * install time. The adapter accepts anything with the canonical LangChain\n * Runnable shape (`invoke(input, config?)`), so it works with their\n * `Runnable`, `RunnableSequence`, `RunnableMap`, `RunnablePassthrough`,\n * and any custom Runnable-shaped object. No version pin, no peer dep,\n * no bundle-bloat risk.\n *\n * **Why this exists**: the most-asked question from foreign agent\n * builders is \"I'm already on LangChain — how do I plug in?\". The answer\n * is one function. Wrap your existing Runnable, pass the Dispatch into\n * `runEval` / `runImprovementLoop`, ship.\n */\n\nimport type { Dispatch, JudgeConfig, JudgeScore, Scenario } from '../contract'\n\n// ── Minimal structural type ──────────────────────────────────────────\n//\n// Whatever has `invoke(input, config?)` qualifies. We accept any\n// config shape (LangChain's RunnableConfig has many optional fields)\n// — the only thing we need is the AbortSignal seam, which LangChain's\n// RunnableConfig already supports as `signal?: AbortSignal`.\n\nexport interface RunnableLike<TInput, TOutput> {\n invoke(input: TInput, config?: { signal?: AbortSignal; [key: string]: unknown }): Promise<TOutput>\n}\n\n// ── Dispatch wrapper ────────────────────────────────────────────────\n\nexport interface LangchainDispatchOptions<TScenario extends Scenario, TArtifact> {\n /** The Runnable (or RunnableSequence, or anything `.invoke`able). */\n runnable: RunnableLike<TScenario, TArtifact>\n /**\n * Optional config merged into every `invoke` call — tags, metadata,\n * callbacks, runName. The substrate's per-cell `AbortSignal` is\n * always merged in last (and so wins).\n */\n config?: Record<string, unknown>\n}\n\n/**\n * Wrap a LangChain Runnable as a `Dispatch`. The Runnable's input must\n * accept the scenario (typically you'll shape it via\n * `RunnableMap`/`RunnableLambda` upstream); its output is the artifact\n * the engine + judges see.\n *\n * @example\n * const chain = prompt.pipe(model).pipe(parser)\n * const dispatch = langchainDispatch({ runnable: chain })\n * await runEval({ scenarios, dispatch, judges: [...], storage, runDir })\n */\nexport function langchainDispatch<TScenario extends Scenario, TArtifact>(\n opts: LangchainDispatchOptions<TScenario, TArtifact>,\n): Dispatch<TScenario, TArtifact> {\n return async (scenario, ctx) => {\n return opts.runnable.invoke(scenario, {\n ...opts.config,\n signal: ctx.signal,\n })\n }\n}\n\n// ── Judge wrapper ───────────────────────────────────────────────────\n\nexport interface LangchainJudgeOptions<TArtifact, TScenario extends Scenario> {\n /** Judge name; appears in `CampaignResult.aggregates.byJudge`. */\n name: string\n /**\n * Dimensions the judge scores. Used both for the judge's own prompt\n * (if it reads them) and for the aggregator's `byJudge` rollup.\n */\n dimensions: { key: string; description: string }[]\n /**\n * A Runnable that takes `{ artifact, scenario }` and returns a\n * partial `JudgeScore` — the dimensions map at minimum. `composite`\n * is computed by averaging `dimensions` when the Runnable doesn't\n * provide it; `notes` defaults to an empty string.\n */\n runnable: RunnableLike<{ artifact: TArtifact; scenario: TScenario }, Partial<JudgeScore>>\n appliesTo?: (scenario: TScenario) => boolean\n}\n\n/**\n * Wrap a LangChain Runnable as a `JudgeConfig`. The Runnable can be any\n * structured-output chain (e.g. `prompt.pipe(model).pipe(StructuredOutputParser)`)\n * that returns a `Partial<JudgeScore>`.\n *\n * The substrate's invariant — throw on judge failure, never silently\n * fold errors into a zero — is preserved: any error from the Runnable\n * propagates and the substrate records a failed cell.\n *\n * @example\n * const scorePrompt = ChatPromptTemplate.fromTemplate(`...`)\n * const judgeChain = scorePrompt.pipe(judgeModel).pipe(jsonParser)\n * const judge = langchainJudge({\n * name: 'marketing-quality',\n * dimensions: [{ key: 'hook_strength', description: '...' }, ...],\n * runnable: judgeChain,\n * })\n */\nexport function langchainJudge<TArtifact, TScenario extends Scenario>(\n opts: LangchainJudgeOptions<TArtifact, TScenario>,\n): JudgeConfig<TArtifact, TScenario> {\n return {\n name: opts.name,\n dimensions: opts.dimensions,\n appliesTo: opts.appliesTo,\n async score({ artifact, scenario, signal }) {\n const result = await opts.runnable.invoke({ artifact, scenario }, { signal })\n const dims = (result.dimensions ?? {}) as Record<string, number>\n const dimValues = Object.values(dims)\n const composite
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/langchain.ts"],"sourcesContent":["/**\n * # `@tangle-network/agent-eval/adapters/langchain` — wrap any LangChain\n * Runnable as a `Dispatch` (or `JudgeConfig`).\n *\n * **Why structural, not pinned**: we don't depend on `@langchain/core` at\n * install time. The adapter accepts anything with the canonical LangChain\n * Runnable shape (`invoke(input, config?)`), so it works with their\n * `Runnable`, `RunnableSequence`, `RunnableMap`, `RunnablePassthrough`,\n * and any custom Runnable-shaped object. No version pin, no peer dep,\n * no bundle-bloat risk.\n *\n * **Why this exists**: the most-asked question from foreign agent\n * builders is \"I'm already on LangChain — how do I plug in?\". The answer\n * is one function. Wrap your existing Runnable, pass the Dispatch into\n * `runEval` / `runImprovementLoop`, ship.\n */\n\nimport type { Dispatch, JudgeConfig, JudgeScore, Scenario } from '../contract'\n\n// ── Minimal structural type ──────────────────────────────────────────\n//\n// Whatever has `invoke(input, config?)` qualifies. We accept any\n// config shape (LangChain's RunnableConfig has many optional fields)\n// — the only thing we need is the AbortSignal seam, which LangChain's\n// RunnableConfig already supports as `signal?: AbortSignal`.\n\nexport interface RunnableLike<TInput, TOutput> {\n invoke(input: TInput, config?: { signal?: AbortSignal; [key: string]: unknown }): Promise<TOutput>\n}\n\n// ── Dispatch wrapper ────────────────────────────────────────────────\n\nexport interface LangchainDispatchOptions<TScenario extends Scenario, TArtifact> {\n /** The Runnable (or RunnableSequence, or anything `.invoke`able). */\n runnable: RunnableLike<TScenario, TArtifact>\n /**\n * Optional config merged into every `invoke` call — tags, metadata,\n * callbacks, runName. The substrate's per-cell `AbortSignal` is\n * always merged in last (and so wins).\n */\n config?: Record<string, unknown>\n}\n\n/**\n * Wrap a LangChain Runnable as a `Dispatch`. The Runnable's input must\n * accept the scenario (typically you'll shape it via\n * `RunnableMap`/`RunnableLambda` upstream); its output is the artifact\n * the engine + judges see.\n *\n * @example\n * const chain = prompt.pipe(model).pipe(parser)\n * const dispatch = langchainDispatch({ runnable: chain })\n * await runEval({ scenarios, dispatch, judges: [...], storage, runDir })\n */\nexport function langchainDispatch<TScenario extends Scenario, TArtifact>(\n opts: LangchainDispatchOptions<TScenario, TArtifact>,\n): Dispatch<TScenario, TArtifact> {\n return async (scenario, ctx) => {\n return opts.runnable.invoke(scenario, {\n ...opts.config,\n signal: ctx.signal,\n })\n }\n}\n\n// ── Judge wrapper ───────────────────────────────────────────────────\n\nexport interface LangchainJudgeOptions<TArtifact, TScenario extends Scenario> {\n /** Judge name; appears in `CampaignResult.aggregates.byJudge`. */\n name: string\n /**\n * Dimensions the judge scores. Used both for the judge's own prompt\n * (if it reads them) and for the aggregator's `byJudge` rollup.\n */\n dimensions: { key: string; description: string }[]\n /**\n * A Runnable that takes `{ artifact, scenario }` and returns a\n * partial `JudgeScore` — the dimensions map at minimum. `composite`\n * is computed by averaging `dimensions` when the Runnable doesn't\n * provide it; `notes` defaults to an empty string.\n */\n runnable: RunnableLike<{ artifact: TArtifact; scenario: TScenario }, Partial<JudgeScore>>\n appliesTo?: (scenario: TScenario) => boolean\n}\n\n/**\n * Wrap a LangChain Runnable as a `JudgeConfig`. The Runnable can be any\n * structured-output chain (e.g. `prompt.pipe(model).pipe(StructuredOutputParser)`)\n * that returns a `Partial<JudgeScore>`.\n *\n * The substrate's invariant — throw on judge failure, never silently\n * fold errors into a zero — is preserved: any error from the Runnable\n * propagates and the substrate records a failed cell.\n *\n * @example\n * const scorePrompt = ChatPromptTemplate.fromTemplate(`...`)\n * const judgeChain = scorePrompt.pipe(judgeModel).pipe(jsonParser)\n * const judge = langchainJudge({\n * name: 'marketing-quality',\n * dimensions: [{ key: 'hook_strength', description: '...' }, ...],\n * runnable: judgeChain,\n * })\n */\nexport function langchainJudge<TArtifact, TScenario extends Scenario>(\n opts: LangchainJudgeOptions<TArtifact, TScenario>,\n): JudgeConfig<TArtifact, TScenario> {\n return {\n name: opts.name,\n dimensions: opts.dimensions,\n appliesTo: opts.appliesTo,\n async score({ artifact, scenario, signal }) {\n const result = await opts.runnable.invoke({ artifact, scenario }, { signal })\n const dims = (result.dimensions ?? {}) as Record<string, number>\n const dimValues = Object.values(dims)\n const composite =\n result.composite ??\n (dimValues.length > 0 ? dimValues.reduce((a, b) => a + b, 0) / dimValues.length : 0)\n return {\n dimensions: dims,\n composite,\n notes: result.notes ?? '',\n }\n },\n }\n}\n"],"mappings":";;;AAsDO,SAAS,kBACd,MACgC;AAChC,SAAO,OAAO,UAAU,QAAQ;AAC9B,WAAO,KAAK,SAAS,OAAO,UAAU;AAAA,MACpC,GAAG,KAAK;AAAA,MACR,QAAQ,IAAI;AAAA,IACd,CAAC;AAAA,EACH;AACF;AAwCO,SAAS,eACd,MACmC;AACnC,SAAO;AAAA,IACL,MAAM,KAAK;AAAA,IACX,YAAY,KAAK;AAAA,IACjB,WAAW,KAAK;AAAA,IAChB,MAAM,MAAM,EAAE,UAAU,UAAU,OAAO,GAAG;AAC1C,YAAM,SAAS,MAAM,KAAK,SAAS,OAAO,EAAE,UAAU,SAAS,GAAG,EAAE,OAAO,CAAC;AAC5E,YAAM,OAAQ,OAAO,cAAc,CAAC;AACpC,YAAM,YAAY,OAAO,OAAO,IAAI;AACpC,YAAM,YACJ,OAAO,cACN,UAAU,SAAS,IAAI,UAAU,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,UAAU,SAAS;AACpF,aAAO;AAAA,QACL,YAAY;AAAA,QACZ;AAAA,QACA,OAAO,OAAO,SAAS;AAAA,MACzB;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
|
package/dist/campaign/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export { C as CampaignStorage, D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, O as OpenAutoPrOptions, m as OpenAutoPrResult, R as RunCampaignOptions, a as RunEvalOptions, b as RunImprovementLoopOptions, c as RunImprovementLoopResult, n as RunOptimizationOptions, o as RunOptimizationResult, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, p as openAutoPr, r as runCampaign, k as runEval, l as runImprovementLoop, q as runOptimization, s as surfaceHash } from '../run-improvement-loop-
|
|
2
|
-
import { L as LabeledScenarioStore, q as LabeledScenarioWrite, r as LabeledScenarioSampleArgs, s as LabeledScenarioRecord, f as CodeSurface } from '../types-
|
|
3
|
-
export { C as CampaignAggregates, a as CampaignArtifactWriter, b as CampaignCellResult, c as CampaignCostMeter, d as CampaignResult, e as CampaignTraceWriter, g as DispatchContext, D as DispatchFn, G as Gate, h as GateContext, i as GateDecision, j as GateResult, k as GenerationCandidate, l as GenerationRecord, I as ImprovementDriver, t as JudgeAggregate, J as JudgeConfig, m as JudgeDimension, n as JudgeScore, u as LabeledScenarioSource, M as MutableSurface, o as Mutator, O as OptimizerConfig, P as ProposeContext, R as RedactionStatus, S as Scenario, v as ScenarioAggregate, p as SessionScript, T as TraceSpan } from '../types-
|
|
1
|
+
export { C as CampaignStorage, D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, O as OpenAutoPrOptions, m as OpenAutoPrResult, R as RunCampaignOptions, a as RunEvalOptions, b as RunImprovementLoopOptions, c as RunImprovementLoopResult, n as RunOptimizationOptions, o as RunOptimizationResult, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, p as openAutoPr, r as runCampaign, k as runEval, l as runImprovementLoop, q as runOptimization, s as surfaceHash } from '../run-improvement-loop-pJ4yrx4X.js';
|
|
2
|
+
import { L as LabeledScenarioStore, q as LabeledScenarioWrite, r as LabeledScenarioSampleArgs, s as LabeledScenarioRecord, f as CodeSurface } from '../types-BURGZ8Ug.js';
|
|
3
|
+
export { C as CampaignAggregates, a as CampaignArtifactWriter, b as CampaignCellResult, c as CampaignCostMeter, d as CampaignResult, e as CampaignTraceWriter, g as DispatchContext, D as DispatchFn, G as Gate, h as GateContext, i as GateDecision, j as GateResult, k as GenerationCandidate, l as GenerationRecord, I as ImprovementDriver, t as JudgeAggregate, J as JudgeConfig, m as JudgeDimension, n as JudgeScore, u as LabeledScenarioSource, M as MutableSurface, o as Mutator, O as OptimizerConfig, P as ProposeContext, R as RedactionStatus, S as Scenario, v as ScenarioAggregate, p as SessionScript, T as TraceSpan } from '../types-BURGZ8Ug.js';
|
|
4
4
|
import '../llm-client-BXVRUZyX.js';
|
|
5
5
|
import '../errors-mje_cKOs.js';
|
|
6
6
|
import '../raw-provider-sink-C46HDghv.js';
|
package/dist/campaign/index.js
CHANGED
|
@@ -9,12 +9,12 @@ import {
|
|
|
9
9
|
runImprovementLoop,
|
|
10
10
|
runOptimization,
|
|
11
11
|
surfaceHash
|
|
12
|
-
} from "../chunk-
|
|
12
|
+
} from "../chunk-HRKOCLQA.js";
|
|
13
13
|
import {
|
|
14
14
|
fsCampaignStorage,
|
|
15
15
|
inMemoryCampaignStorage,
|
|
16
16
|
runCampaign
|
|
17
|
-
} from "../chunk-
|
|
17
|
+
} from "../chunk-J3EIOI3O.js";
|
|
18
18
|
import "../chunk-N4SBKEPJ.js";
|
|
19
19
|
import "../chunk-YV7J7X5N.js";
|
|
20
20
|
import "../chunk-WP7SY7AI.js";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runCampaign
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-J3EIOI3O.js";
|
|
4
4
|
import {
|
|
5
5
|
buildReflectionPrompt,
|
|
6
6
|
parseReflectionResponse,
|
|
@@ -553,7 +553,7 @@ async function runImprovementLoop(opts) {
|
|
|
553
553
|
throw new Error("runImprovementLoop: autoOnPromote='pr' requires ghOwner + ghRepo.");
|
|
554
554
|
}
|
|
555
555
|
const optimization = await runOptimization(opts);
|
|
556
|
-
const { runCampaign: runCampaign2 } = await import("./run-campaign-
|
|
556
|
+
const { runCampaign: runCampaign2 } = await import("./run-campaign-6UEVBPP3.js");
|
|
557
557
|
const baselineOnHoldout = await runCampaign2({
|
|
558
558
|
...opts,
|
|
559
559
|
scenarios: opts.holdoutScenarios,
|
|
@@ -639,4 +639,4 @@ export {
|
|
|
639
639
|
surfaceHash,
|
|
640
640
|
runImprovementLoop
|
|
641
641
|
};
|
|
642
|
-
//# sourceMappingURL=chunk-
|
|
642
|
+
//# sourceMappingURL=chunk-HRKOCLQA.js.map
|
|
@@ -195,6 +195,10 @@ async function executeCell(args) {
|
|
|
195
195
|
return costSoFar;
|
|
196
196
|
}
|
|
197
197
|
};
|
|
198
|
+
const placement = args.opts.cellPlacement?.({
|
|
199
|
+
scenario: args.slot.scenario,
|
|
200
|
+
rep: args.slot.rep
|
|
201
|
+
});
|
|
198
202
|
const ctx = {
|
|
199
203
|
cellId: args.slot.cellId,
|
|
200
204
|
rep: args.slot.rep,
|
|
@@ -202,7 +206,8 @@ async function executeCell(args) {
|
|
|
202
206
|
signal: args.signal,
|
|
203
207
|
trace,
|
|
204
208
|
artifacts,
|
|
205
|
-
cost
|
|
209
|
+
cost,
|
|
210
|
+
placement
|
|
206
211
|
};
|
|
207
212
|
let artifact;
|
|
208
213
|
let errorMessage;
|
|
@@ -357,4 +362,4 @@ export {
|
|
|
357
362
|
inMemoryCampaignStorage,
|
|
358
363
|
runCampaign
|
|
359
364
|
};
|
|
360
|
-
//# sourceMappingURL=chunk-
|
|
365
|
+
//# sourceMappingURL=chunk-J3EIOI3O.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/campaign/run-campaign.ts","../src/campaign/storage.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `runCampaign` — Pass A substrate primitive. ONE function that orchestrates\n * scenarios → dispatch → artifacts → judges → aggregates, with full\n * reproducibility (seed + manifest hash), cell-level resumability, bootstrap\n * CIs, and the `LabeledScenarioStore` capture flywheel.\n *\n * Improvement loops (optimizer / gate / autoOnPromote) ride on top of this\n * primitive but live in `presets/run-improvement-loop.ts`. This file keeps\n * the core orchestrator minimal — Phase 1 of the Pass A track.\n */\n\nimport { createHash } from 'node:crypto'\nimport { join } from 'node:path'\nimport { confidenceInterval } from '../statistics'\nimport { type CampaignStorage, fsCampaignStorage } from './storage'\nimport type {\n CampaignAggregates,\n CampaignArtifactWriter,\n CampaignCellResult,\n CampaignCostMeter,\n CampaignResult,\n CampaignTraceWriter,\n DispatchContext,\n DispatchFn,\n JudgeAggregate,\n JudgeConfig,\n JudgeScore,\n LabeledScenarioStore,\n Scenario,\n ScenarioAggregate,\n TraceSpan,\n} from './types'\n\nexport interface RunCampaignOptions<TScenario extends Scenario, TArtifact> {\n scenarios: TScenario[]\n dispatch: DispatchFn<TScenario, TArtifact>\n judges?: JudgeConfig<TArtifact, TScenario>[]\n /** Required for reproducibility. Default 42. */\n seed?: number\n /** Per-scenario replicates for CI bands. Default 1; raise to 5+ for\n * bootstrap-tight intervals on critical eval. */\n reps?: number\n /** When true (default), completed cells are cached by\n * (manifestHash, scenarioId, rep, generation). Re-runs skip cached cells. */\n resumable?: boolean\n /** Optional store — when present, every artifact + judge score is captured\n * with the configured `captureSource`. Capture is default ON; pass `'off'`\n * to disable. */\n labeledStore?: LabeledScenarioStore | 'off'\n captureSource?: 'production-trace' | 'eval-run' | 'manual' | 'red-team' | 'synthetic'\n captureSourceVersionHash?: string\n /** Wall-clock cost cap across all cells. Cells beyond ceiling are skipped. */\n costCeiling?: number\n /** Max concurrent cells. Default 2. */\n maxConcurrency?: number\n /** Required: where artifacts + traces land. */\n runDir: string\n /** Tracing posture. Default is the substrate's `FileSystemTraceStore` rooted\n * at `<runDir>/traces/`. `'off'` disables capture entirely — substrate\n * refuses this when the caller wires `autoOnPromote !== 'none'`. */\n tracing?: 'on' | 'off'\n /** Test seam — override the wall clock for deterministic tests. */\n now?: () => Date\n /** Test seam — override per-cell trace writer factory. */\n buildTraceWriter?: (cellId: string, dir: string) => CampaignTraceWriter\n /** Storage backend for run/cell dirs, the resumability cache, artifacts,\n * and trace spans. Default: the Node filesystem (`fsCampaignStorage`).\n * Pass `inMemoryCampaignStorage()` to run in a filesystem-less runtime\n * (Cloudflare Workers, Deno, edge) — the `CampaignResult` is still\n * produced; artifacts/traces just aren't persisted to disk. */\n storage?: CampaignStorage\n /**\n * Optional per-cell placement strategy. Returns an opaque string the\n * substrate forwards as `ctx.placement` to the Dispatch — placement-aware\n * Dispatches (e.g. `httpDispatch` from `/adapters/http`) use it to route\n * each cell to the right worker, region, or sandbox. When unset, every\n * cell receives `ctx.placement = undefined` and behaves identically to\n * the in-process case.\n *\n * @example\n * cellPlacement: ({ scenario }) => scenario.tags?.includes('eu') ? 'eu-west' : 'us-east'\n */\n cellPlacement?: (input: {\n scenario: TScenario\n rep: number\n generation?: number\n }) => string | undefined\n}\n\nexport async function runCampaign<TScenario extends Scenario, TArtifact>(\n opts: RunCampaignOptions<TScenario, TArtifact>,\n): Promise<CampaignResult<TArtifact, TScenario>> {\n const seed = opts.seed ?? 42\n const reps = opts.reps ?? 1\n const resumable = opts.resumable ?? true\n const maxConcurrency = opts.maxConcurrency ?? 2\n const now = opts.now ?? (() => new Date())\n const judges = opts.judges ?? []\n const storage = opts.storage ?? fsCampaignStorage()\n\n storage.ensureDir(opts.runDir)\n\n const manifestHash = computeManifestHash({\n scenarios: opts.scenarios,\n judges: judges as unknown as JudgeConfig<unknown>[],\n dispatchRef: opts.dispatch.name || 'anonymous',\n seed,\n reps,\n })\n\n const startedAt = now()\n const cells: CampaignCellResult<TArtifact>[] = []\n const artifactsByPath: Record<string, string> = {}\n\n // Build the cell schedule (scenario × rep).\n const schedule: Array<{ scenario: TScenario; rep: number; cellId: string; cellSeed: number }> = []\n let cellIndex = 0\n for (const scenario of opts.scenarios) {\n for (let rep = 0; rep < reps; rep++) {\n const cellId = `${scenario.id}:${rep}`\n const cellSeed = seed + cellIndex\n schedule.push({ scenario, rep, cellId, cellSeed })\n cellIndex += 1\n }\n }\n\n // Concurrency-limited execution.\n let totalCostUsd = 0\n let costCeilingReached = false\n const abortController = new AbortController()\n // Concurrency lanes that drain the cell schedule. Named \"lanes\" — not\n // \"workers\" — to avoid clashing with the taxonomy's worker (= the agent\n // harness in a sandbox, invoked behind `dispatch`). See loop-taxonomy.md.\n const lanes: Promise<void>[] = []\n let nextIdx = 0\n const cellsRef = cells\n\n for (let i = 0; i < maxConcurrency; i++) {\n lanes.push(\n (async () => {\n while (true) {\n const myIdx = nextIdx++\n if (myIdx >= schedule.length) return\n const slot = schedule[myIdx]!\n if (costCeilingReached) {\n cellsRef.push(skippedCell(slot, 'cost_ceiling_reached'))\n continue\n }\n const result = await executeCell({\n slot,\n opts,\n manifestHash,\n resumable,\n now,\n storage,\n buildTraceWriter: opts.buildTraceWriter ?? defaultBuildTraceWriter(storage),\n signal: abortController.signal,\n })\n cellsRef.push(result.cell)\n totalCostUsd += result.cell.costUsd\n Object.assign(artifactsByPath, result.artifactsByPath)\n if (opts.costCeiling !== undefined && totalCostUsd >= opts.costCeiling) {\n costCeilingReached = true\n }\n // Capture into LabeledScenarioStore unless explicitly disabled.\n if (opts.labeledStore && opts.labeledStore !== 'off' && !result.cell.error) {\n await captureToStore({\n store: opts.labeledStore,\n cell: result.cell,\n scenario: slot.scenario,\n opts,\n now,\n }).catch((err) => {\n // Capture failures are non-fatal — log but don't crash the campaign.\n // (Trace would normally land here.)\n console.warn(\n `[runCampaign] capture failed for ${result.cell.cellId}: ${err instanceof Error ? err.message : String(err)}`,\n )\n })\n }\n }\n })(),\n )\n }\n await Promise.all(lanes)\n\n const endedAt = now()\n cellsRef.sort((a, b) => a.cellId.localeCompare(b.cellId))\n\n const aggregates = computeAggregates(\n cellsRef,\n judges as unknown as JudgeConfig<TArtifact>[],\n seed,\n )\n\n return {\n manifestHash,\n seed,\n startedAt: startedAt.toISOString(),\n endedAt: endedAt.toISOString(),\n durationMs: endedAt.getTime() - startedAt.getTime(),\n cells: cellsRef,\n aggregates,\n runDir: opts.runDir,\n artifactsByPath,\n scenarios: opts.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n }\n}\n\n// ── Internals ─────────────────────────────────────────────────────────\n\ninterface ExecuteCellArgs<TScenario extends Scenario, TArtifact> {\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number }\n opts: RunCampaignOptions<TScenario, TArtifact>\n manifestHash: string\n resumable: boolean\n now: () => Date\n storage: CampaignStorage\n buildTraceWriter: (cellId: string, dir: string) => CampaignTraceWriter\n signal: AbortSignal\n}\n\nasync function executeCell<TScenario extends Scenario, TArtifact>(\n args: ExecuteCellArgs<TScenario, TArtifact>,\n): Promise<{ cell: CampaignCellResult<TArtifact>; artifactsByPath: Record<string, string> }> {\n const storage = args.storage\n const cellDir = join(args.opts.runDir, args.slot.cellId.replace(/[^a-zA-Z0-9_-]/g, '_'))\n storage.ensureDir(cellDir)\n\n // Resumability: cache key = (manifestHash, scenarioId, rep)\n const cachePath = join(cellDir, 'cached-result.json')\n if (args.resumable) {\n const raw = storage.read(cachePath)\n if (raw !== undefined) {\n try {\n const cached = JSON.parse(raw) as CampaignCellResult<TArtifact>\n if (cached.cellId === args.slot.cellId) {\n return { cell: { ...cached, cached: true }, artifactsByPath: {} }\n }\n } catch {\n // Corrupt cache — fall through to re-run.\n }\n }\n }\n\n const startMs = Date.now()\n const trace = args.buildTraceWriter(args.slot.cellId, cellDir)\n const artifactsByPath: Record<string, string> = {}\n const artifacts: CampaignArtifactWriter = {\n async write(path, content) {\n const fullPath = join(cellDir, path)\n storage.ensureDir(join(fullPath, '..'))\n storage.write(fullPath, content)\n artifactsByPath[`${args.slot.cellId}/${path}`] = fullPath\n return fullPath\n },\n async writeJson(path, value) {\n return artifacts.write(path, JSON.stringify(value, null, 2))\n },\n }\n let costSoFar = 0\n const cost: CampaignCostMeter = {\n observe(amount, source) {\n costSoFar += amount\n trace.span(`cost.${source}`, { amountUsd: amount }).end()\n },\n current() {\n return costSoFar\n },\n }\n\n const placement = args.opts.cellPlacement?.({\n scenario: args.slot.scenario,\n rep: args.slot.rep,\n })\n\n const ctx: DispatchContext = {\n cellId: args.slot.cellId,\n rep: args.slot.rep,\n seed: args.slot.cellSeed,\n signal: args.signal,\n trace,\n artifacts,\n cost,\n placement,\n }\n\n let artifact: TArtifact | undefined\n let errorMessage: string | undefined\n try {\n artifact = await args.opts.dispatch(args.slot.scenario, ctx)\n } catch (err) {\n errorMessage = err instanceof Error ? err.message : String(err)\n }\n\n // Run judges (only if we have an artifact). A judge that throws invalidates\n // the cell — recorded as `error`, NOT folded into a fake composite:0 (a fake\n // zero is indistinguishable from a real zero and poisons every aggregate).\n const judgeScores: Record<string, JudgeScore> = {}\n if (artifact !== undefined) {\n for (const judge of args.opts.judges ?? []) {\n if (judge.appliesTo && !judge.appliesTo(args.slot.scenario)) continue\n try {\n judgeScores[judge.name] = await runJudgeCell(judge, {\n artifact,\n scenario: args.slot.scenario,\n signal: args.signal,\n })\n } catch (err) {\n errorMessage = `judge '${judge.name}' failed: ${err instanceof Error ? err.message : String(err)}`\n break\n }\n }\n }\n\n await trace.flush()\n\n const cell: CampaignCellResult<TArtifact> = {\n cellId: args.slot.cellId,\n scenarioId: args.slot.scenario.id,\n rep: args.slot.rep,\n artifact: (artifact ?? null) as TArtifact,\n judgeScores,\n costUsd: costSoFar,\n durationMs: Date.now() - startMs,\n seed: args.slot.cellSeed,\n cached: false,\n error: errorMessage,\n }\n\n if (!errorMessage && args.resumable) {\n storage.write(cachePath, JSON.stringify(cell))\n }\n\n return { cell, artifactsByPath }\n}\n\nasync function runJudgeCell<TArtifact, TScenario extends Scenario>(\n judge: JudgeConfig<TArtifact, TScenario>,\n input: { artifact: TArtifact; scenario: TScenario; signal: AbortSignal },\n): Promise<JudgeScore> {\n return judge.score(input)\n}\n\nfunction defaultBuildTraceWriter(\n storage: CampaignStorage,\n): (cellId: string, dir: string) => CampaignTraceWriter {\n return (cellId, dir) => {\n const spans: Array<Record<string, unknown>> = []\n return {\n span(name, attributes) {\n const startMs = Date.now()\n const record: Record<string, unknown> = { name, cellId, startMs, ...(attributes ?? {}) }\n const finish: TraceSpan = {\n end(endAttrs) {\n record.durationMs = Date.now() - startMs\n if (endAttrs) Object.assign(record, endAttrs)\n spans.push(record)\n },\n setAttribute(key, value) {\n record[key] = value\n },\n }\n return finish\n },\n async flush() {\n storage.write(join(dir, 'spans.jsonl'), spans.map((s) => JSON.stringify(s)).join('\\n'))\n },\n }\n }\n}\n\nfunction skippedCell<TScenario extends Scenario, TArtifact>(\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number },\n reason: string,\n): CampaignCellResult<TArtifact> {\n return {\n cellId: slot.cellId,\n scenarioId: slot.scenario.id,\n rep: slot.rep,\n artifact: null as unknown as TArtifact,\n judgeScores: {},\n costUsd: 0,\n durationMs: 0,\n seed: slot.cellSeed,\n cached: false,\n error: `skipped: ${reason}`,\n }\n}\n\ninterface CaptureArgs<TScenario extends Scenario, TArtifact> {\n store: LabeledScenarioStore\n cell: CampaignCellResult<TArtifact>\n scenario: TScenario\n opts: RunCampaignOptions<TScenario, TArtifact>\n now: () => Date\n}\n\nasync function captureToStore<TScenario extends Scenario, TArtifact>(\n args: CaptureArgs<TScenario, TArtifact>,\n): Promise<void> {\n await args.store.observe({\n scenario: args.scenario,\n artifact: args.cell.artifact,\n judgeScores: args.cell.judgeScores,\n source: args.opts.captureSource ?? 'eval-run',\n sourceVersionHash: args.opts.captureSourceVersionHash ?? 'unknown',\n capturedAt: args.now().toISOString(),\n redactionStatus: 'raw',\n })\n}\n\n// ── Aggregates + manifest hash ────────────────────────────────────────\n\nfunction computeManifestHash(input: {\n scenarios: Scenario[]\n judges: JudgeConfig<unknown>[]\n dispatchRef: string\n seed: number\n reps: number\n}): string {\n const canonical = {\n scenarios: input.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n judges: input.judges.map((j) => ({ name: j.name, dims: j.dimensions.map((d) => d.key) })),\n dispatch: input.dispatchRef,\n seed: input.seed,\n reps: input.reps,\n }\n return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')\n}\n\nfunction computeAggregates<TArtifact>(\n cells: CampaignCellResult<TArtifact>[],\n judges: JudgeConfig<TArtifact>[],\n seed: number,\n): CampaignAggregates {\n const byJudge: Record<string, JudgeAggregate> = {}\n for (const judge of judges) {\n const scores: number[] = []\n for (const cell of cells) {\n const s = cell.judgeScores[judge.name]\n if (s !== undefined) scores.push(s.composite)\n }\n byJudge[judge.name] = aggregate(scores, seed)\n }\n const byScenario: Record<string, ScenarioAggregate> = {}\n const scenarioGroups = new Map<string, number[]>()\n for (const cell of cells) {\n const composites = Object.values(cell.judgeScores).map((s) => s.composite)\n if (composites.length === 0) continue\n const mean = composites.reduce((a, b) => a + b, 0) / composites.length\n const arr = scenarioGroups.get(cell.scenarioId) ?? []\n arr.push(mean)\n scenarioGroups.set(cell.scenarioId, arr)\n }\n for (const [scenarioId, samples] of scenarioGroups) {\n const ag = aggregate(samples, seed)\n byScenario[scenarioId] = { meanComposite: ag.mean, ci95: ag.ci95, n: ag.n }\n }\n return {\n byJudge,\n byScenario,\n totalCostUsd: cells.reduce((a, c) => a + c.costUsd, 0),\n cellsExecuted: cells.filter((c) => !c.error).length,\n cellsSkipped: cells.filter((c) => c.error?.startsWith('skipped:')).length,\n cellsCached: cells.filter((c) => c.cached).length,\n cellsFailed: cells.filter((c) => c.error && !c.error.startsWith('skipped:')).length,\n }\n}\n\n// Percentile bootstrap CI95 via seeded resampling. Deterministic for a given\n// seed — same campaign re-run produces identical CI bands. Falls back to\n// degenerate intervals at n<=1 (the bootstrap is undefined there).\nfunction aggregate(samples: number[], seed: number): JudgeAggregate {\n const n = samples.length\n if (n === 0) return { mean: 0, stdev: 0, ci95: [0, 0], n: 0 }\n const mean = samples.reduce((a, b) => a + b, 0) / n\n const variance = samples.reduce((a, b) => a + (b - mean) ** 2, 0) / Math.max(1, n - 1)\n const stdev = Math.sqrt(variance)\n const ci = confidenceInterval(samples, 0.95, { seed, resamples: 1000 })\n return { mean, stdev, ci95: [ci.lower, ci.upper], n }\n}\n","/**\n * @experimental\n *\n * `CampaignStorage` — the filesystem seam `runCampaign` writes through\n * (run/cell dirs, the resumability cache, per-cell artifacts, trace spans).\n *\n * The default (`fsCampaignStorage`) is the Node filesystem — identical\n * behavior to the inline `node:fs` calls it replaces, so existing CLI\n * consumers are unaffected. `inMemoryCampaignStorage` keeps everything in a\n * `Map`, so the substrate runs in environments WITHOUT a filesystem\n * (Cloudflare Workers, Deno Deploy, other edge runtimes) — the campaign\n * still produces its `CampaignResult` (cells + aggregates) in memory;\n * artifacts/traces simply aren't persisted to disk.\n *\n * Paths are opaque keys to the in-memory adapter — it does not parse them,\n * so the same `join(...)`-built paths work unchanged across both adapters.\n */\nexport interface CampaignStorage {\n /** Ensure a directory exists (recursive). No-op for in-memory. */\n ensureDir(dir: string): void\n /** Does this path exist (as a written file or an ensured dir)? */\n exists(path: string): boolean\n /** Read a UTF-8 file; `undefined` when missing or unreadable. */\n read(path: string): string | undefined\n /** Write a file (string or bytes). Parent dir is assumed ensured. */\n write(path: string, content: string | Uint8Array): void\n}\n\n/** Node-filesystem storage — the default. Lazily requires `node:fs` so the\n * module imports cleanly in non-Node runtimes (where the caller passes\n * `inMemoryCampaignStorage` instead and never constructs this). */\nexport function fsCampaignStorage(): CampaignStorage {\n const { existsSync, mkdirSync, readFileSync, writeFileSync } =\n require('node:fs') as typeof import('node:fs')\n return {\n ensureDir(dir) {\n if (!existsSync(dir)) mkdirSync(dir, { recursive: true })\n },\n exists(path) {\n return existsSync(path)\n },\n read(path) {\n try {\n return readFileSync(path, 'utf8')\n } catch {\n return undefined\n }\n },\n write(path, content) {\n writeFileSync(path, content as Uint8Array)\n },\n }\n}\n\n/** In-memory storage for filesystem-less runtimes. Artifacts + trace spans\n * live in a `Map` for the duration of the run; the `CampaignResult` is\n * fully populated, but nothing is persisted to disk. */\nexport function inMemoryCampaignStorage(): CampaignStorage {\n const files = new Map<string, string | Uint8Array>()\n const dirs = new Set<string>()\n return {\n ensureDir(dir) {\n dirs.add(dir)\n },\n exists(path) {\n return files.has(path) || dirs.has(path)\n },\n read(path) {\n const value = files.get(path)\n if (value === undefined) return undefined\n return typeof value === 'string' ? value : new TextDecoder().decode(value)\n },\n write(path, content) {\n files.set(path, content)\n },\n }\n}\n"],"mappings":";;;;;;;;AAaA,SAAS,kBAAkB;AAC3B,SAAS,YAAY;;;ACiBd,SAAS,oBAAqC;AACnD,QAAM,EAAE,YAAY,WAAW,cAAc,cAAc,IACzD,UAAQ,IAAS;AACnB,SAAO;AAAA,IACL,UAAU,KAAK;AACb,UAAI,CAAC,WAAW,GAAG,EAAG,WAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AAAA,IAC1D;AAAA,IACA,OAAO,MAAM;AACX,aAAO,WAAW,IAAI;AAAA,IACxB;AAAA,IACA,KAAK,MAAM;AACT,UAAI;AACF,eAAO,aAAa,MAAM,MAAM;AAAA,MAClC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,oBAAc,MAAM,OAAqB;AAAA,IAC3C;AAAA,EACF;AACF;AAKO,SAAS,0BAA2C;AACzD,QAAM,QAAQ,oBAAI,IAAiC;AACnD,QAAM,OAAO,oBAAI,IAAY;AAC7B,SAAO;AAAA,IACL,UAAU,KAAK;AACb,WAAK,IAAI,GAAG;AAAA,IACd;AAAA,IACA,OAAO,MAAM;AACX,aAAO,MAAM,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI;AAAA,IACzC;AAAA,IACA,KAAK,MAAM;AACT,YAAM,QAAQ,MAAM,IAAI,IAAI;AAC5B,UAAI,UAAU,OAAW,QAAO;AAChC,aAAO,OAAO,UAAU,WAAW,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK;AAAA,IAC3E;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,YAAM,IAAI,MAAM,OAAO;AAAA,IACzB;AAAA,EACF;AACF;;;ADeA,eAAsB,YACpB,MAC+C;AAC/C,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,QAAM,MAAM,KAAK,QAAQ,MAAM,oBAAI,KAAK;AACxC,QAAM,SAAS,KAAK,UAAU,CAAC;AAC/B,QAAM,UAAU,KAAK,WAAW,kBAAkB;AAElD,UAAQ,UAAU,KAAK,MAAM;AAE7B,QAAM,eAAe,oBAAoB;AAAA,IACvC,WAAW,KAAK;AAAA,IAChB;AAAA,IACA,aAAa,KAAK,SAAS,QAAQ;AAAA,IACnC;AAAA,IACA;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI;AACtB,QAAM,QAAyC,CAAC;AAChD,QAAM,kBAA0C,CAAC;AAGjD,QAAM,WAA0F,CAAC;AACjG,MAAI,YAAY;AAChB,aAAW,YAAY,KAAK,WAAW;AACrC,aAAS,MAAM,GAAG,MAAM,MAAM,OAAO;AACnC,YAAM,SAAS,GAAG,SAAS,EAAE,IAAI,GAAG;AACpC,YAAM,WAAW,OAAO;AACxB,eAAS,KAAK,EAAE,UAAU,KAAK,QAAQ,SAAS,CAAC;AACjD,mBAAa;AAAA,IACf;AAAA,EACF;AAGA,MAAI,eAAe;AACnB,MAAI,qBAAqB;AACzB,QAAM,kBAAkB,IAAI,gBAAgB;AAI5C,QAAM,QAAyB,CAAC;AAChC,MAAI,UAAU;AACd,QAAM,WAAW;AAEjB,WAAS,IAAI,GAAG,IAAI,gBAAgB,KAAK;AACvC,UAAM;AAAA,OACH,YAAY;AACX,eAAO,MAAM;AACX,gBAAM,QAAQ;AACd,cAAI,SAAS,SAAS,OAAQ;AAC9B,gBAAM,OAAO,SAAS,KAAK;AAC3B,cAAI,oBAAoB;AACtB,qBAAS,KAAK,YAAY,MAAM,sBAAsB,CAAC;AACvD;AAAA,UACF;AACA,gBAAM,SAAS,MAAM,YAAY;AAAA,YAC/B;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,kBAAkB,KAAK,oBAAoB,wBAAwB,OAAO;AAAA,YAC1E,QAAQ,gBAAgB;AAAA,UAC1B,CAAC;AACD,mBAAS,KAAK,OAAO,IAAI;AACzB,0BAAgB,OAAO,KAAK;AAC5B,iBAAO,OAAO,iBAAiB,OAAO,eAAe;AACrD,cAAI,KAAK,gBAAgB,UAAa,gBAAgB,KAAK,aAAa;AACtE,iCAAqB;AAAA,UACvB;AAEA,cAAI,KAAK,gBAAgB,KAAK,iBAAiB,SAAS,CAAC,OAAO,KAAK,OAAO;AAC1E,kBAAM,eAAe;AAAA,cACnB,OAAO,KAAK;AAAA,cACZ,MAAM,OAAO;AAAA,cACb,UAAU,KAAK;AAAA,cACf;AAAA,cACA;AAAA,YACF,CAAC,EAAE,MAAM,CAAC,QAAQ;AAGhB,sBAAQ;AAAA,gBACN,oCAAoC,OAAO,KAAK,MAAM,KAAK,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,cAC7G;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF,GAAG;AAAA,IACL;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,KAAK;AAEvB,QAAM,UAAU,IAAI;AACpB,WAAS,KAAK,CAAC,GAAG,MAAM,EAAE,OAAO,cAAc,EAAE,MAAM,CAAC;AAExD,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,WAAW,UAAU,YAAY;AAAA,IACjC,SAAS,QAAQ,YAAY;AAAA,IAC7B,YAAY,QAAQ,QAAQ,IAAI,UAAU,QAAQ;AAAA,IAClD,OAAO;AAAA,IACP;AAAA,IACA,QAAQ,KAAK;AAAA,IACb;AAAA,IACA,WAAW,KAAK,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,EACnE;AACF;AAeA,eAAe,YACb,MAC2F;AAC3F,QAAM,UAAU,KAAK;AACrB,QAAM,UAAU,KAAK,KAAK,KAAK,QAAQ,KAAK,KAAK,OAAO,QAAQ,mBAAmB,GAAG,CAAC;AACvF,UAAQ,UAAU,OAAO;AAGzB,QAAM,YAAY,KAAK,SAAS,oBAAoB;AACpD,MAAI,KAAK,WAAW;AAClB,UAAM,MAAM,QAAQ,KAAK,SAAS;AAClC,QAAI,QAAQ,QAAW;AACrB,UAAI;AACF,cAAM,SAAS,KAAK,MAAM,GAAG;AAC7B,YAAI,OAAO,WAAW,KAAK,KAAK,QAAQ;AACtC,iBAAO,EAAE,MAAM,EAAE,GAAG,QAAQ,QAAQ,KAAK,GAAG,iBAAiB,CAAC,EAAE;AAAA,QAClE;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,KAAK,IAAI;AACzB,QAAM,QAAQ,KAAK,iBAAiB,KAAK,KAAK,QAAQ,OAAO;AAC7D,QAAM,kBAA0C,CAAC;AACjD,QAAM,YAAoC;AAAA,IACxC,MAAM,MAAM,MAAM,SAAS;AACzB,YAAM,WAAW,KAAK,SAAS,IAAI;AACnC,cAAQ,UAAU,KAAK,UAAU,IAAI,CAAC;AACtC,cAAQ,MAAM,UAAU,OAAO;AAC/B,sBAAgB,GAAG,KAAK,KAAK,MAAM,IAAI,IAAI,EAAE,IAAI;AACjD,aAAO;AAAA,IACT;AAAA,IACA,MAAM,UAAU,MAAM,OAAO;AAC3B,aAAO,UAAU,MAAM,MAAM,KAAK,UAAU,OAAO,MAAM,CAAC,CAAC;AAAA,IAC7D;AAAA,EACF;AACA,MAAI,YAAY;AAChB,QAAM,OAA0B;AAAA,IAC9B,QAAQ,QAAQ,QAAQ;AACtB,mBAAa;AACb,YAAM,KAAK,QAAQ,MAAM,IAAI,EAAE,WAAW,OAAO,CAAC,EAAE,IAAI;AAAA,IAC1D;AAAA,IACA,UAAU;AACR,aAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,YAAY,KAAK,KAAK,gBAAgB;AAAA,IAC1C,UAAU,KAAK,KAAK;AAAA,IACpB,KAAK,KAAK,KAAK;AAAA,EACjB,CAAC;AAED,QAAM,MAAuB;AAAA,IAC3B,QAAQ,KAAK,KAAK;AAAA,IAClB,KAAK,KAAK,KAAK;AAAA,IACf,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ,KAAK;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,MAAI;AACJ,MAAI;AACJ,MAAI;AACF,eAAW,MAAM,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,GAAG;AAAA,EAC7D,SAAS,KAAK;AACZ,mBAAe,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,EAChE;AAKA,QAAM,cAA0C,CAAC;AACjD,MAAI,aAAa,QAAW;AAC1B,eAAW,SAAS,KAAK,KAAK,UAAU,CAAC,GAAG;AAC1C,UAAI,MAAM,aAAa,CAAC,MAAM,UAAU,KAAK,KAAK,QAAQ,EAAG;AAC7D,UAAI;AACF,oBAAY,MAAM,IAAI,IAAI,MAAM,aAAa,OAAO;AAAA,UAClD;AAAA,UACA,UAAU,KAAK,KAAK;AAAA,UACpB,QAAQ,KAAK;AAAA,QACf,CAAC;AAAA,MACH,SAAS,KAAK;AACZ,uBAAe,UAAU,MAAM,IAAI,aAAa,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAChG;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,MAAM,MAAM;AAElB,QAAM,OAAsC;AAAA,IAC1C,QAAQ,KAAK,KAAK;AAAA,IAClB,YAAY,KAAK,KAAK,SAAS;AAAA,IAC/B,KAAK,KAAK,KAAK;AAAA,IACf,UAAW,YAAY;AAAA,IACvB;AAAA,IACA,SAAS;AAAA,IACT,YAAY,KAAK,IAAI,IAAI;AAAA,IACzB,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ;AAAA,IACR,OAAO;AAAA,EACT;AAEA,MAAI,CAAC,gBAAgB,KAAK,WAAW;AACnC,YAAQ,MAAM,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,EAC/C;AAEA,SAAO,EAAE,MAAM,gBAAgB;AACjC;AAEA,eAAe,aACb,OACA,OACqB;AACrB,SAAO,MAAM,MAAM,KAAK;AAC1B;AAEA,SAAS,wBACP,SACsD;AACtD,SAAO,CAAC,QAAQ,QAAQ;AACtB,UAAM,QAAwC,CAAC;AAC/C,WAAO;AAAA,MACL,KAAK,MAAM,YAAY;AACrB,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAkC,EAAE,MAAM,QAAQ,SAAS,GAAI,cAAc,CAAC,EAAG;AACvF,cAAM,SAAoB;AAAA,UACxB,IAAI,UAAU;AACZ,mBAAO,aAAa,KAAK,IAAI,IAAI;AACjC,gBAAI,SAAU,QAAO,OAAO,QAAQ,QAAQ;AAC5C,kBAAM,KAAK,MAAM;AAAA,UACnB;AAAA,UACA,aAAa,KAAK,OAAO;AACvB,mBAAO,GAAG,IAAI;AAAA,UAChB;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,MACA,MAAM,QAAQ;AACZ,gBAAQ,MAAM,KAAK,KAAK,aAAa,GAAG,MAAM,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,YACP,MACA,QAC+B;AAC/B,SAAO;AAAA,IACL,QAAQ,KAAK;AAAA,IACb,YAAY,KAAK,SAAS;AAAA,IAC1B,KAAK,KAAK;AAAA,IACV,UAAU;AAAA,IACV,aAAa,CAAC;AAAA,IACd,SAAS;AAAA,IACT,YAAY;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,QAAQ;AAAA,IACR,OAAO,YAAY,MAAM;AAAA,EAC3B;AACF;AAUA,eAAe,eACb,MACe;AACf,QAAM,KAAK,MAAM,QAAQ;AAAA,IACvB,UAAU,KAAK;AAAA,IACf,UAAU,KAAK,KAAK;AAAA,IACpB,aAAa,KAAK,KAAK;AAAA,IACvB,QAAQ,KAAK,KAAK,iBAAiB;AAAA,IACnC,mBAAmB,KAAK,KAAK,4BAA4B;AAAA,IACzD,YAAY,KAAK,IAAI,EAAE,YAAY;AAAA,IACnC,iBAAiB;AAAA,EACnB,CAAC;AACH;AAIA,SAAS,oBAAoB,OAMlB;AACT,QAAM,YAAY;AAAA,IAChB,WAAW,MAAM,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,IAClE,QAAQ,MAAM,OAAO,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,EAAE,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;AAAA,IACxF,UAAU,MAAM;AAAA,IAChB,MAAM,MAAM;AAAA,IACZ,MAAM,MAAM;AAAA,EACd;AACA,SAAO,WAAW,QAAQ,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC,EAAE,OAAO,KAAK;AAC5E;AAEA,SAAS,kBACP,OACA,QACA,MACoB;AACpB,QAAM,UAA0C,CAAC;AACjD,aAAW,SAAS,QAAQ;AAC1B,UAAM,SAAmB,CAAC;AAC1B,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,YAAY,MAAM,IAAI;AACrC,UAAI,MAAM,OAAW,QAAO,KAAK,EAAE,SAAS;AAAA,IAC9C;AACA,YAAQ,MAAM,IAAI,IAAI,UAAU,QAAQ,IAAI;AAAA,EAC9C;AACA,QAAM,aAAgD,CAAC;AACvD,QAAM,iBAAiB,oBAAI,IAAsB;AACjD,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACzE,QAAI,WAAW,WAAW,EAAG;AAC7B,UAAM,OAAO,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAChE,UAAM,MAAM,eAAe,IAAI,KAAK,UAAU,KAAK,CAAC;AACpD,QAAI,KAAK,IAAI;AACb,mBAAe,IAAI,KAAK,YAAY,GAAG;AAAA,EACzC;AACA,aAAW,CAAC,YAAY,OAAO,KAAK,gBAAgB;AAClD,UAAM,KAAK,UAAU,SAAS,IAAI;AAClC,eAAW,UAAU,IAAI,EAAE,eAAe,GAAG,MAAM,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE;AAAA,EAC5E;AACA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,cAAc,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,SAAS,CAAC;AAAA,IACrD,eAAe,MAAM,OAAO,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE;AAAA,IAC7C,cAAc,MAAM,OAAO,CAAC,MAAM,EAAE,OAAO,WAAW,UAAU,CAAC,EAAE;AAAA,IACnE,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,IAC3C,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,WAAW,UAAU,CAAC,EAAE;AAAA,EAC/E;AACF;AAKA,SAAS,UAAU,SAAmB,MAA8B;AAClE,QAAM,IAAI,QAAQ;AAClB,MAAI,MAAM,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,EAAE;AAC5D,QAAM,OAAO,QAAQ,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAClD,QAAM,WAAW,QAAQ,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,SAAS,GAAG,CAAC,IAAI,KAAK,IAAI,GAAG,IAAI,CAAC;AACrF,QAAM,QAAQ,KAAK,KAAK,QAAQ;AAChC,QAAM,KAAK,mBAAmB,SAAS,MAAM,EAAE,MAAM,WAAW,IAAK,CAAC;AACtE,SAAO,EAAE,MAAM,OAAO,MAAM,CAAC,GAAG,OAAO,GAAG,KAAK,GAAG,EAAE;AACtD;","names":[]}
|
package/dist/contract/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export { C as CampaignAggregates, a as CampaignArtifactWriter, b as CampaignCellResult, c as CampaignCostMeter, d as CampaignResult, e as CampaignTraceWriter, f as CodeSurface, D as Dispatch, g as DispatchContext, G as Gate, h as GateContext, i as GateDecision, j as GateResult, k as GenerationCandidate, l as GenerationRecord, I as ImprovementDriver, J as JudgeConfig, m as JudgeDimension, n as JudgeScore, M as MutableSurface, o as Mutator, O as OptimizerConfig, S as Scenario, p as SessionScript } from '../types-
|
|
2
|
-
export { C as CampaignStorage, D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, R as RunCampaignOptions, a as RunEvalOptions, b as RunImprovementLoopOptions, c as RunImprovementLoopResult, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, r as runCampaign, k as runEval, l as runImprovementLoop } from '../run-improvement-loop-
|
|
1
|
+
export { C as CampaignAggregates, a as CampaignArtifactWriter, b as CampaignCellResult, c as CampaignCostMeter, d as CampaignResult, e as CampaignTraceWriter, f as CodeSurface, D as Dispatch, g as DispatchContext, G as Gate, h as GateContext, i as GateDecision, j as GateResult, k as GenerationCandidate, l as GenerationRecord, I as ImprovementDriver, J as JudgeConfig, m as JudgeDimension, n as JudgeScore, M as MutableSurface, o as Mutator, O as OptimizerConfig, S as Scenario, p as SessionScript } from '../types-BURGZ8Ug.js';
|
|
2
|
+
export { C as CampaignStorage, D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, R as RunCampaignOptions, a as RunEvalOptions, b as RunImprovementLoopOptions, c as RunImprovementLoopResult, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, r as runCampaign, k as runEval, l as runImprovementLoop } from '../run-improvement-loop-pJ4yrx4X.js';
|
|
3
3
|
export { D as DeploymentOutcome, F as FileSystemOutcomeStore, a as FileSystemOutcomeStoreOptions, I as InMemoryOutcomeStore, O as OutcomeStore } from '../outcome-store-BxJ3DQKJ.js';
|
|
4
4
|
import '../llm-client-BXVRUZyX.js';
|
|
5
5
|
import '../errors-mje_cKOs.js';
|
package/dist/contract/index.js
CHANGED
|
@@ -6,12 +6,12 @@ import {
|
|
|
6
6
|
heldOutGate,
|
|
7
7
|
runEval,
|
|
8
8
|
runImprovementLoop
|
|
9
|
-
} from "../chunk-
|
|
9
|
+
} from "../chunk-HRKOCLQA.js";
|
|
10
10
|
import {
|
|
11
11
|
fsCampaignStorage,
|
|
12
12
|
inMemoryCampaignStorage,
|
|
13
13
|
runCampaign
|
|
14
|
-
} from "../chunk-
|
|
14
|
+
} from "../chunk-J3EIOI3O.js";
|
|
15
15
|
import "../chunk-N4SBKEPJ.js";
|
|
16
16
|
import "../chunk-YV7J7X5N.js";
|
|
17
17
|
import {
|
package/dist/openapi.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"openapi": "3.1.0",
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "@tangle-network/agent-eval — wire protocol",
|
|
5
|
-
"version": "0.44.
|
|
5
|
+
"version": "0.44.1",
|
|
6
6
|
"description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
|
|
7
7
|
"contact": {
|
|
8
8
|
"name": "Tangle Network",
|
package/dist/rl.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { R as RunRecord, a as RunSplitTag } from './run-record-BGY6bHRh.js';
|
|
2
|
-
import { d as CampaignResult } from './types-
|
|
2
|
+
import { d as CampaignResult } from './types-BURGZ8Ug.js';
|
|
3
3
|
import { V as VerificationReport, R as Researcher, F as FailureMode, S as SteeringChange, E as ExperimentPlan, a as ExperimentResult, b as EvalCampaignResult, c as EvalCampaignOptions } from './researcher-CoJMs2Iz.js';
|
|
4
4
|
export { r as runEvalCampaign } from './researcher-CoJMs2Iz.js';
|
|
5
5
|
import { S as Span, T as TraceStore } from './store-Db2Bv8Cf.js';
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runCampaign
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-J3EIOI3O.js";
|
|
4
4
|
import "./chunk-WP7SY7AI.js";
|
|
5
5
|
import "./chunk-QYJT52YW.js";
|
|
6
6
|
import "./chunk-NSBPE2FW.js";
|
|
7
7
|
export {
|
|
8
8
|
runCampaign
|
|
9
9
|
};
|
|
10
|
-
//# sourceMappingURL=run-campaign-
|
|
10
|
+
//# sourceMappingURL=run-campaign-6UEVBPP3.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { S as Scenario, d as CampaignResult, j as GateResult, o as Mutator, I as ImprovementDriver, G as Gate, D as DispatchFn, J as JudgeConfig, L as LabeledScenarioStore, e as CampaignTraceWriter, M as MutableSurface, l as GenerationRecord } from './types-
|
|
1
|
+
import { S as Scenario, d as CampaignResult, j as GateResult, o as Mutator, I as ImprovementDriver, G as Gate, D as DispatchFn, J as JudgeConfig, L as LabeledScenarioStore, e as CampaignTraceWriter, M as MutableSurface, l as GenerationRecord } from './types-BURGZ8Ug.js';
|
|
2
2
|
import { L as LlmClientOptions } from './llm-client-BXVRUZyX.js';
|
|
3
3
|
import { RunRecord } from '@tangle-network/agent-runtime';
|
|
4
4
|
import { R as RedTeamCase } from './red-team-30II1T4o.js';
|
|
@@ -267,6 +267,22 @@ interface RunCampaignOptions<TScenario extends Scenario, TArtifact> {
|
|
|
267
267
|
* (Cloudflare Workers, Deno, edge) — the `CampaignResult` is still
|
|
268
268
|
* produced; artifacts/traces just aren't persisted to disk. */
|
|
269
269
|
storage?: CampaignStorage;
|
|
270
|
+
/**
|
|
271
|
+
* Optional per-cell placement strategy. Returns an opaque string the
|
|
272
|
+
* substrate forwards as `ctx.placement` to the Dispatch — placement-aware
|
|
273
|
+
* Dispatches (e.g. `httpDispatch` from `/adapters/http`) use it to route
|
|
274
|
+
* each cell to the right worker, region, or sandbox. When unset, every
|
|
275
|
+
* cell receives `ctx.placement = undefined` and behaves identically to
|
|
276
|
+
* the in-process case.
|
|
277
|
+
*
|
|
278
|
+
* @example
|
|
279
|
+
* cellPlacement: ({ scenario }) => scenario.tags?.includes('eu') ? 'eu-west' : 'us-east'
|
|
280
|
+
*/
|
|
281
|
+
cellPlacement?: (input: {
|
|
282
|
+
scenario: TScenario;
|
|
283
|
+
rep: number;
|
|
284
|
+
generation?: number;
|
|
285
|
+
}) => string | undefined;
|
|
270
286
|
}
|
|
271
287
|
declare function runCampaign<TScenario extends Scenario, TArtifact>(opts: RunCampaignOptions<TScenario, TArtifact>): Promise<CampaignResult<TArtifact, TScenario>>;
|
|
272
288
|
|
|
@@ -40,6 +40,14 @@ interface DispatchContext {
|
|
|
40
40
|
cycleId?: string;
|
|
41
41
|
/** Populated when the substrate resumed from a prior cache hit. */
|
|
42
42
|
resumedFrom?: string;
|
|
43
|
+
/**
|
|
44
|
+
* Opaque placement key supplied by `RunCampaignOptions.cellPlacement`.
|
|
45
|
+
* The substrate forwards it through unchanged; placement-aware Dispatch
|
|
46
|
+
* implementations (e.g. `httpDispatch` from `/adapters/http`) read it to
|
|
47
|
+
* route the cell to the right worker / region / sandbox. `undefined`
|
|
48
|
+
* when no placement strategy is configured.
|
|
49
|
+
*/
|
|
50
|
+
placement?: string;
|
|
43
51
|
}
|
|
44
52
|
/** @experimental One function: scenario + ctx → artifact. Dispatcher chooses
|
|
45
53
|
* whether to call `runMultishot`, `runLoop`, raw `streamPrompt`, anything. */
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Composing agent-eval with your observability stack
|
|
2
|
+
|
|
3
|
+
`@tangle-network/agent-eval` ships its own OpenTelemetry pipeline
|
|
4
|
+
(`@tangle-network/agent-eval/telemetry`) that emits spans for every
|
|
5
|
+
cell, judge invocation, mutator proposal, and gate decision. **It's
|
|
6
|
+
just OTel** — same protocol as Langfuse SDK, OpenLLMetry, Arize
|
|
7
|
+
Phoenix, TraceAI, and the OpenTelemetry GenAI semantic conventions.
|
|
8
|
+
|
|
9
|
+
That means: if you already instrument your agent with any OTel-native
|
|
10
|
+
observability tool, the two compose **for free at the protocol layer**.
|
|
11
|
+
This doc shows the composition pattern; no agent-eval-specific adapter
|
|
12
|
+
code required.
|
|
13
|
+
|
|
14
|
+
## TL;DR — one OTel context, two emitters
|
|
15
|
+
|
|
16
|
+
1. Set up a shared OTel tracer provider in your process (or service mesh).
|
|
17
|
+
2. Configure your observability tool (TraceAI / Langfuse / OpenLLMetry /
|
|
18
|
+
Phoenix) to register its instrumentations against that provider.
|
|
19
|
+
3. Configure agent-eval's `/telemetry` exporter against the same provider.
|
|
20
|
+
4. Run a campaign. Both sets of spans land at your OTel collector.
|
|
21
|
+
5. Filter / route / fan-out at the collector layer — Jaeger, Tempo,
|
|
22
|
+
Phoenix, Langfuse cloud, your private collector, whatever.
|
|
23
|
+
|
|
24
|
+
The Tangle substrate doesn't compete with the observability tool;
|
|
25
|
+
they're orthogonal. The tool tells you *what your agent did*; the
|
|
26
|
+
substrate tells you *what the campaign / judge / mutator decided about
|
|
27
|
+
it*. Unified at the trace level, you see both as one timeline per cell.
|
|
28
|
+
|
|
29
|
+
## Per-tool notes
|
|
30
|
+
|
|
31
|
+
### TraceAI (Future-AGI)
|
|
32
|
+
|
|
33
|
+
- TS SDK auto-instruments OpenAI/Anthropic SDKs + LangChain.
|
|
34
|
+
- Compatible with the OpenTelemetry GenAI semantic conventions.
|
|
35
|
+
- Compose: register TraceAI's instrumentations on the global tracer
|
|
36
|
+
provider, then either point both at your OTLP collector or at
|
|
37
|
+
TraceAI's hosted backend if you want their UI.
|
|
38
|
+
|
|
39
|
+
### Langfuse SDK
|
|
40
|
+
|
|
41
|
+
- Larger installed base; has its own hosted product + OSS self-host.
|
|
42
|
+
- Their OpenTelemetry-compatible mode ships LLM call spans with
|
|
43
|
+
Langfuse-specific attributes preserved.
|
|
44
|
+
- Compose: register Langfuse as an OTel processor; agent-eval's
|
|
45
|
+
campaign/judge/mutator spans appear alongside the LLM calls in their
|
|
46
|
+
UI.
|
|
47
|
+
|
|
48
|
+
### OpenLLMetry (Traceloop)
|
|
49
|
+
|
|
50
|
+
- OSS auto-instrumentation library; OTel-native by design.
|
|
51
|
+
- Wide framework coverage (LangChain, LlamaIndex, Haystack, OpenAI,
|
|
52
|
+
Anthropic).
|
|
53
|
+
- Compose: set up Traceloop's exporter; agent-eval's exporter shares
|
|
54
|
+
the same trace context per cell.
|
|
55
|
+
|
|
56
|
+
### Arize Phoenix
|
|
57
|
+
|
|
58
|
+
- OSS observability backend; strong in the eval-tooling community.
|
|
59
|
+
- OTel-native ingest; renders trace + span attributes per the GenAI
|
|
60
|
+
semantic conventions.
|
|
61
|
+
- Compose: point both exporters at your local Phoenix instance. Phoenix
|
|
62
|
+
becomes the unified UI for both LLM-call traces and campaign spans.
|
|
63
|
+
|
|
64
|
+
## Wiring pattern (reference)
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
import { trace } from '@opentelemetry/api'
|
|
68
|
+
import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node'
|
|
69
|
+
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http'
|
|
70
|
+
import { SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base'
|
|
71
|
+
|
|
72
|
+
// 1. One shared tracer provider for the process.
|
|
73
|
+
const provider = new NodeTracerProvider()
|
|
74
|
+
provider.addSpanProcessor(new SimpleSpanProcessor(
|
|
75
|
+
new OTLPTraceExporter({ url: 'http://localhost:4318/v1/traces' }),
|
|
76
|
+
))
|
|
77
|
+
provider.register()
|
|
78
|
+
|
|
79
|
+
// 2. Your observability tool registers against the global provider.
|
|
80
|
+
// Example for TraceAI / OpenLLMetry / Langfuse — call their init.
|
|
81
|
+
// (See each tool's docs.)
|
|
82
|
+
|
|
83
|
+
// 3. agent-eval is already OTel-native; it picks up the same global
|
|
84
|
+
// provider. Just ensure `@tangle-network/agent-eval/telemetry` is
|
|
85
|
+
// initialized for the campaign:
|
|
86
|
+
import { setOtelExporter } from '@tangle-network/agent-eval/telemetry'
|
|
87
|
+
setOtelExporter({ kind: 'otel-global' }) // use the global provider
|
|
88
|
+
|
|
89
|
+
// 4. Run your campaign — both sets of spans land at the collector.
|
|
90
|
+
import { runEval } from '@tangle-network/agent-eval/contract'
|
|
91
|
+
await runEval({ /* ... */ })
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
That's it. No new adapter shipping required — the libs are already
|
|
95
|
+
designed to live in the same OTel ecosystem.
|
|
96
|
+
|
|
97
|
+
## When you'd want a deeper, code-level adapter
|
|
98
|
+
|
|
99
|
+
The two cases where a thin adapter would add value beyond the
|
|
100
|
+
OTel-protocol composition:
|
|
101
|
+
|
|
102
|
+
1. **Cost-aware judging.** Your observability tool's auto-instrumented
|
|
103
|
+
spans carry token counts + cost. A custom `JudgeConfig` can read
|
|
104
|
+
them via the OTel context and refuse to score artifacts that
|
|
105
|
+
exceeded a per-call budget. Easy to write yourself; we'll ship a
|
|
106
|
+
reference helper (`costAwareJudgeFromOtel`) when a partner pulls on
|
|
107
|
+
this.
|
|
108
|
+
2. **Tool-aware judging.** Your instrumentation captures the tool-call
|
|
109
|
+
sequence (`langchain.tool.invoked`, `openai.function.called`, etc.).
|
|
110
|
+
A judge that scores "did the agent use the right tool" reads those
|
|
111
|
+
spans directly. Also straightforward; helper ships when needed.
|
|
112
|
+
|
|
113
|
+
Both of these are L1-tier ergonomic helpers; the underlying composition
|
|
114
|
+
works today without them.
|
|
115
|
+
|
|
116
|
+
## What this does NOT install
|
|
117
|
+
|
|
118
|
+
No new dependencies. No new peer deps. No `@traceai/*`, no
|
|
119
|
+
`@langfuse/*`, no `@opentelemetry/*` in our manifest. You bring the
|
|
120
|
+
observability stack you want; agent-eval just emits OTel and respects
|
|
121
|
+
whatever provider is registered.
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# Distributed driver — driver-on-A, workers-on-B (and C, D, E…)
|
|
2
|
+
|
|
3
|
+
The driver (running `runCampaign` / `runImprovementLoop` / `gepaDriver`)
|
|
4
|
+
and the worker (running your actual agent) **do not have to live in the
|
|
5
|
+
same process, machine, region, or cloud.** `Dispatch` is just a
|
|
6
|
+
function: scenario in, artifact out. Whatever returns the artifact is
|
|
7
|
+
the worker — local, remote, sandboxed, or fanned out across a fleet.
|
|
8
|
+
|
|
9
|
+
## Why you'd want this
|
|
10
|
+
|
|
11
|
+
| Pattern | Reason |
|
|
12
|
+
|---|---|
|
|
13
|
+
| **Driver on your VPC, workers on our sandbox fleet** | Driver holds secrets, training data, prompt corpus; workers stay stateless and scale horizontally |
|
|
14
|
+
| **Multi-region campaigns** | Each cell runs in the region closest to its target API (latency, compliance, data residency) |
|
|
15
|
+
| **Driver-as-a-service** | Long-running optimization process; reuses across many short-lived worker invocations |
|
|
16
|
+
| **Heterogeneous workers** | One cell on a CPU container, another on a GPU box, another against a third-party API — same Dispatch shape, different placement |
|
|
17
|
+
| **Budget-isolated workers** | Worker boxes get scoped, time-bounded credentials; driver never holds production keys |
|
|
18
|
+
|
|
19
|
+
## Two new pieces in 0.45.0
|
|
20
|
+
|
|
21
|
+
| Where | What |
|
|
22
|
+
|---|---|
|
|
23
|
+
| **`DispatchContext.placement?: string`** | Opaque placement key the substrate forwards to the Dispatch. |
|
|
24
|
+
| **`RunCampaignOptions.cellPlacement?(input) → string \| undefined`** | Strategy function the substrate calls per cell to compute the placement key. |
|
|
25
|
+
| **`@tangle-network/agent-eval/adapters/http`** | `httpDispatch` (client) + `runDispatchServer` (server) — wire shape for HTTP-based remote workers. |
|
|
26
|
+
|
|
27
|
+
Both ends of the wire are in the same package; no peer dep, no separate
|
|
28
|
+
install. The substrate doesn't strategy-pick; you provide the
|
|
29
|
+
`cellPlacement` function, the substrate forwards its result, the
|
|
30
|
+
Dispatch reads it. Clean seam, no policy baked in.
|
|
31
|
+
|
|
32
|
+
## The three reference topologies
|
|
33
|
+
|
|
34
|
+
### 1. In-process (the default — what you already have)
|
|
35
|
+
|
|
36
|
+
```ts
|
|
37
|
+
await runCampaign({
|
|
38
|
+
scenarios,
|
|
39
|
+
dispatch, // runs in-process
|
|
40
|
+
judges: [judge],
|
|
41
|
+
storage,
|
|
42
|
+
runDir,
|
|
43
|
+
})
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
`ctx.placement` is `undefined`; nothing changes for existing consumers.
|
|
47
|
+
This shipped in 0.40.
|
|
48
|
+
|
|
49
|
+
### 2. Single remote worker
|
|
50
|
+
|
|
51
|
+
Driver-on-A talks to one worker-on-B over HTTP.
|
|
52
|
+
|
|
53
|
+
**Driver side (machine A):**
|
|
54
|
+
|
|
55
|
+
```ts
|
|
56
|
+
import { httpDispatch } from '@tangle-network/agent-eval/adapters/http'
|
|
57
|
+
|
|
58
|
+
const dispatch = httpDispatch<MyScenario, MyArtifact>({
|
|
59
|
+
url: 'https://worker.your-infra.com/dispatch',
|
|
60
|
+
auth: process.env.WORKER_TOKEN,
|
|
61
|
+
timeoutMs: 5 * 60 * 1000,
|
|
62
|
+
retries: 2,
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
await runImprovementLoop({ scenarios, baselineSurface, dispatchWithSurface: (surface, s, ctx) =>
|
|
66
|
+
dispatch(s, { ...ctx, /* pass the surface through your own protocol */ }),
|
|
67
|
+
/* ... */ })
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Worker side (machine B):**
|
|
71
|
+
|
|
72
|
+
```ts
|
|
73
|
+
import { runDispatchServer } from '@tangle-network/agent-eval/adapters/http'
|
|
74
|
+
|
|
75
|
+
const handle = await runDispatchServer<MyScenario, MyArtifact>({
|
|
76
|
+
dispatch: async (scenario, ctx) => {
|
|
77
|
+
// your agent — call OpenAI, LangChain, your sandbox, anything.
|
|
78
|
+
const artifact = await runMyAgent(scenario, ctx.signal)
|
|
79
|
+
return artifact
|
|
80
|
+
},
|
|
81
|
+
port: 8080,
|
|
82
|
+
auth: process.env.WORKER_TOKEN, // required; `false` only for closed networks
|
|
83
|
+
})
|
|
84
|
+
console.log(`worker listening on ${handle.port}`)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Cancellation, retries on 5xx / 408 / 429, bounded timeouts, optional
|
|
88
|
+
custom auth headers, optional `fetchImpl` override — all there.
|
|
89
|
+
|
|
90
|
+
### 3. Multi-region fan-out
|
|
91
|
+
|
|
92
|
+
Driver picks a region per cell; the same `httpDispatch` routes to
|
|
93
|
+
different worker URLs based on placement.
|
|
94
|
+
|
|
95
|
+
```ts
|
|
96
|
+
import { httpDispatch } from '@tangle-network/agent-eval/adapters/http'
|
|
97
|
+
|
|
98
|
+
const REGION_URLS: Record<string, string> = {
|
|
99
|
+
'us-east': 'https://worker-use1.your-infra.com/dispatch',
|
|
100
|
+
'eu-west': 'https://worker-euw1.your-infra.com/dispatch',
|
|
101
|
+
'ap-south': 'https://worker-aps1.your-infra.com/dispatch',
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const dispatch = httpDispatch<MyScenario, MyArtifact>({
|
|
105
|
+
resolveUrl: ({ placement }) => REGION_URLS[placement ?? 'us-east'],
|
|
106
|
+
auth: process.env.WORKER_TOKEN,
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
await runCampaign({
|
|
110
|
+
scenarios,
|
|
111
|
+
dispatch,
|
|
112
|
+
judges: [judge],
|
|
113
|
+
storage,
|
|
114
|
+
runDir,
|
|
115
|
+
cellPlacement: ({ scenario }) => {
|
|
116
|
+
if (scenario.tags?.includes('eu')) return 'eu-west'
|
|
117
|
+
if (scenario.tags?.includes('ap')) return 'ap-south'
|
|
118
|
+
return 'us-east'
|
|
119
|
+
},
|
|
120
|
+
maxConcurrency: 8, // 8 cells fan across regions in parallel
|
|
121
|
+
})
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
`cellPlacement` is a pure function the substrate calls per cell — no
|
|
125
|
+
state. Use whatever signal you want (tags, hash of scenario id,
|
|
126
|
+
round-robin, region-affinity from a previous run, scheduling table).
|
|
127
|
+
|
|
128
|
+
## What's preserved across the wire
|
|
129
|
+
|
|
130
|
+
| Concern | How |
|
|
131
|
+
|---|---|
|
|
132
|
+
| **Cancellation** | Driver's `AbortSignal` forwards into the HTTP request; server translates `AbortError` → `499` so client doesn't retry. |
|
|
133
|
+
| **Timeouts** | Per-call `timeoutMs` on the client; server can layer its own. |
|
|
134
|
+
| **Retries** | Idempotent retries on 5xx / 408 / 429 with exponential backoff + jitter. Driver-aborts never retry. |
|
|
135
|
+
| **Auth** | Bearer token on `Authorization`; pluggable via `auth: string \| () => string \| Promise<string>` for rotation/refresh. |
|
|
136
|
+
| **Payload size** | Server enforces `maxBodyBytes` (default 10 MB). |
|
|
137
|
+
| **Traces** | Both ends emit OTel — if both point at the same OTLP collector, you get a unified trace per cell. See `docs/adapters-observability.md`. |
|
|
138
|
+
| **Cost** | Worker's `ctx.cost.observe(usd, source)` is local to the worker process. Roll up server-side and attach to your worker-side telemetry; we don't (yet) forward cost back to the driver. Tracked as follow-up. |
|
|
139
|
+
|
|
140
|
+
## Running the reference example
|
|
141
|
+
|
|
142
|
+
See `examples/distributed-driver/`:
|
|
143
|
+
|
|
144
|
+
```sh
|
|
145
|
+
# Terminal 1 — worker
|
|
146
|
+
pnpm tsx examples/distributed-driver/worker.ts
|
|
147
|
+
|
|
148
|
+
# Terminal 2 — driver
|
|
149
|
+
WORKER_URL=http://localhost:8080/dispatch \
|
|
150
|
+
WORKER_TOKEN=dev-token \
|
|
151
|
+
pnpm tsx examples/distributed-driver/driver.ts
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Two processes, one local TCP loopback, full self-improvement loop end
|
|
155
|
+
to end. Scaling out is dropping `WORKER_URL` to a non-loopback hostname
|
|
156
|
+
and using `cellPlacement` to fan across many of them.
|
|
157
|
+
|
|
158
|
+
## Known gaps + follow-ups
|
|
159
|
+
|
|
160
|
+
- **Cost roll-up across the wire** — worker-side `ctx.cost` observations
|
|
161
|
+
stay on the worker. We need to forward them in the response body so
|
|
162
|
+
`defaultProductionGate`'s `budgetUsd` ceiling reflects total spend, not
|
|
163
|
+
driver-side spend. Tracked as a 0.45.x follow-up.
|
|
164
|
+
- **Per-cell artifact streaming** — when the worker writes intermediate
|
|
165
|
+
artifacts via `ctx.artifacts.write`, those land on the worker's
|
|
166
|
+
storage. For multi-worker campaigns you'll want a shared object store
|
|
167
|
+
(S3/GCS) reachable from both sides; today consumers wire that as a
|
|
168
|
+
`CampaignStorage` impl. A reference S3-backed storage is on the
|
|
169
|
+
roadmap.
|
|
170
|
+
- **gRPC / NATS / Temporal transports** — the wire is HTTP today by
|
|
171
|
+
default because everything speaks HTTP. Other transports can ship as
|
|
172
|
+
additional adapters; the `Dispatch` interface itself is
|
|
173
|
+
transport-agnostic.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.45.0",
|
|
4
4
|
"description": "Substrate for self-improving agents: traces, verifiable rewards, preferences, GEPA / reflective mutation, auto-research, replay, sequential anytime-valid stats, and release gates.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-eval#readme",
|
|
6
6
|
"repository": {
|
|
@@ -114,6 +114,11 @@
|
|
|
114
114
|
"import": "./dist/adapters/langchain.js",
|
|
115
115
|
"default": "./dist/adapters/langchain.js"
|
|
116
116
|
},
|
|
117
|
+
"./adapters/http": {
|
|
118
|
+
"types": "./dist/adapters/http.d.ts",
|
|
119
|
+
"import": "./dist/adapters/http.js",
|
|
120
|
+
"default": "./dist/adapters/http.js"
|
|
121
|
+
},
|
|
117
122
|
"./openapi.json": {
|
|
118
123
|
"default": "./dist/openapi.json"
|
|
119
124
|
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/campaign/run-campaign.ts","../src/campaign/storage.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `runCampaign` — Pass A substrate primitive. ONE function that orchestrates\n * scenarios → dispatch → artifacts → judges → aggregates, with full\n * reproducibility (seed + manifest hash), cell-level resumability, bootstrap\n * CIs, and the `LabeledScenarioStore` capture flywheel.\n *\n * Improvement loops (optimizer / gate / autoOnPromote) ride on top of this\n * primitive but live in `presets/run-improvement-loop.ts`. This file keeps\n * the core orchestrator minimal — Phase 1 of the Pass A track.\n */\n\nimport { createHash } from 'node:crypto'\nimport { join } from 'node:path'\nimport { confidenceInterval } from '../statistics'\nimport { type CampaignStorage, fsCampaignStorage } from './storage'\nimport type {\n CampaignAggregates,\n CampaignArtifactWriter,\n CampaignCellResult,\n CampaignCostMeter,\n CampaignResult,\n CampaignTraceWriter,\n DispatchContext,\n DispatchFn,\n JudgeAggregate,\n JudgeConfig,\n JudgeScore,\n LabeledScenarioStore,\n Scenario,\n ScenarioAggregate,\n TraceSpan,\n} from './types'\n\nexport interface RunCampaignOptions<TScenario extends Scenario, TArtifact> {\n scenarios: TScenario[]\n dispatch: DispatchFn<TScenario, TArtifact>\n judges?: JudgeConfig<TArtifact, TScenario>[]\n /** Required for reproducibility. Default 42. */\n seed?: number\n /** Per-scenario replicates for CI bands. Default 1; raise to 5+ for\n * bootstrap-tight intervals on critical eval. */\n reps?: number\n /** When true (default), completed cells are cached by\n * (manifestHash, scenarioId, rep, generation). Re-runs skip cached cells. */\n resumable?: boolean\n /** Optional store — when present, every artifact + judge score is captured\n * with the configured `captureSource`. Capture is default ON; pass `'off'`\n * to disable. */\n labeledStore?: LabeledScenarioStore | 'off'\n captureSource?: 'production-trace' | 'eval-run' | 'manual' | 'red-team' | 'synthetic'\n captureSourceVersionHash?: string\n /** Wall-clock cost cap across all cells. Cells beyond ceiling are skipped. */\n costCeiling?: number\n /** Max concurrent cells. Default 2. */\n maxConcurrency?: number\n /** Required: where artifacts + traces land. */\n runDir: string\n /** Tracing posture. Default is the substrate's `FileSystemTraceStore` rooted\n * at `<runDir>/traces/`. `'off'` disables capture entirely — substrate\n * refuses this when the caller wires `autoOnPromote !== 'none'`. */\n tracing?: 'on' | 'off'\n /** Test seam — override the wall clock for deterministic tests. */\n now?: () => Date\n /** Test seam — override per-cell trace writer factory. */\n buildTraceWriter?: (cellId: string, dir: string) => CampaignTraceWriter\n /** Storage backend for run/cell dirs, the resumability cache, artifacts,\n * and trace spans. Default: the Node filesystem (`fsCampaignStorage`).\n * Pass `inMemoryCampaignStorage()` to run in a filesystem-less runtime\n * (Cloudflare Workers, Deno, edge) — the `CampaignResult` is still\n * produced; artifacts/traces just aren't persisted to disk. */\n storage?: CampaignStorage\n}\n\nexport async function runCampaign<TScenario extends Scenario, TArtifact>(\n opts: RunCampaignOptions<TScenario, TArtifact>,\n): Promise<CampaignResult<TArtifact, TScenario>> {\n const seed = opts.seed ?? 42\n const reps = opts.reps ?? 1\n const resumable = opts.resumable ?? true\n const maxConcurrency = opts.maxConcurrency ?? 2\n const now = opts.now ?? (() => new Date())\n const judges = opts.judges ?? []\n const storage = opts.storage ?? fsCampaignStorage()\n\n storage.ensureDir(opts.runDir)\n\n const manifestHash = computeManifestHash({\n scenarios: opts.scenarios,\n judges: judges as unknown as JudgeConfig<unknown>[],\n dispatchRef: opts.dispatch.name || 'anonymous',\n seed,\n reps,\n })\n\n const startedAt = now()\n const cells: CampaignCellResult<TArtifact>[] = []\n const artifactsByPath: Record<string, string> = {}\n\n // Build the cell schedule (scenario × rep).\n const schedule: Array<{ scenario: TScenario; rep: number; cellId: string; cellSeed: number }> = []\n let cellIndex = 0\n for (const scenario of opts.scenarios) {\n for (let rep = 0; rep < reps; rep++) {\n const cellId = `${scenario.id}:${rep}`\n const cellSeed = seed + cellIndex\n schedule.push({ scenario, rep, cellId, cellSeed })\n cellIndex += 1\n }\n }\n\n // Concurrency-limited execution.\n let totalCostUsd = 0\n let costCeilingReached = false\n const abortController = new AbortController()\n // Concurrency lanes that drain the cell schedule. Named \"lanes\" — not\n // \"workers\" — to avoid clashing with the taxonomy's worker (= the agent\n // harness in a sandbox, invoked behind `dispatch`). See loop-taxonomy.md.\n const lanes: Promise<void>[] = []\n let nextIdx = 0\n const cellsRef = cells\n\n for (let i = 0; i < maxConcurrency; i++) {\n lanes.push(\n (async () => {\n while (true) {\n const myIdx = nextIdx++\n if (myIdx >= schedule.length) return\n const slot = schedule[myIdx]!\n if (costCeilingReached) {\n cellsRef.push(skippedCell(slot, 'cost_ceiling_reached'))\n continue\n }\n const result = await executeCell({\n slot,\n opts,\n manifestHash,\n resumable,\n now,\n storage,\n buildTraceWriter: opts.buildTraceWriter ?? defaultBuildTraceWriter(storage),\n signal: abortController.signal,\n })\n cellsRef.push(result.cell)\n totalCostUsd += result.cell.costUsd\n Object.assign(artifactsByPath, result.artifactsByPath)\n if (opts.costCeiling !== undefined && totalCostUsd >= opts.costCeiling) {\n costCeilingReached = true\n }\n // Capture into LabeledScenarioStore unless explicitly disabled.\n if (opts.labeledStore && opts.labeledStore !== 'off' && !result.cell.error) {\n await captureToStore({\n store: opts.labeledStore,\n cell: result.cell,\n scenario: slot.scenario,\n opts,\n now,\n }).catch((err) => {\n // Capture failures are non-fatal — log but don't crash the campaign.\n // (Trace would normally land here.)\n console.warn(\n `[runCampaign] capture failed for ${result.cell.cellId}: ${err instanceof Error ? err.message : String(err)}`,\n )\n })\n }\n }\n })(),\n )\n }\n await Promise.all(lanes)\n\n const endedAt = now()\n cellsRef.sort((a, b) => a.cellId.localeCompare(b.cellId))\n\n const aggregates = computeAggregates(\n cellsRef,\n judges as unknown as JudgeConfig<TArtifact>[],\n seed,\n )\n\n return {\n manifestHash,\n seed,\n startedAt: startedAt.toISOString(),\n endedAt: endedAt.toISOString(),\n durationMs: endedAt.getTime() - startedAt.getTime(),\n cells: cellsRef,\n aggregates,\n runDir: opts.runDir,\n artifactsByPath,\n scenarios: opts.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n }\n}\n\n// ── Internals ─────────────────────────────────────────────────────────\n\ninterface ExecuteCellArgs<TScenario extends Scenario, TArtifact> {\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number }\n opts: RunCampaignOptions<TScenario, TArtifact>\n manifestHash: string\n resumable: boolean\n now: () => Date\n storage: CampaignStorage\n buildTraceWriter: (cellId: string, dir: string) => CampaignTraceWriter\n signal: AbortSignal\n}\n\nasync function executeCell<TScenario extends Scenario, TArtifact>(\n args: ExecuteCellArgs<TScenario, TArtifact>,\n): Promise<{ cell: CampaignCellResult<TArtifact>; artifactsByPath: Record<string, string> }> {\n const storage = args.storage\n const cellDir = join(args.opts.runDir, args.slot.cellId.replace(/[^a-zA-Z0-9_-]/g, '_'))\n storage.ensureDir(cellDir)\n\n // Resumability: cache key = (manifestHash, scenarioId, rep)\n const cachePath = join(cellDir, 'cached-result.json')\n if (args.resumable) {\n const raw = storage.read(cachePath)\n if (raw !== undefined) {\n try {\n const cached = JSON.parse(raw) as CampaignCellResult<TArtifact>\n if (cached.cellId === args.slot.cellId) {\n return { cell: { ...cached, cached: true }, artifactsByPath: {} }\n }\n } catch {\n // Corrupt cache — fall through to re-run.\n }\n }\n }\n\n const startMs = Date.now()\n const trace = args.buildTraceWriter(args.slot.cellId, cellDir)\n const artifactsByPath: Record<string, string> = {}\n const artifacts: CampaignArtifactWriter = {\n async write(path, content) {\n const fullPath = join(cellDir, path)\n storage.ensureDir(join(fullPath, '..'))\n storage.write(fullPath, content)\n artifactsByPath[`${args.slot.cellId}/${path}`] = fullPath\n return fullPath\n },\n async writeJson(path, value) {\n return artifacts.write(path, JSON.stringify(value, null, 2))\n },\n }\n let costSoFar = 0\n const cost: CampaignCostMeter = {\n observe(amount, source) {\n costSoFar += amount\n trace.span(`cost.${source}`, { amountUsd: amount }).end()\n },\n current() {\n return costSoFar\n },\n }\n\n const ctx: DispatchContext = {\n cellId: args.slot.cellId,\n rep: args.slot.rep,\n seed: args.slot.cellSeed,\n signal: args.signal,\n trace,\n artifacts,\n cost,\n }\n\n let artifact: TArtifact | undefined\n let errorMessage: string | undefined\n try {\n artifact = await args.opts.dispatch(args.slot.scenario, ctx)\n } catch (err) {\n errorMessage = err instanceof Error ? err.message : String(err)\n }\n\n // Run judges (only if we have an artifact). A judge that throws invalidates\n // the cell — recorded as `error`, NOT folded into a fake composite:0 (a fake\n // zero is indistinguishable from a real zero and poisons every aggregate).\n const judgeScores: Record<string, JudgeScore> = {}\n if (artifact !== undefined) {\n for (const judge of args.opts.judges ?? []) {\n if (judge.appliesTo && !judge.appliesTo(args.slot.scenario)) continue\n try {\n judgeScores[judge.name] = await runJudgeCell(judge, {\n artifact,\n scenario: args.slot.scenario,\n signal: args.signal,\n })\n } catch (err) {\n errorMessage = `judge '${judge.name}' failed: ${err instanceof Error ? err.message : String(err)}`\n break\n }\n }\n }\n\n await trace.flush()\n\n const cell: CampaignCellResult<TArtifact> = {\n cellId: args.slot.cellId,\n scenarioId: args.slot.scenario.id,\n rep: args.slot.rep,\n artifact: (artifact ?? null) as TArtifact,\n judgeScores,\n costUsd: costSoFar,\n durationMs: Date.now() - startMs,\n seed: args.slot.cellSeed,\n cached: false,\n error: errorMessage,\n }\n\n if (!errorMessage && args.resumable) {\n storage.write(cachePath, JSON.stringify(cell))\n }\n\n return { cell, artifactsByPath }\n}\n\nasync function runJudgeCell<TArtifact, TScenario extends Scenario>(\n judge: JudgeConfig<TArtifact, TScenario>,\n input: { artifact: TArtifact; scenario: TScenario; signal: AbortSignal },\n): Promise<JudgeScore> {\n return judge.score(input)\n}\n\nfunction defaultBuildTraceWriter(\n storage: CampaignStorage,\n): (cellId: string, dir: string) => CampaignTraceWriter {\n return (cellId, dir) => {\n const spans: Array<Record<string, unknown>> = []\n return {\n span(name, attributes) {\n const startMs = Date.now()\n const record: Record<string, unknown> = { name, cellId, startMs, ...(attributes ?? {}) }\n const finish: TraceSpan = {\n end(endAttrs) {\n record.durationMs = Date.now() - startMs\n if (endAttrs) Object.assign(record, endAttrs)\n spans.push(record)\n },\n setAttribute(key, value) {\n record[key] = value\n },\n }\n return finish\n },\n async flush() {\n storage.write(join(dir, 'spans.jsonl'), spans.map((s) => JSON.stringify(s)).join('\\n'))\n },\n }\n }\n}\n\nfunction skippedCell<TScenario extends Scenario, TArtifact>(\n slot: { scenario: TScenario; rep: number; cellId: string; cellSeed: number },\n reason: string,\n): CampaignCellResult<TArtifact> {\n return {\n cellId: slot.cellId,\n scenarioId: slot.scenario.id,\n rep: slot.rep,\n artifact: null as unknown as TArtifact,\n judgeScores: {},\n costUsd: 0,\n durationMs: 0,\n seed: slot.cellSeed,\n cached: false,\n error: `skipped: ${reason}`,\n }\n}\n\ninterface CaptureArgs<TScenario extends Scenario, TArtifact> {\n store: LabeledScenarioStore\n cell: CampaignCellResult<TArtifact>\n scenario: TScenario\n opts: RunCampaignOptions<TScenario, TArtifact>\n now: () => Date\n}\n\nasync function captureToStore<TScenario extends Scenario, TArtifact>(\n args: CaptureArgs<TScenario, TArtifact>,\n): Promise<void> {\n await args.store.observe({\n scenario: args.scenario,\n artifact: args.cell.artifact,\n judgeScores: args.cell.judgeScores,\n source: args.opts.captureSource ?? 'eval-run',\n sourceVersionHash: args.opts.captureSourceVersionHash ?? 'unknown',\n capturedAt: args.now().toISOString(),\n redactionStatus: 'raw',\n })\n}\n\n// ── Aggregates + manifest hash ────────────────────────────────────────\n\nfunction computeManifestHash(input: {\n scenarios: Scenario[]\n judges: JudgeConfig<unknown>[]\n dispatchRef: string\n seed: number\n reps: number\n}): string {\n const canonical = {\n scenarios: input.scenarios.map((s) => ({ id: s.id, kind: s.kind })),\n judges: input.judges.map((j) => ({ name: j.name, dims: j.dimensions.map((d) => d.key) })),\n dispatch: input.dispatchRef,\n seed: input.seed,\n reps: input.reps,\n }\n return createHash('sha256').update(JSON.stringify(canonical)).digest('hex')\n}\n\nfunction computeAggregates<TArtifact>(\n cells: CampaignCellResult<TArtifact>[],\n judges: JudgeConfig<TArtifact>[],\n seed: number,\n): CampaignAggregates {\n const byJudge: Record<string, JudgeAggregate> = {}\n for (const judge of judges) {\n const scores: number[] = []\n for (const cell of cells) {\n const s = cell.judgeScores[judge.name]\n if (s !== undefined) scores.push(s.composite)\n }\n byJudge[judge.name] = aggregate(scores, seed)\n }\n const byScenario: Record<string, ScenarioAggregate> = {}\n const scenarioGroups = new Map<string, number[]>()\n for (const cell of cells) {\n const composites = Object.values(cell.judgeScores).map((s) => s.composite)\n if (composites.length === 0) continue\n const mean = composites.reduce((a, b) => a + b, 0) / composites.length\n const arr = scenarioGroups.get(cell.scenarioId) ?? []\n arr.push(mean)\n scenarioGroups.set(cell.scenarioId, arr)\n }\n for (const [scenarioId, samples] of scenarioGroups) {\n const ag = aggregate(samples, seed)\n byScenario[scenarioId] = { meanComposite: ag.mean, ci95: ag.ci95, n: ag.n }\n }\n return {\n byJudge,\n byScenario,\n totalCostUsd: cells.reduce((a, c) => a + c.costUsd, 0),\n cellsExecuted: cells.filter((c) => !c.error).length,\n cellsSkipped: cells.filter((c) => c.error?.startsWith('skipped:')).length,\n cellsCached: cells.filter((c) => c.cached).length,\n cellsFailed: cells.filter((c) => c.error && !c.error.startsWith('skipped:')).length,\n }\n}\n\n// Percentile bootstrap CI95 via seeded resampling. Deterministic for a given\n// seed — same campaign re-run produces identical CI bands. Falls back to\n// degenerate intervals at n<=1 (the bootstrap is undefined there).\nfunction aggregate(samples: number[], seed: number): JudgeAggregate {\n const n = samples.length\n if (n === 0) return { mean: 0, stdev: 0, ci95: [0, 0], n: 0 }\n const mean = samples.reduce((a, b) => a + b, 0) / n\n const variance = samples.reduce((a, b) => a + (b - mean) ** 2, 0) / Math.max(1, n - 1)\n const stdev = Math.sqrt(variance)\n const ci = confidenceInterval(samples, 0.95, { seed, resamples: 1000 })\n return { mean, stdev, ci95: [ci.lower, ci.upper], n }\n}\n","/**\n * @experimental\n *\n * `CampaignStorage` — the filesystem seam `runCampaign` writes through\n * (run/cell dirs, the resumability cache, per-cell artifacts, trace spans).\n *\n * The default (`fsCampaignStorage`) is the Node filesystem — identical\n * behavior to the inline `node:fs` calls it replaces, so existing CLI\n * consumers are unaffected. `inMemoryCampaignStorage` keeps everything in a\n * `Map`, so the substrate runs in environments WITHOUT a filesystem\n * (Cloudflare Workers, Deno Deploy, other edge runtimes) — the campaign\n * still produces its `CampaignResult` (cells + aggregates) in memory;\n * artifacts/traces simply aren't persisted to disk.\n *\n * Paths are opaque keys to the in-memory adapter — it does not parse them,\n * so the same `join(...)`-built paths work unchanged across both adapters.\n */\nexport interface CampaignStorage {\n /** Ensure a directory exists (recursive). No-op for in-memory. */\n ensureDir(dir: string): void\n /** Does this path exist (as a written file or an ensured dir)? */\n exists(path: string): boolean\n /** Read a UTF-8 file; `undefined` when missing or unreadable. */\n read(path: string): string | undefined\n /** Write a file (string or bytes). Parent dir is assumed ensured. */\n write(path: string, content: string | Uint8Array): void\n}\n\n/** Node-filesystem storage — the default. Lazily requires `node:fs` so the\n * module imports cleanly in non-Node runtimes (where the caller passes\n * `inMemoryCampaignStorage` instead and never constructs this). */\nexport function fsCampaignStorage(): CampaignStorage {\n const { existsSync, mkdirSync, readFileSync, writeFileSync } =\n require('node:fs') as typeof import('node:fs')\n return {\n ensureDir(dir) {\n if (!existsSync(dir)) mkdirSync(dir, { recursive: true })\n },\n exists(path) {\n return existsSync(path)\n },\n read(path) {\n try {\n return readFileSync(path, 'utf8')\n } catch {\n return undefined\n }\n },\n write(path, content) {\n writeFileSync(path, content as Uint8Array)\n },\n }\n}\n\n/** In-memory storage for filesystem-less runtimes. Artifacts + trace spans\n * live in a `Map` for the duration of the run; the `CampaignResult` is\n * fully populated, but nothing is persisted to disk. */\nexport function inMemoryCampaignStorage(): CampaignStorage {\n const files = new Map<string, string | Uint8Array>()\n const dirs = new Set<string>()\n return {\n ensureDir(dir) {\n dirs.add(dir)\n },\n exists(path) {\n return files.has(path) || dirs.has(path)\n },\n read(path) {\n const value = files.get(path)\n if (value === undefined) return undefined\n return typeof value === 'string' ? value : new TextDecoder().decode(value)\n },\n write(path, content) {\n files.set(path, content)\n },\n }\n}\n"],"mappings":";;;;;;;;AAaA,SAAS,kBAAkB;AAC3B,SAAS,YAAY;;;ACiBd,SAAS,oBAAqC;AACnD,QAAM,EAAE,YAAY,WAAW,cAAc,cAAc,IACzD,UAAQ,IAAS;AACnB,SAAO;AAAA,IACL,UAAU,KAAK;AACb,UAAI,CAAC,WAAW,GAAG,EAAG,WAAU,KAAK,EAAE,WAAW,KAAK,CAAC;AAAA,IAC1D;AAAA,IACA,OAAO,MAAM;AACX,aAAO,WAAW,IAAI;AAAA,IACxB;AAAA,IACA,KAAK,MAAM;AACT,UAAI;AACF,eAAO,aAAa,MAAM,MAAM;AAAA,MAClC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,oBAAc,MAAM,OAAqB;AAAA,IAC3C;AAAA,EACF;AACF;AAKO,SAAS,0BAA2C;AACzD,QAAM,QAAQ,oBAAI,IAAiC;AACnD,QAAM,OAAO,oBAAI,IAAY;AAC7B,SAAO;AAAA,IACL,UAAU,KAAK;AACb,WAAK,IAAI,GAAG;AAAA,IACd;AAAA,IACA,OAAO,MAAM;AACX,aAAO,MAAM,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI;AAAA,IACzC;AAAA,IACA,KAAK,MAAM;AACT,YAAM,QAAQ,MAAM,IAAI,IAAI;AAC5B,UAAI,UAAU,OAAW,QAAO;AAChC,aAAO,OAAO,UAAU,WAAW,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK;AAAA,IAC3E;AAAA,IACA,MAAM,MAAM,SAAS;AACnB,YAAM,IAAI,MAAM,OAAO;AAAA,IACzB;AAAA,EACF;AACF;;;ADDA,eAAsB,YACpB,MAC+C;AAC/C,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,QAAM,MAAM,KAAK,QAAQ,MAAM,oBAAI,KAAK;AACxC,QAAM,SAAS,KAAK,UAAU,CAAC;AAC/B,QAAM,UAAU,KAAK,WAAW,kBAAkB;AAElD,UAAQ,UAAU,KAAK,MAAM;AAE7B,QAAM,eAAe,oBAAoB;AAAA,IACvC,WAAW,KAAK;AAAA,IAChB;AAAA,IACA,aAAa,KAAK,SAAS,QAAQ;AAAA,IACnC;AAAA,IACA;AAAA,EACF,CAAC;AAED,QAAM,YAAY,IAAI;AACtB,QAAM,QAAyC,CAAC;AAChD,QAAM,kBAA0C,CAAC;AAGjD,QAAM,WAA0F,CAAC;AACjG,MAAI,YAAY;AAChB,aAAW,YAAY,KAAK,WAAW;AACrC,aAAS,MAAM,GAAG,MAAM,MAAM,OAAO;AACnC,YAAM,SAAS,GAAG,SAAS,EAAE,IAAI,GAAG;AACpC,YAAM,WAAW,OAAO;AACxB,eAAS,KAAK,EAAE,UAAU,KAAK,QAAQ,SAAS,CAAC;AACjD,mBAAa;AAAA,IACf;AAAA,EACF;AAGA,MAAI,eAAe;AACnB,MAAI,qBAAqB;AACzB,QAAM,kBAAkB,IAAI,gBAAgB;AAI5C,QAAM,QAAyB,CAAC;AAChC,MAAI,UAAU;AACd,QAAM,WAAW;AAEjB,WAAS,IAAI,GAAG,IAAI,gBAAgB,KAAK;AACvC,UAAM;AAAA,OACH,YAAY;AACX,eAAO,MAAM;AACX,gBAAM,QAAQ;AACd,cAAI,SAAS,SAAS,OAAQ;AAC9B,gBAAM,OAAO,SAAS,KAAK;AAC3B,cAAI,oBAAoB;AACtB,qBAAS,KAAK,YAAY,MAAM,sBAAsB,CAAC;AACvD;AAAA,UACF;AACA,gBAAM,SAAS,MAAM,YAAY;AAAA,YAC/B;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,kBAAkB,KAAK,oBAAoB,wBAAwB,OAAO;AAAA,YAC1E,QAAQ,gBAAgB;AAAA,UAC1B,CAAC;AACD,mBAAS,KAAK,OAAO,IAAI;AACzB,0BAAgB,OAAO,KAAK;AAC5B,iBAAO,OAAO,iBAAiB,OAAO,eAAe;AACrD,cAAI,KAAK,gBAAgB,UAAa,gBAAgB,KAAK,aAAa;AACtE,iCAAqB;AAAA,UACvB;AAEA,cAAI,KAAK,gBAAgB,KAAK,iBAAiB,SAAS,CAAC,OAAO,KAAK,OAAO;AAC1E,kBAAM,eAAe;AAAA,cACnB,OAAO,KAAK;AAAA,cACZ,MAAM,OAAO;AAAA,cACb,UAAU,KAAK;AAAA,cACf;AAAA,cACA;AAAA,YACF,CAAC,EAAE,MAAM,CAAC,QAAQ;AAGhB,sBAAQ;AAAA,gBACN,oCAAoC,OAAO,KAAK,MAAM,KAAK,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,cAC7G;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF,GAAG;AAAA,IACL;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,KAAK;AAEvB,QAAM,UAAU,IAAI;AACpB,WAAS,KAAK,CAAC,GAAG,MAAM,EAAE,OAAO,cAAc,EAAE,MAAM,CAAC;AAExD,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,WAAW,UAAU,YAAY;AAAA,IACjC,SAAS,QAAQ,YAAY;AAAA,IAC7B,YAAY,QAAQ,QAAQ,IAAI,UAAU,QAAQ;AAAA,IAClD,OAAO;AAAA,IACP;AAAA,IACA,QAAQ,KAAK;AAAA,IACb;AAAA,IACA,WAAW,KAAK,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,EACnE;AACF;AAeA,eAAe,YACb,MAC2F;AAC3F,QAAM,UAAU,KAAK;AACrB,QAAM,UAAU,KAAK,KAAK,KAAK,QAAQ,KAAK,KAAK,OAAO,QAAQ,mBAAmB,GAAG,CAAC;AACvF,UAAQ,UAAU,OAAO;AAGzB,QAAM,YAAY,KAAK,SAAS,oBAAoB;AACpD,MAAI,KAAK,WAAW;AAClB,UAAM,MAAM,QAAQ,KAAK,SAAS;AAClC,QAAI,QAAQ,QAAW;AACrB,UAAI;AACF,cAAM,SAAS,KAAK,MAAM,GAAG;AAC7B,YAAI,OAAO,WAAW,KAAK,KAAK,QAAQ;AACtC,iBAAO,EAAE,MAAM,EAAE,GAAG,QAAQ,QAAQ,KAAK,GAAG,iBAAiB,CAAC,EAAE;AAAA,QAClE;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,KAAK,IAAI;AACzB,QAAM,QAAQ,KAAK,iBAAiB,KAAK,KAAK,QAAQ,OAAO;AAC7D,QAAM,kBAA0C,CAAC;AACjD,QAAM,YAAoC;AAAA,IACxC,MAAM,MAAM,MAAM,SAAS;AACzB,YAAM,WAAW,KAAK,SAAS,IAAI;AACnC,cAAQ,UAAU,KAAK,UAAU,IAAI,CAAC;AACtC,cAAQ,MAAM,UAAU,OAAO;AAC/B,sBAAgB,GAAG,KAAK,KAAK,MAAM,IAAI,IAAI,EAAE,IAAI;AACjD,aAAO;AAAA,IACT;AAAA,IACA,MAAM,UAAU,MAAM,OAAO;AAC3B,aAAO,UAAU,MAAM,MAAM,KAAK,UAAU,OAAO,MAAM,CAAC,CAAC;AAAA,IAC7D;AAAA,EACF;AACA,MAAI,YAAY;AAChB,QAAM,OAA0B;AAAA,IAC9B,QAAQ,QAAQ,QAAQ;AACtB,mBAAa;AACb,YAAM,KAAK,QAAQ,MAAM,IAAI,EAAE,WAAW,OAAO,CAAC,EAAE,IAAI;AAAA,IAC1D;AAAA,IACA,UAAU;AACR,aAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,MAAuB;AAAA,IAC3B,QAAQ,KAAK,KAAK;AAAA,IAClB,KAAK,KAAK,KAAK;AAAA,IACf,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ,KAAK;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,MAAI;AACJ,MAAI;AACJ,MAAI;AACF,eAAW,MAAM,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,GAAG;AAAA,EAC7D,SAAS,KAAK;AACZ,mBAAe,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,EAChE;AAKA,QAAM,cAA0C,CAAC;AACjD,MAAI,aAAa,QAAW;AAC1B,eAAW,SAAS,KAAK,KAAK,UAAU,CAAC,GAAG;AAC1C,UAAI,MAAM,aAAa,CAAC,MAAM,UAAU,KAAK,KAAK,QAAQ,EAAG;AAC7D,UAAI;AACF,oBAAY,MAAM,IAAI,IAAI,MAAM,aAAa,OAAO;AAAA,UAClD;AAAA,UACA,UAAU,KAAK,KAAK;AAAA,UACpB,QAAQ,KAAK;AAAA,QACf,CAAC;AAAA,MACH,SAAS,KAAK;AACZ,uBAAe,UAAU,MAAM,IAAI,aAAa,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAChG;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,MAAM,MAAM;AAElB,QAAM,OAAsC;AAAA,IAC1C,QAAQ,KAAK,KAAK;AAAA,IAClB,YAAY,KAAK,KAAK,SAAS;AAAA,IAC/B,KAAK,KAAK,KAAK;AAAA,IACf,UAAW,YAAY;AAAA,IACvB;AAAA,IACA,SAAS;AAAA,IACT,YAAY,KAAK,IAAI,IAAI;AAAA,IACzB,MAAM,KAAK,KAAK;AAAA,IAChB,QAAQ;AAAA,IACR,OAAO;AAAA,EACT;AAEA,MAAI,CAAC,gBAAgB,KAAK,WAAW;AACnC,YAAQ,MAAM,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,EAC/C;AAEA,SAAO,EAAE,MAAM,gBAAgB;AACjC;AAEA,eAAe,aACb,OACA,OACqB;AACrB,SAAO,MAAM,MAAM,KAAK;AAC1B;AAEA,SAAS,wBACP,SACsD;AACtD,SAAO,CAAC,QAAQ,QAAQ;AACtB,UAAM,QAAwC,CAAC;AAC/C,WAAO;AAAA,MACL,KAAK,MAAM,YAAY;AACrB,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAkC,EAAE,MAAM,QAAQ,SAAS,GAAI,cAAc,CAAC,EAAG;AACvF,cAAM,SAAoB;AAAA,UACxB,IAAI,UAAU;AACZ,mBAAO,aAAa,KAAK,IAAI,IAAI;AACjC,gBAAI,SAAU,QAAO,OAAO,QAAQ,QAAQ;AAC5C,kBAAM,KAAK,MAAM;AAAA,UACnB;AAAA,UACA,aAAa,KAAK,OAAO;AACvB,mBAAO,GAAG,IAAI;AAAA,UAChB;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,MACA,MAAM,QAAQ;AACZ,gBAAQ,MAAM,KAAK,KAAK,aAAa,GAAG,MAAM,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,YACP,MACA,QAC+B;AAC/B,SAAO;AAAA,IACL,QAAQ,KAAK;AAAA,IACb,YAAY,KAAK,SAAS;AAAA,IAC1B,KAAK,KAAK;AAAA,IACV,UAAU;AAAA,IACV,aAAa,CAAC;AAAA,IACd,SAAS;AAAA,IACT,YAAY;AAAA,IACZ,MAAM,KAAK;AAAA,IACX,QAAQ;AAAA,IACR,OAAO,YAAY,MAAM;AAAA,EAC3B;AACF;AAUA,eAAe,eACb,MACe;AACf,QAAM,KAAK,MAAM,QAAQ;AAAA,IACvB,UAAU,KAAK;AAAA,IACf,UAAU,KAAK,KAAK;AAAA,IACpB,aAAa,KAAK,KAAK;AAAA,IACvB,QAAQ,KAAK,KAAK,iBAAiB;AAAA,IACnC,mBAAmB,KAAK,KAAK,4BAA4B;AAAA,IACzD,YAAY,KAAK,IAAI,EAAE,YAAY;AAAA,IACnC,iBAAiB;AAAA,EACnB,CAAC;AACH;AAIA,SAAS,oBAAoB,OAMlB;AACT,QAAM,YAAY;AAAA,IAChB,WAAW,MAAM,UAAU,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,IAClE,QAAQ,MAAM,OAAO,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,EAAE,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;AAAA,IACxF,UAAU,MAAM;AAAA,IAChB,MAAM,MAAM;AAAA,IACZ,MAAM,MAAM;AAAA,EACd;AACA,SAAO,WAAW,QAAQ,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC,EAAE,OAAO,KAAK;AAC5E;AAEA,SAAS,kBACP,OACA,QACA,MACoB;AACpB,QAAM,UAA0C,CAAC;AACjD,aAAW,SAAS,QAAQ;AAC1B,UAAM,SAAmB,CAAC;AAC1B,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,YAAY,MAAM,IAAI;AACrC,UAAI,MAAM,OAAW,QAAO,KAAK,EAAE,SAAS;AAAA,IAC9C;AACA,YAAQ,MAAM,IAAI,IAAI,UAAU,QAAQ,IAAI;AAAA,EAC9C;AACA,QAAM,aAAgD,CAAC;AACvD,QAAM,iBAAiB,oBAAI,IAAsB;AACjD,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,OAAO,OAAO,KAAK,WAAW,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS;AACzE,QAAI,WAAW,WAAW,EAAG;AAC7B,UAAM,OAAO,WAAW,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,WAAW;AAChE,UAAM,MAAM,eAAe,IAAI,KAAK,UAAU,KAAK,CAAC;AACpD,QAAI,KAAK,IAAI;AACb,mBAAe,IAAI,KAAK,YAAY,GAAG;AAAA,EACzC;AACA,aAAW,CAAC,YAAY,OAAO,KAAK,gBAAgB;AAClD,UAAM,KAAK,UAAU,SAAS,IAAI;AAClC,eAAW,UAAU,IAAI,EAAE,eAAe,GAAG,MAAM,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE;AAAA,EAC5E;AACA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,cAAc,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,SAAS,CAAC;AAAA,IACrD,eAAe,MAAM,OAAO,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE;AAAA,IAC7C,cAAc,MAAM,OAAO,CAAC,MAAM,EAAE,OAAO,WAAW,UAAU,CAAC,EAAE;AAAA,IACnE,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE;AAAA,IAC3C,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,WAAW,UAAU,CAAC,EAAE;AAAA,EAC/E;AACF;AAKA,SAAS,UAAU,SAAmB,MAA8B;AAClE,QAAM,IAAI,QAAQ;AAClB,MAAI,MAAM,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,EAAE;AAC5D,QAAM,OAAO,QAAQ,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAClD,QAAM,WAAW,QAAQ,OAAO,CAAC,GAAG,MAAM,KAAK,IAAI,SAAS,GAAG,CAAC,IAAI,KAAK,IAAI,GAAG,IAAI,CAAC;AACrF,QAAM,QAAQ,KAAK,KAAK,QAAQ;AAChC,QAAM,KAAK,mBAAmB,SAAS,MAAM,EAAE,MAAM,WAAW,IAAK,CAAC;AACtE,SAAO,EAAE,MAAM,OAAO,MAAM,CAAC,GAAG,OAAO,GAAG,KAAK,GAAG,EAAE;AACtD;","names":[]}
|
|
File without changes
|
|
File without changes
|