@tangle-network/agent-eval 0.44.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/http.d.ts +138 -0
- package/dist/adapters/http.js +196 -0
- package/dist/adapters/http.js.map +1 -0
- package/dist/adapters/langchain.d.ts +91 -0
- package/dist/adapters/langchain.js +34 -0
- package/dist/adapters/langchain.js.map +1 -0
- package/dist/campaign/index.d.ts +3 -3
- package/dist/campaign/index.js +2 -2
- package/dist/{chunk-H5BGRSN4.js → chunk-HRKOCLQA.js} +3 -3
- package/dist/{chunk-RXK7FXLV.js → chunk-J3EIOI3O.js} +7 -2
- package/dist/chunk-J3EIOI3O.js.map +1 -0
- package/dist/contract/index.d.ts +2 -2
- package/dist/contract/index.js +2 -2
- package/dist/openapi.json +1 -1
- package/dist/rl.d.ts +1 -1
- package/dist/{run-campaign-GNDO66B4.js → run-campaign-6UEVBPP3.js} +2 -2
- package/dist/{run-improvement-loop-CbilHQAb.d.ts → run-improvement-loop-pJ4yrx4X.d.ts} +17 -1
- package/dist/{types-DToGONFA.d.ts → types-BURGZ8Ug.d.ts} +8 -0
- package/docs/adapters-observability.md +121 -0
- package/docs/distributed-driver.md +173 -0
- package/docs/quickstart-external.md +190 -0
- package/package.json +11 -1
- package/dist/chunk-RXK7FXLV.js.map +0 -1
- /package/dist/{chunk-H5BGRSN4.js.map → chunk-HRKOCLQA.js.map} +0 -0
- /package/dist/{run-campaign-GNDO66B4.js.map → run-campaign-6UEVBPP3.js.map} +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { S as Scenario, D as DispatchFn, g as DispatchContext } from '../types-BURGZ8Ug.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* # `@tangle-network/agent-eval/adapters/http` — distributed Dispatch over HTTP.
|
|
5
|
+
*
|
|
6
|
+
* Decouples driver and worker. The driver (running `runImprovementLoop` or
|
|
7
|
+
* `runCampaign`) can live anywhere — your VPC, a dev laptop, a cron VM. The
|
|
8
|
+
* workers (running the actual agent) can live anywhere else — different
|
|
9
|
+
* regions, different clouds, different boxes — as long as they speak HTTP.
|
|
10
|
+
*
|
|
11
|
+
* Both sides:
|
|
12
|
+
*
|
|
13
|
+
* - **`httpDispatch({ url | resolveUrl, ... })`** — client. Returns a
|
|
14
|
+
* `Dispatch` that POSTs `{ scenario, ctx }` to a worker URL and parses
|
|
15
|
+
* the artifact back. AbortSignal-aware, retries on idempotent errors,
|
|
16
|
+
* bounded timeout per call.
|
|
17
|
+
* - **`runDispatchServer({ dispatch, port, ... })`** — server. Wraps your
|
|
18
|
+
* local `Dispatch` as an HTTP endpoint. Handles auth, JSON parsing,
|
|
19
|
+
* error mapping, and cancellation when the client aborts.
|
|
20
|
+
*
|
|
21
|
+
* # Topology examples
|
|
22
|
+
*
|
|
23
|
+
* **Single-worker:** driver on box A, worker on box B. Set
|
|
24
|
+
* `httpDispatch({ url: 'https://box-b/dispatch' })`.
|
|
25
|
+
*
|
|
26
|
+
* **Multi-region:** N workers across regions. Use `httpDispatch({ resolveUrl })`
|
|
27
|
+
* with a function that picks the URL per cell from `ctx.placement`. Combined
|
|
28
|
+
* with `cellPlacement` on `RunCampaignOptions`, the substrate fans cells
|
|
29
|
+
* across geographies in parallel.
|
|
30
|
+
*
|
|
31
|
+
* **Driver-as-a-service:** driver runs as a long-lived process or service
|
|
32
|
+
* (holds optimization state across generations); workers are stateless
|
|
33
|
+
* HTTP services that can scale horizontally per cell.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
interface HttpDispatchOptions<TScenario extends Scenario, _TArtifact> {
|
|
37
|
+
/** Static endpoint URL. Mutually exclusive with `resolveUrl`. */
|
|
38
|
+
url?: string;
|
|
39
|
+
/**
|
|
40
|
+
* Dynamic per-cell URL resolver. Receives the scenario + the substrate
|
|
41
|
+
* placement key (from `RunCampaignOptions.cellPlacement`) and returns the
|
|
42
|
+
* worker URL to invoke. Mutually exclusive with `url`.
|
|
43
|
+
*/
|
|
44
|
+
resolveUrl?: (input: {
|
|
45
|
+
scenario: TScenario;
|
|
46
|
+
placement?: string;
|
|
47
|
+
cellId: string;
|
|
48
|
+
}) => string;
|
|
49
|
+
/** Bearer token or static auth string set as `Authorization`. */
|
|
50
|
+
auth?: string | (() => string | Promise<string>);
|
|
51
|
+
/** Extra headers merged into every request. */
|
|
52
|
+
headers?: Record<string, string>;
|
|
53
|
+
/** Per-call timeout in ms. Default 5 minutes. */
|
|
54
|
+
timeoutMs?: number;
|
|
55
|
+
/** How many idempotent retries on 5xx / network errors. Default 2. */
|
|
56
|
+
retries?: number;
|
|
57
|
+
/** Optional fetch override (auth wrappers, custom agent, mocks). */
|
|
58
|
+
fetchImpl?: typeof fetch;
|
|
59
|
+
}
|
|
60
|
+
interface HttpDispatchRequestBody<TScenario extends Scenario> {
|
|
61
|
+
scenario: TScenario;
|
|
62
|
+
cellId: string;
|
|
63
|
+
rep: number;
|
|
64
|
+
generation?: number;
|
|
65
|
+
seed: number;
|
|
66
|
+
placement?: string;
|
|
67
|
+
cycleId?: string;
|
|
68
|
+
}
|
|
69
|
+
interface HttpDispatchResponseBody<TArtifact> {
|
|
70
|
+
artifact: TArtifact;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Wrap a remote HTTP endpoint as a `Dispatch`. The remote side should run
|
|
74
|
+
* `runDispatchServer` (or any service that speaks the same wire shape).
|
|
75
|
+
*
|
|
76
|
+
* Cancellation: the substrate's per-cell `AbortSignal` is forwarded; the
|
|
77
|
+
* server's `runDispatchServer` translates the resulting `AbortError` into
|
|
78
|
+
* a 499 (client-closed) so the client doesn't retry.
|
|
79
|
+
*/
|
|
80
|
+
declare function httpDispatch<TScenario extends Scenario, TArtifact>(opts: HttpDispatchOptions<TScenario, TArtifact>): DispatchFn<TScenario, TArtifact>;
|
|
81
|
+
interface RunDispatchServerOptions<TScenario extends Scenario, TArtifact> {
|
|
82
|
+
/** The Dispatch this server exposes — what runs when a request lands. */
|
|
83
|
+
dispatch: DispatchFn<TScenario, TArtifact>;
|
|
84
|
+
/** TCP port to bind. */
|
|
85
|
+
port: number;
|
|
86
|
+
/** Optional bind host; defaults to 0.0.0.0. */
|
|
87
|
+
host?: string;
|
|
88
|
+
/** Required for any non-test deployment: the bearer token clients must
|
|
89
|
+
* send. The substrate refuses to start without auth unless `auth: false`
|
|
90
|
+
* is set explicitly (intended ONLY for closed-network/internal testing). */
|
|
91
|
+
auth: string | false;
|
|
92
|
+
/** Path the server listens on. Default `/dispatch`. */
|
|
93
|
+
path?: string;
|
|
94
|
+
/**
|
|
95
|
+
* Per-request handler that wraps `dispatch` with whatever context the
|
|
96
|
+
* worker side needs to construct a `DispatchContext` — typically the
|
|
97
|
+
* trace writer, artifact writer, and cost meter. The substrate provides
|
|
98
|
+
* synthetic-but-typed defaults if not supplied; production deployments
|
|
99
|
+
* should wire real ones (e.g. ship traces to your OTel collector).
|
|
100
|
+
*/
|
|
101
|
+
contextFactory?: (req: HttpDispatchRequestBody<TScenario>, signal: AbortSignal) => Promise<DispatchContext>;
|
|
102
|
+
/** Optional max payload size for the request body (bytes). Default 10 MB. */
|
|
103
|
+
maxBodyBytes?: number;
|
|
104
|
+
/** Hook for observability — called on every successful or failed turn. */
|
|
105
|
+
onRequest?: (event: {
|
|
106
|
+
cellId: string;
|
|
107
|
+
durationMs: number;
|
|
108
|
+
success: boolean;
|
|
109
|
+
error?: unknown;
|
|
110
|
+
}) => void;
|
|
111
|
+
}
|
|
112
|
+
interface DispatchServerHandle {
|
|
113
|
+
/** The actual bound port (useful when `port: 0` requests an ephemeral port). */
|
|
114
|
+
port: number;
|
|
115
|
+
/** Stop accepting new connections and drain existing ones. */
|
|
116
|
+
close: () => Promise<void>;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Start an HTTP server exposing a local `Dispatch` over the wire. Pair with
|
|
120
|
+
* `httpDispatch` on the driver side.
|
|
121
|
+
*
|
|
122
|
+
* Wire shape:
|
|
123
|
+
*
|
|
124
|
+
* POST /dispatch
|
|
125
|
+
* Authorization: Bearer <token>
|
|
126
|
+
* Body: HttpDispatchRequestBody
|
|
127
|
+
* 200 OK: HttpDispatchResponseBody
|
|
128
|
+
* 401: missing/invalid auth
|
|
129
|
+
* 408: per-request timeout exceeded
|
|
130
|
+
* 499: client aborted before completion
|
|
131
|
+
* 500: dispatch threw
|
|
132
|
+
*
|
|
133
|
+
* The server is `node:http`-based to keep the runtime dependency surface
|
|
134
|
+
* minimal — works in plain Node, sandbox, or any container.
|
|
135
|
+
*/
|
|
136
|
+
declare function runDispatchServer<TScenario extends Scenario, TArtifact>(opts: RunDispatchServerOptions<TScenario, TArtifact>): Promise<DispatchServerHandle>;
|
|
137
|
+
|
|
138
|
+
export { type DispatchServerHandle, type HttpDispatchOptions, type HttpDispatchRequestBody, type HttpDispatchResponseBody, type RunDispatchServerOptions, httpDispatch, runDispatchServer };
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import "../chunk-NSBPE2FW.js";
|
|
2
|
+
|
|
3
|
+
// src/adapters/http.ts
|
|
4
|
+
function resolveAuth(auth) {
|
|
5
|
+
if (!auth) return Promise.resolve(null);
|
|
6
|
+
if (typeof auth === "string") return Promise.resolve(auth);
|
|
7
|
+
return Promise.resolve(auth());
|
|
8
|
+
}
|
|
9
|
+
function httpDispatch(opts) {
|
|
10
|
+
if (!opts.url && !opts.resolveUrl) {
|
|
11
|
+
throw new Error("httpDispatch: pass exactly one of `url` or `resolveUrl`.");
|
|
12
|
+
}
|
|
13
|
+
if (opts.url && opts.resolveUrl) {
|
|
14
|
+
throw new Error("httpDispatch: pass exactly one of `url` or `resolveUrl`, not both.");
|
|
15
|
+
}
|
|
16
|
+
const timeoutMs = opts.timeoutMs ?? 5 * 60 * 1e3;
|
|
17
|
+
const maxRetries = opts.retries ?? 2;
|
|
18
|
+
const f = opts.fetchImpl ?? ((...args) => fetch(...args));
|
|
19
|
+
return async (scenario, ctx) => {
|
|
20
|
+
const url = opts.url ?? opts.resolveUrl({ scenario, placement: ctx.placement, cellId: ctx.cellId });
|
|
21
|
+
const authValue = await resolveAuth(opts.auth);
|
|
22
|
+
const body = {
|
|
23
|
+
scenario,
|
|
24
|
+
cellId: ctx.cellId,
|
|
25
|
+
rep: ctx.rep,
|
|
26
|
+
generation: ctx.generation,
|
|
27
|
+
seed: ctx.seed,
|
|
28
|
+
placement: ctx.placement,
|
|
29
|
+
cycleId: ctx.cycleId
|
|
30
|
+
};
|
|
31
|
+
let lastError;
|
|
32
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
33
|
+
const ourTimeout = AbortSignal.timeout(timeoutMs);
|
|
34
|
+
const combinedSignal = AbortSignal.any([ctx.signal, ourTimeout]);
|
|
35
|
+
try {
|
|
36
|
+
const res = await f(url, {
|
|
37
|
+
method: "POST",
|
|
38
|
+
headers: {
|
|
39
|
+
"Content-Type": "application/json",
|
|
40
|
+
...authValue ? { Authorization: authValue.startsWith("Bearer ") ? authValue : `Bearer ${authValue}` } : {},
|
|
41
|
+
...opts.headers
|
|
42
|
+
},
|
|
43
|
+
body: JSON.stringify(body),
|
|
44
|
+
signal: combinedSignal
|
|
45
|
+
});
|
|
46
|
+
if (!res.ok) {
|
|
47
|
+
const retryable = res.status >= 500 || res.status === 408 || res.status === 429;
|
|
48
|
+
if (!retryable || attempt === maxRetries) {
|
|
49
|
+
const text = await res.text().catch(() => "");
|
|
50
|
+
throw new Error(`httpDispatch ${url} failed (${res.status}): ${text.slice(0, 500)}`);
|
|
51
|
+
}
|
|
52
|
+
await sleep(2 ** attempt * 200 + Math.random() * 200);
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
const parsed = await res.json();
|
|
56
|
+
return parsed.artifact;
|
|
57
|
+
} catch (err) {
|
|
58
|
+
if (ctx.signal.aborted) throw err;
|
|
59
|
+
lastError = err;
|
|
60
|
+
if (attempt === maxRetries) throw err;
|
|
61
|
+
await sleep(2 ** attempt * 200 + Math.random() * 200);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
throw lastError ?? new Error("httpDispatch exhausted retries");
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function sleep(ms) {
|
|
68
|
+
return new Promise((resolve) => {
|
|
69
|
+
const t = setTimeout(resolve, ms);
|
|
70
|
+
if (typeof t.unref === "function") t.unref();
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
async function runDispatchServer(opts) {
|
|
74
|
+
if (opts.auth === void 0) {
|
|
75
|
+
throw new Error("runDispatchServer: 'auth' is required (pass a bearer-token string, or `auth: false` explicitly for a closed-network test deployment).");
|
|
76
|
+
}
|
|
77
|
+
const path = opts.path ?? "/dispatch";
|
|
78
|
+
const maxBytes = opts.maxBodyBytes ?? 10 * 1024 * 1024;
|
|
79
|
+
const expectedAuth = typeof opts.auth === "string" ? `Bearer ${opts.auth.replace(/^Bearer\s+/, "")}` : null;
|
|
80
|
+
const { createServer } = await import("http");
|
|
81
|
+
const server = createServer(async (req, res) => {
|
|
82
|
+
const start = Date.now();
|
|
83
|
+
let cellId = "unknown";
|
|
84
|
+
let success = false;
|
|
85
|
+
let errCaught;
|
|
86
|
+
try {
|
|
87
|
+
if (req.method !== "POST" || req.url?.split("?")[0] !== path) {
|
|
88
|
+
res.statusCode = 404;
|
|
89
|
+
res.end("not found");
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
if (expectedAuth) {
|
|
93
|
+
const got = req.headers["authorization"];
|
|
94
|
+
if (got !== expectedAuth) {
|
|
95
|
+
res.statusCode = 401;
|
|
96
|
+
res.end("unauthorized");
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
const chunks = [];
|
|
101
|
+
let totalBytes = 0;
|
|
102
|
+
const aborter = new AbortController();
|
|
103
|
+
req.on("close", () => {
|
|
104
|
+
if (!res.writableEnded) aborter.abort();
|
|
105
|
+
});
|
|
106
|
+
for await (const chunk of req) {
|
|
107
|
+
const buf = chunk;
|
|
108
|
+
totalBytes += buf.length;
|
|
109
|
+
if (totalBytes > maxBytes) {
|
|
110
|
+
res.statusCode = 413;
|
|
111
|
+
res.end("payload too large");
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
chunks.push(buf);
|
|
115
|
+
}
|
|
116
|
+
const body = JSON.parse(Buffer.concat(chunks).toString("utf8"));
|
|
117
|
+
cellId = body.cellId;
|
|
118
|
+
const ctx = opts.contextFactory ? await opts.contextFactory(body, aborter.signal) : {
|
|
119
|
+
cellId: body.cellId,
|
|
120
|
+
rep: body.rep,
|
|
121
|
+
generation: body.generation,
|
|
122
|
+
seed: body.seed,
|
|
123
|
+
signal: aborter.signal,
|
|
124
|
+
placement: body.placement,
|
|
125
|
+
cycleId: body.cycleId,
|
|
126
|
+
trace: NOOP_TRACE,
|
|
127
|
+
artifacts: NOOP_ARTIFACTS,
|
|
128
|
+
cost: NOOP_COST
|
|
129
|
+
};
|
|
130
|
+
const artifact = await opts.dispatch(body.scenario, ctx);
|
|
131
|
+
const responseBody = { artifact };
|
|
132
|
+
res.statusCode = 200;
|
|
133
|
+
res.setHeader("content-type", "application/json");
|
|
134
|
+
res.end(JSON.stringify(responseBody));
|
|
135
|
+
success = true;
|
|
136
|
+
} catch (err) {
|
|
137
|
+
errCaught = err;
|
|
138
|
+
if (err?.name === "AbortError") {
|
|
139
|
+
res.statusCode = 499;
|
|
140
|
+
res.end("client aborted");
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
res.statusCode = 500;
|
|
144
|
+
res.setHeader("content-type", "application/json");
|
|
145
|
+
res.end(JSON.stringify({ error: err instanceof Error ? err.message : String(err) }));
|
|
146
|
+
} finally {
|
|
147
|
+
opts.onRequest?.({
|
|
148
|
+
cellId,
|
|
149
|
+
durationMs: Date.now() - start,
|
|
150
|
+
success,
|
|
151
|
+
error: errCaught
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
await new Promise((resolve, reject) => {
|
|
156
|
+
server.once("error", reject);
|
|
157
|
+
server.listen(opts.port, opts.host ?? "0.0.0.0", () => resolve());
|
|
158
|
+
});
|
|
159
|
+
const addr = server.address();
|
|
160
|
+
const boundPort = typeof addr === "object" && addr ? addr.port : opts.port;
|
|
161
|
+
return {
|
|
162
|
+
port: boundPort,
|
|
163
|
+
close: () => new Promise((resolve, reject) => {
|
|
164
|
+
server.close((err) => err ? reject(err) : resolve());
|
|
165
|
+
})
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
var NOOP_TRACE = {
|
|
169
|
+
span: () => ({
|
|
170
|
+
end: () => {
|
|
171
|
+
},
|
|
172
|
+
setAttribute: () => {
|
|
173
|
+
},
|
|
174
|
+
setStatus: () => {
|
|
175
|
+
},
|
|
176
|
+
recordException: () => {
|
|
177
|
+
},
|
|
178
|
+
addEvent: () => {
|
|
179
|
+
}
|
|
180
|
+
})
|
|
181
|
+
};
|
|
182
|
+
var NOOP_ARTIFACTS = {
|
|
183
|
+
write: async () => void 0,
|
|
184
|
+
read: async () => void 0,
|
|
185
|
+
list: async () => []
|
|
186
|
+
};
|
|
187
|
+
var NOOP_COST = {
|
|
188
|
+
record: () => {
|
|
189
|
+
},
|
|
190
|
+
total: () => 0
|
|
191
|
+
};
|
|
192
|
+
export {
|
|
193
|
+
httpDispatch,
|
|
194
|
+
runDispatchServer
|
|
195
|
+
};
|
|
196
|
+
//# sourceMappingURL=http.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/http.ts"],"sourcesContent":["/**\n * # `@tangle-network/agent-eval/adapters/http` — distributed Dispatch over HTTP.\n *\n * Decouples driver and worker. The driver (running `runImprovementLoop` or\n * `runCampaign`) can live anywhere — your VPC, a dev laptop, a cron VM. The\n * workers (running the actual agent) can live anywhere else — different\n * regions, different clouds, different boxes — as long as they speak HTTP.\n *\n * Both sides:\n *\n * - **`httpDispatch({ url | resolveUrl, ... })`** — client. Returns a\n * `Dispatch` that POSTs `{ scenario, ctx }` to a worker URL and parses\n * the artifact back. AbortSignal-aware, retries on idempotent errors,\n * bounded timeout per call.\n * - **`runDispatchServer({ dispatch, port, ... })`** — server. Wraps your\n * local `Dispatch` as an HTTP endpoint. Handles auth, JSON parsing,\n * error mapping, and cancellation when the client aborts.\n *\n * # Topology examples\n *\n * **Single-worker:** driver on box A, worker on box B. Set\n * `httpDispatch({ url: 'https://box-b/dispatch' })`.\n *\n * **Multi-region:** N workers across regions. Use `httpDispatch({ resolveUrl })`\n * with a function that picks the URL per cell from `ctx.placement`. Combined\n * with `cellPlacement` on `RunCampaignOptions`, the substrate fans cells\n * across geographies in parallel.\n *\n * **Driver-as-a-service:** driver runs as a long-lived process or service\n * (holds optimization state across generations); workers are stateless\n * HTTP services that can scale horizontally per cell.\n */\n\nimport type { Dispatch, DispatchContext, Scenario } from '../contract'\n\n// ── Client ───────────────────────────────────────────────────────────\n\n// eslint-disable-next-line @typescript-eslint/no-unused-vars -- TArtifact is unused\n// in this options interface but kept as a parameter so callers can write\n// `HttpDispatchOptions<MyScenario, MyArtifact>` symmetrically with\n// `Dispatch<MyScenario, MyArtifact>`. Marking it unused at the position\n// where it bites.\nexport interface HttpDispatchOptions<TScenario extends Scenario, _TArtifact> {\n /** Static endpoint URL. Mutually exclusive with `resolveUrl`. */\n url?: string\n /**\n * Dynamic per-cell URL resolver. Receives the scenario + the substrate\n * placement key (from `RunCampaignOptions.cellPlacement`) and returns the\n * worker URL to invoke. Mutually exclusive with `url`.\n */\n resolveUrl?: (input: { scenario: TScenario; placement?: string; cellId: string }) => string\n /** Bearer token or static auth string set as `Authorization`. */\n auth?: string | (() => string | Promise<string>)\n /** Extra headers merged into every request. */\n headers?: Record<string, string>\n /** Per-call timeout in ms. Default 5 minutes. */\n timeoutMs?: number\n /** How many idempotent retries on 5xx / network errors. Default 2. */\n retries?: number\n /** Optional fetch override (auth wrappers, custom agent, mocks). */\n fetchImpl?: typeof fetch\n}\n\nexport interface HttpDispatchRequestBody<TScenario extends Scenario> {\n scenario: TScenario\n cellId: string\n rep: number\n generation?: number\n seed: number\n placement?: string\n cycleId?: string\n}\n\nexport interface HttpDispatchResponseBody<TArtifact> {\n artifact: TArtifact\n}\n\nfunction resolveAuth(auth: HttpDispatchOptions<Scenario, unknown>['auth']): Promise<string | null> {\n if (!auth) return Promise.resolve(null)\n if (typeof auth === 'string') return Promise.resolve(auth)\n return Promise.resolve(auth())\n}\n\n/**\n * Wrap a remote HTTP endpoint as a `Dispatch`. The remote side should run\n * `runDispatchServer` (or any service that speaks the same wire shape).\n *\n * Cancellation: the substrate's per-cell `AbortSignal` is forwarded; the\n * server's `runDispatchServer` translates the resulting `AbortError` into\n * a 499 (client-closed) so the client doesn't retry.\n */\nexport function httpDispatch<TScenario extends Scenario, TArtifact>(\n opts: HttpDispatchOptions<TScenario, TArtifact>,\n): Dispatch<TScenario, TArtifact> {\n if (!opts.url && !opts.resolveUrl) {\n throw new Error('httpDispatch: pass exactly one of `url` or `resolveUrl`.')\n }\n if (opts.url && opts.resolveUrl) {\n throw new Error('httpDispatch: pass exactly one of `url` or `resolveUrl`, not both.')\n }\n const timeoutMs = opts.timeoutMs ?? 5 * 60 * 1000\n const maxRetries = opts.retries ?? 2\n const f: typeof fetch = opts.fetchImpl ?? ((...args) => fetch(...args))\n\n return async (scenario, ctx) => {\n const url = opts.url ?? opts.resolveUrl!({ scenario, placement: ctx.placement, cellId: ctx.cellId })\n const authValue = await resolveAuth(opts.auth)\n const body: HttpDispatchRequestBody<TScenario> = {\n scenario,\n cellId: ctx.cellId,\n rep: ctx.rep,\n generation: ctx.generation,\n seed: ctx.seed,\n placement: ctx.placement,\n cycleId: ctx.cycleId,\n }\n\n let lastError: unknown\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n // Compose the request signal: caller's signal OR our timeout.\n const ourTimeout = AbortSignal.timeout(timeoutMs)\n const combinedSignal = AbortSignal.any([ctx.signal, ourTimeout])\n try {\n const res = await f(url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n ...(authValue ? { Authorization: authValue.startsWith('Bearer ') ? authValue : `Bearer ${authValue}` } : {}),\n ...opts.headers,\n },\n body: JSON.stringify(body),\n signal: combinedSignal,\n })\n if (!res.ok) {\n // 4xx is non-retryable (caller error, auth, bad scenario shape).\n // 5xx / 408 / 429 / 502 / 503 / 504 are retryable.\n const retryable = res.status >= 500 || res.status === 408 || res.status === 429\n if (!retryable || attempt === maxRetries) {\n const text = await res.text().catch(() => '')\n throw new Error(`httpDispatch ${url} failed (${res.status}): ${text.slice(0, 500)}`)\n }\n // exponential backoff with jitter\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n continue\n }\n const parsed = (await res.json()) as HttpDispatchResponseBody<TArtifact>\n return parsed.artifact\n } catch (err) {\n // Caller-driven abort is terminal — never retry.\n if (ctx.signal.aborted) throw err\n lastError = err\n if (attempt === maxRetries) throw err\n await sleep(2 ** attempt * 200 + Math.random() * 200)\n }\n }\n throw lastError ?? new Error('httpDispatch exhausted retries')\n }\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => {\n const t = setTimeout(resolve, ms)\n // Don't keep node process alive purely for backoff sleeps.\n if (typeof (t as { unref?: () => void }).unref === 'function') (t as { unref: () => void }).unref()\n })\n}\n\n// ── Server ───────────────────────────────────────────────────────────\n\nexport interface RunDispatchServerOptions<TScenario extends Scenario, TArtifact> {\n /** The Dispatch this server exposes — what runs when a request lands. */\n dispatch: Dispatch<TScenario, TArtifact>\n /** TCP port to bind. */\n port: number\n /** Optional bind host; defaults to 0.0.0.0. */\n host?: string\n /** Required for any non-test deployment: the bearer token clients must\n * send. The substrate refuses to start without auth unless `auth: false`\n * is set explicitly (intended ONLY for closed-network/internal testing). */\n auth: string | false\n /** Path the server listens on. Default `/dispatch`. */\n path?: string\n /**\n * Per-request handler that wraps `dispatch` with whatever context the\n * worker side needs to construct a `DispatchContext` — typically the\n * trace writer, artifact writer, and cost meter. The substrate provides\n * synthetic-but-typed defaults if not supplied; production deployments\n * should wire real ones (e.g. ship traces to your OTel collector).\n */\n contextFactory?: (req: HttpDispatchRequestBody<TScenario>, signal: AbortSignal) => Promise<DispatchContext>\n /** Optional max payload size for the request body (bytes). Default 10 MB. */\n maxBodyBytes?: number\n /** Hook for observability — called on every successful or failed turn. */\n onRequest?: (event: {\n cellId: string\n durationMs: number\n success: boolean\n error?: unknown\n }) => void\n}\n\nexport interface DispatchServerHandle {\n /** The actual bound port (useful when `port: 0` requests an ephemeral port). */\n port: number\n /** Stop accepting new connections and drain existing ones. */\n close: () => Promise<void>\n}\n\n/**\n * Start an HTTP server exposing a local `Dispatch` over the wire. Pair with\n * `httpDispatch` on the driver side.\n *\n * Wire shape:\n *\n * POST /dispatch\n * Authorization: Bearer <token>\n * Body: HttpDispatchRequestBody\n * 200 OK: HttpDispatchResponseBody\n * 401: missing/invalid auth\n * 408: per-request timeout exceeded\n * 499: client aborted before completion\n * 500: dispatch threw\n *\n * The server is `node:http`-based to keep the runtime dependency surface\n * minimal — works in plain Node, sandbox, or any container.\n */\nexport async function runDispatchServer<TScenario extends Scenario, TArtifact>(\n opts: RunDispatchServerOptions<TScenario, TArtifact>,\n): Promise<DispatchServerHandle> {\n if (opts.auth === undefined) {\n throw new Error(\"runDispatchServer: 'auth' is required (pass a bearer-token string, or `auth: false` explicitly for a closed-network test deployment).\")\n }\n const path = opts.path ?? '/dispatch'\n const maxBytes = opts.maxBodyBytes ?? 10 * 1024 * 1024\n const expectedAuth = typeof opts.auth === 'string' ? `Bearer ${opts.auth.replace(/^Bearer\\s+/, '')}` : null\n\n // Lazy-import node:http so the file is usable from non-Node bundlers\n // that import the client side only (e.g. an edge driver shipping\n // httpDispatch alone). Server side is opt-in by calling this function.\n const { createServer } = await import('node:http')\n\n const server = createServer(async (req, res) => {\n const start = Date.now()\n let cellId = 'unknown'\n let success = false\n let errCaught: unknown\n\n try {\n if (req.method !== 'POST' || req.url?.split('?')[0] !== path) {\n res.statusCode = 404\n res.end('not found')\n return\n }\n if (expectedAuth) {\n const got = req.headers['authorization']\n if (got !== expectedAuth) {\n res.statusCode = 401\n res.end('unauthorized')\n return\n }\n }\n\n // Read body up to maxBytes\n const chunks: Buffer[] = []\n let totalBytes = 0\n const aborter = new AbortController()\n req.on('close', () => {\n if (!res.writableEnded) aborter.abort()\n })\n\n for await (const chunk of req) {\n const buf = chunk as Buffer\n totalBytes += buf.length\n if (totalBytes > maxBytes) {\n res.statusCode = 413\n res.end('payload too large')\n return\n }\n chunks.push(buf)\n }\n\n const body = JSON.parse(Buffer.concat(chunks).toString('utf8')) as HttpDispatchRequestBody<TScenario>\n cellId = body.cellId\n\n const ctx: DispatchContext = opts.contextFactory\n ? await opts.contextFactory(body, aborter.signal)\n : {\n cellId: body.cellId,\n rep: body.rep,\n generation: body.generation,\n seed: body.seed,\n signal: aborter.signal,\n placement: body.placement,\n cycleId: body.cycleId,\n trace: NOOP_TRACE,\n artifacts: NOOP_ARTIFACTS,\n cost: NOOP_COST,\n }\n\n const artifact = await opts.dispatch(body.scenario, ctx)\n const responseBody: HttpDispatchResponseBody<TArtifact> = { artifact }\n\n res.statusCode = 200\n res.setHeader('content-type', 'application/json')\n res.end(JSON.stringify(responseBody))\n success = true\n } catch (err) {\n errCaught = err\n // Client-cancelled — they don't care about the result.\n if ((err as Error)?.name === 'AbortError') {\n res.statusCode = 499\n res.end('client aborted')\n return\n }\n res.statusCode = 500\n res.setHeader('content-type', 'application/json')\n res.end(JSON.stringify({ error: err instanceof Error ? err.message : String(err) }))\n } finally {\n opts.onRequest?.({\n cellId,\n durationMs: Date.now() - start,\n success,\n error: errCaught,\n })\n }\n })\n\n await new Promise<void>((resolve, reject) => {\n server.once('error', reject)\n server.listen(opts.port, opts.host ?? '0.0.0.0', () => resolve())\n })\n\n const addr = server.address()\n const boundPort = typeof addr === 'object' && addr ? addr.port : opts.port\n\n return {\n port: boundPort,\n close: () =>\n new Promise<void>((resolve, reject) => {\n server.close((err) => (err ? reject(err) : resolve()))\n }),\n }\n}\n\n// ── No-op default ctx machinery (worker can replace via contextFactory) ──\n\nconst NOOP_TRACE = {\n span: () => ({\n end: () => {},\n setAttribute: () => {},\n setStatus: () => {},\n recordException: () => {},\n addEvent: () => {},\n }),\n} as unknown as DispatchContext['trace']\n\nconst NOOP_ARTIFACTS = {\n write: async () => undefined,\n read: async () => undefined,\n list: async () => [],\n} as unknown as DispatchContext['artifacts']\n\nconst NOOP_COST = {\n record: () => {},\n total: () => 0,\n} as unknown as DispatchContext['cost']\n"],"mappings":";;;AA6EA,SAAS,YAAY,MAA8E;AACjG,MAAI,CAAC,KAAM,QAAO,QAAQ,QAAQ,IAAI;AACtC,MAAI,OAAO,SAAS,SAAU,QAAO,QAAQ,QAAQ,IAAI;AACzD,SAAO,QAAQ,QAAQ,KAAK,CAAC;AAC/B;AAUO,SAAS,aACd,MACgC;AAChC,MAAI,CAAC,KAAK,OAAO,CAAC,KAAK,YAAY;AACjC,UAAM,IAAI,MAAM,0DAA0D;AAAA,EAC5E;AACA,MAAI,KAAK,OAAO,KAAK,YAAY;AAC/B,UAAM,IAAI,MAAM,oEAAoE;AAAA,EACtF;AACA,QAAM,YAAY,KAAK,aAAa,IAAI,KAAK;AAC7C,QAAM,aAAa,KAAK,WAAW;AACnC,QAAM,IAAkB,KAAK,cAAc,IAAI,SAAS,MAAM,GAAG,IAAI;AAErE,SAAO,OAAO,UAAU,QAAQ;AAC9B,UAAM,MAAM,KAAK,OAAO,KAAK,WAAY,EAAE,UAAU,WAAW,IAAI,WAAW,QAAQ,IAAI,OAAO,CAAC;AACnG,UAAM,YAAY,MAAM,YAAY,KAAK,IAAI;AAC7C,UAAM,OAA2C;AAAA,MAC/C;AAAA,MACA,QAAQ,IAAI;AAAA,MACZ,KAAK,IAAI;AAAA,MACT,YAAY,IAAI;AAAA,MAChB,MAAM,IAAI;AAAA,MACV,WAAW,IAAI;AAAA,MACf,SAAS,IAAI;AAAA,IACf;AAEA,QAAI;AACJ,aAAS,UAAU,GAAG,WAAW,YAAY,WAAW;AAEtD,YAAM,aAAa,YAAY,QAAQ,SAAS;AAChD,YAAM,iBAAiB,YAAY,IAAI,CAAC,IAAI,QAAQ,UAAU,CAAC;AAC/D,UAAI;AACF,cAAM,MAAM,MAAM,EAAE,KAAK;AAAA,UACvB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,gBAAgB;AAAA,YAChB,GAAI,YAAY,EAAE,eAAe,UAAU,WAAW,SAAS,IAAI,YAAY,UAAU,SAAS,GAAG,IAAI,CAAC;AAAA,YAC1G,GAAG,KAAK;AAAA,UACV;AAAA,UACA,MAAM,KAAK,UAAU,IAAI;AAAA,UACzB,QAAQ;AAAA,QACV,CAAC;AACD,YAAI,CAAC,IAAI,IAAI;AAGX,gBAAM,YAAY,IAAI,UAAU,OAAO,IAAI,WAAW,OAAO,IAAI,WAAW;AAC5E,cAAI,CAAC,aAAa,YAAY,YAAY;AACxC,kBAAM,OAAO,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,EAAE;AAC5C,kBAAM,IAAI,MAAM,gBAAgB,GAAG,YAAY,IAAI,MAAM,MAAM,KAAK,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,UACrF;AAEA,gBAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AACpD;AAAA,QACF;AACA,cAAM,SAAU,MAAM,IAAI,KAAK;AAC/B,eAAO,OAAO;AAAA,MAChB,SAAS,KAAK;AAEZ,YAAI,IAAI,OAAO,QAAS,OAAM;AAC9B,oBAAY;AACZ,YAAI,YAAY,WAAY,OAAM;AAClC,cAAM,MAAM,KAAK,UAAU,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,MACtD;AAAA,IACF;AACA,UAAM,aAAa,IAAI,MAAM,gCAAgC;AAAA,EAC/D;AACF;AAEA,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAM,IAAI,WAAW,SAAS,EAAE;AAEhC,QAAI,OAAQ,EAA6B,UAAU,WAAY,CAAC,EAA4B,MAAM;AAAA,EACpG,CAAC;AACH;AA6DA,eAAsB,kBACpB,MAC+B;AAC/B,MAAI,KAAK,SAAS,QAAW;AAC3B,UAAM,IAAI,MAAM,uIAAuI;AAAA,EACzJ;AACA,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,WAAW,KAAK,gBAAgB,KAAK,OAAO;AAClD,QAAM,eAAe,OAAO,KAAK,SAAS,WAAW,UAAU,KAAK,KAAK,QAAQ,cAAc,EAAE,CAAC,KAAK;AAKvG,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,MAAW;AAEjD,QAAM,SAAS,aAAa,OAAO,KAAK,QAAQ;AAC9C,UAAM,QAAQ,KAAK,IAAI;AACvB,QAAI,SAAS;AACb,QAAI,UAAU;AACd,QAAI;AAEJ,QAAI;AACF,UAAI,IAAI,WAAW,UAAU,IAAI,KAAK,MAAM,GAAG,EAAE,CAAC,MAAM,MAAM;AAC5D,YAAI,aAAa;AACjB,YAAI,IAAI,WAAW;AACnB;AAAA,MACF;AACA,UAAI,cAAc;AAChB,cAAM,MAAM,IAAI,QAAQ,eAAe;AACvC,YAAI,QAAQ,cAAc;AACxB,cAAI,aAAa;AACjB,cAAI,IAAI,cAAc;AACtB;AAAA,QACF;AAAA,MACF;AAGA,YAAM,SAAmB,CAAC;AAC1B,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB;AACpC,UAAI,GAAG,SAAS,MAAM;AACpB,YAAI,CAAC,IAAI,cAAe,SAAQ,MAAM;AAAA,MACxC,CAAC;AAED,uBAAiB,SAAS,KAAK;AAC7B,cAAM,MAAM;AACZ,sBAAc,IAAI;AAClB,YAAI,aAAa,UAAU;AACzB,cAAI,aAAa;AACjB,cAAI,IAAI,mBAAmB;AAC3B;AAAA,QACF;AACA,eAAO,KAAK,GAAG;AAAA,MACjB;AAEA,YAAM,OAAO,KAAK,MAAM,OAAO,OAAO,MAAM,EAAE,SAAS,MAAM,CAAC;AAC9D,eAAS,KAAK;AAEd,YAAM,MAAuB,KAAK,iBAC9B,MAAM,KAAK,eAAe,MAAM,QAAQ,MAAM,IAC9C;AAAA,QACE,QAAQ,KAAK;AAAA,QACb,KAAK,KAAK;AAAA,QACV,YAAY,KAAK;AAAA,QACjB,MAAM,KAAK;AAAA,QACX,QAAQ,QAAQ;AAAA,QAChB,WAAW,KAAK;AAAA,QAChB,SAAS,KAAK;AAAA,QACd,OAAO;AAAA,QACP,WAAW;AAAA,QACX,MAAM;AAAA,MACR;AAEJ,YAAM,WAAW,MAAM,KAAK,SAAS,KAAK,UAAU,GAAG;AACvD,YAAM,eAAoD,EAAE,SAAS;AAErE,UAAI,aAAa;AACjB,UAAI,UAAU,gBAAgB,kBAAkB;AAChD,UAAI,IAAI,KAAK,UAAU,YAAY,CAAC;AACpC,gBAAU;AAAA,IACZ,SAAS,KAAK;AACZ,kBAAY;AAEZ,UAAK,KAAe,SAAS,cAAc;AACzC,YAAI,aAAa;AACjB,YAAI,IAAI,gBAAgB;AACxB;AAAA,MACF;AACA,UAAI,aAAa;AACjB,UAAI,UAAU,gBAAgB,kBAAkB;AAChD,UAAI,IAAI,KAAK,UAAU,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC,CAAC;AAAA,IACrF,UAAE;AACA,WAAK,YAAY;AAAA,QACf;AAAA,QACA,YAAY,KAAK,IAAI,IAAI;AAAA,QACzB;AAAA,QACA,OAAO;AAAA,MACT,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AAED,QAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,WAAO,KAAK,SAAS,MAAM;AAC3B,WAAO,OAAO,KAAK,MAAM,KAAK,QAAQ,WAAW,MAAM,QAAQ,CAAC;AAAA,EAClE,CAAC;AAED,QAAM,OAAO,OAAO,QAAQ;AAC5B,QAAM,YAAY,OAAO,SAAS,YAAY,OAAO,KAAK,OAAO,KAAK;AAEtE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,OAAO,MACL,IAAI,QAAc,CAAC,SAAS,WAAW;AACrC,aAAO,MAAM,CAAC,QAAS,MAAM,OAAO,GAAG,IAAI,QAAQ,CAAE;AAAA,IACvD,CAAC;AAAA,EACL;AACF;AAIA,IAAM,aAAa;AAAA,EACjB,MAAM,OAAO;AAAA,IACX,KAAK,MAAM;AAAA,IAAC;AAAA,IACZ,cAAc,MAAM;AAAA,IAAC;AAAA,IACrB,WAAW,MAAM;AAAA,IAAC;AAAA,IAClB,iBAAiB,MAAM;AAAA,IAAC;AAAA,IACxB,UAAU,MAAM;AAAA,IAAC;AAAA,EACnB;AACF;AAEA,IAAM,iBAAiB;AAAA,EACrB,OAAO,YAAY;AAAA,EACnB,MAAM,YAAY;AAAA,EAClB,MAAM,YAAY,CAAC;AACrB;AAEA,IAAM,YAAY;AAAA,EAChB,QAAQ,MAAM;AAAA,EAAC;AAAA,EACf,OAAO,MAAM;AACf;","names":[]}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { S as Scenario, n as JudgeScore, D as DispatchFn, J as JudgeConfig } from '../types-BURGZ8Ug.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* # `@tangle-network/agent-eval/adapters/langchain` — wrap any LangChain
|
|
5
|
+
* Runnable as a `Dispatch` (or `JudgeConfig`).
|
|
6
|
+
*
|
|
7
|
+
* **Why structural, not pinned**: we don't depend on `@langchain/core` at
|
|
8
|
+
* install time. The adapter accepts anything with the canonical LangChain
|
|
9
|
+
* Runnable shape (`invoke(input, config?)`), so it works with their
|
|
10
|
+
* `Runnable`, `RunnableSequence`, `RunnableMap`, `RunnablePassthrough`,
|
|
11
|
+
* and any custom Runnable-shaped object. No version pin, no peer dep,
|
|
12
|
+
* no bundle-bloat risk.
|
|
13
|
+
*
|
|
14
|
+
* **Why this exists**: the most-asked question from foreign agent
|
|
15
|
+
* builders is "I'm already on LangChain — how do I plug in?". The answer
|
|
16
|
+
* is one function. Wrap your existing Runnable, pass the Dispatch into
|
|
17
|
+
* `runEval` / `runImprovementLoop`, ship.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
interface RunnableLike<TInput, TOutput> {
|
|
21
|
+
invoke(input: TInput, config?: {
|
|
22
|
+
signal?: AbortSignal;
|
|
23
|
+
[key: string]: unknown;
|
|
24
|
+
}): Promise<TOutput>;
|
|
25
|
+
}
|
|
26
|
+
interface LangchainDispatchOptions<TScenario extends Scenario, TArtifact> {
|
|
27
|
+
/** The Runnable (or RunnableSequence, or anything `.invoke`able). */
|
|
28
|
+
runnable: RunnableLike<TScenario, TArtifact>;
|
|
29
|
+
/**
|
|
30
|
+
* Optional config merged into every `invoke` call — tags, metadata,
|
|
31
|
+
* callbacks, runName. The substrate's per-cell `AbortSignal` is
|
|
32
|
+
* always merged in last (and so wins).
|
|
33
|
+
*/
|
|
34
|
+
config?: Record<string, unknown>;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Wrap a LangChain Runnable as a `Dispatch`. The Runnable's input must
|
|
38
|
+
* accept the scenario (typically you'll shape it via
|
|
39
|
+
* `RunnableMap`/`RunnableLambda` upstream); its output is the artifact
|
|
40
|
+
* the engine + judges see.
|
|
41
|
+
*
|
|
42
|
+
* @example
|
|
43
|
+
* const chain = prompt.pipe(model).pipe(parser)
|
|
44
|
+
* const dispatch = langchainDispatch({ runnable: chain })
|
|
45
|
+
* await runEval({ scenarios, dispatch, judges: [...], storage, runDir })
|
|
46
|
+
*/
|
|
47
|
+
declare function langchainDispatch<TScenario extends Scenario, TArtifact>(opts: LangchainDispatchOptions<TScenario, TArtifact>): DispatchFn<TScenario, TArtifact>;
|
|
48
|
+
interface LangchainJudgeOptions<TArtifact, TScenario extends Scenario> {
|
|
49
|
+
/** Judge name; appears in `CampaignResult.aggregates.byJudge`. */
|
|
50
|
+
name: string;
|
|
51
|
+
/**
|
|
52
|
+
* Dimensions the judge scores. Used both for the judge's own prompt
|
|
53
|
+
* (if it reads them) and for the aggregator's `byJudge` rollup.
|
|
54
|
+
*/
|
|
55
|
+
dimensions: {
|
|
56
|
+
key: string;
|
|
57
|
+
description: string;
|
|
58
|
+
}[];
|
|
59
|
+
/**
|
|
60
|
+
* A Runnable that takes `{ artifact, scenario }` and returns a
|
|
61
|
+
* partial `JudgeScore` — the dimensions map at minimum. `composite`
|
|
62
|
+
* is computed by averaging `dimensions` when the Runnable doesn't
|
|
63
|
+
* provide it; `notes` defaults to an empty string.
|
|
64
|
+
*/
|
|
65
|
+
runnable: RunnableLike<{
|
|
66
|
+
artifact: TArtifact;
|
|
67
|
+
scenario: TScenario;
|
|
68
|
+
}, Partial<JudgeScore>>;
|
|
69
|
+
appliesTo?: (scenario: TScenario) => boolean;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Wrap a LangChain Runnable as a `JudgeConfig`. The Runnable can be any
|
|
73
|
+
* structured-output chain (e.g. `prompt.pipe(model).pipe(StructuredOutputParser)`)
|
|
74
|
+
* that returns a `Partial<JudgeScore>`.
|
|
75
|
+
*
|
|
76
|
+
* The substrate's invariant — throw on judge failure, never silently
|
|
77
|
+
* fold errors into a zero — is preserved: any error from the Runnable
|
|
78
|
+
* propagates and the substrate records a failed cell.
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* const scorePrompt = ChatPromptTemplate.fromTemplate(`...`)
|
|
82
|
+
* const judgeChain = scorePrompt.pipe(judgeModel).pipe(jsonParser)
|
|
83
|
+
* const judge = langchainJudge({
|
|
84
|
+
* name: 'marketing-quality',
|
|
85
|
+
* dimensions: [{ key: 'hook_strength', description: '...' }, ...],
|
|
86
|
+
* runnable: judgeChain,
|
|
87
|
+
* })
|
|
88
|
+
*/
|
|
89
|
+
declare function langchainJudge<TArtifact, TScenario extends Scenario>(opts: LangchainJudgeOptions<TArtifact, TScenario>): JudgeConfig<TArtifact, TScenario>;
|
|
90
|
+
|
|
91
|
+
export { type LangchainDispatchOptions, type LangchainJudgeOptions, type RunnableLike, langchainDispatch, langchainJudge };
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import "../chunk-NSBPE2FW.js";
|
|
2
|
+
|
|
3
|
+
// src/adapters/langchain.ts
|
|
4
|
+
function langchainDispatch(opts) {
|
|
5
|
+
return async (scenario, ctx) => {
|
|
6
|
+
return opts.runnable.invoke(scenario, {
|
|
7
|
+
...opts.config,
|
|
8
|
+
signal: ctx.signal
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
function langchainJudge(opts) {
|
|
13
|
+
return {
|
|
14
|
+
name: opts.name,
|
|
15
|
+
dimensions: opts.dimensions,
|
|
16
|
+
appliesTo: opts.appliesTo,
|
|
17
|
+
async score({ artifact, scenario, signal }) {
|
|
18
|
+
const result = await opts.runnable.invoke({ artifact, scenario }, { signal });
|
|
19
|
+
const dims = result.dimensions ?? {};
|
|
20
|
+
const dimValues = Object.values(dims);
|
|
21
|
+
const composite = result.composite ?? (dimValues.length > 0 ? dimValues.reduce((a, b) => a + b, 0) / dimValues.length : 0);
|
|
22
|
+
return {
|
|
23
|
+
dimensions: dims,
|
|
24
|
+
composite,
|
|
25
|
+
notes: result.notes ?? ""
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
export {
|
|
31
|
+
langchainDispatch,
|
|
32
|
+
langchainJudge
|
|
33
|
+
};
|
|
34
|
+
//# sourceMappingURL=langchain.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/langchain.ts"],"sourcesContent":["/**\n * # `@tangle-network/agent-eval/adapters/langchain` — wrap any LangChain\n * Runnable as a `Dispatch` (or `JudgeConfig`).\n *\n * **Why structural, not pinned**: we don't depend on `@langchain/core` at\n * install time. The adapter accepts anything with the canonical LangChain\n * Runnable shape (`invoke(input, config?)`), so it works with their\n * `Runnable`, `RunnableSequence`, `RunnableMap`, `RunnablePassthrough`,\n * and any custom Runnable-shaped object. No version pin, no peer dep,\n * no bundle-bloat risk.\n *\n * **Why this exists**: the most-asked question from foreign agent\n * builders is \"I'm already on LangChain — how do I plug in?\". The answer\n * is one function. Wrap your existing Runnable, pass the Dispatch into\n * `runEval` / `runImprovementLoop`, ship.\n */\n\nimport type { Dispatch, JudgeConfig, JudgeScore, Scenario } from '../contract'\n\n// ── Minimal structural type ──────────────────────────────────────────\n//\n// Whatever has `invoke(input, config?)` qualifies. We accept any\n// config shape (LangChain's RunnableConfig has many optional fields)\n// — the only thing we need is the AbortSignal seam, which LangChain's\n// RunnableConfig already supports as `signal?: AbortSignal`.\n\nexport interface RunnableLike<TInput, TOutput> {\n invoke(input: TInput, config?: { signal?: AbortSignal; [key: string]: unknown }): Promise<TOutput>\n}\n\n// ── Dispatch wrapper ────────────────────────────────────────────────\n\nexport interface LangchainDispatchOptions<TScenario extends Scenario, TArtifact> {\n /** The Runnable (or RunnableSequence, or anything `.invoke`able). */\n runnable: RunnableLike<TScenario, TArtifact>\n /**\n * Optional config merged into every `invoke` call — tags, metadata,\n * callbacks, runName. The substrate's per-cell `AbortSignal` is\n * always merged in last (and so wins).\n */\n config?: Record<string, unknown>\n}\n\n/**\n * Wrap a LangChain Runnable as a `Dispatch`. The Runnable's input must\n * accept the scenario (typically you'll shape it via\n * `RunnableMap`/`RunnableLambda` upstream); its output is the artifact\n * the engine + judges see.\n *\n * @example\n * const chain = prompt.pipe(model).pipe(parser)\n * const dispatch = langchainDispatch({ runnable: chain })\n * await runEval({ scenarios, dispatch, judges: [...], storage, runDir })\n */\nexport function langchainDispatch<TScenario extends Scenario, TArtifact>(\n opts: LangchainDispatchOptions<TScenario, TArtifact>,\n): Dispatch<TScenario, TArtifact> {\n return async (scenario, ctx) => {\n return opts.runnable.invoke(scenario, {\n ...opts.config,\n signal: ctx.signal,\n })\n }\n}\n\n// ── Judge wrapper ───────────────────────────────────────────────────\n\nexport interface LangchainJudgeOptions<TArtifact, TScenario extends Scenario> {\n /** Judge name; appears in `CampaignResult.aggregates.byJudge`. */\n name: string\n /**\n * Dimensions the judge scores. Used both for the judge's own prompt\n * (if it reads them) and for the aggregator's `byJudge` rollup.\n */\n dimensions: { key: string; description: string }[]\n /**\n * A Runnable that takes `{ artifact, scenario }` and returns a\n * partial `JudgeScore` — the dimensions map at minimum. `composite`\n * is computed by averaging `dimensions` when the Runnable doesn't\n * provide it; `notes` defaults to an empty string.\n */\n runnable: RunnableLike<{ artifact: TArtifact; scenario: TScenario }, Partial<JudgeScore>>\n appliesTo?: (scenario: TScenario) => boolean\n}\n\n/**\n * Wrap a LangChain Runnable as a `JudgeConfig`. The Runnable can be any\n * structured-output chain (e.g. `prompt.pipe(model).pipe(StructuredOutputParser)`)\n * that returns a `Partial<JudgeScore>`.\n *\n * The substrate's invariant — throw on judge failure, never silently\n * fold errors into a zero — is preserved: any error from the Runnable\n * propagates and the substrate records a failed cell.\n *\n * @example\n * const scorePrompt = ChatPromptTemplate.fromTemplate(`...`)\n * const judgeChain = scorePrompt.pipe(judgeModel).pipe(jsonParser)\n * const judge = langchainJudge({\n * name: 'marketing-quality',\n * dimensions: [{ key: 'hook_strength', description: '...' }, ...],\n * runnable: judgeChain,\n * })\n */\nexport function langchainJudge<TArtifact, TScenario extends Scenario>(\n opts: LangchainJudgeOptions<TArtifact, TScenario>,\n): JudgeConfig<TArtifact, TScenario> {\n return {\n name: opts.name,\n dimensions: opts.dimensions,\n appliesTo: opts.appliesTo,\n async score({ artifact, scenario, signal }) {\n const result = await opts.runnable.invoke({ artifact, scenario }, { signal })\n const dims = (result.dimensions ?? {}) as Record<string, number>\n const dimValues = Object.values(dims)\n const composite =\n result.composite ??\n (dimValues.length > 0 ? dimValues.reduce((a, b) => a + b, 0) / dimValues.length : 0)\n return {\n dimensions: dims,\n composite,\n notes: result.notes ?? '',\n }\n },\n }\n}\n"],"mappings":";;;AAsDO,SAAS,kBACd,MACgC;AAChC,SAAO,OAAO,UAAU,QAAQ;AAC9B,WAAO,KAAK,SAAS,OAAO,UAAU;AAAA,MACpC,GAAG,KAAK;AAAA,MACR,QAAQ,IAAI;AAAA,IACd,CAAC;AAAA,EACH;AACF;AAwCO,SAAS,eACd,MACmC;AACnC,SAAO;AAAA,IACL,MAAM,KAAK;AAAA,IACX,YAAY,KAAK;AAAA,IACjB,WAAW,KAAK;AAAA,IAChB,MAAM,MAAM,EAAE,UAAU,UAAU,OAAO,GAAG;AAC1C,YAAM,SAAS,MAAM,KAAK,SAAS,OAAO,EAAE,UAAU,SAAS,GAAG,EAAE,OAAO,CAAC;AAC5E,YAAM,OAAQ,OAAO,cAAc,CAAC;AACpC,YAAM,YAAY,OAAO,OAAO,IAAI;AACpC,YAAM,YACJ,OAAO,cACN,UAAU,SAAS,IAAI,UAAU,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,UAAU,SAAS;AACpF,aAAO;AAAA,QACL,YAAY;AAAA,QACZ;AAAA,QACA,OAAO,OAAO,SAAS;AAAA,MACzB;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
|
package/dist/campaign/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export { C as CampaignStorage, D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, O as OpenAutoPrOptions, m as OpenAutoPrResult, R as RunCampaignOptions, a as RunEvalOptions, b as RunImprovementLoopOptions, c as RunImprovementLoopResult, n as RunOptimizationOptions, o as RunOptimizationResult, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, p as openAutoPr, r as runCampaign, k as runEval, l as runImprovementLoop, q as runOptimization, s as surfaceHash } from '../run-improvement-loop-
|
|
2
|
-
import { L as LabeledScenarioStore, q as LabeledScenarioWrite, r as LabeledScenarioSampleArgs, s as LabeledScenarioRecord, f as CodeSurface } from '../types-
|
|
3
|
-
export { C as CampaignAggregates, a as CampaignArtifactWriter, b as CampaignCellResult, c as CampaignCostMeter, d as CampaignResult, e as CampaignTraceWriter, g as DispatchContext, D as DispatchFn, G as Gate, h as GateContext, i as GateDecision, j as GateResult, k as GenerationCandidate, l as GenerationRecord, I as ImprovementDriver, t as JudgeAggregate, J as JudgeConfig, m as JudgeDimension, n as JudgeScore, u as LabeledScenarioSource, M as MutableSurface, o as Mutator, O as OptimizerConfig, P as ProposeContext, R as RedactionStatus, S as Scenario, v as ScenarioAggregate, p as SessionScript, T as TraceSpan } from '../types-
|
|
1
|
+
export { C as CampaignStorage, D as DefaultProductionGateOptions, E as EvolutionaryDriverOptions, G as GepaDriverOptions, H as HeldOutGateOptions, O as OpenAutoPrOptions, m as OpenAutoPrResult, R as RunCampaignOptions, a as RunEvalOptions, b as RunImprovementLoopOptions, c as RunImprovementLoopResult, n as RunOptimizationOptions, o as RunOptimizationResult, d as composeGate, e as defaultProductionGate, f as evolutionaryDriver, g as fsCampaignStorage, h as gepaDriver, i as heldOutGate, j as inMemoryCampaignStorage, p as openAutoPr, r as runCampaign, k as runEval, l as runImprovementLoop, q as runOptimization, s as surfaceHash } from '../run-improvement-loop-pJ4yrx4X.js';
|
|
2
|
+
import { L as LabeledScenarioStore, q as LabeledScenarioWrite, r as LabeledScenarioSampleArgs, s as LabeledScenarioRecord, f as CodeSurface } from '../types-BURGZ8Ug.js';
|
|
3
|
+
export { C as CampaignAggregates, a as CampaignArtifactWriter, b as CampaignCellResult, c as CampaignCostMeter, d as CampaignResult, e as CampaignTraceWriter, g as DispatchContext, D as DispatchFn, G as Gate, h as GateContext, i as GateDecision, j as GateResult, k as GenerationCandidate, l as GenerationRecord, I as ImprovementDriver, t as JudgeAggregate, J as JudgeConfig, m as JudgeDimension, n as JudgeScore, u as LabeledScenarioSource, M as MutableSurface, o as Mutator, O as OptimizerConfig, P as ProposeContext, R as RedactionStatus, S as Scenario, v as ScenarioAggregate, p as SessionScript, T as TraceSpan } from '../types-BURGZ8Ug.js';
|
|
4
4
|
import '../llm-client-BXVRUZyX.js';
|
|
5
5
|
import '../errors-mje_cKOs.js';
|
|
6
6
|
import '../raw-provider-sink-C46HDghv.js';
|
package/dist/campaign/index.js
CHANGED
|
@@ -9,12 +9,12 @@ import {
|
|
|
9
9
|
runImprovementLoop,
|
|
10
10
|
runOptimization,
|
|
11
11
|
surfaceHash
|
|
12
|
-
} from "../chunk-
|
|
12
|
+
} from "../chunk-HRKOCLQA.js";
|
|
13
13
|
import {
|
|
14
14
|
fsCampaignStorage,
|
|
15
15
|
inMemoryCampaignStorage,
|
|
16
16
|
runCampaign
|
|
17
|
-
} from "../chunk-
|
|
17
|
+
} from "../chunk-J3EIOI3O.js";
|
|
18
18
|
import "../chunk-N4SBKEPJ.js";
|
|
19
19
|
import "../chunk-YV7J7X5N.js";
|
|
20
20
|
import "../chunk-WP7SY7AI.js";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runCampaign
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-J3EIOI3O.js";
|
|
4
4
|
import {
|
|
5
5
|
buildReflectionPrompt,
|
|
6
6
|
parseReflectionResponse,
|
|
@@ -553,7 +553,7 @@ async function runImprovementLoop(opts) {
|
|
|
553
553
|
throw new Error("runImprovementLoop: autoOnPromote='pr' requires ghOwner + ghRepo.");
|
|
554
554
|
}
|
|
555
555
|
const optimization = await runOptimization(opts);
|
|
556
|
-
const { runCampaign: runCampaign2 } = await import("./run-campaign-
|
|
556
|
+
const { runCampaign: runCampaign2 } = await import("./run-campaign-6UEVBPP3.js");
|
|
557
557
|
const baselineOnHoldout = await runCampaign2({
|
|
558
558
|
...opts,
|
|
559
559
|
scenarios: opts.holdoutScenarios,
|
|
@@ -639,4 +639,4 @@ export {
|
|
|
639
639
|
surfaceHash,
|
|
640
640
|
runImprovementLoop
|
|
641
641
|
};
|
|
642
|
-
//# sourceMappingURL=chunk-
|
|
642
|
+
//# sourceMappingURL=chunk-HRKOCLQA.js.map
|
|
@@ -195,6 +195,10 @@ async function executeCell(args) {
|
|
|
195
195
|
return costSoFar;
|
|
196
196
|
}
|
|
197
197
|
};
|
|
198
|
+
const placement = args.opts.cellPlacement?.({
|
|
199
|
+
scenario: args.slot.scenario,
|
|
200
|
+
rep: args.slot.rep
|
|
201
|
+
});
|
|
198
202
|
const ctx = {
|
|
199
203
|
cellId: args.slot.cellId,
|
|
200
204
|
rep: args.slot.rep,
|
|
@@ -202,7 +206,8 @@ async function executeCell(args) {
|
|
|
202
206
|
signal: args.signal,
|
|
203
207
|
trace,
|
|
204
208
|
artifacts,
|
|
205
|
-
cost
|
|
209
|
+
cost,
|
|
210
|
+
placement
|
|
206
211
|
};
|
|
207
212
|
let artifact;
|
|
208
213
|
let errorMessage;
|
|
@@ -357,4 +362,4 @@ export {
|
|
|
357
362
|
inMemoryCampaignStorage,
|
|
358
363
|
runCampaign
|
|
359
364
|
};
|
|
360
|
-
//# sourceMappingURL=chunk-
|
|
365
|
+
//# sourceMappingURL=chunk-J3EIOI3O.js.map
|