@tangle-network/agent-runtime 0.32.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-CBQVID7G.js → chunk-5QVVET72.js} +2 -2
- package/dist/{chunk-PACEJFUE.js → chunk-7KS6UEHB.js} +277 -10
- package/dist/chunk-7KS6UEHB.js.map +1 -0
- package/dist/{chunk-UNQM6XQO.js → chunk-HSX6PFZR.js} +2 -2
- package/dist/{chunk-URDSRUPQ.js → chunk-PY6NMZYX.js} +2 -2
- package/dist/{chunk-IORDTTVB.js → chunk-Q4ZDSLBD.js} +153 -3
- package/dist/chunk-Q4ZDSLBD.js.map +1 -0
- package/dist/{chunk-XZYF3YJN.js → chunk-SQSCRJ7U.js} +7 -1
- package/dist/{chunk-XZYF3YJN.js.map → chunk-SQSCRJ7U.js.map} +1 -1
- package/dist/{chunk-XNZYEQMF.js → chunk-VVHX5RKE.js} +4 -4
- package/dist/improvement.d.ts +128 -3
- package/dist/improvement.js +86 -0
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +18 -2
- package/dist/index.js +7 -3
- package/dist/index.js.map +1 -1
- package/dist/loops.d.ts +202 -40
- package/dist/loops.js +11 -5
- package/dist/mcp/bin.js +6 -6
- package/dist/mcp/index.d.ts +3 -3
- package/dist/mcp/index.js +19 -10
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-CsgwKFq8.d.ts → otel-export-xgf4J6bo.d.ts} +23 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +3 -3
- package/dist/{types-Co507h15.d.ts → types-BZw2bqJc.d.ts} +44 -1
- package/package.json +24 -12
- package/skills/agent-runtime-adoption/SKILL.md +170 -0
- package/dist/chunk-IORDTTVB.js.map +0 -1
- package/dist/chunk-PACEJFUE.js.map +0 -1
- /package/dist/{chunk-CBQVID7G.js.map → chunk-5QVVET72.js.map} +0 -0
- /package/dist/{chunk-UNQM6XQO.js.map → chunk-HSX6PFZR.js.map} +0 -0
- /package/dist/{chunk-URDSRUPQ.js.map → chunk-PY6NMZYX.js.map} +0 -0
- /package/dist/{chunk-XNZYEQMF.js.map → chunk-VVHX5RKE.js.map} +0 -0
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
DELEGATION_STATUS_DESCRIPTION,
|
|
15
15
|
DELEGATION_STATUS_INPUT_SCHEMA,
|
|
16
16
|
DELEGATION_STATUS_TOOL_NAME
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-HSX6PFZR.js";
|
|
18
18
|
|
|
19
19
|
// src/mcp/openai-tools.ts
|
|
20
20
|
function buildTool(name, description, parameters) {
|
|
@@ -58,7 +58,14 @@ function mcpToolsForRuntimeMcpSubset(names) {
|
|
|
58
58
|
}
|
|
59
59
|
|
|
60
60
|
// src/otel-export.ts
|
|
61
|
-
var SCOPE = { name: "@tangle-network/agent-runtime", version: "0.
|
|
61
|
+
var SCOPE = { name: "@tangle-network/agent-runtime", version: "0.33.0" };
|
|
62
|
+
var GEN_AI = {
|
|
63
|
+
operation: "gen_ai.operation.name",
|
|
64
|
+
agentName: "gen_ai.agent.name",
|
|
65
|
+
conversationId: "gen_ai.conversation.id",
|
|
66
|
+
inputTokens: "gen_ai.usage.input_tokens",
|
|
67
|
+
outputTokens: "gen_ai.usage.output_tokens"
|
|
68
|
+
};
|
|
62
69
|
function createOtelExporter(config) {
|
|
63
70
|
const resolvedEndpoint = config?.endpoint ?? (typeof process !== "undefined" ? process.env.OTEL_EXPORTER_OTLP_ENDPOINT : void 0);
|
|
64
71
|
if (!resolvedEndpoint) return void 0;
|
|
@@ -150,6 +157,148 @@ function loopEventToOtelSpan(event, traceId, parentSpanId) {
|
|
|
150
157
|
status: { code: 1 }
|
|
151
158
|
};
|
|
152
159
|
}
|
|
160
|
+
function buildLoopOtelSpans(events, traceId, rootParentSpanId) {
|
|
161
|
+
if (events.length === 0) return [];
|
|
162
|
+
const tid = padTraceId(traceId);
|
|
163
|
+
const out = [];
|
|
164
|
+
const num = (v) => typeof v === "number" && Number.isFinite(v) ? v : void 0;
|
|
165
|
+
const str = (v) => typeof v === "string" && v.length > 0 ? v : void 0;
|
|
166
|
+
const rec = (v) => v && typeof v === "object" ? v : {};
|
|
167
|
+
const started = events.find((e) => e.kind === "loop.started");
|
|
168
|
+
const ended = events.find((e) => e.kind === "loop.ended");
|
|
169
|
+
const runId = events[0]?.runId ?? "";
|
|
170
|
+
const rootStart = started?.timestamp ?? events[0].timestamp;
|
|
171
|
+
const rootEnd = ended?.timestamp ?? events[events.length - 1].timestamp;
|
|
172
|
+
const rootId = generateSpanId();
|
|
173
|
+
const make = (spanId, parentSpanId, name, startMs, endMs, attrs, statusCode = 1) => ({
|
|
174
|
+
traceId: tid,
|
|
175
|
+
spanId,
|
|
176
|
+
parentSpanId: parentSpanId ? padSpanId(parentSpanId) : void 0,
|
|
177
|
+
name,
|
|
178
|
+
kind: 1,
|
|
179
|
+
startTimeUnixNano: msToNs(startMs),
|
|
180
|
+
endTimeUnixNano: msToNs(endMs),
|
|
181
|
+
attributes: toAttributes(attrs),
|
|
182
|
+
status: { code: statusCode }
|
|
183
|
+
});
|
|
184
|
+
const sp = rec(started?.payload);
|
|
185
|
+
const rootAttrs = {
|
|
186
|
+
[GEN_AI.operation]: "invoke_workflow",
|
|
187
|
+
[GEN_AI.conversationId]: runId,
|
|
188
|
+
"tangle.loop.driver": str(sp.driver) ?? "driver"
|
|
189
|
+
};
|
|
190
|
+
if (Array.isArray(sp.agentRunNames) && sp.agentRunNames.length > 0) {
|
|
191
|
+
rootAttrs["tangle.loop.agents"] = sp.agentRunNames.map(String).join(",");
|
|
192
|
+
}
|
|
193
|
+
if (ended) {
|
|
194
|
+
const ep = rec(ended.payload);
|
|
195
|
+
const win = num(ep.winnerIterationIndex);
|
|
196
|
+
if (win !== void 0) rootAttrs["tangle.loop.winner.iteration_index"] = win;
|
|
197
|
+
const cost = num(ep.totalCostUsd);
|
|
198
|
+
if (cost !== void 0) rootAttrs["tangle.cost.usd"] = cost;
|
|
199
|
+
const iters = num(ep.iterations);
|
|
200
|
+
if (iters !== void 0) rootAttrs["tangle.loop.iterations"] = iters;
|
|
201
|
+
}
|
|
202
|
+
out.push(make(rootId, rootParentSpanId, "loop", rootStart, rootEnd, rootAttrs));
|
|
203
|
+
const iterStartTs = /* @__PURE__ */ new Map();
|
|
204
|
+
const placementByIdx = /* @__PURE__ */ new Map();
|
|
205
|
+
let currentRoundId;
|
|
206
|
+
let pendingRound;
|
|
207
|
+
const flushRound = (endMs) => {
|
|
208
|
+
if (!pendingRound) return;
|
|
209
|
+
out.push(
|
|
210
|
+
make(pendingRound.id, rootId, "loop.round", pendingRound.start, endMs, pendingRound.attrs)
|
|
211
|
+
);
|
|
212
|
+
pendingRound = void 0;
|
|
213
|
+
};
|
|
214
|
+
for (const e of events) {
|
|
215
|
+
const p = rec(e.payload);
|
|
216
|
+
switch (e.kind) {
|
|
217
|
+
case "loop.plan": {
|
|
218
|
+
flushRound(e.timestamp);
|
|
219
|
+
const id = generateSpanId();
|
|
220
|
+
const attrs = {
|
|
221
|
+
[GEN_AI.operation]: "invoke_workflow",
|
|
222
|
+
"tangle.loop.round.index": num(p.roundIndex) ?? 0,
|
|
223
|
+
"tangle.loop.move.kind": str(p.moveKind) ?? "unknown",
|
|
224
|
+
"tangle.loop.move.width": num(p.plannedCount) ?? 0
|
|
225
|
+
};
|
|
226
|
+
const r = str(p.rationale);
|
|
227
|
+
if (r) attrs["tangle.loop.move.rationale"] = r;
|
|
228
|
+
pendingRound = { id, start: e.timestamp, attrs };
|
|
229
|
+
currentRoundId = id;
|
|
230
|
+
break;
|
|
231
|
+
}
|
|
232
|
+
case "loop.iteration.started": {
|
|
233
|
+
const idx = num(p.iterationIndex);
|
|
234
|
+
if (idx !== void 0) iterStartTs.set(idx, e.timestamp);
|
|
235
|
+
break;
|
|
236
|
+
}
|
|
237
|
+
case "loop.iteration.dispatch": {
|
|
238
|
+
const idx = num(p.iterationIndex);
|
|
239
|
+
if (idx === void 0) break;
|
|
240
|
+
const place = {};
|
|
241
|
+
const kind = str(p.placement);
|
|
242
|
+
if (kind) place["tangle.loop.placement.kind"] = kind;
|
|
243
|
+
const sid = str(p.sandboxId);
|
|
244
|
+
if (sid) place["tangle.sandbox.id"] = sid;
|
|
245
|
+
const fid = str(p.fleetId);
|
|
246
|
+
if (fid) place["tangle.fleet.id"] = fid;
|
|
247
|
+
const mid = str(p.machineId);
|
|
248
|
+
if (mid) place["tangle.machine.id"] = mid;
|
|
249
|
+
placementByIdx.set(idx, place);
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
case "loop.iteration.ended": {
|
|
253
|
+
const idx = num(p.iterationIndex) ?? 0;
|
|
254
|
+
const start = iterStartTs.get(idx) ?? e.timestamp;
|
|
255
|
+
const err = str(p.error);
|
|
256
|
+
const attrs = {
|
|
257
|
+
[GEN_AI.operation]: "invoke_agent",
|
|
258
|
+
"tangle.loop.iteration.index": idx
|
|
259
|
+
};
|
|
260
|
+
const agent = str(p.agentRunName);
|
|
261
|
+
if (agent) attrs[GEN_AI.agentName] = agent;
|
|
262
|
+
const tu = rec(p.tokenUsage);
|
|
263
|
+
const inTok = num(tu.input);
|
|
264
|
+
if (inTok !== void 0) attrs[GEN_AI.inputTokens] = inTok;
|
|
265
|
+
const outTok = num(tu.output);
|
|
266
|
+
if (outTok !== void 0) attrs[GEN_AI.outputTokens] = outTok;
|
|
267
|
+
const cost = num(p.costUsd);
|
|
268
|
+
if (cost !== void 0) attrs["tangle.cost.usd"] = cost;
|
|
269
|
+
const verdict = rec(p.verdict);
|
|
270
|
+
if (typeof verdict.valid === "boolean") attrs["tangle.loop.verdict.valid"] = verdict.valid;
|
|
271
|
+
const score = num(verdict.score);
|
|
272
|
+
if (score !== void 0) attrs["tangle.loop.verdict.score"] = score;
|
|
273
|
+
if (err) attrs["tangle.loop.error"] = err;
|
|
274
|
+
Object.assign(attrs, placementByIdx.get(idx) ?? {});
|
|
275
|
+
out.push(
|
|
276
|
+
make(
|
|
277
|
+
generateSpanId(),
|
|
278
|
+
currentRoundId ?? rootId,
|
|
279
|
+
"loop.iteration",
|
|
280
|
+
start,
|
|
281
|
+
e.timestamp,
|
|
282
|
+
attrs,
|
|
283
|
+
err ? 2 : 1
|
|
284
|
+
)
|
|
285
|
+
);
|
|
286
|
+
break;
|
|
287
|
+
}
|
|
288
|
+
case "loop.decision": {
|
|
289
|
+
if (pendingRound) {
|
|
290
|
+
const dec = str(p.decision);
|
|
291
|
+
if (dec) pendingRound.attrs["tangle.loop.decision"] = dec;
|
|
292
|
+
flushRound(e.timestamp);
|
|
293
|
+
}
|
|
294
|
+
currentRoundId = void 0;
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
flushRound(rootEnd);
|
|
300
|
+
return out;
|
|
301
|
+
}
|
|
153
302
|
function parseHeadersFromEnv() {
|
|
154
303
|
if (typeof process === "undefined") return {};
|
|
155
304
|
const raw = process.env.OTEL_EXPORTER_OTLP_HEADERS;
|
|
@@ -227,7 +376,8 @@ export {
|
|
|
227
376
|
mcpToolsForRuntimeMcpSubset,
|
|
228
377
|
createOtelExporter,
|
|
229
378
|
loopEventToOtelSpan,
|
|
379
|
+
buildLoopOtelSpans,
|
|
230
380
|
INTELLIGENCE_WIRE_VERSION,
|
|
231
381
|
exportEvalRuns
|
|
232
382
|
};
|
|
233
|
-
//# sourceMappingURL=chunk-
|
|
383
|
+
//# sourceMappingURL=chunk-Q4ZDSLBD.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/mcp/openai-tools.ts","../src/otel-export.ts"],"sourcesContent":["/**\n * @experimental\n *\n * OpenAI Chat Completions `tools[]` projection of the 5 agent-runtime MCP\n * delegation tools.\n *\n * Use when configuring `createOpenAICompatibleBackend({ tools: ... })` so the\n * model can call `delegate_code`, `delegate_research`, `delegate_feedback`,\n * `delegation_status`, and `delegation_history` through the OpenAI-compat\n * transport (tcloud, OpenRouter, OpenAI direct, cli-bridge). The runtime\n * surfaces tool calls as `tool_call` stream events — execution is the\n * caller's responsibility (typically the parent sandbox runtime's MCP\n * mount).\n *\n * Sandbox-SDK callers do NOT need this helper: the sandbox runtime mounts\n * MCP servers natively and the in-sandbox harness discovers tools via the\n * runtime, not via an OpenAI tools array.\n *\n * Tool name + description + JSON-schema are pulled from the canonical\n * `DELEGATE_*` constants exported by `./tools/*` so the projection cannot\n * drift from the server's own validators.\n */\n\nimport type { OpenAIChatTool } from '../types'\nimport {\n DELEGATE_CODE_DESCRIPTION,\n DELEGATE_CODE_INPUT_SCHEMA,\n DELEGATE_CODE_TOOL_NAME,\n} from './tools/delegate-code'\nimport {\n DELEGATE_FEEDBACK_DESCRIPTION,\n DELEGATE_FEEDBACK_INPUT_SCHEMA,\n DELEGATE_FEEDBACK_TOOL_NAME,\n} from './tools/delegate-feedback'\nimport {\n DELEGATE_RESEARCH_DESCRIPTION,\n DELEGATE_RESEARCH_INPUT_SCHEMA,\n DELEGATE_RESEARCH_TOOL_NAME,\n} from './tools/delegate-research'\nimport {\n DELEGATION_HISTORY_DESCRIPTION,\n DELEGATION_HISTORY_INPUT_SCHEMA,\n DELEGATION_HISTORY_TOOL_NAME,\n} from './tools/delegation-history'\nimport {\n DELEGATION_STATUS_DESCRIPTION,\n DELEGATION_STATUS_INPUT_SCHEMA,\n DELEGATION_STATUS_TOOL_NAME,\n} from './tools/delegation-status'\n\nfunction buildTool(\n name: string,\n description: string,\n parameters: Readonly<Record<string, unknown>>,\n): OpenAIChatTool {\n // `parameters` arrives as a deeply-readonly `as const` literal. The\n // OpenAI-compat backend JSON-serializes the body so a shallow copy\n // into a plain object is sufficient — and shields callers that mutate\n // the returned descriptor from corrupting the source constant.\n return {\n type: 'function',\n function: { name, description, parameters: { ...parameters } },\n }\n}\n\n/**\n * @experimental\n *\n * Returns the 5 delegation tools projected into OpenAI Chat Completions\n * `tools[]` shape. The order is stable: `delegate_code`,\n * `delegate_research`, `delegate_feedback`, `delegation_status`,\n * `delegation_history`.\n */\nexport function mcpToolsForRuntimeMcp(): OpenAIChatTool[] {\n return [\n buildTool(\n DELEGATE_CODE_TOOL_NAME,\n DELEGATE_CODE_DESCRIPTION,\n DELEGATE_CODE_INPUT_SCHEMA as Readonly<Record<string, unknown>>,\n ),\n buildTool(\n DELEGATE_RESEARCH_TOOL_NAME,\n DELEGATE_RESEARCH_DESCRIPTION,\n DELEGATE_RESEARCH_INPUT_SCHEMA as Readonly<Record<string, unknown>>,\n ),\n buildTool(\n DELEGATE_FEEDBACK_TOOL_NAME,\n DELEGATE_FEEDBACK_DESCRIPTION,\n DELEGATE_FEEDBACK_INPUT_SCHEMA as Readonly<Record<string, unknown>>,\n ),\n buildTool(\n DELEGATION_STATUS_TOOL_NAME,\n DELEGATION_STATUS_DESCRIPTION,\n DELEGATION_STATUS_INPUT_SCHEMA as Readonly<Record<string, unknown>>,\n ),\n buildTool(\n DELEGATION_HISTORY_TOOL_NAME,\n DELEGATION_HISTORY_DESCRIPTION,\n DELEGATION_HISTORY_INPUT_SCHEMA as Readonly<Record<string, unknown>>,\n ),\n ]\n}\n\n/**\n * @experimental\n *\n * Subset filter — return only the projected tools whose `function.name`\n * appears in `names`. Useful for curated mounts (e.g. only the queue-bound\n * delegation tools, omitting `delegate_feedback`). Unknown names are\n * silently ignored; pass an empty array to get an empty result.\n */\nexport function mcpToolsForRuntimeMcpSubset(names: ReadonlyArray<string>): OpenAIChatTool[] {\n const allowed = new Set(names)\n return mcpToolsForRuntimeMcp().filter((tool) => allowed.has(tool.function.name))\n}\n","/**\n * OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.\n *\n * Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env\n * when no explicit config is given. Keeps the runtime dep-free from\n * @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.\n *\n * The exporter accepts both raw OtelSpan objects and LoopTraceEvents\n * (which get converted to OTLP spans automatically).\n */\n\nexport interface OtelExportConfig {\n /** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */\n endpoint?: string\n /** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */\n headers?: Record<string, string>\n /** Batch size before flush. Default 64. */\n batchSize?: number\n /** Flush interval ms. Default 5000. */\n flushIntervalMs?: number\n /** Resource attributes stamped on every export. */\n resourceAttributes?: Record<string, string | number | boolean>\n /** Service name. Default 'agent-runtime'. */\n serviceName?: string\n}\n\nexport interface OtelExporter {\n /** Export a span. */\n exportSpan(span: OtelSpan): void\n /** Force flush pending spans. */\n flush(): Promise<void>\n /** Shutdown cleanly. */\n shutdown(): Promise<void>\n}\n\nexport interface OtelSpan {\n traceId: string\n spanId: string\n parentSpanId?: string\n name: string\n kind?: number\n startTimeUnixNano: string\n endTimeUnixNano: string\n attributes?: OtelAttribute[]\n status?: { code: number; message?: string }\n}\n\nexport interface OtelAttribute {\n key: string\n value: { stringValue?: string; intValue?: string; doubleValue?: number; boolValue?: boolean }\n}\n\ninterface OtlpResourceSpans {\n resource: { attributes: OtelAttribute[] }\n scopeSpans: Array<{ scope: { name: string; version: string }; spans: OtelSpan[] }>\n}\n\ninterface OtlpExport {\n resourceSpans: OtlpResourceSpans[]\n}\n\nconst SCOPE = { name: '@tangle-network/agent-runtime', version: '0.33.0' }\n\n/**\n * Current (non-deprecated) OpenTelemetry GenAI semantic-convention keys.\n * Registry: https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/\n * NB: `gen_ai.system` / `gen_ai.usage.prompt_tokens` / `completion_tokens` are\n * DEPRECATED — do not emit them. We use `provider.name` + `input/output_tokens`.\n */\nconst GEN_AI = {\n operation: 'gen_ai.operation.name',\n agentName: 'gen_ai.agent.name',\n conversationId: 'gen_ai.conversation.id',\n inputTokens: 'gen_ai.usage.input_tokens',\n outputTokens: 'gen_ai.usage.output_tokens',\n} as const\n\n/**\n * Create an OTEL exporter. Returns undefined when no endpoint is configured.\n */\nexport function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined {\n const resolvedEndpoint =\n config?.endpoint ??\n (typeof process !== 'undefined' ? process.env.OTEL_EXPORTER_OTLP_ENDPOINT : undefined)\n if (!resolvedEndpoint) return undefined\n const endpoint: string = resolvedEndpoint\n\n const headers = config?.headers ?? parseHeadersFromEnv()\n const batchSize = config?.batchSize ?? 64\n const flushIntervalMs = config?.flushIntervalMs ?? 5000\n const serviceName = config?.serviceName ?? 'agent-runtime'\n const resourceAttrs = config?.resourceAttributes ?? {}\n\n const pending: OtelSpan[] = []\n let timer: ReturnType<typeof setInterval> | undefined\n let stopped = false\n\n const exporter: OtelExporter = {\n exportSpan(span: OtelSpan): void {\n if (stopped) return\n pending.push(span)\n if (pending.length >= batchSize) {\n void doFlush()\n }\n },\n\n async flush(): Promise<void> {\n await doFlush()\n },\n\n async shutdown(): Promise<void> {\n stopped = true\n if (timer !== undefined) {\n clearInterval(timer)\n timer = undefined\n }\n await doFlush()\n },\n }\n\n timer = setInterval(() => {\n if (pending.length > 0) void doFlush()\n }, flushIntervalMs)\n if (typeof timer === 'object' && 'unref' in timer) {\n ;(timer as NodeJS.Timeout).unref()\n }\n\n async function doFlush(): Promise<void> {\n if (pending.length === 0) return\n const batch = pending.splice(0)\n const body: OtlpExport = {\n resourceSpans: [\n {\n resource: {\n attributes: toAttributes({\n 'service.name': serviceName,\n ...resourceAttrs,\n }),\n },\n scopeSpans: [{ scope: SCOPE, spans: batch }],\n },\n ],\n }\n const url = `${endpoint.replace(/\\/+$/, '')}/v1/traces`\n try {\n await fetch(url, {\n method: 'POST',\n headers: { 'content-type': 'application/json', ...headers },\n body: JSON.stringify(body),\n })\n } catch {\n // Best-effort — telemetry export must not crash the runtime.\n }\n }\n\n return exporter\n}\n\n/**\n * Convert a LoopTraceEvent into an OtelSpan for export.\n */\nexport function loopEventToOtelSpan(\n event: {\n kind: string\n runId: string\n timestamp: number\n payload: object\n },\n traceId: string,\n parentSpanId?: string,\n): OtelSpan {\n const spanId = generateSpanId()\n const attrs: Record<string, string | number | boolean> = {\n 'loop.event_kind': event.kind,\n 'loop.run_id': event.runId,\n }\n for (const [k, v] of Object.entries(event.payload)) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n attrs[`loop.${k}`] = v\n }\n }\n const ts = msToNs(event.timestamp)\n return {\n traceId: padTraceId(traceId),\n spanId,\n parentSpanId: parentSpanId ? padSpanId(parentSpanId) : undefined,\n name: event.kind,\n kind: 1,\n startTimeUnixNano: ts,\n endTimeUnixNano: ts,\n attributes: toAttributes(attrs),\n status: { code: 1 },\n }\n}\n\n/**\n * Build a nested, real-duration OTLP span tree for ONE loop run from its full\n * ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,\n * zero-duration span per event), this reconstructs the topology hierarchy a\n * GenAI trace viewer renders natively:\n *\n * loop (invoke_workflow)\n * └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}\n * ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement\n * └─ …\n *\n * Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and\n * a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /\n * verdict / placement / cost (not yet standardized). Pure: feed it a buffered\n * per-runId event array (e.g. flushed on `loop.ended`) and export the result.\n */\nexport function buildLoopOtelSpans(\n events: ReadonlyArray<{ kind: string; runId: string; timestamp: number; payload: object }>,\n traceId: string,\n rootParentSpanId?: string,\n): OtelSpan[] {\n if (events.length === 0) return []\n const tid = padTraceId(traceId)\n const out: OtelSpan[] = []\n const num = (v: unknown): number | undefined =>\n typeof v === 'number' && Number.isFinite(v) ? v : undefined\n const str = (v: unknown): string | undefined =>\n typeof v === 'string' && v.length > 0 ? v : undefined\n const rec = (v: unknown): Record<string, unknown> =>\n v && typeof v === 'object' ? (v as Record<string, unknown>) : {}\n\n const started = events.find((e) => e.kind === 'loop.started')\n const ended = events.find((e) => e.kind === 'loop.ended')\n const runId = events[0]?.runId ?? ''\n const rootStart = started?.timestamp ?? events[0]!.timestamp\n const rootEnd = ended?.timestamp ?? events[events.length - 1]!.timestamp\n const rootId = generateSpanId()\n\n const make = (\n spanId: string,\n parentSpanId: string | undefined,\n name: string,\n startMs: number,\n endMs: number,\n attrs: Record<string, string | number | boolean>,\n statusCode = 1,\n ): OtelSpan => ({\n traceId: tid,\n spanId,\n parentSpanId: parentSpanId ? padSpanId(parentSpanId) : undefined,\n name,\n kind: 1,\n startTimeUnixNano: msToNs(startMs),\n endTimeUnixNano: msToNs(endMs),\n attributes: toAttributes(attrs),\n status: { code: statusCode },\n })\n\n // root\n const sp = rec(started?.payload)\n const rootAttrs: Record<string, string | number | boolean> = {\n [GEN_AI.operation]: 'invoke_workflow',\n [GEN_AI.conversationId]: runId,\n 'tangle.loop.driver': str(sp.driver) ?? 'driver',\n }\n if (Array.isArray(sp.agentRunNames) && sp.agentRunNames.length > 0) {\n rootAttrs['tangle.loop.agents'] = sp.agentRunNames.map(String).join(',')\n }\n if (ended) {\n const ep = rec(ended.payload)\n const win = num(ep.winnerIterationIndex)\n if (win !== undefined) rootAttrs['tangle.loop.winner.iteration_index'] = win\n const cost = num(ep.totalCostUsd)\n if (cost !== undefined) rootAttrs['tangle.cost.usd'] = cost\n const iters = num(ep.iterations)\n if (iters !== undefined) rootAttrs['tangle.loop.iterations'] = iters\n }\n out.push(make(rootId, rootParentSpanId, 'loop', rootStart, rootEnd, rootAttrs))\n\n // rounds + iterations\n const iterStartTs = new Map<number, number>()\n const placementByIdx = new Map<number, Record<string, string>>()\n let currentRoundId: string | undefined\n let pendingRound:\n | { id: string; start: number; attrs: Record<string, string | number | boolean> }\n | undefined\n const flushRound = (endMs: number) => {\n if (!pendingRound) return\n out.push(\n make(pendingRound.id, rootId, 'loop.round', pendingRound.start, endMs, pendingRound.attrs),\n )\n pendingRound = undefined\n }\n\n for (const e of events) {\n const p = rec(e.payload)\n switch (e.kind) {\n case 'loop.plan': {\n flushRound(e.timestamp)\n const id = generateSpanId()\n const attrs: Record<string, string | number | boolean> = {\n [GEN_AI.operation]: 'invoke_workflow',\n 'tangle.loop.round.index': num(p.roundIndex) ?? 0,\n 'tangle.loop.move.kind': str(p.moveKind) ?? 'unknown',\n 'tangle.loop.move.width': num(p.plannedCount) ?? 0,\n }\n const r = str(p.rationale)\n if (r) attrs['tangle.loop.move.rationale'] = r\n pendingRound = { id, start: e.timestamp, attrs }\n currentRoundId = id\n break\n }\n case 'loop.iteration.started': {\n const idx = num(p.iterationIndex)\n if (idx !== undefined) iterStartTs.set(idx, e.timestamp)\n break\n }\n case 'loop.iteration.dispatch': {\n const idx = num(p.iterationIndex)\n if (idx === undefined) break\n const place: Record<string, string> = {}\n const kind = str(p.placement)\n if (kind) place['tangle.loop.placement.kind'] = kind\n const sid = str(p.sandboxId)\n if (sid) place['tangle.sandbox.id'] = sid\n const fid = str(p.fleetId)\n if (fid) place['tangle.fleet.id'] = fid\n const mid = str(p.machineId)\n if (mid) place['tangle.machine.id'] = mid\n placementByIdx.set(idx, place)\n break\n }\n case 'loop.iteration.ended': {\n const idx = num(p.iterationIndex) ?? 0\n const start = iterStartTs.get(idx) ?? e.timestamp\n const err = str(p.error)\n const attrs: Record<string, string | number | boolean> = {\n [GEN_AI.operation]: 'invoke_agent',\n 'tangle.loop.iteration.index': idx,\n }\n const agent = str(p.agentRunName)\n if (agent) attrs[GEN_AI.agentName] = agent\n const tu = rec(p.tokenUsage)\n const inTok = num(tu.input)\n if (inTok !== undefined) attrs[GEN_AI.inputTokens] = inTok\n const outTok = num(tu.output)\n if (outTok !== undefined) attrs[GEN_AI.outputTokens] = outTok\n const cost = num(p.costUsd)\n if (cost !== undefined) attrs['tangle.cost.usd'] = cost\n const verdict = rec(p.verdict)\n if (typeof verdict.valid === 'boolean') attrs['tangle.loop.verdict.valid'] = verdict.valid\n const score = num(verdict.score)\n if (score !== undefined) attrs['tangle.loop.verdict.score'] = score\n if (err) attrs['tangle.loop.error'] = err\n Object.assign(attrs, placementByIdx.get(idx) ?? {})\n out.push(\n make(\n generateSpanId(),\n currentRoundId ?? rootId,\n 'loop.iteration',\n start,\n e.timestamp,\n attrs,\n err ? 2 : 1,\n ),\n )\n break\n }\n case 'loop.decision': {\n if (pendingRound) {\n const dec = str(p.decision)\n if (dec) pendingRound.attrs['tangle.loop.decision'] = dec\n flushRound(e.timestamp)\n }\n currentRoundId = undefined\n break\n }\n }\n }\n flushRound(rootEnd)\n return out\n}\n\nfunction parseHeadersFromEnv(): Record<string, string> {\n if (typeof process === 'undefined') return {}\n const raw = process.env.OTEL_EXPORTER_OTLP_HEADERS\n if (!raw) return {}\n const out: Record<string, string> = {}\n for (const pair of raw.split(',')) {\n const eq = pair.indexOf('=')\n if (eq < 0) continue\n const key = pair.slice(0, eq).trim()\n const value = pair.slice(eq + 1).trim()\n if (key) out[key] = value\n }\n return out\n}\n\nfunction toAttributes(record: Record<string, string | number | boolean>): OtelAttribute[] {\n return Object.entries(record).map(([key, value]) => ({\n key,\n value:\n typeof value === 'number'\n ? Number.isInteger(value)\n ? { intValue: value.toString() }\n : { doubleValue: value }\n : typeof value === 'boolean'\n ? { boolValue: value }\n : { stringValue: value },\n }))\n}\n\nfunction msToNs(ms: number): string {\n return (BigInt(Math.floor(ms)) * 1_000_000n).toString()\n}\n\nfunction padSpanId(id: string): string {\n const cleaned = id.replace(/-/g, '')\n return cleaned.slice(0, 16).padEnd(16, '0')\n}\n\nfunction padTraceId(id: string): string {\n const cleaned = id.replace(/-/g, '')\n return cleaned.slice(0, 32).padEnd(32, '0')\n}\n\nfunction generateSpanId(): string {\n const bytes = new Uint8Array(8)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 8; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n\n// ─── Eval-run ingest (self-improvement provenance) ───────────────────────────\n//\n// Tangle Intelligence has a first-class, non-trace record for self-improvement\n// runs: POST /v1/ingest/eval-runs (\"Mode D\"). Each generation carries a\n// `surfaceHash` (the proposed-change identity) + arbitrary `surface` provenance;\n// a later `gate-decided` event re-emits the same `runId` (idempotent upsert) with\n// a real `gateDecision` + `holdoutLift`, so proposal→verdict is one diffable\n// record. This is how a consumer's RSI loop records WHAT it changed, WHY, from\n// which evidence — the audit trail behind agentic self-improvement.\n\n/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */\nexport const INTELLIGENCE_WIRE_VERSION = '2026-05-26.v1'\n\nexport interface EvalRunGeneration {\n /** 0-based ordinal of this generation within the run (required by ingest). */\n index: number\n /** Identity of the proposed surface change (content-addressed hash). */\n surfaceHash: string\n /** Arbitrary provenance for this generation (rationale, evidence, source). */\n surface?: unknown\n /** Per-scenario results; empty until the generation is measured. */\n cells?: unknown[]\n /** Mean composite score (0 when unmeasured — pair with labels.measured). */\n compositeMean: number\n costUsd: number\n durationMs: number\n}\n\nexport interface EvalRunEvent {\n runId: string\n runDir: string\n /** ISO timestamp. */\n timestamp: string\n status:\n | 'started'\n | 'baseline-complete'\n | 'generation-complete'\n | 'gate-decided'\n | 'finished'\n | 'errored'\n labels?: Record<string, string>\n baseline?: EvalRunGeneration\n generations?: EvalRunGeneration[]\n gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling'\n holdoutLift?: number\n totalCostUsd: number\n totalDurationMs: number\n errorMessage?: string\n}\n\nexport interface EvalRunsExportConfig {\n /** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */\n apiKey?: string\n /** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */\n base?: string\n /** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */\n idempotencyKey?: string\n}\n\nexport interface EvalRunsExportResult {\n ok: boolean\n status: number\n accepted: number\n rejected: Array<{ index: number; reason: string }>\n}\n\nconst DEFAULT_INTELLIGENCE_BASE = 'https://intelligence.tangle.tools'\n\n/**\n * Ship self-improvement eval-run events to Tangle Intelligence. Unlike the\n * best-effort span exporter, this RESOLVES with the ingest verdict (accepted /\n * rejected per event) so a consumer's loop can assert its provenance landed.\n * Throws only on a missing key or network failure.\n */\nexport async function exportEvalRuns(\n events: EvalRunEvent[],\n config?: EvalRunsExportConfig,\n): Promise<EvalRunsExportResult> {\n if (events.length === 0) return { ok: true, status: 0, accepted: 0, rejected: [] }\n const apiKey =\n config?.apiKey ?? (typeof process !== 'undefined' ? process.env.TANGLE_API_KEY : undefined)\n if (!apiKey)\n throw new Error('exportEvalRuns: apiKey required (pass config.apiKey or set TANGLE_API_KEY)')\n const base =\n config?.base ??\n (typeof process !== 'undefined' ? process.env.INTELLIGENCE_BASE : undefined) ??\n DEFAULT_INTELLIGENCE_BASE\n const url = `${base.replace(/\\/+$/, '')}/v1/ingest/eval-runs`\n const res = await fetch(url, {\n method: 'POST',\n headers: {\n 'content-type': 'application/json',\n authorization: `Bearer ${apiKey}`,\n 'X-Tangle-Wire-Version': INTELLIGENCE_WIRE_VERSION,\n ...(config?.idempotencyKey ? { 'Idempotency-Key': config.idempotencyKey } : {}),\n },\n body: JSON.stringify({ wireVersion: INTELLIGENCE_WIRE_VERSION, events }),\n })\n let parsed: { accepted?: number; rejected?: Array<{ index: number; reason: string }> } = {}\n try {\n parsed = (await res.json()) as typeof parsed\n } catch {\n // non-JSON body (e.g. 5xx HTML) — leave parsed empty\n }\n return {\n ok: res.ok,\n status: res.status,\n accepted: parsed.accepted ?? (res.ok ? events.length : 0),\n rejected: parsed.rejected ?? [],\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AAkDA,SAAS,UACP,MACA,aACA,YACgB;AAKhB,SAAO;AAAA,IACL,MAAM;AAAA,IACN,UAAU,EAAE,MAAM,aAAa,YAAY,EAAE,GAAG,WAAW,EAAE;AAAA,EAC/D;AACF;AAUO,SAAS,wBAA0C;AACxD,SAAO;AAAA,IACL;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAUO,SAAS,4BAA4B,OAAgD;AAC1F,QAAM,UAAU,IAAI,IAAI,KAAK;AAC7B,SAAO,sBAAsB,EAAE,OAAO,CAAC,SAAS,QAAQ,IAAI,KAAK,SAAS,IAAI,CAAC;AACjF;;;ACrDA,IAAM,QAAQ,EAAE,MAAM,iCAAiC,SAAS,SAAS;AAQzE,IAAM,SAAS;AAAA,EACb,WAAW;AAAA,EACX,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,aAAa;AAAA,EACb,cAAc;AAChB;AAKO,SAAS,mBAAmB,QAAqD;AACtF,QAAM,mBACJ,QAAQ,aACP,OAAO,YAAY,cAAc,QAAQ,IAAI,8BAA8B;AAC9E,MAAI,CAAC,iBAAkB,QAAO;AAC9B,QAAM,WAAmB;AAEzB,QAAM,UAAU,QAAQ,WAAW,oBAAoB;AACvD,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,kBAAkB,QAAQ,mBAAmB;AACnD,QAAM,cAAc,QAAQ,eAAe;AAC3C,QAAM,gBAAgB,QAAQ,sBAAsB,CAAC;AAErD,QAAM,UAAsB,CAAC;AAC7B,MAAI;AACJ,MAAI,UAAU;AAEd,QAAM,WAAyB;AAAA,IAC7B,WAAW,MAAsB;AAC/B,UAAI,QAAS;AACb,cAAQ,KAAK,IAAI;AACjB,UAAI,QAAQ,UAAU,WAAW;AAC/B,aAAK,QAAQ;AAAA,MACf;AAAA,IACF;AAAA,IAEA,MAAM,QAAuB;AAC3B,YAAM,QAAQ;AAAA,IAChB;AAAA,IAEA,MAAM,WAA0B;AAC9B,gBAAU;AACV,UAAI,UAAU,QAAW;AACvB,sBAAc,KAAK;AACnB,gBAAQ;AAAA,MACV;AACA,YAAM,QAAQ;AAAA,IAChB;AAAA,EACF;AAEA,UAAQ,YAAY,MAAM;AACxB,QAAI,QAAQ,SAAS,EAAG,MAAK,QAAQ;AAAA,EACvC,GAAG,eAAe;AAClB,MAAI,OAAO,UAAU,YAAY,WAAW,OAAO;AACjD;AAAC,IAAC,MAAyB,MAAM;AAAA,EACnC;AAEA,iBAAe,UAAyB;AACtC,QAAI,QAAQ,WAAW,EAAG;AAC1B,UAAM,QAAQ,QAAQ,OAAO,CAAC;AAC9B,UAAM,OAAmB;AAAA,MACvB,eAAe;AAAA,QACb;AAAA,UACE,UAAU;AAAA,YACR,YAAY,aAAa;AAAA,cACvB,gBAAgB;AAAA,cAChB,GAAG;AAAA,YACL,CAAC;AAAA,UACH;AAAA,UACA,YAAY,CAAC,EAAE,OAAO,OAAO,OAAO,MAAM,CAAC;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AACA,UAAM,MAAM,GAAG,SAAS,QAAQ,QAAQ,EAAE,CAAC;AAC3C,QAAI;AACF,YAAM,MAAM,KAAK;AAAA,QACf,QAAQ;AAAA,QACR,SAAS,EAAE,gBAAgB,oBAAoB,GAAG,QAAQ;AAAA,QAC1D,MAAM,KAAK,UAAU,IAAI;AAAA,MAC3B,CAAC;AAAA,IACH,QAAQ;AAAA,IAER;AAAA,EACF;AAEA,SAAO;AACT;AAKO,SAAS,oBACd,OAMA,SACA,cACU;AACV,QAAM,SAAS,eAAe;AAC9B,QAAM,QAAmD;AAAA,IACvD,mBAAmB,MAAM;AAAA,IACzB,eAAe,MAAM;AAAA,EACvB;AACA,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,MAAM,OAAO,GAAG;AAClD,QAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,YAAM,QAAQ,CAAC,EAAE,IAAI;AAAA,IACvB;AAAA,EACF;AACA,QAAM,KAAK,OAAO,MAAM,SAAS;AACjC,SAAO;AAAA,IACL,SAAS,WAAW,OAAO;AAAA,IAC3B;AAAA,IACA,cAAc,eAAe,UAAU,YAAY,IAAI;AAAA,IACvD,MAAM,MAAM;AAAA,IACZ,MAAM;AAAA,IACN,mBAAmB;AAAA,IACnB,iBAAiB;AAAA,IACjB,YAAY,aAAa,KAAK;AAAA,IAC9B,QAAQ,EAAE,MAAM,EAAE;AAAA,EACpB;AACF;AAkBO,SAAS,mBACd,QACA,SACA,kBACY;AACZ,MAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AACjC,QAAM,MAAM,WAAW,OAAO;AAC9B,QAAM,MAAkB,CAAC;AACzB,QAAM,MAAM,CAAC,MACX,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,IAAI,IAAI;AACpD,QAAM,MAAM,CAAC,MACX,OAAO,MAAM,YAAY,EAAE,SAAS,IAAI,IAAI;AAC9C,QAAM,MAAM,CAAC,MACX,KAAK,OAAO,MAAM,WAAY,IAAgC,CAAC;AAEjE,QAAM,UAAU,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,cAAc;AAC5D,QAAM,QAAQ,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,YAAY;AACxD,QAAM,QAAQ,OAAO,CAAC,GAAG,SAAS;AAClC,QAAM,YAAY,SAAS,aAAa,OAAO,CAAC,EAAG;AACnD,QAAM,UAAU,OAAO,aAAa,OAAO,OAAO,SAAS,CAAC,EAAG;AAC/D,QAAM,SAAS,eAAe;AAE9B,QAAM,OAAO,CACX,QACA,cACA,MACA,SACA,OACA,OACA,aAAa,OACC;AAAA,IACd,SAAS;AAAA,IACT;AAAA,IACA,cAAc,eAAe,UAAU,YAAY,IAAI;AAAA,IACvD;AAAA,IACA,MAAM;AAAA,IACN,mBAAmB,OAAO,OAAO;AAAA,IACjC,iBAAiB,OAAO,KAAK;AAAA,IAC7B,YAAY,aAAa,KAAK;AAAA,IAC9B,QAAQ,EAAE,MAAM,WAAW;AAAA,EAC7B;AAGA,QAAM,KAAK,IAAI,SAAS,OAAO;AAC/B,QAAM,YAAuD;AAAA,IAC3D,CAAC,OAAO,SAAS,GAAG;AAAA,IACpB,CAAC,OAAO,cAAc,GAAG;AAAA,IACzB,sBAAsB,IAAI,GAAG,MAAM,KAAK;AAAA,EAC1C;AACA,MAAI,MAAM,QAAQ,GAAG,aAAa,KAAK,GAAG,cAAc,SAAS,GAAG;AAClE,cAAU,oBAAoB,IAAI,GAAG,cAAc,IAAI,MAAM,EAAE,KAAK,GAAG;AAAA,EACzE;AACA,MAAI,OAAO;AACT,UAAM,KAAK,IAAI,MAAM,OAAO;AAC5B,UAAM,MAAM,IAAI,GAAG,oBAAoB;AACvC,QAAI,QAAQ,OAAW,WAAU,oCAAoC,IAAI;AACzE,UAAM,OAAO,IAAI,GAAG,YAAY;AAChC,QAAI,SAAS,OAAW,WAAU,iBAAiB,IAAI;AACvD,UAAM,QAAQ,IAAI,GAAG,UAAU;AAC/B,QAAI,UAAU,OAAW,WAAU,wBAAwB,IAAI;AAAA,EACjE;AACA,MAAI,KAAK,KAAK,QAAQ,kBAAkB,QAAQ,WAAW,SAAS,SAAS,CAAC;AAG9E,QAAM,cAAc,oBAAI,IAAoB;AAC5C,QAAM,iBAAiB,oBAAI,IAAoC;AAC/D,MAAI;AACJ,MAAI;AAGJ,QAAM,aAAa,CAAC,UAAkB;AACpC,QAAI,CAAC,aAAc;AACnB,QAAI;AAAA,MACF,KAAK,aAAa,IAAI,QAAQ,cAAc,aAAa,OAAO,OAAO,aAAa,KAAK;AAAA,IAC3F;AACA,mBAAe;AAAA,EACjB;AAEA,aAAW,KAAK,QAAQ;AACtB,UAAM,IAAI,IAAI,EAAE,OAAO;AACvB,YAAQ,EAAE,MAAM;AAAA,MACd,KAAK,aAAa;AAChB,mBAAW,EAAE,SAAS;AACtB,cAAM,KAAK,eAAe;AAC1B,cAAM,QAAmD;AAAA,UACvD,CAAC,OAAO,SAAS,GAAG;AAAA,UACpB,2BAA2B,IAAI,EAAE,UAAU,KAAK;AAAA,UAChD,yBAAyB,IAAI,EAAE,QAAQ,KAAK;AAAA,UAC5C,0BAA0B,IAAI,EAAE,YAAY,KAAK;AAAA,QACnD;AACA,cAAM,IAAI,IAAI,EAAE,SAAS;AACzB,YAAI,EAAG,OAAM,4BAA4B,IAAI;AAC7C,uBAAe,EAAE,IAAI,OAAO,EAAE,WAAW,MAAM;AAC/C,yBAAiB;AACjB;AAAA,MACF;AAAA,MACA,KAAK,0BAA0B;AAC7B,cAAM,MAAM,IAAI,EAAE,cAAc;AAChC,YAAI,QAAQ,OAAW,aAAY,IAAI,KAAK,EAAE,SAAS;AACvD;AAAA,MACF;AAAA,MACA,KAAK,2BAA2B;AAC9B,cAAM,MAAM,IAAI,EAAE,cAAc;AAChC,YAAI,QAAQ,OAAW;AACvB,cAAM,QAAgC,CAAC;AACvC,cAAM,OAAO,IAAI,EAAE,SAAS;AAC5B,YAAI,KAAM,OAAM,4BAA4B,IAAI;AAChD,cAAM,MAAM,IAAI,EAAE,SAAS;AAC3B,YAAI,IAAK,OAAM,mBAAmB,IAAI;AACtC,cAAM,MAAM,IAAI,EAAE,OAAO;AACzB,YAAI,IAAK,OAAM,iBAAiB,IAAI;AACpC,cAAM,MAAM,IAAI,EAAE,SAAS;AAC3B,YAAI,IAAK,OAAM,mBAAmB,IAAI;AACtC,uBAAe,IAAI,KAAK,KAAK;AAC7B;AAAA,MACF;AAAA,MACA,KAAK,wBAAwB;AAC3B,cAAM,MAAM,IAAI,EAAE,cAAc,KAAK;AACrC,cAAM,QAAQ,YAAY,IAAI,GAAG,KAAK,EAAE;AACxC,cAAM,MAAM,IAAI,EAAE,KAAK;AACvB,cAAM,QAAmD;AAAA,UACvD,CAAC,OAAO,SAAS,GAAG;AAAA,UACpB,+BAA+B;AAAA,QACjC;AACA,cAAM,QAAQ,IAAI,EAAE,YAAY;AAChC,YAAI,MAAO,OAAM,OAAO,SAAS,IAAI;AACrC,cAAM,KAAK,IAAI,EAAE,UAAU;AAC3B,cAAM,QAAQ,IAAI,GAAG,KAAK;AAC1B,YAAI,UAAU,OAAW,OAAM,OAAO,WAAW,IAAI;AACrD,cAAM,SAAS,IAAI,GAAG,MAAM;AAC5B,YAAI,WAAW,OAAW,OAAM,OAAO,YAAY,IAAI;AACvD,cAAM,OAAO,IAAI,EAAE,OAAO;AAC1B,YAAI,SAAS,OAAW,OAAM,iBAAiB,IAAI;AACnD,cAAM,UAAU,IAAI,EAAE,OAAO;AAC7B,YAAI,OAAO,QAAQ,UAAU,UAAW,OAAM,2BAA2B,IAAI,QAAQ;AACrF,cAAM,QAAQ,IAAI,QAAQ,KAAK;AAC/B,YAAI,UAAU,OAAW,OAAM,2BAA2B,IAAI;AAC9D,YAAI,IAAK,OAAM,mBAAmB,IAAI;AACtC,eAAO,OAAO,OAAO,eAAe,IAAI,GAAG,KAAK,CAAC,CAAC;AAClD,YAAI;AAAA,UACF;AAAA,YACE,eAAe;AAAA,YACf,kBAAkB;AAAA,YAClB;AAAA,YACA;AAAA,YACA,EAAE;AAAA,YACF;AAAA,YACA,MAAM,IAAI;AAAA,UACZ;AAAA,QACF;AACA;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,YAAI,cAAc;AAChB,gBAAM,MAAM,IAAI,EAAE,QAAQ;AAC1B,cAAI,IAAK,cAAa,MAAM,sBAAsB,IAAI;AACtD,qBAAW,EAAE,SAAS;AAAA,QACxB;AACA,yBAAiB;AACjB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,aAAW,OAAO;AAClB,SAAO;AACT;AAEA,SAAS,sBAA8C;AACrD,MAAI,OAAO,YAAY,YAAa,QAAO,CAAC;AAC5C,QAAM,MAAM,QAAQ,IAAI;AACxB,MAAI,CAAC,IAAK,QAAO,CAAC;AAClB,QAAM,MAA8B,CAAC;AACrC,aAAW,QAAQ,IAAI,MAAM,GAAG,GAAG;AACjC,UAAM,KAAK,KAAK,QAAQ,GAAG;AAC3B,QAAI,KAAK,EAAG;AACZ,UAAM,MAAM,KAAK,MAAM,GAAG,EAAE,EAAE,KAAK;AACnC,UAAM,QAAQ,KAAK,MAAM,KAAK,CAAC,EAAE,KAAK;AACtC,QAAI,IAAK,KAAI,GAAG,IAAI;AAAA,EACtB;AACA,SAAO;AACT;AAEA,SAAS,aAAa,QAAoE;AACxF,SAAO,OAAO,QAAQ,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,KAAK,OAAO;AAAA,IACnD;AAAA,IACA,OACE,OAAO,UAAU,WACb,OAAO,UAAU,KAAK,IACpB,EAAE,UAAU,MAAM,SAAS,EAAE,IAC7B,EAAE,aAAa,MAAM,IACvB,OAAO,UAAU,YACf,EAAE,WAAW,MAAM,IACnB,EAAE,aAAa,MAAM;AAAA,EAC/B,EAAE;AACJ;AAEA,SAAS,OAAO,IAAoB;AAClC,UAAQ,OAAO,KAAK,MAAM,EAAE,CAAC,IAAI,UAAY,SAAS;AACxD;AAEA,SAAS,UAAU,IAAoB;AACrC,QAAM,UAAU,GAAG,QAAQ,MAAM,EAAE;AACnC,SAAO,QAAQ,MAAM,GAAG,EAAE,EAAE,OAAO,IAAI,GAAG;AAC5C;AAEA,SAAS,WAAW,IAAoB;AACtC,QAAM,UAAU,GAAG,QAAQ,MAAM,EAAE;AACnC,SAAO,QAAQ,MAAM,GAAG,EAAE,EAAE,OAAO,IAAI,GAAG;AAC5C;AAEA,SAAS,iBAAyB;AAChC,QAAM,QAAQ,IAAI,WAAW,CAAC;AAC9B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACvE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;AAaO,IAAM,4BAA4B;AAuDzC,IAAM,4BAA4B;AAQlC,eAAsB,eACpB,QACA,QAC+B;AAC/B,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,IAAI,MAAM,QAAQ,GAAG,UAAU,GAAG,UAAU,CAAC,EAAE;AACjF,QAAM,SACJ,QAAQ,WAAW,OAAO,YAAY,cAAc,QAAQ,IAAI,iBAAiB;AACnF,MAAI,CAAC;AACH,UAAM,IAAI,MAAM,4EAA4E;AAC9F,QAAM,OACJ,QAAQ,SACP,OAAO,YAAY,cAAc,QAAQ,IAAI,oBAAoB,WAClE;AACF,QAAM,MAAM,GAAG,KAAK,QAAQ,QAAQ,EAAE,CAAC;AACvC,QAAM,MAAM,MAAM,MAAM,KAAK;AAAA,IAC3B,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,gBAAgB;AAAA,MAChB,eAAe,UAAU,MAAM;AAAA,MAC/B,yBAAyB;AAAA,MACzB,GAAI,QAAQ,iBAAiB,EAAE,mBAAmB,OAAO,eAAe,IAAI,CAAC;AAAA,IAC/E;AAAA,IACA,MAAM,KAAK,UAAU,EAAE,aAAa,2BAA2B,OAAO,CAAC;AAAA,EACzE,CAAC;AACD,MAAI,SAAqF,CAAC;AAC1F,MAAI;AACF,aAAU,MAAM,IAAI,KAAK;AAAA,EAC3B,QAAQ;AAAA,EAER;AACA,SAAO;AAAA,IACL,IAAI,IAAI;AAAA,IACR,QAAQ,IAAI;AAAA,IACZ,UAAU,OAAO,aAAa,IAAI,KAAK,OAAO,SAAS;AAAA,IACvD,UAAU,OAAO,YAAY,CAAC;AAAA,EAChC;AACF;","names":[]}
|
|
@@ -45,15 +45,21 @@ var RuntimeRunStateError = class extends AgentEvalError {
|
|
|
45
45
|
super("validation", message, options);
|
|
46
46
|
}
|
|
47
47
|
};
|
|
48
|
+
var PlannerError = class extends AgentEvalError {
|
|
49
|
+
constructor(message, options) {
|
|
50
|
+
super("validation", message, options);
|
|
51
|
+
}
|
|
52
|
+
};
|
|
48
53
|
|
|
49
54
|
export {
|
|
50
55
|
SessionMismatchError,
|
|
51
56
|
BackendTransportError,
|
|
52
57
|
RuntimeRunStateError,
|
|
58
|
+
PlannerError,
|
|
53
59
|
AgentEvalError2 as AgentEvalError,
|
|
54
60
|
ConfigError,
|
|
55
61
|
JudgeError,
|
|
56
62
|
NotFoundError,
|
|
57
63
|
ValidationError
|
|
58
64
|
};
|
|
59
|
-
//# sourceMappingURL=chunk-
|
|
65
|
+
//# sourceMappingURL=chunk-SQSCRJ7U.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/errors.ts"],"sourcesContent":["/**\n * @stable\n *\n * Error taxonomy for `@tangle-network/agent-runtime`.\n *\n * Public contract: every error this package throws as part of its consumer-\n * facing API either extends `AgentEvalError` (re-exported here for ergonomic\n * `instanceof` checks at the runtime boundary) or extends one of the\n * runtime-specific subclasses below.\n *\n * Internal invariant guards (`throw new Error('this should never happen')`)\n * remain plain `Error` — they are programmer-mistake assertions, not\n * consumer-catchable contract failures.\n *\n * Subclassing strategy: where a runtime-specific failure maps cleanly to an\n * agent-eval code (validation, config, not_found), we re-use the agent-eval\n * subclass. Runtime-only failure modes (session resume against the wrong\n * backend, backend transport errors) get fresh subclasses that still carry an\n * `AgentEvalErrorCode` so cross-package handlers can pattern-match without\n * importing the runtime.\n */\n\nimport { AgentEvalError } from '@tangle-network/agent-eval'\n\nexport {\n AgentEvalError,\n type AgentEvalErrorCode,\n CaptureIntegrityError,\n ConfigError,\n JudgeError,\n NotFoundError,\n ReplayError,\n ValidationError,\n VerificationError,\n} from '@tangle-network/agent-eval'\n\n/**\n * @stable\n *\n * Caller asked to resume a session against a backend whose `kind` does not\n * match the session's recorded backend. This is a routing bug — the same\n * session id was reused across two different backend implementations — and\n * is not retryable without picking the right backend.\n */\nexport class SessionMismatchError extends AgentEvalError {\n readonly sessionBackend: string\n readonly requestedBackend: string\n\n constructor(sessionBackend: string, requestedBackend: string, options?: { cause?: unknown }) {\n super(\n 'validation',\n `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,\n options,\n )\n this.sessionBackend = sessionBackend\n this.requestedBackend = requestedBackend\n }\n}\n\n/**\n * @stable\n *\n * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success\n * status. Distinct from `JudgeError` (which is structural / unrecoverable)\n * because backend failures are sometimes retryable and consumers may want to\n * branch on the upstream status code.\n */\nexport class BackendTransportError extends AgentEvalError {\n readonly backend: string\n readonly status?: number\n /**\n * Truncated upstream response body (≤2 KiB) when available. Diagnostic\n * only — surfaces in `backend_error.error.body` and `final.error.body`\n * so operators can see \"free_tier_limit\", \"invalid_api_key\", etc. without\n * cracking the log line open.\n */\n readonly body?: string\n\n constructor(\n backend: string,\n message: string,\n options?: { cause?: unknown; status?: number; body?: string },\n ) {\n super('config', message, options)\n this.backend = backend\n this.status = options?.status\n this.body = options?.body\n }\n}\n\n/**\n * @stable\n *\n * A runtime-run lifecycle method was called in an order the state machine does\n * not allow: `persist()` before `complete()`, `complete()` twice, etc.\n */\nexport class RuntimeRunStateError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n"],"mappings":";AAsBA,SAAS,sBAAsB;AAE/B;AAAA,EACE,kBAAAA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAUA,IAAM,uBAAN,cAAmC,eAAe;AAAA,EAC9C;AAAA,EACA;AAAA,EAET,YAAY,gBAAwB,kBAA0B,SAA+B;AAC3F;AAAA,MACE;AAAA,MACA,iBAAiB,cAAc,iBAAiB,gBAAgB;AAAA,MAChE;AAAA,IACF;AACA,SAAK,iBAAiB;AACtB,SAAK,mBAAmB;AAAA,EAC1B;AACF;AAUO,IAAM,wBAAN,cAAoC,eAAe;AAAA,EAC/C;AAAA,EACA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA;AAAA,EAET,YACE,SACA,SACA,SACA;AACA,UAAM,UAAU,SAAS,OAAO;AAChC,SAAK,UAAU;AACf,SAAK,SAAS,SAAS;AACvB,SAAK,OAAO,SAAS;AAAA,EACvB;AACF;AAQO,IAAM,uBAAN,cAAmC,eAAe;AAAA,EACvD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;","names":["AgentEvalError"]}
|
|
1
|
+
{"version":3,"sources":["../src/errors.ts"],"sourcesContent":["/**\n * @stable\n *\n * Error taxonomy for `@tangle-network/agent-runtime`.\n *\n * Public contract: every error this package throws as part of its consumer-\n * facing API either extends `AgentEvalError` (re-exported here for ergonomic\n * `instanceof` checks at the runtime boundary) or extends one of the\n * runtime-specific subclasses below.\n *\n * Internal invariant guards (`throw new Error('this should never happen')`)\n * remain plain `Error` — they are programmer-mistake assertions, not\n * consumer-catchable contract failures.\n *\n * Subclassing strategy: where a runtime-specific failure maps cleanly to an\n * agent-eval code (validation, config, not_found), we re-use the agent-eval\n * subclass. Runtime-only failure modes (session resume against the wrong\n * backend, backend transport errors) get fresh subclasses that still carry an\n * `AgentEvalErrorCode` so cross-package handlers can pattern-match without\n * importing the runtime.\n */\n\nimport { AgentEvalError } from '@tangle-network/agent-eval'\n\nexport {\n AgentEvalError,\n type AgentEvalErrorCode,\n CaptureIntegrityError,\n ConfigError,\n JudgeError,\n NotFoundError,\n ReplayError,\n ValidationError,\n VerificationError,\n} from '@tangle-network/agent-eval'\n\n/**\n * @stable\n *\n * Caller asked to resume a session against a backend whose `kind` does not\n * match the session's recorded backend. This is a routing bug — the same\n * session id was reused across two different backend implementations — and\n * is not retryable without picking the right backend.\n */\nexport class SessionMismatchError extends AgentEvalError {\n readonly sessionBackend: string\n readonly requestedBackend: string\n\n constructor(sessionBackend: string, requestedBackend: string, options?: { cause?: unknown }) {\n super(\n 'validation',\n `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,\n options,\n )\n this.sessionBackend = sessionBackend\n this.requestedBackend = requestedBackend\n }\n}\n\n/**\n * @stable\n *\n * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success\n * status. Distinct from `JudgeError` (which is structural / unrecoverable)\n * because backend failures are sometimes retryable and consumers may want to\n * branch on the upstream status code.\n */\nexport class BackendTransportError extends AgentEvalError {\n readonly backend: string\n readonly status?: number\n /**\n * Truncated upstream response body (≤2 KiB) when available. Diagnostic\n * only — surfaces in `backend_error.error.body` and `final.error.body`\n * so operators can see \"free_tier_limit\", \"invalid_api_key\", etc. without\n * cracking the log line open.\n */\n readonly body?: string\n\n constructor(\n backend: string,\n message: string,\n options?: { cause?: unknown; status?: number; body?: string },\n ) {\n super('config', message, options)\n this.backend = backend\n this.status = options?.status\n this.body = options?.body\n }\n}\n\n/**\n * @stable\n *\n * A runtime-run lifecycle method was called in an order the state machine does\n * not allow: `persist()` before `complete()`, `complete()` twice, etc.\n */\nexport class RuntimeRunStateError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n\n/**\n * @stable\n *\n * The dynamic-loop planner returned an unusable topology move — the LLM emitted\n * no parseable envelope, an unknown `kind`, or a structurally-invalid move\n * (e.g. a fanout with zero tasks). This is a structural failure of the\n * agent-authored topology, not a config mistake: the planner ran but its output\n * cannot drive the kernel. Carries `validation` so cross-package handlers can\n * pattern-match without importing the runtime. Fail loud — never substitute a\n * default move, or the loop silently runs a topology nobody chose.\n */\nexport class PlannerError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n"],"mappings":";AAsBA,SAAS,sBAAsB;AAE/B;AAAA,EACE,kBAAAA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAUA,IAAM,uBAAN,cAAmC,eAAe;AAAA,EAC9C;AAAA,EACA;AAAA,EAET,YAAY,gBAAwB,kBAA0B,SAA+B;AAC3F;AAAA,MACE;AAAA,MACA,iBAAiB,cAAc,iBAAiB,gBAAgB;AAAA,MAChE;AAAA,IACF;AACA,SAAK,iBAAiB;AACtB,SAAK,mBAAmB;AAAA,EAC1B;AACF;AAUO,IAAM,wBAAN,cAAoC,eAAe;AAAA,EAC/C;AAAA,EACA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA;AAAA,EAET,YACE,SACA,SACA,SACA;AACA,UAAM,UAAU,SAAS,OAAO;AAChC,SAAK,UAAU;AACf,SAAK,SAAS,SAAS;AACvB,SAAK,OAAO,SAAS;AAAA,EACvB;AACF;AAQO,IAAM,uBAAN,cAAmC,eAAe;AAAA,EACvD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;AAaO,IAAM,eAAN,cAA2B,eAAe;AAAA,EAC/C,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;","names":["AgentEvalError"]}
|
|
@@ -21,17 +21,17 @@ import {
|
|
|
21
21
|
createDelegateResearchHandler,
|
|
22
22
|
createDelegationHistoryHandler,
|
|
23
23
|
createDelegationStatusHandler
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-HSX6PFZR.js";
|
|
25
25
|
import {
|
|
26
26
|
runLocalHarness
|
|
27
27
|
} from "./chunk-GLR25NG7.js";
|
|
28
28
|
import {
|
|
29
29
|
runLoop
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-7KS6UEHB.js";
|
|
31
31
|
import {
|
|
32
32
|
coderProfile,
|
|
33
33
|
multiHarnessCoderFanout
|
|
34
|
-
} from "./chunk-
|
|
34
|
+
} from "./chunk-5QVVET72.js";
|
|
35
35
|
|
|
36
36
|
// src/mcp/executor.ts
|
|
37
37
|
function createSiblingSandboxExecutor(options) {
|
|
@@ -712,4 +712,4 @@ export {
|
|
|
712
712
|
createMcpServer,
|
|
713
713
|
createInProcessTransport
|
|
714
714
|
};
|
|
715
|
-
//# sourceMappingURL=chunk-
|
|
715
|
+
//# sourceMappingURL=chunk-VVHX5RKE.js.map
|
package/dist/improvement.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { AnalystFinding } from '@tangle-network/agent-eval';
|
|
1
|
+
import { AnalystFinding, LlmClientOptions } from '@tangle-network/agent-eval';
|
|
2
2
|
import { L as LocalHarness, r as runLocalHarness } from './local-harness-KrdFTY5R.js';
|
|
3
|
-
import { LabeledScenarioStore, WorktreeAdapter, ImprovementDriver } from '@tangle-network/agent-eval/campaign';
|
|
3
|
+
import { LabeledScenarioStore, WorktreeAdapter, ImprovementDriver, Scenario, DispatchContext, JudgeConfig, Gate, CampaignStorage, GateResult, RunImprovementLoopResult } from '@tangle-network/agent-eval/campaign';
|
|
4
4
|
import { S as SurfaceImprovementEdit } from './improvement-adapter-CaZxFxTd.js';
|
|
5
5
|
import { I as ImprovementAdapter } from './types-D_MXrmJP.js';
|
|
6
6
|
import 'node:child_process';
|
|
@@ -98,6 +98,131 @@ interface AgenticGeneratorOptions {
|
|
|
98
98
|
}
|
|
99
99
|
declare function agenticGenerator(opts?: AgenticGeneratorOptions): CandidateGenerator;
|
|
100
100
|
|
|
101
|
+
/**
|
|
102
|
+
* @experimental
|
|
103
|
+
*
|
|
104
|
+
* `optimizePrompt` — identity-gated optimization for any TEXT prompt surface
|
|
105
|
+
* (system prompt, planner prompt, judge rubric, skill doc).
|
|
106
|
+
*
|
|
107
|
+
* The text-surface sibling to this module's `improvementDriver` (the
|
|
108
|
+
* CODE-surface / worktree path). Both feed agent-eval's `runImprovementLoop`;
|
|
109
|
+
* this one defaults the driver to agent-eval's `gepaDriver` (reflective text
|
|
110
|
+
* mutator) and the gate to `heldOutGate`.
|
|
111
|
+
*
|
|
112
|
+
* IDENTITY-GATED BY CONSTRUCTION — the whole point. The loop runs evals,
|
|
113
|
+
* collects per-scenario signal, proposes candidates, and the gate compares
|
|
114
|
+
* candidate-vs-baseline ON THE HELDOUT. `result.prompt` is the baseline
|
|
115
|
+
* (identity) UNLESS the gate decided `'ship'`. So wiring a surface up is safe:
|
|
116
|
+
* a surface with no beneficial mutation simply keeps its baseline. You never
|
|
117
|
+
* regress by registering a prompt — you only ever improve when the held-out
|
|
118
|
+
* data earns it.
|
|
119
|
+
*
|
|
120
|
+
* Generic over the runtime: `runWithPrompt` is the only domain seam — given a
|
|
121
|
+
* candidate prompt + scenario, run it however the surface runs (sandbox
|
|
122
|
+
* `streamPrompt`, a `runLoop`, a direct model call) and return the artifact the
|
|
123
|
+
* judges score. The optimizer never assumes how a prompt is executed.
|
|
124
|
+
*/
|
|
125
|
+
|
|
126
|
+
/** Reflection config for the default `gepaDriver`. Omit when passing a custom
|
|
127
|
+
* `driver`. */
|
|
128
|
+
interface OptimizePromptReflection {
|
|
129
|
+
/** Router transport for the reflection model. */
|
|
130
|
+
llm: LlmClientOptions;
|
|
131
|
+
/** Model that performs the reflective rewrite. */
|
|
132
|
+
model: string;
|
|
133
|
+
/** What is being optimized — orients the reflection prompt. Default
|
|
134
|
+
* `'system prompt'`. */
|
|
135
|
+
target?: string;
|
|
136
|
+
/** Surface-specific mutation levers offered to the reflector. */
|
|
137
|
+
mutationPrimitives?: string[];
|
|
138
|
+
/** H2 (`## Foo`) headings that MUST survive every candidate. gepaDriver's
|
|
139
|
+
* only structural guard — load-bearing sections of the prompt should be
|
|
140
|
+
* `##` headings so a rewrite cannot drop them. */
|
|
141
|
+
preserveSections?: string[];
|
|
142
|
+
/** Max sentence-level edits per candidate vs the parent (a textual learning
|
|
143
|
+
* rate). Caps a rewrite from wiping prior rules in one generation. */
|
|
144
|
+
maxSentenceEdits?: number;
|
|
145
|
+
}
|
|
146
|
+
/** @experimental */
|
|
147
|
+
interface OptimizePromptOptions<TScenario extends Scenario, TArtifact> {
|
|
148
|
+
/** The prompt being optimized — the identity baseline the gate protects. */
|
|
149
|
+
baselinePrompt: string;
|
|
150
|
+
/** Domain seam: run a candidate prompt against a scenario → artifact the
|
|
151
|
+
* judges score. The optimizer is agnostic to HOW the prompt runs. */
|
|
152
|
+
runWithPrompt: (prompt: string, scenario: TScenario, ctx: DispatchContext) => Promise<TArtifact>;
|
|
153
|
+
/** Training pool — scored each generation to rank candidates. */
|
|
154
|
+
scenarios: TScenario[];
|
|
155
|
+
/** Held out of training — scored ONLY for the gate's baseline-vs-winner
|
|
156
|
+
* delta. Disjoint from `scenarios`; this is what makes promotion measure
|
|
157
|
+
* generalization, not memorization. */
|
|
158
|
+
holdoutScenarios: TScenario[];
|
|
159
|
+
/** Scorers — deterministic checks or LLM judges. */
|
|
160
|
+
judges: JudgeConfig<TArtifact, TScenario>[];
|
|
161
|
+
/** Where artifacts + traces land (opaque key under in-memory storage). */
|
|
162
|
+
runDir: string;
|
|
163
|
+
/** Default driver = `gepaDriver` built from this. Required UNLESS `driver`
|
|
164
|
+
* is supplied. */
|
|
165
|
+
reflection?: OptimizePromptReflection;
|
|
166
|
+
/** Override the improvement strategy (custom driver / deterministic tests). */
|
|
167
|
+
driver?: ImprovementDriver;
|
|
168
|
+
/** Override the promotion gate. Default `heldOutGate` over `holdoutScenarios`
|
|
169
|
+
* — zero extra LLM. Wrap `defaultProductionGate` for red-team/reward-hacking
|
|
170
|
+
* hardening on production wiring. */
|
|
171
|
+
gate?: Gate<TArtifact, TScenario>;
|
|
172
|
+
/** Minimum held-out composite lift to ship, forwarded to the default
|
|
173
|
+
* `heldOutGate`. When omitted the gate uses its own default. */
|
|
174
|
+
deltaThreshold?: number;
|
|
175
|
+
/** Candidates proposed per generation. Default 4. */
|
|
176
|
+
populationSize?: number;
|
|
177
|
+
/** Generations to run. Default 3. */
|
|
178
|
+
maxGenerations?: number;
|
|
179
|
+
/** Candidates carried to the next generation. Default 2. */
|
|
180
|
+
promoteTopK?: number;
|
|
181
|
+
/** Storage backend. Pass `inMemoryCampaignStorage()` for filesystem-less /
|
|
182
|
+
* test runs. Default: Node filesystem. */
|
|
183
|
+
storage?: CampaignStorage;
|
|
184
|
+
/** Reproducibility seed. Default 42. */
|
|
185
|
+
seed?: number;
|
|
186
|
+
/** Per-scenario replicates for CI bands. Default 1. */
|
|
187
|
+
reps?: number;
|
|
188
|
+
/** Max concurrent cells. Default 2. */
|
|
189
|
+
maxConcurrency?: number;
|
|
190
|
+
/** Test seam — override the wall clock. */
|
|
191
|
+
now?: () => Date;
|
|
192
|
+
/** On a shipped gate: `'pr'` opens a PR, `'none'` just reports. Default
|
|
193
|
+
* `'none'`. */
|
|
194
|
+
autoOnPromote?: 'pr' | 'none';
|
|
195
|
+
ghOwner?: string;
|
|
196
|
+
ghRepo?: string;
|
|
197
|
+
}
|
|
198
|
+
/** @experimental */
|
|
199
|
+
interface OptimizePromptResult<TArtifact, TScenario extends Scenario> {
|
|
200
|
+
/** The prompt to USE. Identity (the baseline) unless the gate shipped a
|
|
201
|
+
* winner — so a caller can always assign `result.prompt` unconditionally. */
|
|
202
|
+
prompt: string;
|
|
203
|
+
/** True only when the gate promoted a candidate over baseline on holdout. */
|
|
204
|
+
improved: boolean;
|
|
205
|
+
/** The gate's verdict (`'ship' | 'hold' | 'need_more_work' | ...`). */
|
|
206
|
+
decision: GateResult['decision'];
|
|
207
|
+
/** Human-readable reasons the gate gave. */
|
|
208
|
+
reasons: string[];
|
|
209
|
+
/** Mean held-out composite of the baseline. */
|
|
210
|
+
baselineComposite: number;
|
|
211
|
+
/** Mean held-out composite of the winner candidate. */
|
|
212
|
+
winnerComposite: number;
|
|
213
|
+
/** Held-out lift (winner − baseline); the gate's `delta` when it reported one. */
|
|
214
|
+
delta: number;
|
|
215
|
+
/** Why the winner was proposed — present when a shipped winner carried a
|
|
216
|
+
* driver rationale. */
|
|
217
|
+
rationale?: string;
|
|
218
|
+
/** Unified baseline→winner diff (empty when the winner is the baseline). */
|
|
219
|
+
diff: string;
|
|
220
|
+
/** The full loop result for callers that need generations / campaigns. */
|
|
221
|
+
raw: RunImprovementLoopResult<TArtifact, TScenario>;
|
|
222
|
+
}
|
|
223
|
+
/** @experimental */
|
|
224
|
+
declare function optimizePrompt<TScenario extends Scenario, TArtifact>(opts: OptimizePromptOptions<TScenario, TArtifact>): Promise<OptimizePromptResult<TArtifact, TScenario>>;
|
|
225
|
+
|
|
101
226
|
/**
|
|
102
227
|
* @experimental
|
|
103
228
|
*
|
|
@@ -117,4 +242,4 @@ interface ReflectiveGeneratorOptions {
|
|
|
117
242
|
}
|
|
118
243
|
declare function reflectiveGenerator(opts: ReflectiveGeneratorOptions): CandidateGenerator;
|
|
119
244
|
|
|
120
|
-
export { type AgenticGeneratorOptions, type CandidateGenerator, type ImprovementDriverOptions, type ReflectiveGeneratorOptions, agenticGenerator, improvementDriver, reflectiveGenerator };
|
|
245
|
+
export { type AgenticGeneratorOptions, type CandidateGenerator, type ImprovementDriverOptions, type OptimizePromptOptions, type OptimizePromptReflection, type OptimizePromptResult, type ReflectiveGeneratorOptions, agenticGenerator, improvementDriver, optimizePrompt, reflectiveGenerator };
|
package/dist/improvement.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runLocalHarness
|
|
3
3
|
} from "./chunk-GLR25NG7.js";
|
|
4
|
+
import {
|
|
5
|
+
ConfigError
|
|
6
|
+
} from "./chunk-SQSCRJ7U.js";
|
|
4
7
|
import "./chunk-DGUM43GV.js";
|
|
5
8
|
|
|
6
9
|
// src/improvement/agentic-generator.ts
|
|
@@ -127,6 +130,88 @@ function resolveFindings(ctx) {
|
|
|
127
130
|
return ctx.findings;
|
|
128
131
|
}
|
|
129
132
|
|
|
133
|
+
// src/improvement/optimize-prompt.ts
|
|
134
|
+
import { gepaDriver, heldOutGate, runImprovementLoop } from "@tangle-network/agent-eval/campaign";
|
|
135
|
+
async function optimizePrompt(opts) {
|
|
136
|
+
if (!opts.driver && !opts.reflection) {
|
|
137
|
+
throw new ConfigError(
|
|
138
|
+
"optimizePrompt: pass `reflection` (builds the default gepaDriver) or a custom `driver`"
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
if (opts.scenarios.length === 0) {
|
|
142
|
+
throw new ConfigError("optimizePrompt: `scenarios` must be non-empty");
|
|
143
|
+
}
|
|
144
|
+
if (opts.holdoutScenarios.length === 0) {
|
|
145
|
+
throw new ConfigError(
|
|
146
|
+
"optimizePrompt: `holdoutScenarios` must be non-empty (the gate needs it)"
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
const driver = opts.driver ?? gepaDriver({
|
|
150
|
+
llm: opts.reflection.llm,
|
|
151
|
+
model: opts.reflection.model,
|
|
152
|
+
target: opts.reflection.target ?? "system prompt",
|
|
153
|
+
mutationPrimitives: opts.reflection.mutationPrimitives,
|
|
154
|
+
constraints: opts.reflection.preserveSections || opts.reflection.maxSentenceEdits !== void 0 ? {
|
|
155
|
+
preserveSections: opts.reflection.preserveSections,
|
|
156
|
+
maxSentenceEdits: opts.reflection.maxSentenceEdits
|
|
157
|
+
} : void 0
|
|
158
|
+
});
|
|
159
|
+
const gate = opts.gate ?? heldOutGate({
|
|
160
|
+
scenarios: opts.holdoutScenarios,
|
|
161
|
+
...opts.deltaThreshold !== void 0 ? { deltaThreshold: opts.deltaThreshold } : {}
|
|
162
|
+
});
|
|
163
|
+
const result = await runImprovementLoop({
|
|
164
|
+
baselineSurface: opts.baselinePrompt,
|
|
165
|
+
dispatchWithSurface: (surface, scenario, ctx) => {
|
|
166
|
+
if (typeof surface !== "string") {
|
|
167
|
+
throw new ConfigError(
|
|
168
|
+
"optimizePrompt: received a CodeSurface \u2014 this entry point optimizes string prompts only"
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
return opts.runWithPrompt(surface, scenario, ctx);
|
|
172
|
+
},
|
|
173
|
+
driver,
|
|
174
|
+
populationSize: opts.populationSize ?? 4,
|
|
175
|
+
maxGenerations: opts.maxGenerations ?? 3,
|
|
176
|
+
...opts.promoteTopK !== void 0 ? { promoteTopK: opts.promoteTopK } : {},
|
|
177
|
+
scenarios: opts.scenarios,
|
|
178
|
+
holdoutScenarios: opts.holdoutScenarios,
|
|
179
|
+
judges: opts.judges,
|
|
180
|
+
gate,
|
|
181
|
+
autoOnPromote: opts.autoOnPromote ?? "none",
|
|
182
|
+
...opts.ghOwner !== void 0 ? { ghOwner: opts.ghOwner } : {},
|
|
183
|
+
...opts.ghRepo !== void 0 ? { ghRepo: opts.ghRepo } : {},
|
|
184
|
+
runDir: opts.runDir,
|
|
185
|
+
...opts.storage !== void 0 ? { storage: opts.storage } : {},
|
|
186
|
+
...opts.seed !== void 0 ? { seed: opts.seed } : {},
|
|
187
|
+
...opts.reps !== void 0 ? { reps: opts.reps } : {},
|
|
188
|
+
...opts.maxConcurrency !== void 0 ? { maxConcurrency: opts.maxConcurrency } : {},
|
|
189
|
+
...opts.now !== void 0 ? { now: opts.now } : {}
|
|
190
|
+
});
|
|
191
|
+
const improved = result.gateResult.decision === "ship";
|
|
192
|
+
const winnerSurface = typeof result.winnerSurface === "string" ? result.winnerSurface : opts.baselinePrompt;
|
|
193
|
+
const baselineComposite = meanComposite(result.baselineOnHoldout);
|
|
194
|
+
const winnerComposite = meanComposite(result.winnerOnHoldout);
|
|
195
|
+
return {
|
|
196
|
+
prompt: improved ? winnerSurface : opts.baselinePrompt,
|
|
197
|
+
improved,
|
|
198
|
+
decision: result.gateResult.decision,
|
|
199
|
+
reasons: result.gateResult.reasons,
|
|
200
|
+
baselineComposite,
|
|
201
|
+
winnerComposite,
|
|
202
|
+
delta: result.gateResult.delta ?? winnerComposite - baselineComposite,
|
|
203
|
+
...improved && result.winnerRationale ? { rationale: result.winnerRationale } : {},
|
|
204
|
+
diff: result.promotedDiff,
|
|
205
|
+
raw: result
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
function meanComposite(campaign) {
|
|
209
|
+
const scenarios = Object.values(campaign.aggregates.byScenario);
|
|
210
|
+
if (scenarios.length === 0) return 0;
|
|
211
|
+
const sum = scenarios.reduce((acc, s) => acc + s.meanComposite, 0);
|
|
212
|
+
return sum / scenarios.length;
|
|
213
|
+
}
|
|
214
|
+
|
|
130
215
|
// src/improvement/reflective-generator.ts
|
|
131
216
|
import { spawnSync as spawnSync2 } from "child_process";
|
|
132
217
|
function reflectiveGenerator(opts) {
|
|
@@ -156,6 +241,7 @@ function applyPatch(patch, cwd) {
|
|
|
156
241
|
export {
|
|
157
242
|
agenticGenerator,
|
|
158
243
|
improvementDriver,
|
|
244
|
+
optimizePrompt,
|
|
159
245
|
reflectiveGenerator
|
|
160
246
|
};
|
|
161
247
|
//# sourceMappingURL=improvement.js.map
|