@tangle-network/agent-runtime 0.50.0 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +1 -1
- package/dist/{chunk-CM2IK7VS.js → chunk-2OU7ZQPD.js} +38 -8
- package/dist/chunk-2OU7ZQPD.js.map +1 -0
- package/dist/{chunk-OM3YNZIW.js → chunk-4JI4BCBI.js} +5 -360
- package/dist/chunk-4JI4BCBI.js.map +1 -0
- package/dist/{chunk-NDM5VXZW.js → chunk-7SP2OVYZ.js} +7 -5
- package/dist/{chunk-NDM5VXZW.js.map → chunk-7SP2OVYZ.js.map} +1 -1
- package/dist/{chunk-RHW75JW5.js → chunk-BERLUBAP.js} +2 -2
- package/dist/{chunk-BKAIVNFA.js → chunk-COAVO6QB.js} +3 -3
- package/dist/chunk-G3RGMA7C.js +361 -0
- package/dist/chunk-G3RGMA7C.js.map +1 -0
- package/dist/{chunk-ML4IXGTV.js → chunk-V2K35HF2.js} +2 -2
- package/dist/improvement.d.ts +96 -8
- package/dist/improvement.js +191 -9
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +114 -4
- package/dist/index.js +144 -18
- package/dist/index.js.map +1 -1
- package/dist/intelligence.d.ts +423 -0
- package/dist/intelligence.js +427 -0
- package/dist/intelligence.js.map +1 -0
- package/dist/loop-runner-bin.js +4 -3
- package/dist/loops.d.ts +2 -1
- package/dist/loops.js +3 -1
- package/dist/mcp/bin.js +5 -4
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.js +6 -5
- package/dist/mcp/index.js.map +1 -1
- package/dist/platform.d.ts +120 -62
- package/dist/platform.js +68 -26
- package/dist/platform.js.map +1 -1
- package/dist/runtime.d.ts +47 -8
- package/dist/runtime.js +3 -1
- package/dist/workflow.js +1 -1
- package/package.json +6 -1
- package/skills/agent-runtime-adoption/SKILL.md +41 -26
- package/skills/build-with-agent-runtime/SKILL.md +143 -0
- package/skills/loop-writer/SKILL.md +6 -7
- package/dist/chunk-CM2IK7VS.js.map +0 -1
- package/dist/chunk-OM3YNZIW.js.map +0 -1
- /package/dist/{chunk-RHW75JW5.js.map → chunk-BERLUBAP.js.map} +0 -0
- /package/dist/{chunk-BKAIVNFA.js.map → chunk-COAVO6QB.js.map} +0 -0
- /package/dist/{chunk-ML4IXGTV.js.map → chunk-V2K35HF2.js.map} +0 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
// src/otel-export.ts
|
|
2
|
+
var SCOPE = { name: "@tangle-network/agent-runtime", version: "0.33.0" };
|
|
3
|
+
var GEN_AI = {
|
|
4
|
+
operation: "gen_ai.operation.name",
|
|
5
|
+
agentName: "gen_ai.agent.name",
|
|
6
|
+
conversationId: "gen_ai.conversation.id",
|
|
7
|
+
inputTokens: "gen_ai.usage.input_tokens",
|
|
8
|
+
outputTokens: "gen_ai.usage.output_tokens"
|
|
9
|
+
};
|
|
10
|
+
function createOtelExporter(config) {
|
|
11
|
+
const resolvedEndpoint = config?.endpoint ?? (typeof process !== "undefined" ? process.env.OTEL_EXPORTER_OTLP_ENDPOINT : void 0);
|
|
12
|
+
if (!resolvedEndpoint) return void 0;
|
|
13
|
+
const endpoint = resolvedEndpoint;
|
|
14
|
+
const headers = config?.headers ?? parseHeadersFromEnv();
|
|
15
|
+
const batchSize = config?.batchSize ?? 64;
|
|
16
|
+
const flushIntervalMs = config?.flushIntervalMs ?? 5e3;
|
|
17
|
+
const serviceName = config?.serviceName ?? "agent-runtime";
|
|
18
|
+
const resourceAttrs = config?.resourceAttributes ?? {};
|
|
19
|
+
const pending = [];
|
|
20
|
+
let timer;
|
|
21
|
+
let stopped = false;
|
|
22
|
+
const exporter = {
|
|
23
|
+
exportSpan(span) {
|
|
24
|
+
if (stopped) return;
|
|
25
|
+
pending.push(span);
|
|
26
|
+
if (pending.length >= batchSize) {
|
|
27
|
+
void doFlush();
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
async flush() {
|
|
31
|
+
await doFlush();
|
|
32
|
+
},
|
|
33
|
+
async shutdown() {
|
|
34
|
+
stopped = true;
|
|
35
|
+
if (timer !== void 0) {
|
|
36
|
+
clearInterval(timer);
|
|
37
|
+
timer = void 0;
|
|
38
|
+
}
|
|
39
|
+
await doFlush();
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
timer = setInterval(() => {
|
|
43
|
+
if (pending.length > 0) void doFlush();
|
|
44
|
+
}, flushIntervalMs);
|
|
45
|
+
if (typeof timer === "object" && "unref" in timer) {
|
|
46
|
+
;
|
|
47
|
+
timer.unref();
|
|
48
|
+
}
|
|
49
|
+
async function doFlush() {
|
|
50
|
+
if (pending.length === 0) return;
|
|
51
|
+
const batch = pending.splice(0);
|
|
52
|
+
const body = {
|
|
53
|
+
resourceSpans: [
|
|
54
|
+
{
|
|
55
|
+
resource: {
|
|
56
|
+
attributes: toAttributes({
|
|
57
|
+
"service.name": serviceName,
|
|
58
|
+
...resourceAttrs
|
|
59
|
+
})
|
|
60
|
+
},
|
|
61
|
+
scopeSpans: [{ scope: SCOPE, spans: batch }]
|
|
62
|
+
}
|
|
63
|
+
]
|
|
64
|
+
};
|
|
65
|
+
const url = `${endpoint.replace(/\/+$/, "")}/v1/traces`;
|
|
66
|
+
try {
|
|
67
|
+
await fetch(url, {
|
|
68
|
+
method: "POST",
|
|
69
|
+
headers: { "content-type": "application/json", ...headers },
|
|
70
|
+
body: JSON.stringify(body)
|
|
71
|
+
});
|
|
72
|
+
} catch {
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return exporter;
|
|
76
|
+
}
|
|
77
|
+
function loopEventToOtelSpan(event, traceId, parentSpanId) {
|
|
78
|
+
const spanId = generateSpanId();
|
|
79
|
+
const attrs = {
|
|
80
|
+
"loop.event_kind": event.kind,
|
|
81
|
+
"loop.run_id": event.runId
|
|
82
|
+
};
|
|
83
|
+
for (const [k, v] of Object.entries(event.payload)) {
|
|
84
|
+
if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
|
|
85
|
+
attrs[`loop.${k}`] = v;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const ts = msToNs(event.timestamp);
|
|
89
|
+
return {
|
|
90
|
+
traceId: padTraceId(traceId),
|
|
91
|
+
spanId,
|
|
92
|
+
parentSpanId: parentSpanId ? padSpanId(parentSpanId) : void 0,
|
|
93
|
+
name: event.kind,
|
|
94
|
+
kind: 1,
|
|
95
|
+
startTimeUnixNano: ts,
|
|
96
|
+
endTimeUnixNano: ts,
|
|
97
|
+
attributes: toAttributes(attrs),
|
|
98
|
+
status: { code: 1 }
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
function buildLoopOtelSpans(events, traceId, rootParentSpanId) {
|
|
102
|
+
const tid = padTraceId(traceId);
|
|
103
|
+
return buildLoopSpanNodes(events).map((node) => ({
|
|
104
|
+
traceId: tid,
|
|
105
|
+
spanId: node.spanId,
|
|
106
|
+
parentSpanId: node.parentSpanId ? padSpanId(node.parentSpanId) : rootParentSpanId ? padSpanId(rootParentSpanId) : void 0,
|
|
107
|
+
name: node.name,
|
|
108
|
+
kind: 1,
|
|
109
|
+
startTimeUnixNano: msToNs(node.startMs),
|
|
110
|
+
endTimeUnixNano: msToNs(node.endMs),
|
|
111
|
+
attributes: toAttributes(node.attrs),
|
|
112
|
+
status: { code: node.error ? 2 : 1 }
|
|
113
|
+
}));
|
|
114
|
+
}
|
|
115
|
+
function buildLoopSpanNodes(events) {
|
|
116
|
+
if (events.length === 0) return [];
|
|
117
|
+
const out = [];
|
|
118
|
+
const num = (v) => typeof v === "number" && Number.isFinite(v) ? v : void 0;
|
|
119
|
+
const str = (v) => typeof v === "string" && v.length > 0 ? v : void 0;
|
|
120
|
+
const rec = (v) => v && typeof v === "object" ? v : {};
|
|
121
|
+
const started = events.find((e) => e.kind === "loop.started");
|
|
122
|
+
const ended = events.find((e) => e.kind === "loop.ended");
|
|
123
|
+
const runId = events[0]?.runId ?? "";
|
|
124
|
+
const rootStart = started?.timestamp ?? events[0].timestamp;
|
|
125
|
+
const rootEnd = ended?.timestamp ?? events[events.length - 1].timestamp;
|
|
126
|
+
const rootId = generateSpanId();
|
|
127
|
+
const make = (spanId, parentSpanId, name, kind, startMs, endMs, attrs, error = false) => ({
|
|
128
|
+
spanId,
|
|
129
|
+
parentSpanId,
|
|
130
|
+
name,
|
|
131
|
+
kind,
|
|
132
|
+
startMs,
|
|
133
|
+
endMs,
|
|
134
|
+
attrs,
|
|
135
|
+
error
|
|
136
|
+
});
|
|
137
|
+
const sp = rec(started?.payload);
|
|
138
|
+
const rootAttrs = {
|
|
139
|
+
[GEN_AI.operation]: "invoke_workflow",
|
|
140
|
+
[GEN_AI.conversationId]: runId,
|
|
141
|
+
"tangle.loop.driver": str(sp.driver) ?? "driver"
|
|
142
|
+
};
|
|
143
|
+
if (Array.isArray(sp.agentRunNames) && sp.agentRunNames.length > 0) {
|
|
144
|
+
rootAttrs["tangle.loop.agents"] = sp.agentRunNames.map(String).join(",");
|
|
145
|
+
}
|
|
146
|
+
if (ended) {
|
|
147
|
+
const ep = rec(ended.payload);
|
|
148
|
+
const win = num(ep.winnerIterationIndex);
|
|
149
|
+
if (win !== void 0) rootAttrs["tangle.loop.winner.iteration_index"] = win;
|
|
150
|
+
const cost = num(ep.totalCostUsd);
|
|
151
|
+
if (cost !== void 0) rootAttrs["tangle.cost.usd"] = cost;
|
|
152
|
+
const dur = num(ep.durationMs);
|
|
153
|
+
if (dur !== void 0) rootAttrs["tangle.loop.duration_ms"] = dur;
|
|
154
|
+
const iters = num(ep.iterations);
|
|
155
|
+
if (iters !== void 0) rootAttrs["tangle.loop.iterations"] = iters;
|
|
156
|
+
}
|
|
157
|
+
out.push(make(rootId, void 0, "loop", "loop", rootStart, rootEnd, rootAttrs));
|
|
158
|
+
const iterStartTs = /* @__PURE__ */ new Map();
|
|
159
|
+
const placementByIdx = /* @__PURE__ */ new Map();
|
|
160
|
+
let currentRoundId;
|
|
161
|
+
let pendingRound;
|
|
162
|
+
const flushRound = (endMs) => {
|
|
163
|
+
if (!pendingRound) return;
|
|
164
|
+
out.push(
|
|
165
|
+
make(
|
|
166
|
+
pendingRound.id,
|
|
167
|
+
rootId,
|
|
168
|
+
"loop.round",
|
|
169
|
+
"round",
|
|
170
|
+
pendingRound.start,
|
|
171
|
+
endMs,
|
|
172
|
+
pendingRound.attrs
|
|
173
|
+
)
|
|
174
|
+
);
|
|
175
|
+
pendingRound = void 0;
|
|
176
|
+
};
|
|
177
|
+
for (const e of events) {
|
|
178
|
+
const p = rec(e.payload);
|
|
179
|
+
switch (e.kind) {
|
|
180
|
+
case "loop.plan": {
|
|
181
|
+
flushRound(e.timestamp);
|
|
182
|
+
const id = generateSpanId();
|
|
183
|
+
const roundIdx = num(p.roundIndex) ?? 0;
|
|
184
|
+
const attrs = {
|
|
185
|
+
[GEN_AI.operation]: "invoke_workflow",
|
|
186
|
+
"tangle.loop.round.index": roundIdx,
|
|
187
|
+
"tangle.loop.move.kind": str(p.moveKind) ?? "unknown",
|
|
188
|
+
"tangle.loop.move.round": roundIdx,
|
|
189
|
+
"tangle.loop.move.width": num(p.plannedCount) ?? 0
|
|
190
|
+
};
|
|
191
|
+
const r = str(p.rationale);
|
|
192
|
+
if (r) attrs["tangle.loop.move.rationale"] = r;
|
|
193
|
+
const parent = num(p.parentIndex);
|
|
194
|
+
if (parent !== void 0) attrs["tangle.loop.move.parent_index"] = parent;
|
|
195
|
+
if (Array.isArray(p.childIndices) && p.childIndices.length > 0) {
|
|
196
|
+
attrs["tangle.loop.move.child_indices"] = p.childIndices.map(String).join(",");
|
|
197
|
+
}
|
|
198
|
+
pendingRound = { id, start: e.timestamp, attrs };
|
|
199
|
+
currentRoundId = id;
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
case "loop.iteration.started": {
|
|
203
|
+
const idx = num(p.iterationIndex);
|
|
204
|
+
if (idx !== void 0) iterStartTs.set(idx, e.timestamp);
|
|
205
|
+
break;
|
|
206
|
+
}
|
|
207
|
+
case "loop.iteration.dispatch": {
|
|
208
|
+
const idx = num(p.iterationIndex);
|
|
209
|
+
if (idx === void 0) break;
|
|
210
|
+
const place = {};
|
|
211
|
+
const kind = str(p.placement);
|
|
212
|
+
if (kind) place["tangle.loop.placement.kind"] = kind;
|
|
213
|
+
const sid = str(p.sandboxId);
|
|
214
|
+
if (sid) place["tangle.sandbox.id"] = sid;
|
|
215
|
+
const fid = str(p.fleetId);
|
|
216
|
+
if (fid) place["tangle.fleet.id"] = fid;
|
|
217
|
+
const mid = str(p.machineId);
|
|
218
|
+
if (mid) place["tangle.machine.id"] = mid;
|
|
219
|
+
placementByIdx.set(idx, place);
|
|
220
|
+
break;
|
|
221
|
+
}
|
|
222
|
+
case "loop.iteration.ended": {
|
|
223
|
+
const idx = num(p.iterationIndex) ?? 0;
|
|
224
|
+
const start = iterStartTs.get(idx) ?? e.timestamp;
|
|
225
|
+
const err = str(p.error);
|
|
226
|
+
const attrs = {
|
|
227
|
+
[GEN_AI.operation]: "invoke_agent",
|
|
228
|
+
"tangle.loop.iteration.index": idx
|
|
229
|
+
};
|
|
230
|
+
const agent = str(p.agentRunName);
|
|
231
|
+
if (agent) attrs[GEN_AI.agentName] = agent;
|
|
232
|
+
const tu = rec(p.tokenUsage);
|
|
233
|
+
const inTok = num(tu.input);
|
|
234
|
+
if (inTok !== void 0) attrs[GEN_AI.inputTokens] = inTok;
|
|
235
|
+
const outTok = num(tu.output);
|
|
236
|
+
if (outTok !== void 0) attrs[GEN_AI.outputTokens] = outTok;
|
|
237
|
+
const cost = num(p.costUsd);
|
|
238
|
+
if (cost !== void 0) attrs["tangle.cost.usd"] = cost;
|
|
239
|
+
const verdict = rec(p.verdict);
|
|
240
|
+
if (typeof verdict.valid === "boolean") attrs["tangle.loop.verdict.valid"] = verdict.valid;
|
|
241
|
+
const score = num(verdict.score);
|
|
242
|
+
if (score !== void 0) attrs["tangle.loop.verdict.score"] = score;
|
|
243
|
+
if (err) attrs["tangle.loop.error"] = err;
|
|
244
|
+
const gid = num(p.groupId);
|
|
245
|
+
if (gid !== void 0) attrs["tangle.loop.iteration.group_id"] = gid;
|
|
246
|
+
const par = num(p.parentIndex);
|
|
247
|
+
if (par !== void 0) attrs["tangle.loop.iteration.parent_index"] = par;
|
|
248
|
+
const dur = num(p.durationMs);
|
|
249
|
+
if (dur !== void 0) attrs["tangle.loop.iteration.duration_ms"] = dur;
|
|
250
|
+
const preview = str(p.outputPreview);
|
|
251
|
+
if (preview) attrs["tangle.loop.iteration.output_preview"] = preview;
|
|
252
|
+
Object.assign(attrs, placementByIdx.get(idx) ?? {});
|
|
253
|
+
out.push(
|
|
254
|
+
make(
|
|
255
|
+
generateSpanId(),
|
|
256
|
+
currentRoundId ?? rootId,
|
|
257
|
+
"loop.iteration",
|
|
258
|
+
"branch",
|
|
259
|
+
start,
|
|
260
|
+
e.timestamp,
|
|
261
|
+
attrs,
|
|
262
|
+
err !== void 0
|
|
263
|
+
)
|
|
264
|
+
);
|
|
265
|
+
break;
|
|
266
|
+
}
|
|
267
|
+
case "loop.decision": {
|
|
268
|
+
if (pendingRound) {
|
|
269
|
+
const dec = str(p.decision);
|
|
270
|
+
if (dec) pendingRound.attrs["tangle.loop.decision"] = dec;
|
|
271
|
+
flushRound(e.timestamp);
|
|
272
|
+
}
|
|
273
|
+
currentRoundId = void 0;
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
flushRound(rootEnd);
|
|
279
|
+
return out;
|
|
280
|
+
}
|
|
281
|
+
function parseHeadersFromEnv() {
|
|
282
|
+
if (typeof process === "undefined") return {};
|
|
283
|
+
const raw = process.env.OTEL_EXPORTER_OTLP_HEADERS;
|
|
284
|
+
if (!raw) return {};
|
|
285
|
+
const out = {};
|
|
286
|
+
for (const pair of raw.split(",")) {
|
|
287
|
+
const eq = pair.indexOf("=");
|
|
288
|
+
if (eq < 0) continue;
|
|
289
|
+
const key = pair.slice(0, eq).trim();
|
|
290
|
+
const value = pair.slice(eq + 1).trim();
|
|
291
|
+
if (key) out[key] = value;
|
|
292
|
+
}
|
|
293
|
+
return out;
|
|
294
|
+
}
|
|
295
|
+
function toAttributes(record) {
|
|
296
|
+
return Object.entries(record).map(([key, value]) => ({
|
|
297
|
+
key,
|
|
298
|
+
value: typeof value === "number" ? Number.isInteger(value) ? { intValue: value.toString() } : { doubleValue: value } : typeof value === "boolean" ? { boolValue: value } : { stringValue: value }
|
|
299
|
+
}));
|
|
300
|
+
}
|
|
301
|
+
function msToNs(ms) {
|
|
302
|
+
return (BigInt(Math.floor(ms)) * 1000000n).toString();
|
|
303
|
+
}
|
|
304
|
+
function padSpanId(id) {
|
|
305
|
+
const cleaned = id.replace(/-/g, "");
|
|
306
|
+
return cleaned.slice(0, 16).padEnd(16, "0");
|
|
307
|
+
}
|
|
308
|
+
function padTraceId(id) {
|
|
309
|
+
const cleaned = id.replace(/-/g, "");
|
|
310
|
+
return cleaned.slice(0, 32).padEnd(32, "0");
|
|
311
|
+
}
|
|
312
|
+
function generateSpanId() {
|
|
313
|
+
const bytes = new Uint8Array(8);
|
|
314
|
+
if (typeof globalThis.crypto?.getRandomValues === "function") {
|
|
315
|
+
globalThis.crypto.getRandomValues(bytes);
|
|
316
|
+
} else {
|
|
317
|
+
for (let i = 0; i < 8; i++) bytes[i] = Math.floor(Math.random() * 256);
|
|
318
|
+
}
|
|
319
|
+
return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
320
|
+
}
|
|
321
|
+
var INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
|
|
322
|
+
var DEFAULT_INTELLIGENCE_BASE = "https://intelligence.tangle.tools";
|
|
323
|
+
async function exportEvalRuns(events, config) {
|
|
324
|
+
if (events.length === 0) return { ok: true, status: 0, accepted: 0, rejected: [] };
|
|
325
|
+
const apiKey = config?.apiKey ?? (typeof process !== "undefined" ? process.env.TANGLE_API_KEY : void 0);
|
|
326
|
+
if (!apiKey)
|
|
327
|
+
throw new Error("exportEvalRuns: apiKey required (pass config.apiKey or set TANGLE_API_KEY)");
|
|
328
|
+
const base = config?.base ?? (typeof process !== "undefined" ? process.env.INTELLIGENCE_BASE : void 0) ?? DEFAULT_INTELLIGENCE_BASE;
|
|
329
|
+
const url = `${base.replace(/\/+$/, "")}/v1/ingest/eval-runs`;
|
|
330
|
+
const res = await fetch(url, {
|
|
331
|
+
method: "POST",
|
|
332
|
+
headers: {
|
|
333
|
+
"content-type": "application/json",
|
|
334
|
+
authorization: `Bearer ${apiKey}`,
|
|
335
|
+
"X-Tangle-Wire-Version": INTELLIGENCE_WIRE_VERSION,
|
|
336
|
+
...config?.idempotencyKey ? { "Idempotency-Key": config.idempotencyKey } : {}
|
|
337
|
+
},
|
|
338
|
+
body: JSON.stringify({ wireVersion: INTELLIGENCE_WIRE_VERSION, events })
|
|
339
|
+
});
|
|
340
|
+
let parsed = {};
|
|
341
|
+
try {
|
|
342
|
+
parsed = await res.json();
|
|
343
|
+
} catch {
|
|
344
|
+
}
|
|
345
|
+
return {
|
|
346
|
+
ok: res.ok,
|
|
347
|
+
status: res.status,
|
|
348
|
+
accepted: parsed.accepted ?? (res.ok ? events.length : 0),
|
|
349
|
+
rejected: parsed.rejected ?? []
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
export {
|
|
354
|
+
createOtelExporter,
|
|
355
|
+
loopEventToOtelSpan,
|
|
356
|
+
buildLoopOtelSpans,
|
|
357
|
+
buildLoopSpanNodes,
|
|
358
|
+
INTELLIGENCE_WIRE_VERSION,
|
|
359
|
+
exportEvalRuns
|
|
360
|
+
};
|
|
361
|
+
//# sourceMappingURL=chunk-G3RGMA7C.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/otel-export.ts"],"sourcesContent":["/**\n * OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.\n *\n * Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env\n * when no explicit config is given. Keeps the runtime dep-free from\n * @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.\n *\n * The exporter accepts both raw OtelSpan objects and LoopTraceEvents\n * (which get converted to OTLP spans automatically).\n */\n\nexport interface OtelExportConfig {\n /** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */\n endpoint?: string\n /** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */\n headers?: Record<string, string>\n /** Batch size before flush. Default 64. */\n batchSize?: number\n /** Flush interval ms. Default 5000. */\n flushIntervalMs?: number\n /** Resource attributes stamped on every export. */\n resourceAttributes?: Record<string, string | number | boolean>\n /** Service name. Default 'agent-runtime'. */\n serviceName?: string\n}\n\nexport interface OtelExporter {\n /** Export a span. */\n exportSpan(span: OtelSpan): void\n /** Force flush pending spans. */\n flush(): Promise<void>\n /** Shutdown cleanly. */\n shutdown(): Promise<void>\n}\n\nexport interface OtelSpan {\n traceId: string\n spanId: string\n parentSpanId?: string\n name: string\n kind?: number\n startTimeUnixNano: string\n endTimeUnixNano: string\n attributes?: OtelAttribute[]\n status?: { code: number; message?: string }\n}\n\nexport interface OtelAttribute {\n key: string\n value: { stringValue?: string; intValue?: string; doubleValue?: number; boolValue?: boolean }\n}\n\ninterface OtlpResourceSpans {\n resource: { attributes: OtelAttribute[] }\n scopeSpans: Array<{ scope: { name: string; version: string }; spans: OtelSpan[] }>\n}\n\ninterface OtlpExport {\n resourceSpans: OtlpResourceSpans[]\n}\n\nconst SCOPE = { name: '@tangle-network/agent-runtime', version: '0.33.0' }\n\n/**\n * Current (non-deprecated) OpenTelemetry GenAI semantic-convention keys.\n * Registry: https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/\n * NB: `gen_ai.system` / `gen_ai.usage.prompt_tokens` / `completion_tokens` are\n * DEPRECATED — do not emit them. We use `provider.name` + `input/output_tokens`.\n */\nconst GEN_AI = {\n operation: 'gen_ai.operation.name',\n agentName: 'gen_ai.agent.name',\n conversationId: 'gen_ai.conversation.id',\n inputTokens: 'gen_ai.usage.input_tokens',\n outputTokens: 'gen_ai.usage.output_tokens',\n} as const\n\n/**\n * Create an OTEL exporter. Returns undefined when no endpoint is configured.\n */\nexport function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined {\n const resolvedEndpoint =\n config?.endpoint ??\n (typeof process !== 'undefined' ? process.env.OTEL_EXPORTER_OTLP_ENDPOINT : undefined)\n if (!resolvedEndpoint) return undefined\n const endpoint: string = resolvedEndpoint\n\n const headers = config?.headers ?? parseHeadersFromEnv()\n const batchSize = config?.batchSize ?? 64\n const flushIntervalMs = config?.flushIntervalMs ?? 5000\n const serviceName = config?.serviceName ?? 'agent-runtime'\n const resourceAttrs = config?.resourceAttributes ?? {}\n\n const pending: OtelSpan[] = []\n let timer: ReturnType<typeof setInterval> | undefined\n let stopped = false\n\n const exporter: OtelExporter = {\n exportSpan(span: OtelSpan): void {\n if (stopped) return\n pending.push(span)\n if (pending.length >= batchSize) {\n void doFlush()\n }\n },\n\n async flush(): Promise<void> {\n await doFlush()\n },\n\n async shutdown(): Promise<void> {\n stopped = true\n if (timer !== undefined) {\n clearInterval(timer)\n timer = undefined\n }\n await doFlush()\n },\n }\n\n timer = setInterval(() => {\n if (pending.length > 0) void doFlush()\n }, flushIntervalMs)\n if (typeof timer === 'object' && 'unref' in timer) {\n ;(timer as NodeJS.Timeout).unref()\n }\n\n async function doFlush(): Promise<void> {\n if (pending.length === 0) return\n const batch = pending.splice(0)\n const body: OtlpExport = {\n resourceSpans: [\n {\n resource: {\n attributes: toAttributes({\n 'service.name': serviceName,\n ...resourceAttrs,\n }),\n },\n scopeSpans: [{ scope: SCOPE, spans: batch }],\n },\n ],\n }\n const url = `${endpoint.replace(/\\/+$/, '')}/v1/traces`\n try {\n await fetch(url, {\n method: 'POST',\n headers: { 'content-type': 'application/json', ...headers },\n body: JSON.stringify(body),\n })\n } catch {\n // Best-effort — telemetry export must not crash the runtime.\n }\n }\n\n return exporter\n}\n\n/**\n * Convert a LoopTraceEvent into an OtelSpan for export.\n */\nexport function loopEventToOtelSpan(\n event: {\n kind: string\n runId: string\n timestamp: number\n payload: object\n },\n traceId: string,\n parentSpanId?: string,\n): OtelSpan {\n const spanId = generateSpanId()\n const attrs: Record<string, string | number | boolean> = {\n 'loop.event_kind': event.kind,\n 'loop.run_id': event.runId,\n }\n for (const [k, v] of Object.entries(event.payload)) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n attrs[`loop.${k}`] = v\n }\n }\n const ts = msToNs(event.timestamp)\n return {\n traceId: padTraceId(traceId),\n spanId,\n parentSpanId: parentSpanId ? padSpanId(parentSpanId) : undefined,\n name: event.kind,\n kind: 1,\n startTimeUnixNano: ts,\n endTimeUnixNano: ts,\n attributes: toAttributes(attrs),\n status: { code: 1 },\n }\n}\n\n/**\n * Sink-neutral node in a reconstructed loop span tree. The root node's\n * `parentSpanId` is `undefined` — sinks decide how to parent it (the OTEL\n * mapper attaches the inherited delegation span; the delegation journal\n * leaves it as the tree root).\n */\nexport interface LoopSpanNode {\n spanId: string\n parentSpanId?: string\n /** `'loop'` | `'loop.round'` | `'loop.iteration'`. */\n name: string\n /** Topology level: loop root, plan round, or iteration branch. */\n kind: 'loop' | 'round' | 'branch'\n startMs: number\n endMs: number\n attrs: Record<string, string | number | boolean>\n /** True when the iteration carried an error — maps to OTEL status code 2. */\n error: boolean\n}\n\n/**\n * Build a nested, real-duration OTLP span tree for ONE loop run from its full\n * ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,\n * zero-duration span per event), this reconstructs the topology hierarchy a\n * GenAI trace viewer renders natively:\n *\n * loop (invoke_workflow)\n * └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}\n * ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement\n * └─ …\n *\n * Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and\n * a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /\n * verdict / placement / cost (not yet standardized). Pure: feed it a buffered\n * per-runId event array (e.g. flushed on `loop.ended`) and export the result.\n */\nexport function buildLoopOtelSpans(\n events: ReadonlyArray<{ kind: string; runId: string; timestamp: number; payload: object }>,\n traceId: string,\n rootParentSpanId?: string,\n): OtelSpan[] {\n const tid = padTraceId(traceId)\n return buildLoopSpanNodes(events).map((node) => ({\n traceId: tid,\n spanId: node.spanId,\n parentSpanId: node.parentSpanId\n ? padSpanId(node.parentSpanId)\n : rootParentSpanId\n ? padSpanId(rootParentSpanId)\n : undefined,\n name: node.name,\n kind: 1,\n startTimeUnixNano: msToNs(node.startMs),\n endTimeUnixNano: msToNs(node.endMs),\n attributes: toAttributes(node.attrs),\n status: { code: node.error ? 2 : 1 },\n }))\n}\n\n/**\n * Sink-neutral core behind {@link buildLoopOtelSpans}: reconstruct the\n * loop → round → branch span tree from one run's ordered `LoopTraceEvent`\n * stream. Consumed by the OTEL mapper above and by the MCP delegation\n * journal's compact trace tee — one topology reconstruction, two sinks.\n * Tolerates partial streams (a run that never reached `loop.ended` closes\n * at the last observed event's timestamp).\n */\nexport function buildLoopSpanNodes(\n events: ReadonlyArray<{ kind: string; runId: string; timestamp: number; payload: object }>,\n): LoopSpanNode[] {\n if (events.length === 0) return []\n const out: LoopSpanNode[] = []\n const num = (v: unknown): number | undefined =>\n typeof v === 'number' && Number.isFinite(v) ? v : undefined\n const str = (v: unknown): string | undefined =>\n typeof v === 'string' && v.length > 0 ? v : undefined\n const rec = (v: unknown): Record<string, unknown> =>\n v && typeof v === 'object' ? (v as Record<string, unknown>) : {}\n\n const started = events.find((e) => e.kind === 'loop.started')\n const ended = events.find((e) => e.kind === 'loop.ended')\n const runId = events[0]?.runId ?? ''\n const rootStart = started?.timestamp ?? events[0]!.timestamp\n const rootEnd = ended?.timestamp ?? events[events.length - 1]!.timestamp\n const rootId = generateSpanId()\n\n const make = (\n spanId: string,\n parentSpanId: string | undefined,\n name: string,\n kind: LoopSpanNode['kind'],\n startMs: number,\n endMs: number,\n attrs: Record<string, string | number | boolean>,\n error = false,\n ): LoopSpanNode => ({\n spanId,\n parentSpanId,\n name,\n kind,\n startMs,\n endMs,\n attrs,\n error,\n })\n\n // root\n const sp = rec(started?.payload)\n const rootAttrs: Record<string, string | number | boolean> = {\n [GEN_AI.operation]: 'invoke_workflow',\n [GEN_AI.conversationId]: runId,\n 'tangle.loop.driver': str(sp.driver) ?? 'driver',\n }\n if (Array.isArray(sp.agentRunNames) && sp.agentRunNames.length > 0) {\n rootAttrs['tangle.loop.agents'] = sp.agentRunNames.map(String).join(',')\n }\n if (ended) {\n const ep = rec(ended.payload)\n const win = num(ep.winnerIterationIndex)\n if (win !== undefined) rootAttrs['tangle.loop.winner.iteration_index'] = win\n const cost = num(ep.totalCostUsd)\n if (cost !== undefined) rootAttrs['tangle.cost.usd'] = cost\n const dur = num(ep.durationMs)\n if (dur !== undefined) rootAttrs['tangle.loop.duration_ms'] = dur\n const iters = num(ep.iterations)\n if (iters !== undefined) rootAttrs['tangle.loop.iterations'] = iters\n }\n out.push(make(rootId, undefined, 'loop', 'loop', rootStart, rootEnd, rootAttrs))\n\n // rounds + iterations\n const iterStartTs = new Map<number, number>()\n const placementByIdx = new Map<number, Record<string, string>>()\n let currentRoundId: string | undefined\n let pendingRound:\n | { id: string; start: number; attrs: Record<string, string | number | boolean> }\n | undefined\n const flushRound = (endMs: number) => {\n if (!pendingRound) return\n out.push(\n make(\n pendingRound.id,\n rootId,\n 'loop.round',\n 'round',\n pendingRound.start,\n endMs,\n pendingRound.attrs,\n ),\n )\n pendingRound = undefined\n }\n\n for (const e of events) {\n const p = rec(e.payload)\n switch (e.kind) {\n case 'loop.plan': {\n flushRound(e.timestamp)\n const id = generateSpanId()\n const roundIdx = num(p.roundIndex) ?? 0\n const attrs: Record<string, string | number | boolean> = {\n [GEN_AI.operation]: 'invoke_workflow',\n 'tangle.loop.round.index': roundIdx,\n 'tangle.loop.move.kind': str(p.moveKind) ?? 'unknown',\n 'tangle.loop.move.round': roundIdx,\n 'tangle.loop.move.width': num(p.plannedCount) ?? 0,\n }\n const r = str(p.rationale)\n if (r) attrs['tangle.loop.move.rationale'] = r\n const parent = num(p.parentIndex)\n if (parent !== undefined) attrs['tangle.loop.move.parent_index'] = parent\n if (Array.isArray(p.childIndices) && p.childIndices.length > 0) {\n attrs['tangle.loop.move.child_indices'] = p.childIndices.map(String).join(',')\n }\n pendingRound = { id, start: e.timestamp, attrs }\n currentRoundId = id\n break\n }\n case 'loop.iteration.started': {\n const idx = num(p.iterationIndex)\n if (idx !== undefined) iterStartTs.set(idx, e.timestamp)\n break\n }\n case 'loop.iteration.dispatch': {\n const idx = num(p.iterationIndex)\n if (idx === undefined) break\n const place: Record<string, string> = {}\n const kind = str(p.placement)\n if (kind) place['tangle.loop.placement.kind'] = kind\n const sid = str(p.sandboxId)\n if (sid) place['tangle.sandbox.id'] = sid\n const fid = str(p.fleetId)\n if (fid) place['tangle.fleet.id'] = fid\n const mid = str(p.machineId)\n if (mid) place['tangle.machine.id'] = mid\n placementByIdx.set(idx, place)\n break\n }\n case 'loop.iteration.ended': {\n const idx = num(p.iterationIndex) ?? 0\n const start = iterStartTs.get(idx) ?? e.timestamp\n const err = str(p.error)\n const attrs: Record<string, string | number | boolean> = {\n [GEN_AI.operation]: 'invoke_agent',\n 'tangle.loop.iteration.index': idx,\n }\n const agent = str(p.agentRunName)\n if (agent) attrs[GEN_AI.agentName] = agent\n const tu = rec(p.tokenUsage)\n const inTok = num(tu.input)\n if (inTok !== undefined) attrs[GEN_AI.inputTokens] = inTok\n const outTok = num(tu.output)\n if (outTok !== undefined) attrs[GEN_AI.outputTokens] = outTok\n const cost = num(p.costUsd)\n if (cost !== undefined) attrs['tangle.cost.usd'] = cost\n const verdict = rec(p.verdict)\n if (typeof verdict.valid === 'boolean') attrs['tangle.loop.verdict.valid'] = verdict.valid\n const score = num(verdict.score)\n if (score !== undefined) attrs['tangle.loop.verdict.score'] = score\n if (err) attrs['tangle.loop.error'] = err\n const gid = num(p.groupId)\n if (gid !== undefined) attrs['tangle.loop.iteration.group_id'] = gid\n const par = num(p.parentIndex)\n if (par !== undefined) attrs['tangle.loop.iteration.parent_index'] = par\n const dur = num(p.durationMs)\n if (dur !== undefined) attrs['tangle.loop.iteration.duration_ms'] = dur\n const preview = str(p.outputPreview)\n if (preview) attrs['tangle.loop.iteration.output_preview'] = preview\n Object.assign(attrs, placementByIdx.get(idx) ?? {})\n out.push(\n make(\n generateSpanId(),\n currentRoundId ?? rootId,\n 'loop.iteration',\n 'branch',\n start,\n e.timestamp,\n attrs,\n err !== undefined,\n ),\n )\n break\n }\n case 'loop.decision': {\n if (pendingRound) {\n const dec = str(p.decision)\n if (dec) pendingRound.attrs['tangle.loop.decision'] = dec\n flushRound(e.timestamp)\n }\n currentRoundId = undefined\n break\n }\n }\n }\n flushRound(rootEnd)\n return out\n}\n\nfunction parseHeadersFromEnv(): Record<string, string> {\n if (typeof process === 'undefined') return {}\n const raw = process.env.OTEL_EXPORTER_OTLP_HEADERS\n if (!raw) return {}\n const out: Record<string, string> = {}\n for (const pair of raw.split(',')) {\n const eq = pair.indexOf('=')\n if (eq < 0) continue\n const key = pair.slice(0, eq).trim()\n const value = pair.slice(eq + 1).trim()\n if (key) out[key] = value\n }\n return out\n}\n\nfunction toAttributes(record: Record<string, string | number | boolean>): OtelAttribute[] {\n return Object.entries(record).map(([key, value]) => ({\n key,\n value:\n typeof value === 'number'\n ? Number.isInteger(value)\n ? { intValue: value.toString() }\n : { doubleValue: value }\n : typeof value === 'boolean'\n ? { boolValue: value }\n : { stringValue: value },\n }))\n}\n\nfunction msToNs(ms: number): string {\n return (BigInt(Math.floor(ms)) * 1_000_000n).toString()\n}\n\nfunction padSpanId(id: string): string {\n const cleaned = id.replace(/-/g, '')\n return cleaned.slice(0, 16).padEnd(16, '0')\n}\n\nfunction padTraceId(id: string): string {\n const cleaned = id.replace(/-/g, '')\n return cleaned.slice(0, 32).padEnd(32, '0')\n}\n\nfunction generateSpanId(): string {\n const bytes = new Uint8Array(8)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 8; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n\n// ─── Eval-run ingest (self-improvement provenance) ───────────────────────────\n//\n// Tangle Intelligence has a first-class, non-trace record for self-improvement\n// runs: POST /v1/ingest/eval-runs (\"Mode D\"). Each generation carries a\n// `surfaceHash` (the proposed-change identity) + arbitrary `surface` provenance;\n// a later `gate-decided` event re-emits the same `runId` (idempotent upsert) with\n// a real `gateDecision` + `holdoutLift`, so proposal→verdict is one diffable\n// record. This is how a consumer's RSI loop records WHAT it changed, WHY, from\n// which evidence — the audit trail behind agentic self-improvement.\n\n/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */\nexport const INTELLIGENCE_WIRE_VERSION = '2026-05-26.v1'\n\nexport interface EvalRunGeneration {\n /** 0-based ordinal of this generation within the run (required by ingest). */\n index: number\n /** Identity of the proposed surface change (content-addressed hash). */\n surfaceHash: string\n /** Arbitrary provenance for this generation (rationale, evidence, source). */\n surface?: unknown\n /** Per-scenario results; empty until the generation is measured. */\n cells?: unknown[]\n /** Mean composite score (0 when unmeasured — pair with labels.measured). */\n compositeMean: number\n costUsd: number\n durationMs: number\n}\n\nexport interface EvalRunEvent {\n runId: string\n runDir: string\n /** ISO timestamp. */\n timestamp: string\n status:\n | 'started'\n | 'baseline-complete'\n | 'generation-complete'\n | 'gate-decided'\n | 'finished'\n | 'errored'\n labels?: Record<string, string>\n baseline?: EvalRunGeneration\n generations?: EvalRunGeneration[]\n gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling'\n holdoutLift?: number\n totalCostUsd: number\n totalDurationMs: number\n errorMessage?: string\n}\n\nexport interface EvalRunsExportConfig {\n /** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */\n apiKey?: string\n /** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */\n base?: string\n /** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */\n idempotencyKey?: string\n}\n\nexport interface EvalRunsExportResult {\n ok: boolean\n status: number\n accepted: number\n rejected: Array<{ index: number; reason: string }>\n}\n\nconst DEFAULT_INTELLIGENCE_BASE = 'https://intelligence.tangle.tools'\n\n/**\n * Ship self-improvement eval-run events to Tangle Intelligence. Unlike the\n * best-effort span exporter, this RESOLVES with the ingest verdict (accepted /\n * rejected per event) so a consumer's loop can assert its provenance landed.\n * Throws only on a missing key or network failure.\n */\nexport async function exportEvalRuns(\n events: EvalRunEvent[],\n config?: EvalRunsExportConfig,\n): Promise<EvalRunsExportResult> {\n if (events.length === 0) return { ok: true, status: 0, accepted: 0, rejected: [] }\n const apiKey =\n config?.apiKey ?? (typeof process !== 'undefined' ? process.env.TANGLE_API_KEY : undefined)\n if (!apiKey)\n throw new Error('exportEvalRuns: apiKey required (pass config.apiKey or set TANGLE_API_KEY)')\n const base =\n config?.base ??\n (typeof process !== 'undefined' ? process.env.INTELLIGENCE_BASE : undefined) ??\n DEFAULT_INTELLIGENCE_BASE\n const url = `${base.replace(/\\/+$/, '')}/v1/ingest/eval-runs`\n const res = await fetch(url, {\n method: 'POST',\n headers: {\n 'content-type': 'application/json',\n authorization: `Bearer ${apiKey}`,\n 'X-Tangle-Wire-Version': INTELLIGENCE_WIRE_VERSION,\n ...(config?.idempotencyKey ? { 'Idempotency-Key': config.idempotencyKey } : {}),\n },\n body: JSON.stringify({ wireVersion: INTELLIGENCE_WIRE_VERSION, events }),\n })\n let parsed: { accepted?: number; rejected?: Array<{ index: number; reason: string }> } = {}\n try {\n parsed = (await res.json()) as typeof parsed\n } catch {\n // non-JSON body (e.g. 5xx HTML) — leave parsed empty\n }\n return {\n ok: res.ok,\n status: res.status,\n accepted: parsed.accepted ?? (res.ok ? events.length : 0),\n rejected: parsed.rejected ?? [],\n }\n}\n"],"mappings":";AA6DA,IAAM,QAAQ,EAAE,MAAM,iCAAiC,SAAS,SAAS;AAQzE,IAAM,SAAS;AAAA,EACb,WAAW;AAAA,EACX,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,aAAa;AAAA,EACb,cAAc;AAChB;AAKO,SAAS,mBAAmB,QAAqD;AACtF,QAAM,mBACJ,QAAQ,aACP,OAAO,YAAY,cAAc,QAAQ,IAAI,8BAA8B;AAC9E,MAAI,CAAC,iBAAkB,QAAO;AAC9B,QAAM,WAAmB;AAEzB,QAAM,UAAU,QAAQ,WAAW,oBAAoB;AACvD,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,kBAAkB,QAAQ,mBAAmB;AACnD,QAAM,cAAc,QAAQ,eAAe;AAC3C,QAAM,gBAAgB,QAAQ,sBAAsB,CAAC;AAErD,QAAM,UAAsB,CAAC;AAC7B,MAAI;AACJ,MAAI,UAAU;AAEd,QAAM,WAAyB;AAAA,IAC7B,WAAW,MAAsB;AAC/B,UAAI,QAAS;AACb,cAAQ,KAAK,IAAI;AACjB,UAAI,QAAQ,UAAU,WAAW;AAC/B,aAAK,QAAQ;AAAA,MACf;AAAA,IACF;AAAA,IAEA,MAAM,QAAuB;AAC3B,YAAM,QAAQ;AAAA,IAChB;AAAA,IAEA,MAAM,WAA0B;AAC9B,gBAAU;AACV,UAAI,UAAU,QAAW;AACvB,sBAAc,KAAK;AACnB,gBAAQ;AAAA,MACV;AACA,YAAM,QAAQ;AAAA,IAChB;AAAA,EACF;AAEA,UAAQ,YAAY,MAAM;AACxB,QAAI,QAAQ,SAAS,EAAG,MAAK,QAAQ;AAAA,EACvC,GAAG,eAAe;AAClB,MAAI,OAAO,UAAU,YAAY,WAAW,OAAO;AACjD;AAAC,IAAC,MAAyB,MAAM;AAAA,EACnC;AAEA,iBAAe,UAAyB;AACtC,QAAI,QAAQ,WAAW,EAAG;AAC1B,UAAM,QAAQ,QAAQ,OAAO,CAAC;AAC9B,UAAM,OAAmB;AAAA,MACvB,eAAe;AAAA,QACb;AAAA,UACE,UAAU;AAAA,YACR,YAAY,aAAa;AAAA,cACvB,gBAAgB;AAAA,cAChB,GAAG;AAAA,YACL,CAAC;AAAA,UACH;AAAA,UACA,YAAY,CAAC,EAAE,OAAO,OAAO,OAAO,MAAM,CAAC;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AACA,UAAM,MAAM,GAAG,SAAS,QAAQ,QAAQ,EAAE,CAAC;AAC3C,QAAI;AACF,YAAM,MAAM,KAAK;AAAA,QACf,QAAQ;AAAA,QACR,SAAS,EAAE,gBAAgB,oBAAoB,GAAG,QAAQ;AAAA,QAC1D,MAAM,KAAK,UAAU,IAAI;AAAA,MAC3B,CAAC;AAAA,IACH,QAAQ;AAAA,IAER;AAAA,EACF;AAEA,SAAO;AACT;AAKO,SAAS,oBACd,OAMA,SACA,cACU;AACV,QAAM,SAAS,eAAe;AAC9B,QAAM,QAAmD;AAAA,IACvD,mBAAmB,MAAM;AAAA,IACzB,eAAe,MAAM;AAAA,EACvB;AACA,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,MAAM,OAAO,GAAG;AAClD,QAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,YAAM,QAAQ,CAAC,EAAE,IAAI;AAAA,IACvB;AAAA,EACF;AACA,QAAM,KAAK,OAAO,MAAM,SAAS;AACjC,SAAO;AAAA,IACL,SAAS,WAAW,OAAO;AAAA,IAC3B;AAAA,IACA,cAAc,eAAe,UAAU,YAAY,IAAI;AAAA,IACvD,MAAM,MAAM;AAAA,IACZ,MAAM;AAAA,IACN,mBAAmB;AAAA,IACnB,iBAAiB;AAAA,IACjB,YAAY,aAAa,KAAK;AAAA,IAC9B,QAAQ,EAAE,MAAM,EAAE;AAAA,EACpB;AACF;AAsCO,SAAS,mBACd,QACA,SACA,kBACY;AACZ,QAAM,MAAM,WAAW,OAAO;AAC9B,SAAO,mBAAmB,MAAM,EAAE,IAAI,CAAC,UAAU;AAAA,IAC/C,SAAS;AAAA,IACT,QAAQ,KAAK;AAAA,IACb,cAAc,KAAK,eACf,UAAU,KAAK,YAAY,IAC3B,mBACE,UAAU,gBAAgB,IAC1B;AAAA,IACN,MAAM,KAAK;AAAA,IACX,MAAM;AAAA,IACN,mBAAmB,OAAO,KAAK,OAAO;AAAA,IACtC,iBAAiB,OAAO,KAAK,KAAK;AAAA,IAClC,YAAY,aAAa,KAAK,KAAK;AAAA,IACnC,QAAQ,EAAE,MAAM,KAAK,QAAQ,IAAI,EAAE;AAAA,EACrC,EAAE;AACJ;AAUO,SAAS,mBACd,QACgB;AAChB,MAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AACjC,QAAM,MAAsB,CAAC;AAC7B,QAAM,MAAM,CAAC,MACX,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,IAAI,IAAI;AACpD,QAAM,MAAM,CAAC,MACX,OAAO,MAAM,YAAY,EAAE,SAAS,IAAI,IAAI;AAC9C,QAAM,MAAM,CAAC,MACX,KAAK,OAAO,MAAM,WAAY,IAAgC,CAAC;AAEjE,QAAM,UAAU,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,cAAc;AAC5D,QAAM,QAAQ,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,YAAY;AACxD,QAAM,QAAQ,OAAO,CAAC,GAAG,SAAS;AAClC,QAAM,YAAY,SAAS,aAAa,OAAO,CAAC,EAAG;AACnD,QAAM,UAAU,OAAO,aAAa,OAAO,OAAO,SAAS,CAAC,EAAG;AAC/D,QAAM,SAAS,eAAe;AAE9B,QAAM,OAAO,CACX,QACA,cACA,MACA,MACA,SACA,OACA,OACA,QAAQ,WACU;AAAA,IAClB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAGA,QAAM,KAAK,IAAI,SAAS,OAAO;AAC/B,QAAM,YAAuD;AAAA,IAC3D,CAAC,OAAO,SAAS,GAAG;AAAA,IACpB,CAAC,OAAO,cAAc,GAAG;AAAA,IACzB,sBAAsB,IAAI,GAAG,MAAM,KAAK;AAAA,EAC1C;AACA,MAAI,MAAM,QAAQ,GAAG,aAAa,KAAK,GAAG,cAAc,SAAS,GAAG;AAClE,cAAU,oBAAoB,IAAI,GAAG,cAAc,IAAI,MAAM,EAAE,KAAK,GAAG;AAAA,EACzE;AACA,MAAI,OAAO;AACT,UAAM,KAAK,IAAI,MAAM,OAAO;AAC5B,UAAM,MAAM,IAAI,GAAG,oBAAoB;AACvC,QAAI,QAAQ,OAAW,WAAU,oCAAoC,IAAI;AACzE,UAAM,OAAO,IAAI,GAAG,YAAY;AAChC,QAAI,SAAS,OAAW,WAAU,iBAAiB,IAAI;AACvD,UAAM,MAAM,IAAI,GAAG,UAAU;AAC7B,QAAI,QAAQ,OAAW,WAAU,yBAAyB,IAAI;AAC9D,UAAM,QAAQ,IAAI,GAAG,UAAU;AAC/B,QAAI,UAAU,OAAW,WAAU,wBAAwB,IAAI;AAAA,EACjE;AACA,MAAI,KAAK,KAAK,QAAQ,QAAW,QAAQ,QAAQ,WAAW,SAAS,SAAS,CAAC;AAG/E,QAAM,cAAc,oBAAI,IAAoB;AAC5C,QAAM,iBAAiB,oBAAI,IAAoC;AAC/D,MAAI;AACJ,MAAI;AAGJ,QAAM,aAAa,CAAC,UAAkB;AACpC,QAAI,CAAC,aAAc;AACnB,QAAI;AAAA,MACF;AAAA,QACE,aAAa;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,QACA,aAAa;AAAA,QACb;AAAA,QACA,aAAa;AAAA,MACf;AAAA,IACF;AACA,mBAAe;AAAA,EACjB;AAEA,aAAW,KAAK,QAAQ;AACtB,UAAM,IAAI,IAAI,EAAE,OAAO;AACvB,YAAQ,EAAE,MAAM;AAAA,MACd,KAAK,aAAa;AAChB,mBAAW,EAAE,SAAS;AACtB,cAAM,KAAK,eAAe;AAC1B,cAAM,WAAW,IAAI,EAAE,UAAU,KAAK;AACtC,cAAM,QAAmD;AAAA,UACvD,CAAC,OAAO,SAAS,GAAG;AAAA,UACpB,2BAA2B;AAAA,UAC3B,yBAAyB,IAAI,EAAE,QAAQ,KAAK;AAAA,UAC5C,0BAA0B;AAAA,UAC1B,0BAA0B,IAAI,EAAE,YAAY,KAAK;AAAA,QACnD;AACA,cAAM,IAAI,IAAI,EAAE,SAAS;AACzB,YAAI,EAAG,OAAM,4BAA4B,IAAI;AAC7C,cAAM,SAAS,IAAI,EAAE,WAAW;AAChC,YAAI,WAAW,OAAW,OAAM,+BAA+B,IAAI;AACnE,YAAI,MAAM,QAAQ,EAAE,YAAY,KAAK,EAAE,aAAa,SAAS,GAAG;AAC9D,gBAAM,gCAAgC,IAAI,EAAE,aAAa,IAAI,MAAM,EAAE,KAAK,GAAG;AAAA,QAC/E;AACA,uBAAe,EAAE,IAAI,OAAO,EAAE,WAAW,MAAM;AAC/C,yBAAiB;AACjB;AAAA,MACF;AAAA,MACA,KAAK,0BAA0B;AAC7B,cAAM,MAAM,IAAI,EAAE,cAAc;AAChC,YAAI,QAAQ,OAAW,aAAY,IAAI,KAAK,EAAE,SAAS;AACvD;AAAA,MACF;AAAA,MACA,KAAK,2BAA2B;AAC9B,cAAM,MAAM,IAAI,EAAE,cAAc;AAChC,YAAI,QAAQ,OAAW;AACvB,cAAM,QAAgC,CAAC;AACvC,cAAM,OAAO,IAAI,EAAE,SAAS;AAC5B,YAAI,KAAM,OAAM,4BAA4B,IAAI;AAChD,cAAM,MAAM,IAAI,EAAE,SAAS;AAC3B,YAAI,IAAK,OAAM,mBAAmB,IAAI;AACtC,cAAM,MAAM,IAAI,EAAE,OAAO;AACzB,YAAI,IAAK,OAAM,iBAAiB,IAAI;AACpC,cAAM,MAAM,IAAI,EAAE,SAAS;AAC3B,YAAI,IAAK,OAAM,mBAAmB,IAAI;AACtC,uBAAe,IAAI,KAAK,KAAK;AAC7B;AAAA,MACF;AAAA,MACA,KAAK,wBAAwB;AAC3B,cAAM,MAAM,IAAI,EAAE,cAAc,KAAK;AACrC,cAAM,QAAQ,YAAY,IAAI,GAAG,KAAK,EAAE;AACxC,cAAM,MAAM,IAAI,EAAE,KAAK;AACvB,cAAM,QAAmD;AAAA,UACvD,CAAC,OAAO,SAAS,GAAG;AAAA,UACpB,+BAA+B;AAAA,QACjC;AACA,cAAM,QAAQ,IAAI,EAAE,YAAY;AAChC,YAAI,MAAO,OAAM,OAAO,SAAS,IAAI;AACrC,cAAM,KAAK,IAAI,EAAE,UAAU;AAC3B,cAAM,QAAQ,IAAI,GAAG,KAAK;AAC1B,YAAI,UAAU,OAAW,OAAM,OAAO,WAAW,IAAI;AACrD,cAAM,SAAS,IAAI,GAAG,MAAM;AAC5B,YAAI,WAAW,OAAW,OAAM,OAAO,YAAY,IAAI;AACvD,cAAM,OAAO,IAAI,EAAE,OAAO;AAC1B,YAAI,SAAS,OAAW,OAAM,iBAAiB,IAAI;AACnD,cAAM,UAAU,IAAI,EAAE,OAAO;AAC7B,YAAI,OAAO,QAAQ,UAAU,UAAW,OAAM,2BAA2B,IAAI,QAAQ;AACrF,cAAM,QAAQ,IAAI,QAAQ,KAAK;AAC/B,YAAI,UAAU,OAAW,OAAM,2BAA2B,IAAI;AAC9D,YAAI,IAAK,OAAM,mBAAmB,IAAI;AACtC,cAAM,MAAM,IAAI,EAAE,OAAO;AACzB,YAAI,QAAQ,OAAW,OAAM,gCAAgC,IAAI;AACjE,cAAM,MAAM,IAAI,EAAE,WAAW;AAC7B,YAAI,QAAQ,OAAW,OAAM,oCAAoC,IAAI;AACrE,cAAM,MAAM,IAAI,EAAE,UAAU;AAC5B,YAAI,QAAQ,OAAW,OAAM,mCAAmC,IAAI;AACpE,cAAM,UAAU,IAAI,EAAE,aAAa;AACnC,YAAI,QAAS,OAAM,sCAAsC,IAAI;AAC7D,eAAO,OAAO,OAAO,eAAe,IAAI,GAAG,KAAK,CAAC,CAAC;AAClD,YAAI;AAAA,UACF;AAAA,YACE,eAAe;AAAA,YACf,kBAAkB;AAAA,YAClB;AAAA,YACA;AAAA,YACA;AAAA,YACA,EAAE;AAAA,YACF;AAAA,YACA,QAAQ;AAAA,UACV;AAAA,QACF;AACA;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,YAAI,cAAc;AAChB,gBAAM,MAAM,IAAI,EAAE,QAAQ;AAC1B,cAAI,IAAK,cAAa,MAAM,sBAAsB,IAAI;AACtD,qBAAW,EAAE,SAAS;AAAA,QACxB;AACA,yBAAiB;AACjB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,aAAW,OAAO;AAClB,SAAO;AACT;AAEA,SAAS,sBAA8C;AACrD,MAAI,OAAO,YAAY,YAAa,QAAO,CAAC;AAC5C,QAAM,MAAM,QAAQ,IAAI;AACxB,MAAI,CAAC,IAAK,QAAO,CAAC;AAClB,QAAM,MAA8B,CAAC;AACrC,aAAW,QAAQ,IAAI,MAAM,GAAG,GAAG;AACjC,UAAM,KAAK,KAAK,QAAQ,GAAG;AAC3B,QAAI,KAAK,EAAG;AACZ,UAAM,MAAM,KAAK,MAAM,GAAG,EAAE,EAAE,KAAK;AACnC,UAAM,QAAQ,KAAK,MAAM,KAAK,CAAC,EAAE,KAAK;AACtC,QAAI,IAAK,KAAI,GAAG,IAAI;AAAA,EACtB;AACA,SAAO;AACT;AAEA,SAAS,aAAa,QAAoE;AACxF,SAAO,OAAO,QAAQ,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,KAAK,OAAO;AAAA,IACnD;AAAA,IACA,OACE,OAAO,UAAU,WACb,OAAO,UAAU,KAAK,IACpB,EAAE,UAAU,MAAM,SAAS,EAAE,IAC7B,EAAE,aAAa,MAAM,IACvB,OAAO,UAAU,YACf,EAAE,WAAW,MAAM,IACnB,EAAE,aAAa,MAAM;AAAA,EAC/B,EAAE;AACJ;AAEA,SAAS,OAAO,IAAoB;AAClC,UAAQ,OAAO,KAAK,MAAM,EAAE,CAAC,IAAI,UAAY,SAAS;AACxD;AAEA,SAAS,UAAU,IAAoB;AACrC,QAAM,UAAU,GAAG,QAAQ,MAAM,EAAE;AACnC,SAAO,QAAQ,MAAM,GAAG,EAAE,EAAE,OAAO,IAAI,GAAG;AAC5C;AAEA,SAAS,WAAW,IAAoB;AACtC,QAAM,UAAU,GAAG,QAAQ,MAAM,EAAE;AACnC,SAAO,QAAQ,MAAM,GAAG,EAAE,EAAE,OAAO,IAAI,GAAG;AAC5C;AAEA,SAAS,iBAAyB;AAChC,QAAM,QAAQ,IAAI,WAAW,CAAC;AAC9B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACvE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;AAaO,IAAM,4BAA4B;AAuDzC,IAAM,4BAA4B;AAQlC,eAAsB,eACpB,QACA,QAC+B;AAC/B,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,IAAI,MAAM,QAAQ,GAAG,UAAU,GAAG,UAAU,CAAC,EAAE;AACjF,QAAM,SACJ,QAAQ,WAAW,OAAO,YAAY,cAAc,QAAQ,IAAI,iBAAiB;AACnF,MAAI,CAAC;AACH,UAAM,IAAI,MAAM,4EAA4E;AAC9F,QAAM,OACJ,QAAQ,SACP,OAAO,YAAY,cAAc,QAAQ,IAAI,oBAAoB,WAClE;AACF,QAAM,MAAM,GAAG,KAAK,QAAQ,QAAQ,EAAE,CAAC;AACvC,QAAM,MAAM,MAAM,MAAM,KAAK;AAAA,IAC3B,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,gBAAgB;AAAA,MAChB,eAAe,UAAU,MAAM;AAAA,MAC/B,yBAAyB;AAAA,MACzB,GAAI,QAAQ,iBAAiB,EAAE,mBAAmB,OAAO,eAAe,IAAI,CAAC;AAAA,IAC/E;AAAA,IACA,MAAM,KAAK,UAAU,EAAE,aAAa,2BAA2B,OAAO,CAAC;AAAA,EACzE,CAAC;AACD,MAAI,SAAqF,CAAC;AAC1F,MAAI;AACF,aAAU,MAAM,IAAI,KAAK;AAAA,EAC3B,QAAQ;AAAA,EAER;AACA,SAAO;AAAA,IACL,IAAI,IAAI;AAAA,IACR,QAAQ,IAAI;AAAA,IACZ,UAAU,OAAO,aAAa,IAAI,KAAK,OAAO,SAAS;AAAA,IACvD,UAAU,OAAO,YAAY,CAAC;AAAA,EAChC;AACF;","names":[]}
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
DELEGATION_STATUS_DESCRIPTION,
|
|
15
15
|
DELEGATION_STATUS_INPUT_SCHEMA,
|
|
16
16
|
DELEGATION_STATUS_TOOL_NAME
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-BERLUBAP.js";
|
|
18
18
|
|
|
19
19
|
// src/mcp/openai-tools.ts
|
|
20
20
|
function buildTool(name, description, parameters) {
|
|
@@ -61,4 +61,4 @@ export {
|
|
|
61
61
|
mcpToolsForRuntimeMcp,
|
|
62
62
|
mcpToolsForRuntimeMcpSubset
|
|
63
63
|
};
|
|
64
|
-
//# sourceMappingURL=chunk-
|
|
64
|
+
//# sourceMappingURL=chunk-V2K35HF2.js.map
|
package/dist/improvement.d.ts
CHANGED
|
@@ -18,8 +18,9 @@ import 'node:child_process';
|
|
|
18
18
|
* the SAME driver at two settings of a dial.
|
|
19
19
|
* - cheap reflective path → `reflectiveGenerator` (shots=1, no sandbox;
|
|
20
20
|
* applies pre-drafted patches)
|
|
21
|
-
* - full agentic path → `agenticGenerator` (shots=N,
|
|
22
|
-
* an agent reads code +
|
|
21
|
+
* - full agentic path → `agenticGenerator` (shots=N, multi-shot
|
|
22
|
+
* verify-in-session loop; an agent reads code +
|
|
23
|
+
* report, edits, and re-tries on verifier failure)
|
|
23
24
|
* Both emit changes into a worktree the driver finalizes into a
|
|
24
25
|
* `CodeSurface{ worktreeRef }` the loop measures on the holdout. See
|
|
25
26
|
* agent-eval's `docs/design/self-improvement-engine.md`.
|
|
@@ -75,11 +76,31 @@ declare function improvementDriver(opts: ImprovementDriverOptions): ImprovementD
|
|
|
75
76
|
* candidate (which would reintroduce a host↔sandbox worktree-transport
|
|
76
77
|
* problem that does not need solving here).
|
|
77
78
|
*
|
|
78
|
-
* `maxShots` is the DEPTH dial
|
|
79
|
-
*
|
|
80
|
-
*
|
|
79
|
+
* `maxShots` is the DEPTH dial — a multi-shot verify-in-session loop, NOT the
|
|
80
|
+
* kernel `runLoop`. Each shot runs one full harness session in the (persistent)
|
|
81
|
+
* worktree; between shots the loop refines based on what the last shot produced:
|
|
82
|
+
* - empty tree → "you changed nothing, make the edits" → retry
|
|
83
|
+
* - dirty + `verify` fails → feed the verifier's failure into the next shot
|
|
84
|
+
* (the worktree persists, so the harness RESUMES atop its own failing
|
|
85
|
+
* edits with the error in hand — no `--resume` session plumbing needed,
|
|
86
|
+
* and harness-agnostic across claude/codex/opencode)
|
|
87
|
+
* - dirty + `verify` ok (or no verifier configured) → return the candidate
|
|
88
|
+
* A candidate that never verifies within `maxShots` is discarded (`applied:
|
|
89
|
+
* false`), never shipped — if you configured a verifier, a non-passing tree is
|
|
90
|
+
* not a candidate. With no verifier the legacy behavior holds: first dirty shot
|
|
91
|
+
* is the candidate.
|
|
81
92
|
*/
|
|
82
93
|
|
|
94
|
+
/** Outcome of verifying a candidate worktree. `feedback` (compiler errors,
|
|
95
|
+
* failing test output) is fed into the next shot when `ok` is false. */
|
|
96
|
+
interface VerifyResult {
|
|
97
|
+
ok: boolean;
|
|
98
|
+
feedback?: string;
|
|
99
|
+
}
|
|
100
|
+
/** Verifies the edited worktree. Sync or async; throws only on a setup fault
|
|
101
|
+
* (a candidate that fails verification returns `{ok:false}`, it does not
|
|
102
|
+
* throw). */
|
|
103
|
+
type Verifier = (worktreePath: string) => Promise<VerifyResult> | VerifyResult;
|
|
83
104
|
interface AgenticGeneratorOptions {
|
|
84
105
|
/** Local coding harness to run in the worktree. Default `claude`. */
|
|
85
106
|
harness?: LocalHarness;
|
|
@@ -91,12 +112,79 @@ interface AgenticGeneratorOptions {
|
|
|
91
112
|
report: unknown;
|
|
92
113
|
findings: AnalystFinding[];
|
|
93
114
|
}) => string;
|
|
115
|
+
/** Verify the worktree after each dirtying shot. When set, a candidate that
|
|
116
|
+
* fails verification is NOT returned — the failure feeds the next shot
|
|
117
|
+
* (verify-in-session), up to `maxShots`; a candidate that never verifies is
|
|
118
|
+
* discarded (`applied:false`), never shipped. Omitted ⇒ legacy behavior:
|
|
119
|
+
* the first dirty shot is the candidate. See `commandVerifier`. */
|
|
120
|
+
verify?: Verifier;
|
|
94
121
|
/** Test seam — inject the harness runner (defaults to `runLocalHarness`). */
|
|
95
122
|
runHarness?: typeof runLocalHarness;
|
|
96
123
|
/** Test seam — inject the worktree-dirty check (defaults to `git status`). */
|
|
97
124
|
isDirty?: (worktreePath: string) => boolean;
|
|
98
125
|
}
|
|
99
126
|
declare function agenticGenerator(opts?: AgenticGeneratorOptions): CandidateGenerator;
|
|
127
|
+
/** A `Verifier` that runs a command in the worktree: exit 0 ⇒ ok, any other
|
|
128
|
+
* exit ⇒ failed with stdout+stderr as feedback. The common case — verify by
|
|
129
|
+
* `tsc --noEmit`, `pnpm build`, or a test command. A timeout is treated as a
|
|
130
|
+
* FAILED candidate (a change that hangs the build is a bad change); a missing
|
|
131
|
+
* binary or spawn fault throws (a setup bug, not a failed candidate — no
|
|
132
|
+
* silent fallback). */
|
|
133
|
+
declare function commandVerifier(command: string, args?: string[], timeoutMs?: number): Verifier;
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Build-prompt starting points for the two buildable artifact types. There is
|
|
137
|
+
* NO `toolGenerator`/`mcpGenerator` wrapper — the factory is `agenticGenerator`
|
|
138
|
+
* + a verifier (docs/artifact-lifecycle-frontier.md), so a tool or an MCP
|
|
139
|
+
* server is built by composing the pieces directly:
|
|
140
|
+
*
|
|
141
|
+
* // a tool:
|
|
142
|
+
* agenticGenerator({ buildPrompt: toolBuildPrompt, verify: commandVerifier('pnpm', ['test']) })
|
|
143
|
+
* // an MCP server:
|
|
144
|
+
* agenticGenerator({ buildPrompt: mcpBuildPrompt, verify: mcpServeVerifier({ command: 'node', args: ['server.mjs'] }) })
|
|
145
|
+
*
|
|
146
|
+
* These are the only type-specific bit (the phrasing that points the agent at a
|
|
147
|
+
* tool vs. an MCP); the worktree, resume-on-failure loop, and improvement-loop
|
|
148
|
+
* wrapper are shared. MCP is the load-bearing target — it is how a harness
|
|
149
|
+
* acquires tools; raw tools matter where we control the loader.
|
|
150
|
+
*/
|
|
151
|
+
|
|
152
|
+
type FindingsArg = {
|
|
153
|
+
report: unknown;
|
|
154
|
+
findings: AnalystFinding[];
|
|
155
|
+
};
|
|
156
|
+
declare function toolBuildPrompt(args: FindingsArg): string;
|
|
157
|
+
declare function mcpBuildPrompt(args: FindingsArg): string;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* `mcpServeVerifier` — the intrinsic verifier for a built MCP server: the
|
|
161
|
+
* boot-and-probe checker named in docs/artifact-lifecycle-frontier.md. A
|
|
162
|
+
* generated MCP server is only a candidate if it actually *serves* — so this
|
|
163
|
+
* boots it over stdio (the default local MCP transport) and runs the real
|
|
164
|
+
* handshake: `initialize` → `notifications/initialized` → `tools/list`, and
|
|
165
|
+
* asserts the server answers with at least `minTools` tools.
|
|
166
|
+
*
|
|
167
|
+
* Outcomes follow the `Verifier` contract: a server that fails to start, exits
|
|
168
|
+
* early, errors the handshake, times out, or exposes no tools is a FAILED
|
|
169
|
+
* candidate (`{ok:false}`, fed back into the next generation shot); a missing
|
|
170
|
+
* start binary or spawn fault THROWS (a setup bug, never a silent fallback).
|
|
171
|
+
*
|
|
172
|
+
* Protocol matches the runtime's own stdio MCP server (src/mcp/server.ts):
|
|
173
|
+
* newline-delimited JSON-RPC 2.0, protocol version 2024-11-05.
|
|
174
|
+
*/
|
|
175
|
+
|
|
176
|
+
interface McpServeSpec {
|
|
177
|
+
/** Command that starts the built MCP server in the worktree (stdio transport). */
|
|
178
|
+
command: string;
|
|
179
|
+
args?: string[];
|
|
180
|
+
/** Extra env for the server process (merged over `process.env`). */
|
|
181
|
+
env?: Record<string, string>;
|
|
182
|
+
/** Handshake timeout (ms). Default 30s. */
|
|
183
|
+
timeoutMs?: number;
|
|
184
|
+
/** Minimum tools the server must expose to pass. Default 1. */
|
|
185
|
+
minTools?: number;
|
|
186
|
+
}
|
|
187
|
+
declare function mcpServeVerifier(spec: McpServeSpec): Verifier;
|
|
100
188
|
|
|
101
189
|
/**
|
|
102
190
|
* @experimental
|
|
@@ -108,8 +196,8 @@ declare function agenticGenerator(opts?: AgenticGeneratorOptions): CandidateGene
|
|
|
108
196
|
* construction (the patches are already drafted).
|
|
109
197
|
*
|
|
110
198
|
* This is the `shots=1, sandbox=off` setting of the one improvement driver.
|
|
111
|
-
* The `agenticGenerator` (
|
|
112
|
-
* setting — both plug into the same `improvementDriver`.
|
|
199
|
+
* The `agenticGenerator` (a multi-shot verify-in-session loop) is the
|
|
200
|
+
* `shots=N` setting — both plug into the same `improvementDriver`.
|
|
113
201
|
*/
|
|
114
202
|
|
|
115
203
|
interface ReflectiveGeneratorOptions {
|
|
@@ -117,4 +205,4 @@ interface ReflectiveGeneratorOptions {
|
|
|
117
205
|
}
|
|
118
206
|
declare function reflectiveGenerator(opts: ReflectiveGeneratorOptions): CandidateGenerator;
|
|
119
207
|
|
|
120
|
-
export { type AgenticGeneratorOptions, type CandidateGenerator, type ImprovementDriverOptions, type ReflectiveGeneratorOptions, agenticGenerator, improvementDriver, reflectiveGenerator };
|
|
208
|
+
export { type AgenticGeneratorOptions, type CandidateGenerator, type ImprovementDriverOptions, type McpServeSpec, type ReflectiveGeneratorOptions, type Verifier, type VerifyResult, agenticGenerator, commandVerifier, improvementDriver, mcpBuildPrompt, mcpServeVerifier, reflectiveGenerator, toolBuildPrompt };
|