@tangle-network/agent-runtime 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -166
- package/dist/index.d.ts +488 -78
- package/dist/index.js +943 -421
- package/dist/index.js.map +1 -1
- package/package.json +17 -12
package/dist/index.js
CHANGED
|
@@ -1,11 +1,61 @@
|
|
|
1
|
-
// src/
|
|
1
|
+
// src/errors.ts
|
|
2
|
+
import { AgentEvalError } from "@tangle-network/agent-eval";
|
|
2
3
|
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
AgentEvalError as AgentEvalError2,
|
|
5
|
+
CaptureIntegrityError,
|
|
6
|
+
ConfigError,
|
|
7
|
+
JudgeError,
|
|
8
|
+
NotFoundError,
|
|
9
|
+
ReplayError,
|
|
10
|
+
ValidationError,
|
|
11
|
+
VerificationError
|
|
8
12
|
} from "@tangle-network/agent-eval";
|
|
13
|
+
var SessionMismatchError = class extends AgentEvalError {
|
|
14
|
+
sessionBackend;
|
|
15
|
+
requestedBackend;
|
|
16
|
+
constructor(sessionBackend, requestedBackend, options) {
|
|
17
|
+
super(
|
|
18
|
+
"validation",
|
|
19
|
+
`Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,
|
|
20
|
+
options
|
|
21
|
+
);
|
|
22
|
+
this.sessionBackend = sessionBackend;
|
|
23
|
+
this.requestedBackend = requestedBackend;
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
var BackendTransportError = class extends AgentEvalError {
|
|
27
|
+
backend;
|
|
28
|
+
status;
|
|
29
|
+
constructor(backend, message, options) {
|
|
30
|
+
super("config", message, options);
|
|
31
|
+
this.backend = backend;
|
|
32
|
+
this.status = options?.status;
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
var RuntimeRunStateError = class extends AgentEvalError {
|
|
36
|
+
constructor(message, options) {
|
|
37
|
+
super("validation", message, options);
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// src/sessions.ts
|
|
42
|
+
function newRuntimeSession(backend, requestedId, metadata) {
|
|
43
|
+
const now = nowIso();
|
|
44
|
+
return {
|
|
45
|
+
id: requestedId || crypto.randomUUID(),
|
|
46
|
+
backend,
|
|
47
|
+
status: "active",
|
|
48
|
+
createdAt: now,
|
|
49
|
+
updatedAt: now,
|
|
50
|
+
metadata
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
function touchSession(session) {
|
|
54
|
+
return { ...session, updatedAt: nowIso() };
|
|
55
|
+
}
|
|
56
|
+
function nowIso() {
|
|
57
|
+
return (/* @__PURE__ */ new Date()).toISOString();
|
|
58
|
+
}
|
|
9
59
|
var InMemoryRuntimeSessionStore = class {
|
|
10
60
|
sessions = /* @__PURE__ */ new Map();
|
|
11
61
|
events = /* @__PURE__ */ new Map();
|
|
@@ -24,6 +74,277 @@ var InMemoryRuntimeSessionStore = class {
|
|
|
24
74
|
return [...this.events.get(sessionId) ?? []];
|
|
25
75
|
}
|
|
26
76
|
};
|
|
77
|
+
|
|
78
|
+
// src/backends.ts
|
|
79
|
+
function createIterableBackend(options) {
|
|
80
|
+
return options;
|
|
81
|
+
}
|
|
82
|
+
function createSandboxPromptBackend(options) {
|
|
83
|
+
const kind = options.kind ?? "sandbox";
|
|
84
|
+
return {
|
|
85
|
+
kind,
|
|
86
|
+
async start(input, context) {
|
|
87
|
+
const box = await options.getBox(input, context);
|
|
88
|
+
return newRuntimeSession(
|
|
89
|
+
kind,
|
|
90
|
+
options.getSessionId?.(box, input) ?? context.requestedSessionId,
|
|
91
|
+
{ resumable: true }
|
|
92
|
+
);
|
|
93
|
+
},
|
|
94
|
+
resume(session) {
|
|
95
|
+
return touchSession({ ...session, status: "active" });
|
|
96
|
+
},
|
|
97
|
+
async *stream(input, context) {
|
|
98
|
+
const box = await options.getBox(input, context);
|
|
99
|
+
const message = input.message ?? input.messages?.at(-1)?.content ?? context.task.intent;
|
|
100
|
+
for await (const event of options.streamPrompt(box, message, context)) {
|
|
101
|
+
const mapped = options.mapEvent?.(event, context) ?? mapCommonBackendEvent(event, context);
|
|
102
|
+
if (mapped) yield mapped;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
function createOpenAICompatibleBackend(options) {
|
|
108
|
+
const fetcher = options.fetchImpl ?? fetch;
|
|
109
|
+
const kind = options.kind ?? "tcloud";
|
|
110
|
+
return {
|
|
111
|
+
kind,
|
|
112
|
+
start(_input, context) {
|
|
113
|
+
return newRuntimeSession(kind, context.requestedSessionId);
|
|
114
|
+
},
|
|
115
|
+
async *stream(input, context) {
|
|
116
|
+
const response = await fetcher(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
|
|
117
|
+
method: "POST",
|
|
118
|
+
headers: {
|
|
119
|
+
Authorization: `Bearer ${options.apiKey}`,
|
|
120
|
+
"Content-Type": "application/json"
|
|
121
|
+
},
|
|
122
|
+
body: JSON.stringify({
|
|
123
|
+
model: options.model,
|
|
124
|
+
stream: true,
|
|
125
|
+
messages: input.messages ?? [
|
|
126
|
+
{ role: "user", content: input.message ?? context.task.intent }
|
|
127
|
+
]
|
|
128
|
+
}),
|
|
129
|
+
signal: context.signal
|
|
130
|
+
});
|
|
131
|
+
if (!response.ok) {
|
|
132
|
+
throw new BackendTransportError(kind, `chat backend returned ${response.status}`, {
|
|
133
|
+
status: response.status
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
yield* streamResponseEvents(response, context);
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
function normalizeBackendStreamEvent(event, task, session) {
|
|
141
|
+
if ("task" in event && event.task && "session" in event && event.session && "timestamp" in event && event.timestamp) {
|
|
142
|
+
return event;
|
|
143
|
+
}
|
|
144
|
+
return {
|
|
145
|
+
...event,
|
|
146
|
+
task: "task" in event && event.task ? event.task : task,
|
|
147
|
+
session: "session" in event && event.session ? event.session : session,
|
|
148
|
+
timestamp: "timestamp" in event && event.timestamp ? event.timestamp : nowIso()
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
function mapCommonBackendEvent(event, context) {
|
|
152
|
+
if (!event || typeof event !== "object") return void 0;
|
|
153
|
+
const record = event;
|
|
154
|
+
const type = String(record.type ?? "");
|
|
155
|
+
const data = record.data && typeof record.data === "object" ? record.data : record;
|
|
156
|
+
if (type === "message.part.updated" || type === "text_delta" || type === "delta") {
|
|
157
|
+
const text = stringValue(data.text) ?? stringValue(data.delta) ?? stringValue(record.text);
|
|
158
|
+
return text ? {
|
|
159
|
+
type: "text_delta",
|
|
160
|
+
task: context.task,
|
|
161
|
+
session: context.session,
|
|
162
|
+
text,
|
|
163
|
+
timestamp: nowIso()
|
|
164
|
+
} : void 0;
|
|
165
|
+
}
|
|
166
|
+
if (type === "reasoning_delta") {
|
|
167
|
+
const text = stringValue(data.text) ?? stringValue(record.text);
|
|
168
|
+
return text ? {
|
|
169
|
+
type: "reasoning_delta",
|
|
170
|
+
task: context.task,
|
|
171
|
+
session: context.session,
|
|
172
|
+
text,
|
|
173
|
+
timestamp: nowIso()
|
|
174
|
+
} : void 0;
|
|
175
|
+
}
|
|
176
|
+
if (type === "tool_call") {
|
|
177
|
+
return {
|
|
178
|
+
type: "tool_call",
|
|
179
|
+
task: context.task,
|
|
180
|
+
session: context.session,
|
|
181
|
+
toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
|
|
182
|
+
toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
|
|
183
|
+
args: data.args ?? data.input ?? record.args,
|
|
184
|
+
timestamp: nowIso()
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
if (type === "tool_result") {
|
|
188
|
+
return {
|
|
189
|
+
type: "tool_result",
|
|
190
|
+
task: context.task,
|
|
191
|
+
session: context.session,
|
|
192
|
+
toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
|
|
193
|
+
toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
|
|
194
|
+
result: data.result ?? data.output ?? record.result,
|
|
195
|
+
timestamp: nowIso()
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
if (type === "result" || type === "final") {
|
|
199
|
+
const text = stringValue(data.finalText) ?? stringValue(data.text) ?? stringValue(record.text);
|
|
200
|
+
return text ? {
|
|
201
|
+
type: "text_delta",
|
|
202
|
+
task: context.task,
|
|
203
|
+
session: context.session,
|
|
204
|
+
text,
|
|
205
|
+
timestamp: nowIso()
|
|
206
|
+
} : void 0;
|
|
207
|
+
}
|
|
208
|
+
return void 0;
|
|
209
|
+
}
|
|
210
|
+
async function* streamResponseEvents(response, context) {
|
|
211
|
+
const body = response.body;
|
|
212
|
+
if (!body) return;
|
|
213
|
+
const reader = body.getReader();
|
|
214
|
+
const decoder = new TextDecoder();
|
|
215
|
+
let buffer = "";
|
|
216
|
+
for (; ; ) {
|
|
217
|
+
const { done, value } = await reader.read();
|
|
218
|
+
if (done) break;
|
|
219
|
+
buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, "\n");
|
|
220
|
+
for (const event of drainStreamBuffer(false)) yield event;
|
|
221
|
+
}
|
|
222
|
+
buffer += decoder.decode().replace(/\r\n/g, "\n");
|
|
223
|
+
for (const event of drainStreamBuffer(true)) yield event;
|
|
224
|
+
if (buffer.trim()) {
|
|
225
|
+
const event = parseStreamChunk(buffer, context);
|
|
226
|
+
if (event) yield event;
|
|
227
|
+
}
|
|
228
|
+
function* drainStreamBuffer(flush) {
|
|
229
|
+
for (; ; ) {
|
|
230
|
+
const sseBoundary = buffer.indexOf("\n\n");
|
|
231
|
+
if (sseBoundary >= 0) {
|
|
232
|
+
const chunk = buffer.slice(0, sseBoundary);
|
|
233
|
+
buffer = buffer.slice(sseBoundary + 2);
|
|
234
|
+
const event = parseStreamChunk(chunk, context);
|
|
235
|
+
if (event) yield event;
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
const newline = buffer.indexOf("\n");
|
|
239
|
+
if (newline >= 0 && !buffer.slice(0, newline).startsWith("data:")) {
|
|
240
|
+
const line = buffer.slice(0, newline);
|
|
241
|
+
buffer = buffer.slice(newline + 1);
|
|
242
|
+
const event = parseStreamChunk(line, context);
|
|
243
|
+
if (event) yield event;
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
if (flush && buffer.trim() && !buffer.trimStart().startsWith("data:")) {
|
|
247
|
+
const line = buffer;
|
|
248
|
+
buffer = "";
|
|
249
|
+
const event = parseStreamChunk(line, context);
|
|
250
|
+
if (event) yield event;
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
function parseStreamChunk(chunk, context) {
|
|
258
|
+
const lines = chunk.split(/\r?\n/);
|
|
259
|
+
const dataLines = lines.filter((line) => line.startsWith("data:"));
|
|
260
|
+
const data = dataLines.length > 0 ? dataLines.map((line) => line.slice(5).trimStart()).join("\n") : chunk.trim();
|
|
261
|
+
if (!data || data === "[DONE]") return void 0;
|
|
262
|
+
try {
|
|
263
|
+
const parsed = JSON.parse(data);
|
|
264
|
+
const choices = parsed.choices;
|
|
265
|
+
const choice = Array.isArray(choices) ? choices[0] : void 0;
|
|
266
|
+
const delta = choice?.delta;
|
|
267
|
+
const message = choice?.message;
|
|
268
|
+
const text = stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text);
|
|
269
|
+
if (text) {
|
|
270
|
+
return {
|
|
271
|
+
type: "text_delta",
|
|
272
|
+
task: context.task,
|
|
273
|
+
session: context.session,
|
|
274
|
+
text,
|
|
275
|
+
timestamp: nowIso()
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
return mapCommonBackendEvent(parsed, context);
|
|
279
|
+
} catch {
|
|
280
|
+
return {
|
|
281
|
+
type: "text_delta",
|
|
282
|
+
task: context.task,
|
|
283
|
+
session: context.session,
|
|
284
|
+
text: data,
|
|
285
|
+
timestamp: nowIso()
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
function stringValue(value) {
|
|
290
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// src/readiness.ts
|
|
294
|
+
var DEFAULT_MINIMUM_READINESS_SCORE = 0.7;
|
|
295
|
+
function decideKnowledgeReadiness(report, options = {}) {
|
|
296
|
+
const minimumScore = options.minimumScore ?? DEFAULT_MINIMUM_READINESS_SCORE;
|
|
297
|
+
if (!Number.isFinite(minimumScore) || minimumScore < 0 || minimumScore > 1) {
|
|
298
|
+
throw new ValidationError(
|
|
299
|
+
`minimumScore must be a finite number in [0, 1]; received ${String(minimumScore)}`
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
const blockingGapIds = report.blockingMissingRequirements.map((requirement) => requirement.id);
|
|
303
|
+
const nonBlockingGapIds = report.nonBlockingGaps.map((requirement) => requirement.id);
|
|
304
|
+
if (blockingGapIds.length > 0) {
|
|
305
|
+
return {
|
|
306
|
+
passed: false,
|
|
307
|
+
status: "blocked",
|
|
308
|
+
reason: report.reason,
|
|
309
|
+
readinessScore: report.readinessScore,
|
|
310
|
+
recommendedAction: report.recommendedAction,
|
|
311
|
+
severity: report.severity,
|
|
312
|
+
blockingGapIds,
|
|
313
|
+
nonBlockingGapIds
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
if (report.readinessScore < minimumScore) {
|
|
317
|
+
return {
|
|
318
|
+
passed: false,
|
|
319
|
+
status: "caveat",
|
|
320
|
+
reason: `Knowledge readiness score ${report.readinessScore.toFixed(3)} is below minimum ${minimumScore.toFixed(3)}.`,
|
|
321
|
+
readinessScore: report.readinessScore,
|
|
322
|
+
recommendedAction: report.recommendedAction,
|
|
323
|
+
severity: report.severity,
|
|
324
|
+
blockingGapIds,
|
|
325
|
+
nonBlockingGapIds
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
return {
|
|
329
|
+
passed: true,
|
|
330
|
+
status: "ready",
|
|
331
|
+
reason: report.reason,
|
|
332
|
+
readinessScore: report.readinessScore,
|
|
333
|
+
recommendedAction: report.recommendedAction,
|
|
334
|
+
severity: report.severity,
|
|
335
|
+
blockingGapIds,
|
|
336
|
+
nonBlockingGapIds
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// src/run.ts
|
|
341
|
+
import {
|
|
342
|
+
acquisitionPlansForKnowledgeGaps,
|
|
343
|
+
blockingKnowledgeEval,
|
|
344
|
+
runAgentControlLoop,
|
|
345
|
+
scoreKnowledgeReadiness,
|
|
346
|
+
userQuestionsForKnowledgeGaps
|
|
347
|
+
} from "@tangle-network/agent-eval";
|
|
27
348
|
async function runAgentTask(options) {
|
|
28
349
|
const task = options.task;
|
|
29
350
|
await emit(options.onEvent, { type: "task_start", task });
|
|
@@ -35,7 +356,13 @@ async function runAgentTask(options) {
|
|
|
35
356
|
...knowledge.blockingMissingRequirements,
|
|
36
357
|
...knowledge.nonBlockingGaps
|
|
37
358
|
]);
|
|
38
|
-
const preflight = await runKnowledgePreflight(
|
|
359
|
+
const preflight = await runKnowledgePreflight(
|
|
360
|
+
task,
|
|
361
|
+
questions,
|
|
362
|
+
acquisitionPlans,
|
|
363
|
+
options.knowledge,
|
|
364
|
+
options.onEvent
|
|
365
|
+
);
|
|
39
366
|
if (options.knowledge?.refreshReadiness && (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)) {
|
|
40
367
|
await emit(options.onEvent, { type: "readiness_start", task });
|
|
41
368
|
knowledge = await options.knowledge.refreshReadiness({
|
|
@@ -58,13 +385,26 @@ async function runAgentTask(options) {
|
|
|
58
385
|
variantId: options.variantId,
|
|
59
386
|
observe: ({ history, abortSignal }) => options.adapter.observe({ task, knowledge, history, abortSignal }),
|
|
60
387
|
validate: async ({ state, history, abortSignal }) => {
|
|
61
|
-
const readinessEval = blockingKnowledgeEval(knowledge, {
|
|
62
|
-
|
|
388
|
+
const readinessEval = blockingKnowledgeEval(knowledge, {
|
|
389
|
+
minimumScore: options.minimumReadinessScore
|
|
390
|
+
});
|
|
391
|
+
const evals = await options.adapter.validate({
|
|
392
|
+
task,
|
|
393
|
+
knowledge,
|
|
394
|
+
state,
|
|
395
|
+
history,
|
|
396
|
+
abortSignal
|
|
397
|
+
});
|
|
63
398
|
return [readinessEval, ...evals];
|
|
64
399
|
},
|
|
65
400
|
decide: (ctx) => {
|
|
66
401
|
if (isKnowledgeBlocked(ctx.evals)) {
|
|
67
|
-
return options.adapter.onKnowledgeBlocked?.({
|
|
402
|
+
return options.adapter.onKnowledgeBlocked?.({
|
|
403
|
+
task,
|
|
404
|
+
knowledge,
|
|
405
|
+
questions,
|
|
406
|
+
acquisitionPlans
|
|
407
|
+
}) ?? {
|
|
68
408
|
type: "stop",
|
|
69
409
|
pass: false,
|
|
70
410
|
score: knowledge.readinessScore,
|
|
@@ -90,7 +430,9 @@ async function runAgentTask(options) {
|
|
|
90
430
|
userAnswers: preflight.userAnswers,
|
|
91
431
|
acquiredEvidenceIds: preflight.acquiredEvidenceIds,
|
|
92
432
|
control,
|
|
93
|
-
runRecords: (options.adapter.projectRunRecords?.(control, task) ?? []).map(
|
|
433
|
+
runRecords: (options.adapter.projectRunRecords?.(control, task) ?? []).map(
|
|
434
|
+
(record) => record.scenarioId === void 0 ? { ...record, scenarioId } : record
|
|
435
|
+
)
|
|
94
436
|
};
|
|
95
437
|
}
|
|
96
438
|
function summarizeAgentTaskRun(result) {
|
|
@@ -102,7 +444,9 @@ function summarizeAgentTaskRun(result) {
|
|
|
102
444
|
readinessStatus: decideKnowledgeReadiness(result.knowledge).status,
|
|
103
445
|
readinessScore: result.knowledge.readinessScore,
|
|
104
446
|
recommendedAction: result.knowledge.recommendedAction,
|
|
105
|
-
blockingGapIds: result.knowledge.blockingMissingRequirements.map(
|
|
447
|
+
blockingGapIds: result.knowledge.blockingMissingRequirements.map(
|
|
448
|
+
(requirement) => requirement.id
|
|
449
|
+
),
|
|
106
450
|
nonBlockingGapIds: result.knowledge.nonBlockingGaps.map((requirement) => requirement.id),
|
|
107
451
|
questionCount: result.questions.length,
|
|
108
452
|
acquisitionPlanCount: result.acquisitionPlans.length,
|
|
@@ -117,17 +461,20 @@ function summarizeAgentTaskRun(result) {
|
|
|
117
461
|
async function* runAgentTaskStream(options) {
|
|
118
462
|
const task = options.task;
|
|
119
463
|
const input = { task, ...options.input ?? {} };
|
|
120
|
-
|
|
121
|
-
yield
|
|
122
|
-
const readinessStart = streamEvent({ type: "readiness_start", task });
|
|
123
|
-
yield readinessStart;
|
|
464
|
+
yield streamEvent({ type: "task_start", task });
|
|
465
|
+
yield streamEvent({ type: "readiness_start", task });
|
|
124
466
|
let knowledge = await buildReadiness(task, options.knowledge);
|
|
125
467
|
const questions = userQuestionsForKnowledgeGaps(knowledge.blockingMissingRequirements);
|
|
126
468
|
const acquisitionPlans = acquisitionPlansForKnowledgeGaps([
|
|
127
469
|
...knowledge.blockingMissingRequirements,
|
|
128
470
|
...knowledge.nonBlockingGaps
|
|
129
471
|
]);
|
|
130
|
-
const preflight = await runKnowledgePreflightStream(
|
|
472
|
+
const preflight = await runKnowledgePreflightStream(
|
|
473
|
+
task,
|
|
474
|
+
questions,
|
|
475
|
+
acquisitionPlans,
|
|
476
|
+
options.knowledge
|
|
477
|
+
);
|
|
131
478
|
for (const event of preflight.events) yield event;
|
|
132
479
|
if (options.knowledge?.refreshReadiness && (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)) {
|
|
133
480
|
yield streamEvent({ type: "readiness_start", task });
|
|
@@ -138,7 +485,9 @@ async function* runAgentTaskStream(options) {
|
|
|
138
485
|
acquiredEvidenceIds: preflight.acquiredEvidenceIds
|
|
139
486
|
});
|
|
140
487
|
}
|
|
141
|
-
const decision = decideKnowledgeReadiness(knowledge, {
|
|
488
|
+
const decision = decideKnowledgeReadiness(knowledge, {
|
|
489
|
+
minimumScore: options.minimumReadinessScore
|
|
490
|
+
});
|
|
142
491
|
yield streamEvent({ type: "readiness_end", task, knowledge, decision });
|
|
143
492
|
if (!decision.passed && decision.status === "blocked") {
|
|
144
493
|
const reason = `knowledge readiness blocked: ${decision.reason}`;
|
|
@@ -149,7 +498,16 @@ async function* runAgentTaskStream(options) {
|
|
|
149
498
|
const store = options.sessionStore;
|
|
150
499
|
const existing = options.sessionId ? await store?.get(options.sessionId) : void 0;
|
|
151
500
|
const shouldResume = Boolean(options.resume && existing);
|
|
152
|
-
let session = shouldResume && existing ? await resumeBackendSession(options.backend, existing, input, {
|
|
501
|
+
let session = shouldResume && existing ? await resumeBackendSession(options.backend, existing, input, {
|
|
502
|
+
task,
|
|
503
|
+
knowledge,
|
|
504
|
+
signal: options.signal
|
|
505
|
+
}) : await startBackendSession(
|
|
506
|
+
options.backend,
|
|
507
|
+
input,
|
|
508
|
+
{ task, knowledge, signal: options.signal },
|
|
509
|
+
options.sessionId
|
|
510
|
+
);
|
|
153
511
|
await store?.put(session);
|
|
154
512
|
const sessionEvent = streamEvent({
|
|
155
513
|
type: shouldResume ? "session_resumed" : "session_created",
|
|
@@ -158,12 +516,22 @@ async function* runAgentTaskStream(options) {
|
|
|
158
516
|
});
|
|
159
517
|
await store?.appendEvent?.(session.id, sessionEvent);
|
|
160
518
|
yield sessionEvent;
|
|
161
|
-
const backendStart = streamEvent({
|
|
519
|
+
const backendStart = streamEvent({
|
|
520
|
+
type: "backend_start",
|
|
521
|
+
task,
|
|
522
|
+
session,
|
|
523
|
+
backend: options.backend.kind
|
|
524
|
+
});
|
|
162
525
|
await store?.appendEvent?.(session.id, backendStart);
|
|
163
526
|
yield backendStart;
|
|
164
527
|
let finalText = "";
|
|
165
528
|
try {
|
|
166
|
-
for await (const rawEvent of options.backend.stream(input, {
|
|
529
|
+
for await (const rawEvent of options.backend.stream(input, {
|
|
530
|
+
task,
|
|
531
|
+
knowledge,
|
|
532
|
+
session,
|
|
533
|
+
signal: options.signal
|
|
534
|
+
})) {
|
|
167
535
|
const event = normalizeBackendStreamEvent(rawEvent, task, session);
|
|
168
536
|
if (event.type === "text_delta") finalText += event.text;
|
|
169
537
|
await store?.appendEvent?.(session.id, event);
|
|
@@ -172,14 +540,26 @@ async function* runAgentTaskStream(options) {
|
|
|
172
540
|
const completedStatus = "completed";
|
|
173
541
|
session = touchSession({ ...session, status: completedStatus });
|
|
174
542
|
await store?.put(session);
|
|
175
|
-
const backendEnd = streamEvent({
|
|
543
|
+
const backendEnd = streamEvent({
|
|
544
|
+
type: "backend_end",
|
|
545
|
+
task,
|
|
546
|
+
session,
|
|
547
|
+
backend: options.backend.kind
|
|
548
|
+
});
|
|
176
549
|
await store?.appendEvent?.(session.id, backendEnd);
|
|
177
550
|
yield backendEnd;
|
|
178
551
|
const reason = "backend completed";
|
|
179
552
|
const taskEnd = streamEvent({ type: "task_end", task, status: completedStatus, reason });
|
|
180
553
|
await store?.appendEvent?.(session.id, taskEnd);
|
|
181
554
|
yield taskEnd;
|
|
182
|
-
const final = streamEvent({
|
|
555
|
+
const final = streamEvent({
|
|
556
|
+
type: "final",
|
|
557
|
+
task,
|
|
558
|
+
session,
|
|
559
|
+
status: completedStatus,
|
|
560
|
+
reason,
|
|
561
|
+
text: finalText || void 0
|
|
562
|
+
});
|
|
183
563
|
await store?.appendEvent?.(session.id, final);
|
|
184
564
|
yield final;
|
|
185
565
|
} catch (err) {
|
|
@@ -206,50 +586,226 @@ async function* runAgentTaskStream(options) {
|
|
|
206
586
|
const taskEnd = streamEvent({ type: "task_end", task, status, reason: message });
|
|
207
587
|
await store?.appendEvent?.(session.id, taskEnd);
|
|
208
588
|
yield taskEnd;
|
|
209
|
-
const final = streamEvent({
|
|
589
|
+
const final = streamEvent({
|
|
590
|
+
type: "final",
|
|
591
|
+
task,
|
|
592
|
+
session,
|
|
593
|
+
status,
|
|
594
|
+
reason: message,
|
|
595
|
+
text: finalText || void 0
|
|
596
|
+
});
|
|
210
597
|
await store?.appendEvent?.(session.id, final);
|
|
211
598
|
yield final;
|
|
212
599
|
}
|
|
213
600
|
}
|
|
214
|
-
function
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
status: "blocked",
|
|
222
|
-
reason: report.reason,
|
|
223
|
-
readinessScore: report.readinessScore,
|
|
224
|
-
recommendedAction: report.recommendedAction,
|
|
225
|
-
severity: report.severity,
|
|
226
|
-
blockingGapIds,
|
|
227
|
-
nonBlockingGapIds
|
|
228
|
-
};
|
|
601
|
+
async function runKnowledgePreflight(task, questions, acquisitionPlans, provider, onEvent) {
|
|
602
|
+
let userAnswers = {};
|
|
603
|
+
let acquiredEvidenceIds = [];
|
|
604
|
+
if (questions.length > 0 && provider?.answerQuestions) {
|
|
605
|
+
await emit(onEvent, { type: "questions_start", task, questions });
|
|
606
|
+
userAnswers = await provider.answerQuestions(questions, task);
|
|
607
|
+
await emit(onEvent, { type: "questions_end", task, questions, userAnswers });
|
|
229
608
|
}
|
|
230
|
-
if (
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
609
|
+
if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
|
|
610
|
+
await emit(onEvent, { type: "acquisition_start", task, acquisitionPlans });
|
|
611
|
+
acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
|
|
612
|
+
await emit(onEvent, {
|
|
613
|
+
type: "acquisition_end",
|
|
614
|
+
task,
|
|
615
|
+
acquisitionPlans,
|
|
616
|
+
acquiredEvidenceIds
|
|
617
|
+
});
|
|
618
|
+
}
|
|
619
|
+
return { userAnswers, acquiredEvidenceIds };
|
|
620
|
+
}
|
|
621
|
+
async function runKnowledgePreflightStream(task, questions, acquisitionPlans, provider) {
|
|
622
|
+
const events = [];
|
|
623
|
+
let userAnswers = {};
|
|
624
|
+
let acquiredEvidenceIds = [];
|
|
625
|
+
if (questions.length > 0 && provider?.answerQuestions) {
|
|
626
|
+
events.push(streamEvent({ type: "questions_start", task, questions }));
|
|
627
|
+
userAnswers = await provider.answerQuestions(questions, task);
|
|
628
|
+
events.push(streamEvent({ type: "questions_end", task, questions, userAnswers }));
|
|
629
|
+
}
|
|
630
|
+
if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
|
|
631
|
+
events.push(streamEvent({ type: "acquisition_start", task, acquisitionPlans }));
|
|
632
|
+
acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
|
|
633
|
+
events.push(
|
|
634
|
+
streamEvent({ type: "acquisition_end", task, acquisitionPlans, acquiredEvidenceIds })
|
|
635
|
+
);
|
|
636
|
+
}
|
|
637
|
+
return { userAnswers, acquiredEvidenceIds, events };
|
|
638
|
+
}
|
|
639
|
+
function streamEvent(event) {
|
|
640
|
+
return { ...event, timestamp: nowIso() };
|
|
641
|
+
}
|
|
642
|
+
async function startBackendSession(backend, input, context, requestedSessionId) {
|
|
643
|
+
if (backend.start) return backend.start(input, { ...context, requestedSessionId });
|
|
644
|
+
return newRuntimeSession(backend.kind, requestedSessionId);
|
|
645
|
+
}
|
|
646
|
+
async function resumeBackendSession(backend, session, input, context) {
|
|
647
|
+
if (session.backend !== backend.kind) {
|
|
648
|
+
throw new SessionMismatchError(session.backend, backend.kind);
|
|
649
|
+
}
|
|
650
|
+
if (backend.resume) return backend.resume(session, input, context);
|
|
651
|
+
return touchSession({ ...session, status: "active" });
|
|
652
|
+
}
|
|
653
|
+
function buildReadiness(task, provider) {
|
|
654
|
+
if (provider?.buildReadiness) return provider.buildReadiness(task);
|
|
655
|
+
return scoreKnowledgeReadiness({
|
|
656
|
+
taskId: task.id,
|
|
657
|
+
requirements: task.requiredKnowledge ?? [],
|
|
658
|
+
metadata: { domain: task.domain, ...task.metadata }
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
function isKnowledgeBlocked(evals) {
|
|
662
|
+
return evals.some((evalResult) => evalResult.id === "knowledge-ready" && !evalResult.passed);
|
|
663
|
+
}
|
|
664
|
+
function statusFromControl(control) {
|
|
665
|
+
if (control.stoppedBy === "abort") return "aborted";
|
|
666
|
+
if (control.reason.includes("knowledge readiness blocked")) return "blocked";
|
|
667
|
+
if (control.pass) return "completed";
|
|
668
|
+
return "failed";
|
|
669
|
+
}
|
|
670
|
+
async function emit(sink, event) {
|
|
671
|
+
await sink?.(event);
|
|
672
|
+
}
|
|
673
|
+
function toAgentContext(task, knowledge, ctx) {
|
|
674
|
+
return {
|
|
675
|
+
task,
|
|
676
|
+
knowledge,
|
|
677
|
+
state: ctx.state,
|
|
678
|
+
evals: ctx.evals,
|
|
679
|
+
history: ctx.history,
|
|
680
|
+
budget: ctx.budget,
|
|
681
|
+
stepIndex: ctx.stepIndex,
|
|
682
|
+
wallMs: ctx.wallMs,
|
|
683
|
+
spentCostUsd: ctx.spentCostUsd,
|
|
684
|
+
remainingCostUsd: ctx.remainingCostUsd,
|
|
685
|
+
abortSignal: ctx.abortSignal
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// src/runtime-run.ts
|
|
690
|
+
function startRuntimeRun(options) {
|
|
691
|
+
if (!options.workspaceId) {
|
|
692
|
+
throw new ValidationError("startRuntimeRun: workspaceId is required");
|
|
241
693
|
}
|
|
694
|
+
if (!options.taskSpec?.id) {
|
|
695
|
+
throw new ValidationError("startRuntimeRun: taskSpec.id is required");
|
|
696
|
+
}
|
|
697
|
+
const now = options.now ?? Date.now;
|
|
698
|
+
const startedAtMs = now();
|
|
699
|
+
const startedAt = new Date(startedAtMs).toISOString();
|
|
700
|
+
const id = options.id ?? `${options.taskSpec.id}:${randomSuffix()}`;
|
|
701
|
+
let status = "running";
|
|
702
|
+
let completedAtMs;
|
|
703
|
+
let resultSummary;
|
|
704
|
+
let error;
|
|
705
|
+
let completionMetadata;
|
|
706
|
+
const ledger = {
|
|
707
|
+
tokensIn: 0,
|
|
708
|
+
tokensOut: 0,
|
|
709
|
+
costUsd: 0,
|
|
710
|
+
wallMs: 0,
|
|
711
|
+
llmCalls: 0
|
|
712
|
+
};
|
|
713
|
+
const snapshotCost = () => ({
|
|
714
|
+
tokensIn: ledger.tokensIn,
|
|
715
|
+
tokensOut: ledger.tokensOut,
|
|
716
|
+
costUsd: ledger.costUsd,
|
|
717
|
+
wallMs: (completedAtMs ?? now()) - startedAtMs,
|
|
718
|
+
llmCalls: ledger.llmCalls
|
|
719
|
+
});
|
|
720
|
+
const buildRow = (extraMetadata) => ({
|
|
721
|
+
id,
|
|
722
|
+
workspaceId: options.workspaceId,
|
|
723
|
+
sessionId: options.sessionId,
|
|
724
|
+
agentId: options.agentId,
|
|
725
|
+
domain: options.taskSpec.domain,
|
|
726
|
+
taskId: options.taskSpec.id,
|
|
727
|
+
scenarioId: options.scenarioId,
|
|
728
|
+
status,
|
|
729
|
+
resultSummary,
|
|
730
|
+
error,
|
|
731
|
+
cost: snapshotCost(),
|
|
732
|
+
startedAt,
|
|
733
|
+
completedAt: completedAtMs !== void 0 ? new Date(completedAtMs).toISOString() : void 0,
|
|
734
|
+
metadata: mergeMetadata(completionMetadata, extraMetadata)
|
|
735
|
+
});
|
|
242
736
|
return {
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
737
|
+
id,
|
|
738
|
+
workspaceId: options.workspaceId,
|
|
739
|
+
sessionId: options.sessionId,
|
|
740
|
+
taskSpec: options.taskSpec,
|
|
741
|
+
get status() {
|
|
742
|
+
return status;
|
|
743
|
+
},
|
|
744
|
+
observe(event) {
|
|
745
|
+
if (event.type !== "llm_call") return;
|
|
746
|
+
ledger.llmCalls += 1;
|
|
747
|
+
if (typeof event.tokensIn === "number" && Number.isFinite(event.tokensIn)) {
|
|
748
|
+
ledger.tokensIn += event.tokensIn;
|
|
749
|
+
}
|
|
750
|
+
if (typeof event.tokensOut === "number" && Number.isFinite(event.tokensOut)) {
|
|
751
|
+
ledger.tokensOut += event.tokensOut;
|
|
752
|
+
}
|
|
753
|
+
if (typeof event.costUsd === "number" && Number.isFinite(event.costUsd)) {
|
|
754
|
+
ledger.costUsd += event.costUsd;
|
|
755
|
+
}
|
|
756
|
+
},
|
|
757
|
+
cost: snapshotCost,
|
|
758
|
+
complete(input) {
|
|
759
|
+
if (input.status === "running") {
|
|
760
|
+
throw new ValidationError('complete() requires a terminal status, got "running"');
|
|
761
|
+
}
|
|
762
|
+
if (status !== "running") {
|
|
763
|
+
if (status === input.status) return;
|
|
764
|
+
throw new RuntimeRunStateError(
|
|
765
|
+
`Cannot transition runtime run from "${status}" to "${input.status}"`
|
|
766
|
+
);
|
|
767
|
+
}
|
|
768
|
+
status = input.status;
|
|
769
|
+
completedAtMs = now();
|
|
770
|
+
resultSummary = input.resultSummary;
|
|
771
|
+
error = input.error;
|
|
772
|
+
completionMetadata = input.metadata;
|
|
773
|
+
if (input.cost) {
|
|
774
|
+
if (typeof input.cost.tokensIn === "number" && Number.isFinite(input.cost.tokensIn)) {
|
|
775
|
+
ledger.tokensIn = input.cost.tokensIn;
|
|
776
|
+
}
|
|
777
|
+
if (typeof input.cost.tokensOut === "number" && Number.isFinite(input.cost.tokensOut)) {
|
|
778
|
+
ledger.tokensOut = input.cost.tokensOut;
|
|
779
|
+
}
|
|
780
|
+
if (typeof input.cost.costUsd === "number" && Number.isFinite(input.cost.costUsd)) {
|
|
781
|
+
ledger.costUsd = input.cost.costUsd;
|
|
782
|
+
}
|
|
783
|
+
if (typeof input.cost.llmCalls === "number" && Number.isFinite(input.cost.llmCalls)) {
|
|
784
|
+
ledger.llmCalls = input.cost.llmCalls;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
},
|
|
788
|
+
toRow(metadata) {
|
|
789
|
+
return buildRow(metadata);
|
|
790
|
+
},
|
|
791
|
+
async persist(metadata) {
|
|
792
|
+
if (status === "running") {
|
|
793
|
+
throw new RuntimeRunStateError("Cannot persist a runtime run before complete() is called");
|
|
794
|
+
}
|
|
795
|
+
if (!options.adapter) return;
|
|
796
|
+
await options.adapter.upsert(buildRow(metadata));
|
|
797
|
+
}
|
|
251
798
|
};
|
|
252
799
|
}
|
|
800
|
+
function mergeMetadata(base, extra) {
|
|
801
|
+
if (!base && !extra) return void 0;
|
|
802
|
+
return { ...base ?? {}, ...extra ?? {} };
|
|
803
|
+
}
|
|
804
|
+
function randomSuffix() {
|
|
805
|
+
return Math.random().toString(36).slice(2, 10);
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// src/sanitize.ts
|
|
253
809
|
function sanitizeKnowledgeReadinessReport(report, options = {}) {
|
|
254
810
|
return {
|
|
255
811
|
taskId: report.taskId,
|
|
@@ -277,7 +833,10 @@ function sanitizeAgentRuntimeEvent(event, options = {}) {
|
|
|
277
833
|
return { ...base, knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options) };
|
|
278
834
|
}
|
|
279
835
|
if (event.type === "questions_start") {
|
|
280
|
-
return {
|
|
836
|
+
return {
|
|
837
|
+
...base,
|
|
838
|
+
questions: event.questions.map((question) => sanitizeQuestion(question, options))
|
|
839
|
+
};
|
|
281
840
|
}
|
|
282
841
|
if (event.type === "questions_end") {
|
|
283
842
|
return {
|
|
@@ -318,7 +877,12 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
|
|
|
318
877
|
};
|
|
319
878
|
}
|
|
320
879
|
if (event.type === "questions_start") {
|
|
321
|
-
return {
|
|
880
|
+
return {
|
|
881
|
+
type: event.type,
|
|
882
|
+
...withTask,
|
|
883
|
+
timestamp: event.timestamp,
|
|
884
|
+
questions: event.questions.map((question) => sanitizeQuestion(question, options))
|
|
885
|
+
};
|
|
322
886
|
}
|
|
323
887
|
if (event.type === "questions_end") {
|
|
324
888
|
return {
|
|
@@ -330,7 +894,12 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
|
|
|
330
894
|
};
|
|
331
895
|
}
|
|
332
896
|
if (event.type === "acquisition_start") {
|
|
333
|
-
return {
|
|
897
|
+
return {
|
|
898
|
+
type: event.type,
|
|
899
|
+
...withTask,
|
|
900
|
+
timestamp: event.timestamp,
|
|
901
|
+
acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan)
|
|
902
|
+
};
|
|
334
903
|
}
|
|
335
904
|
if (event.type === "acquisition_end") {
|
|
336
905
|
return {
|
|
@@ -364,6 +933,20 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
|
|
|
364
933
|
result: options.includeControlPayloads ? event.result : void 0
|
|
365
934
|
};
|
|
366
935
|
}
|
|
936
|
+
if (event.type === "llm_call") {
|
|
937
|
+
return {
|
|
938
|
+
type: event.type,
|
|
939
|
+
...withTask,
|
|
940
|
+
...withSession,
|
|
941
|
+
timestamp: event.timestamp,
|
|
942
|
+
model: event.model,
|
|
943
|
+
tokensIn: event.tokensIn,
|
|
944
|
+
tokensOut: event.tokensOut,
|
|
945
|
+
costUsd: event.costUsd,
|
|
946
|
+
latencyMs: event.latencyMs,
|
|
947
|
+
finishReason: event.finishReason
|
|
948
|
+
};
|
|
949
|
+
}
|
|
367
950
|
if (event.type === "artifact") {
|
|
368
951
|
return {
|
|
369
952
|
type: event.type,
|
|
@@ -397,198 +980,48 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
|
|
|
397
980
|
...pickPublicStreamFields(event)
|
|
398
981
|
};
|
|
399
982
|
}
|
|
400
|
-
function
|
|
401
|
-
const events = [];
|
|
983
|
+
function sanitizeTask(task, options) {
|
|
402
984
|
return {
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
985
|
+
id: task.id,
|
|
986
|
+
intent: task.intent,
|
|
987
|
+
domain: task.domain,
|
|
988
|
+
inputs: options.includeInputs ? task.inputs : task.inputs ? "[redacted]" : void 0,
|
|
989
|
+
requiredKnowledge: task.requiredKnowledge?.map(
|
|
990
|
+
(requirement) => sanitizeKnowledgeRequirement(requirement, options)
|
|
991
|
+
),
|
|
992
|
+
metadata: options.includeMetadata ? task.metadata : task.metadata ? "[redacted]" : void 0
|
|
407
993
|
};
|
|
408
994
|
}
|
|
409
|
-
function
|
|
410
|
-
const events = [];
|
|
411
|
-
const eventCountsByType = {};
|
|
412
|
-
let firstSessionId;
|
|
413
|
-
let finalStatus;
|
|
414
|
-
let finalReason;
|
|
415
|
-
let finalText = "";
|
|
995
|
+
function sanitizeRuntimeSession(session, options) {
|
|
416
996
|
return {
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
}
|
|
425
|
-
if (event.type === "final") {
|
|
426
|
-
finalStatus = event.status;
|
|
427
|
-
finalReason = event.reason;
|
|
428
|
-
}
|
|
429
|
-
},
|
|
430
|
-
summary() {
|
|
431
|
-
return {
|
|
432
|
-
eventCount: events.length,
|
|
433
|
-
eventCountsByType: { ...eventCountsByType },
|
|
434
|
-
firstSessionId,
|
|
435
|
-
finalStatus,
|
|
436
|
-
finalReason,
|
|
437
|
-
finalText
|
|
438
|
-
};
|
|
439
|
-
}
|
|
997
|
+
id: session.id,
|
|
998
|
+
backend: session.backend,
|
|
999
|
+
status: session.status,
|
|
1000
|
+
hasResumeToken: Boolean(session.resumeToken),
|
|
1001
|
+
createdAt: session.createdAt,
|
|
1002
|
+
updatedAt: session.updatedAt,
|
|
1003
|
+
metadata: options.includeMetadata ? session.metadata : session.metadata ? "[redacted]" : void 0
|
|
440
1004
|
};
|
|
441
1005
|
}
|
|
442
|
-
function
|
|
443
|
-
const
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
return encodeServerSentEvent({
|
|
460
|
-
type: "readiness",
|
|
461
|
-
readiness: sanitizeKnowledgeReadinessReport(report, telemetryOptions)
|
|
462
|
-
}, { event, id, retry });
|
|
463
|
-
}
|
|
464
|
-
function runtimeStreamServerSentEvent(event, options = {}) {
|
|
465
|
-
const { event: sseEvent, id, retry, ...telemetryOptions } = options;
|
|
466
|
-
return encodeServerSentEvent(sanitizeRuntimeStreamEvent(event, telemetryOptions), { event: sseEvent, id, retry });
|
|
467
|
-
}
|
|
468
|
-
function createIterableBackend(options) {
|
|
469
|
-
return options;
|
|
1006
|
+
function sanitizeKnowledgeRequirement(requirement, options) {
|
|
1007
|
+
const includeDescription = options.includeRequirementDescriptions && requirement.sensitivity !== "secret";
|
|
1008
|
+
return {
|
|
1009
|
+
id: requirement.id,
|
|
1010
|
+
description: includeDescription ? requirement.description : void 0,
|
|
1011
|
+
requiredFor: requirement.requiredFor,
|
|
1012
|
+
category: requirement.category,
|
|
1013
|
+
acquisitionMode: requirement.acquisitionMode,
|
|
1014
|
+
importance: requirement.importance,
|
|
1015
|
+
freshness: requirement.freshness,
|
|
1016
|
+
sensitivity: requirement.sensitivity,
|
|
1017
|
+
confidenceNeeded: requirement.confidenceNeeded,
|
|
1018
|
+
currentConfidence: requirement.currentConfidence,
|
|
1019
|
+
evidenceCount: requirement.evidenceIds.length,
|
|
1020
|
+
evidenceIds: options.includeEvidenceIds ? requirement.evidenceIds : void 0,
|
|
1021
|
+
fallbackPolicy: requirement.fallbackPolicy
|
|
1022
|
+
};
|
|
470
1023
|
}
|
|
471
|
-
function
|
|
472
|
-
return {
|
|
473
|
-
kind: options.kind ?? "sandbox",
|
|
474
|
-
async start(input, context) {
|
|
475
|
-
const box = await options.getBox(input, context);
|
|
476
|
-
return newRuntimeSession(options.kind ?? "sandbox", options.getSessionId?.(box, input) ?? context.requestedSessionId, {
|
|
477
|
-
resumable: true
|
|
478
|
-
});
|
|
479
|
-
},
|
|
480
|
-
resume(session) {
|
|
481
|
-
return touchSession({ ...session, status: "active" });
|
|
482
|
-
},
|
|
483
|
-
async *stream(input, context) {
|
|
484
|
-
const box = await options.getBox(input, context);
|
|
485
|
-
const message = input.message ?? input.messages?.at(-1)?.content ?? context.task.intent;
|
|
486
|
-
for await (const event of options.streamPrompt(box, message, context)) {
|
|
487
|
-
const mapped = options.mapEvent?.(event, context) ?? mapCommonBackendEvent(event, context);
|
|
488
|
-
if (mapped) yield mapped;
|
|
489
|
-
}
|
|
490
|
-
}
|
|
491
|
-
};
|
|
492
|
-
}
|
|
493
|
-
function createOpenAICompatibleBackend(options) {
|
|
494
|
-
const fetcher = options.fetchImpl ?? fetch;
|
|
495
|
-
return {
|
|
496
|
-
kind: options.kind ?? "tcloud",
|
|
497
|
-
start(_input, context) {
|
|
498
|
-
return newRuntimeSession(options.kind ?? "tcloud", context.requestedSessionId);
|
|
499
|
-
},
|
|
500
|
-
async *stream(input, context) {
|
|
501
|
-
const response = await fetcher(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
|
|
502
|
-
method: "POST",
|
|
503
|
-
headers: {
|
|
504
|
-
Authorization: `Bearer ${options.apiKey}`,
|
|
505
|
-
"Content-Type": "application/json"
|
|
506
|
-
},
|
|
507
|
-
body: JSON.stringify({
|
|
508
|
-
model: options.model,
|
|
509
|
-
stream: true,
|
|
510
|
-
messages: input.messages ?? [{ role: "user", content: input.message ?? context.task.intent }]
|
|
511
|
-
}),
|
|
512
|
-
signal: context.signal
|
|
513
|
-
});
|
|
514
|
-
if (!response.ok) throw new Error(`chat backend returned ${response.status}`);
|
|
515
|
-
yield* streamResponseEvents(response, context);
|
|
516
|
-
}
|
|
517
|
-
};
|
|
518
|
-
}
|
|
519
|
-
async function runKnowledgePreflight(task, questions, acquisitionPlans, provider, onEvent) {
|
|
520
|
-
let userAnswers = {};
|
|
521
|
-
let acquiredEvidenceIds = [];
|
|
522
|
-
if (questions.length > 0 && provider?.answerQuestions) {
|
|
523
|
-
await emit(onEvent, { type: "questions_start", task, questions });
|
|
524
|
-
userAnswers = await provider.answerQuestions(questions, task);
|
|
525
|
-
await emit(onEvent, { type: "questions_end", task, questions, userAnswers });
|
|
526
|
-
}
|
|
527
|
-
if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
|
|
528
|
-
await emit(onEvent, { type: "acquisition_start", task, acquisitionPlans });
|
|
529
|
-
acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
|
|
530
|
-
await emit(onEvent, { type: "acquisition_end", task, acquisitionPlans, acquiredEvidenceIds });
|
|
531
|
-
}
|
|
532
|
-
return { userAnswers, acquiredEvidenceIds };
|
|
533
|
-
}
|
|
534
|
-
async function runKnowledgePreflightStream(task, questions, acquisitionPlans, provider) {
|
|
535
|
-
const events = [];
|
|
536
|
-
let userAnswers = {};
|
|
537
|
-
let acquiredEvidenceIds = [];
|
|
538
|
-
if (questions.length > 0 && provider?.answerQuestions) {
|
|
539
|
-
events.push(streamEvent({ type: "questions_start", task, questions }));
|
|
540
|
-
userAnswers = await provider.answerQuestions(questions, task);
|
|
541
|
-
events.push(streamEvent({ type: "questions_end", task, questions, userAnswers }));
|
|
542
|
-
}
|
|
543
|
-
if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
|
|
544
|
-
events.push(streamEvent({ type: "acquisition_start", task, acquisitionPlans }));
|
|
545
|
-
acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
|
|
546
|
-
events.push(streamEvent({ type: "acquisition_end", task, acquisitionPlans, acquiredEvidenceIds }));
|
|
547
|
-
}
|
|
548
|
-
return { userAnswers, acquiredEvidenceIds, events };
|
|
549
|
-
}
|
|
550
|
-
function sanitizeTask(task, options) {
|
|
551
|
-
return {
|
|
552
|
-
id: task.id,
|
|
553
|
-
intent: task.intent,
|
|
554
|
-
domain: task.domain,
|
|
555
|
-
inputs: options.includeInputs ? task.inputs : task.inputs ? "[redacted]" : void 0,
|
|
556
|
-
requiredKnowledge: task.requiredKnowledge?.map(
|
|
557
|
-
(requirement) => sanitizeKnowledgeRequirement(requirement, options)
|
|
558
|
-
),
|
|
559
|
-
metadata: options.includeMetadata ? task.metadata : task.metadata ? "[redacted]" : void 0
|
|
560
|
-
};
|
|
561
|
-
}
|
|
562
|
-
function sanitizeRuntimeSession(session, options) {
|
|
563
|
-
return {
|
|
564
|
-
id: session.id,
|
|
565
|
-
backend: session.backend,
|
|
566
|
-
status: session.status,
|
|
567
|
-
hasResumeToken: Boolean(session.resumeToken),
|
|
568
|
-
createdAt: session.createdAt,
|
|
569
|
-
updatedAt: session.updatedAt,
|
|
570
|
-
metadata: options.includeMetadata ? session.metadata : session.metadata ? "[redacted]" : void 0
|
|
571
|
-
};
|
|
572
|
-
}
|
|
573
|
-
function sanitizeKnowledgeRequirement(requirement, options) {
|
|
574
|
-
const includeDescription = options.includeRequirementDescriptions && requirement.sensitivity !== "secret";
|
|
575
|
-
return {
|
|
576
|
-
id: requirement.id,
|
|
577
|
-
description: includeDescription ? requirement.description : void 0,
|
|
578
|
-
requiredFor: requirement.requiredFor,
|
|
579
|
-
category: requirement.category,
|
|
580
|
-
acquisitionMode: requirement.acquisitionMode,
|
|
581
|
-
importance: requirement.importance,
|
|
582
|
-
freshness: requirement.freshness,
|
|
583
|
-
sensitivity: requirement.sensitivity,
|
|
584
|
-
confidenceNeeded: requirement.confidenceNeeded,
|
|
585
|
-
currentConfidence: requirement.currentConfidence,
|
|
586
|
-
evidenceCount: requirement.evidenceIds.length,
|
|
587
|
-
evidenceIds: options.includeEvidenceIds ? requirement.evidenceIds : void 0,
|
|
588
|
-
fallbackPolicy: requirement.fallbackPolicy
|
|
589
|
-
};
|
|
590
|
-
}
|
|
591
|
-
function sanitizeQuestion(question, options) {
|
|
1024
|
+
function sanitizeQuestion(question, options) {
|
|
592
1025
|
return {
|
|
593
1026
|
id: question.id,
|
|
594
1027
|
question: options.includeRequirementDescriptions && question.answerType !== "credential" ? question.question : void 0,
|
|
@@ -657,207 +1090,294 @@ function summarizeEvals(evals, options) {
|
|
|
657
1090
|
function redactRecord(record) {
|
|
658
1091
|
return Object.fromEntries(Object.keys(record).map((key) => [key, "[redacted]"]));
|
|
659
1092
|
}
|
|
660
|
-
function stripNewlines(value) {
|
|
661
|
-
return value.replace(/[\r\n]/g, " ");
|
|
662
|
-
}
|
|
663
|
-
function timestamp() {
|
|
664
|
-
return (/* @__PURE__ */ new Date()).toISOString();
|
|
665
|
-
}
|
|
666
|
-
function streamEvent(event) {
|
|
667
|
-
return { ...event, timestamp: timestamp() };
|
|
668
|
-
}
|
|
669
|
-
function newRuntimeSession(backend, requestedId, metadata) {
|
|
670
|
-
const now = timestamp();
|
|
671
|
-
return {
|
|
672
|
-
id: requestedId || crypto.randomUUID(),
|
|
673
|
-
backend,
|
|
674
|
-
status: "active",
|
|
675
|
-
createdAt: now,
|
|
676
|
-
updatedAt: now,
|
|
677
|
-
metadata
|
|
678
|
-
};
|
|
679
|
-
}
|
|
680
|
-
function touchSession(session) {
|
|
681
|
-
return { ...session, updatedAt: timestamp() };
|
|
682
|
-
}
|
|
683
|
-
async function startBackendSession(backend, input, context, requestedSessionId) {
|
|
684
|
-
if (backend.start) return backend.start(input, { ...context, requestedSessionId });
|
|
685
|
-
return newRuntimeSession(backend.kind, requestedSessionId);
|
|
686
|
-
}
|
|
687
|
-
async function resumeBackendSession(backend, session, input, context) {
|
|
688
|
-
if (session.backend !== backend.kind) {
|
|
689
|
-
throw new Error(`Cannot resume ${session.backend} session with ${backend.kind} backend`);
|
|
690
|
-
}
|
|
691
|
-
if (backend.resume) return backend.resume(session, input, context);
|
|
692
|
-
return touchSession({ ...session, status: "active" });
|
|
693
|
-
}
|
|
694
|
-
function normalizeBackendStreamEvent(event, task, session) {
|
|
695
|
-
if ("task" in event && event.task && "session" in event && event.session && "timestamp" in event && event.timestamp) return event;
|
|
696
|
-
return {
|
|
697
|
-
...event,
|
|
698
|
-
task: "task" in event && event.task ? event.task : task,
|
|
699
|
-
session: "session" in event && event.session ? event.session : session,
|
|
700
|
-
timestamp: "timestamp" in event && event.timestamp ? event.timestamp : timestamp()
|
|
701
|
-
};
|
|
702
|
-
}
|
|
703
1093
|
function pickPublicStreamFields(event) {
|
|
704
1094
|
if (event.type === "session_created" || event.type === "session_resumed") return {};
|
|
705
|
-
if (event.type === "backend_start" || event.type === "backend_end")
|
|
706
|
-
|
|
1095
|
+
if (event.type === "backend_start" || event.type === "backend_end")
|
|
1096
|
+
return { backend: event.backend };
|
|
1097
|
+
if (event.type === "backend_error") {
|
|
1098
|
+
return { backend: event.backend, message: event.message, recoverable: event.recoverable };
|
|
1099
|
+
}
|
|
707
1100
|
if (event.type === "task_end") return { status: event.status, reason: event.reason };
|
|
708
1101
|
if (event.type === "text_delta" || event.type === "reasoning_delta") return { text: event.text };
|
|
709
1102
|
return {};
|
|
710
1103
|
}
|
|
711
|
-
function
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
}
|
|
720
|
-
if (type === "reasoning_delta") {
|
|
721
|
-
const text = stringValue(data.text) ?? stringValue(record.text);
|
|
722
|
-
return text ? { type: "reasoning_delta", task: context.task, session: context.session, text, timestamp: timestamp() } : void 0;
|
|
723
|
-
}
|
|
724
|
-
if (type === "tool_call") {
|
|
725
|
-
return {
|
|
726
|
-
type: "tool_call",
|
|
727
|
-
task: context.task,
|
|
728
|
-
session: context.session,
|
|
729
|
-
toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
|
|
730
|
-
toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
|
|
731
|
-
args: data.args ?? data.input ?? record.args,
|
|
732
|
-
timestamp: timestamp()
|
|
733
|
-
};
|
|
734
|
-
}
|
|
735
|
-
if (type === "tool_result") {
|
|
736
|
-
return {
|
|
737
|
-
type: "tool_result",
|
|
738
|
-
task: context.task,
|
|
739
|
-
session: context.session,
|
|
740
|
-
toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
|
|
741
|
-
toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
|
|
742
|
-
result: data.result ?? data.output ?? record.result,
|
|
743
|
-
timestamp: timestamp()
|
|
744
|
-
};
|
|
745
|
-
}
|
|
746
|
-
if (type === "result" || type === "final") {
|
|
747
|
-
const text = stringValue(data.finalText) ?? stringValue(data.text) ?? stringValue(record.text);
|
|
748
|
-
return text ? { type: "text_delta", task: context.task, session: context.session, text, timestamp: timestamp() } : void 0;
|
|
749
|
-
}
|
|
750
|
-
return void 0;
|
|
1104
|
+
function createRuntimeEventCollector(options = {}) {
|
|
1105
|
+
const events = [];
|
|
1106
|
+
return {
|
|
1107
|
+
events,
|
|
1108
|
+
onEvent: (event) => {
|
|
1109
|
+
events.push(sanitizeAgentRuntimeEvent(event, options));
|
|
1110
|
+
}
|
|
1111
|
+
};
|
|
751
1112
|
}
|
|
752
|
-
|
|
753
|
-
const
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
let
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
const event = parseStreamChunk(buffer, context);
|
|
768
|
-
if (event) yield event;
|
|
769
|
-
}
|
|
770
|
-
function* drainStreamBuffer(flush) {
|
|
771
|
-
for (; ; ) {
|
|
772
|
-
const sseBoundary = buffer.indexOf("\n\n");
|
|
773
|
-
if (sseBoundary >= 0) {
|
|
774
|
-
const chunk = buffer.slice(0, sseBoundary);
|
|
775
|
-
buffer = buffer.slice(sseBoundary + 2);
|
|
776
|
-
const event = parseStreamChunk(chunk, context);
|
|
777
|
-
if (event) yield event;
|
|
778
|
-
continue;
|
|
779
|
-
}
|
|
780
|
-
const newline = buffer.indexOf("\n");
|
|
781
|
-
if (newline >= 0 && !buffer.slice(0, newline).startsWith("data:")) {
|
|
782
|
-
const line = buffer.slice(0, newline);
|
|
783
|
-
buffer = buffer.slice(newline + 1);
|
|
784
|
-
const event = parseStreamChunk(line, context);
|
|
785
|
-
if (event) yield event;
|
|
786
|
-
continue;
|
|
1113
|
+
function createRuntimeStreamEventCollector(options = {}) {
|
|
1114
|
+
const events = [];
|
|
1115
|
+
const eventCountsByType = {};
|
|
1116
|
+
let firstSessionId;
|
|
1117
|
+
let finalStatus;
|
|
1118
|
+
let finalReason;
|
|
1119
|
+
let finalText = "";
|
|
1120
|
+
return {
|
|
1121
|
+
events,
|
|
1122
|
+
onEvent: (event) => {
|
|
1123
|
+
events.push(sanitizeRuntimeStreamEvent(event, options));
|
|
1124
|
+
eventCountsByType[event.type] = (eventCountsByType[event.type] ?? 0) + 1;
|
|
1125
|
+
if (event.type === "text_delta") finalText += event.text;
|
|
1126
|
+
if (!firstSessionId && (event.type === "session_created" || event.type === "session_resumed")) {
|
|
1127
|
+
firstSessionId = event.session.id;
|
|
787
1128
|
}
|
|
788
|
-
if (
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
const event = parseStreamChunk(line, context);
|
|
792
|
-
if (event) yield event;
|
|
793
|
-
continue;
|
|
1129
|
+
if (event.type === "final") {
|
|
1130
|
+
finalStatus = event.status;
|
|
1131
|
+
finalReason = event.reason;
|
|
794
1132
|
}
|
|
795
|
-
|
|
1133
|
+
},
|
|
1134
|
+
summary() {
|
|
1135
|
+
return {
|
|
1136
|
+
eventCount: events.length,
|
|
1137
|
+
eventCountsByType: { ...eventCountsByType },
|
|
1138
|
+
firstSessionId,
|
|
1139
|
+
finalStatus,
|
|
1140
|
+
finalReason,
|
|
1141
|
+
finalText
|
|
1142
|
+
};
|
|
796
1143
|
}
|
|
797
|
-
}
|
|
1144
|
+
};
|
|
798
1145
|
}
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
const
|
|
803
|
-
if (
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
const delta = choice?.delta;
|
|
808
|
-
const message = choice?.message;
|
|
809
|
-
const text = stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text);
|
|
810
|
-
if (text) return { type: "text_delta", task: context.task, session: context.session, text, timestamp: timestamp() };
|
|
811
|
-
return mapCommonBackendEvent(parsed, context);
|
|
812
|
-
} catch {
|
|
813
|
-
return { type: "text_delta", task: context.task, session: context.session, text: data, timestamp: timestamp() };
|
|
1146
|
+
|
|
1147
|
+
// src/sse.ts
|
|
1148
|
+
function encodeServerSentEvent(data, options = {}) {
|
|
1149
|
+
const lines = [];
|
|
1150
|
+
if (options.id) lines.push(`id: ${stripNewlines(options.id)}`);
|
|
1151
|
+
if (options.event) lines.push(`event: ${stripNewlines(options.event)}`);
|
|
1152
|
+
if (typeof options.retry === "number" && Number.isFinite(options.retry) && options.retry >= 0) {
|
|
1153
|
+
lines.push(`retry: ${Math.floor(options.retry)}`);
|
|
814
1154
|
}
|
|
1155
|
+
const payload = typeof data === "string" ? data : JSON.stringify(data);
|
|
1156
|
+
for (const line of payload.split(/\r?\n/)) {
|
|
1157
|
+
lines.push(`data: ${line}`);
|
|
1158
|
+
}
|
|
1159
|
+
return `${lines.join("\n")}
|
|
1160
|
+
|
|
1161
|
+
`;
|
|
815
1162
|
}
|
|
816
|
-
function
|
|
817
|
-
|
|
1163
|
+
function readinessServerSentEvent(report, options = {}) {
|
|
1164
|
+
const { event, id, retry, ...telemetryOptions } = options;
|
|
1165
|
+
return encodeServerSentEvent(
|
|
1166
|
+
{
|
|
1167
|
+
type: "readiness",
|
|
1168
|
+
readiness: sanitizeKnowledgeReadinessReport(report, telemetryOptions)
|
|
1169
|
+
},
|
|
1170
|
+
{ event, id, retry }
|
|
1171
|
+
);
|
|
818
1172
|
}
|
|
819
|
-
function
|
|
820
|
-
|
|
821
|
-
return
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
1173
|
+
function runtimeStreamServerSentEvent(event, options = {}) {
|
|
1174
|
+
const { event: sseEvent, id, retry, ...telemetryOptions } = options;
|
|
1175
|
+
return encodeServerSentEvent(sanitizeRuntimeStreamEvent(event, telemetryOptions), {
|
|
1176
|
+
event: sseEvent,
|
|
1177
|
+
id,
|
|
1178
|
+
retry
|
|
825
1179
|
});
|
|
826
1180
|
}
|
|
827
|
-
function
|
|
828
|
-
return
|
|
829
|
-
}
|
|
830
|
-
function statusFromControl(control) {
|
|
831
|
-
if (control.stoppedBy === "abort") return "aborted";
|
|
832
|
-
if (control.reason.includes("knowledge readiness blocked")) return "blocked";
|
|
833
|
-
if (control.pass) return "completed";
|
|
834
|
-
return "failed";
|
|
835
|
-
}
|
|
836
|
-
async function emit(sink, event) {
|
|
837
|
-
await sink?.(event);
|
|
1181
|
+
function stripNewlines(value) {
|
|
1182
|
+
return value.replace(/[\r\n]/g, " ");
|
|
838
1183
|
}
|
|
839
|
-
|
|
1184
|
+
|
|
1185
|
+
// src/trace-bridge.ts
|
|
1186
|
+
function createTraceBridge(options) {
|
|
1187
|
+
if (!options.runId) {
|
|
1188
|
+
throw new ValidationError("createTraceBridge: runId is required");
|
|
1189
|
+
}
|
|
1190
|
+
let counter = 0;
|
|
1191
|
+
const newEventId = options.newEventId ?? (() => `evt-${++counter}`);
|
|
1192
|
+
const baseSpanId = options.spanId;
|
|
1193
|
+
const toTraceEvent = (event) => {
|
|
1194
|
+
const projection = projectToTraceEvent(event);
|
|
1195
|
+
if (!projection) return void 0;
|
|
1196
|
+
return {
|
|
1197
|
+
eventId: newEventId(),
|
|
1198
|
+
runId: options.runId,
|
|
1199
|
+
spanId: baseSpanId,
|
|
1200
|
+
kind: projection.kind,
|
|
1201
|
+
timestamp: timestampFor(event),
|
|
1202
|
+
payload: projection.payload
|
|
1203
|
+
};
|
|
1204
|
+
};
|
|
840
1205
|
return {
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
remainingCostUsd: ctx.remainingCostUsd,
|
|
851
|
-
abortSignal: ctx.abortSignal
|
|
1206
|
+
toTraceEvent,
|
|
1207
|
+
drain(events) {
|
|
1208
|
+
const out = [];
|
|
1209
|
+
for (const event of events) {
|
|
1210
|
+
const trace = toTraceEvent(event);
|
|
1211
|
+
if (trace) out.push(trace);
|
|
1212
|
+
}
|
|
1213
|
+
return out;
|
|
1214
|
+
}
|
|
852
1215
|
};
|
|
853
1216
|
}
|
|
1217
|
+
function toAgentEvalTrace(event, options) {
|
|
1218
|
+
return createTraceBridge(options).toTraceEvent(event);
|
|
1219
|
+
}
|
|
1220
|
+
function projectToTraceEvent(event) {
|
|
1221
|
+
switch (event.type) {
|
|
1222
|
+
case "task_start":
|
|
1223
|
+
return {
|
|
1224
|
+
kind: "log",
|
|
1225
|
+
payload: { phase: "task_start", taskId: event.task.id, intent: event.task.intent }
|
|
1226
|
+
};
|
|
1227
|
+
case "readiness_start":
|
|
1228
|
+
return { kind: "log", payload: { phase: "readiness_start", taskId: event.task.id } };
|
|
1229
|
+
case "readiness_end":
|
|
1230
|
+
return {
|
|
1231
|
+
kind: event.decision.passed ? "log" : "policy_violation",
|
|
1232
|
+
payload: {
|
|
1233
|
+
phase: "readiness_end",
|
|
1234
|
+
taskId: event.task.id,
|
|
1235
|
+
status: event.decision.status,
|
|
1236
|
+
readinessScore: event.decision.readinessScore,
|
|
1237
|
+
blockingGapIds: event.decision.blockingGapIds,
|
|
1238
|
+
nonBlockingGapIds: event.decision.nonBlockingGapIds,
|
|
1239
|
+
reason: event.decision.reason
|
|
1240
|
+
}
|
|
1241
|
+
};
|
|
1242
|
+
case "questions_start":
|
|
1243
|
+
return {
|
|
1244
|
+
kind: "log",
|
|
1245
|
+
payload: { phase: "questions_start", questionCount: event.questions.length }
|
|
1246
|
+
};
|
|
1247
|
+
case "questions_end":
|
|
1248
|
+
return {
|
|
1249
|
+
kind: "log",
|
|
1250
|
+
payload: {
|
|
1251
|
+
phase: "questions_end",
|
|
1252
|
+
questionCount: event.questions.length,
|
|
1253
|
+
answerCount: Object.keys(event.userAnswers).length
|
|
1254
|
+
}
|
|
1255
|
+
};
|
|
1256
|
+
case "acquisition_start":
|
|
1257
|
+
return {
|
|
1258
|
+
kind: "log",
|
|
1259
|
+
payload: { phase: "acquisition_start", planCount: event.acquisitionPlans.length }
|
|
1260
|
+
};
|
|
1261
|
+
case "acquisition_end":
|
|
1262
|
+
return {
|
|
1263
|
+
kind: "log",
|
|
1264
|
+
payload: {
|
|
1265
|
+
phase: "acquisition_end",
|
|
1266
|
+
planCount: event.acquisitionPlans.length,
|
|
1267
|
+
evidenceCount: event.acquiredEvidenceIds.length
|
|
1268
|
+
}
|
|
1269
|
+
};
|
|
1270
|
+
case "session_created":
|
|
1271
|
+
case "session_resumed":
|
|
1272
|
+
return {
|
|
1273
|
+
kind: "log",
|
|
1274
|
+
payload: {
|
|
1275
|
+
phase: event.type,
|
|
1276
|
+
sessionId: event.session.id,
|
|
1277
|
+
backend: event.session.backend
|
|
1278
|
+
}
|
|
1279
|
+
};
|
|
1280
|
+
case "backend_start":
|
|
1281
|
+
case "backend_end":
|
|
1282
|
+
return { kind: "log", payload: { phase: event.type, backend: event.backend } };
|
|
1283
|
+
case "backend_error":
|
|
1284
|
+
return {
|
|
1285
|
+
kind: "error",
|
|
1286
|
+
payload: {
|
|
1287
|
+
backend: event.backend,
|
|
1288
|
+
message: event.message,
|
|
1289
|
+
recoverable: event.recoverable
|
|
1290
|
+
}
|
|
1291
|
+
};
|
|
1292
|
+
case "tool_call":
|
|
1293
|
+
return {
|
|
1294
|
+
kind: "log",
|
|
1295
|
+
payload: {
|
|
1296
|
+
phase: "tool_call",
|
|
1297
|
+
toolName: event.toolName,
|
|
1298
|
+
toolCallId: event.toolCallId
|
|
1299
|
+
// Args intentionally omitted at this layer; consumers attach the
|
|
1300
|
+
// payload to a `ToolSpan` if they need to retain it. Trace events
|
|
1301
|
+
// are point-in-time markers, not the canonical store for tool I/O.
|
|
1302
|
+
}
|
|
1303
|
+
};
|
|
1304
|
+
case "tool_result":
|
|
1305
|
+
return {
|
|
1306
|
+
kind: "log",
|
|
1307
|
+
payload: {
|
|
1308
|
+
phase: "tool_result",
|
|
1309
|
+
toolName: event.toolName,
|
|
1310
|
+
toolCallId: event.toolCallId
|
|
1311
|
+
}
|
|
1312
|
+
};
|
|
1313
|
+
case "llm_call":
|
|
1314
|
+
return {
|
|
1315
|
+
kind: "log",
|
|
1316
|
+
payload: {
|
|
1317
|
+
phase: "llm_call",
|
|
1318
|
+
model: event.model,
|
|
1319
|
+
tokensIn: event.tokensIn,
|
|
1320
|
+
tokensOut: event.tokensOut,
|
|
1321
|
+
costUsd: event.costUsd,
|
|
1322
|
+
latencyMs: event.latencyMs,
|
|
1323
|
+
finishReason: event.finishReason
|
|
1324
|
+
}
|
|
1325
|
+
};
|
|
1326
|
+
case "artifact":
|
|
1327
|
+
return {
|
|
1328
|
+
kind: "state_mutation",
|
|
1329
|
+
payload: {
|
|
1330
|
+
phase: "artifact",
|
|
1331
|
+
artifactId: event.artifactId,
|
|
1332
|
+
name: event.name,
|
|
1333
|
+
mimeType: event.mimeType
|
|
1334
|
+
}
|
|
1335
|
+
};
|
|
1336
|
+
case "task_end":
|
|
1337
|
+
return {
|
|
1338
|
+
kind: event.status === "failed" || event.status === "aborted" ? "error" : "log",
|
|
1339
|
+
payload: { phase: "task_end", status: event.status, reason: event.reason }
|
|
1340
|
+
};
|
|
1341
|
+
case "final":
|
|
1342
|
+
return {
|
|
1343
|
+
kind: event.status === "failed" || event.status === "aborted" ? "error" : "log",
|
|
1344
|
+
payload: { phase: "final", status: event.status, reason: event.reason }
|
|
1345
|
+
};
|
|
1346
|
+
case "text_delta":
|
|
1347
|
+
case "reasoning_delta":
|
|
1348
|
+
return void 0;
|
|
1349
|
+
default: {
|
|
1350
|
+
const exhaust = event;
|
|
1351
|
+
void exhaust;
|
|
1352
|
+
return void 0;
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
}
|
|
1356
|
+
function timestampFor(event) {
|
|
1357
|
+
const iso = "timestamp" in event ? event.timestamp : void 0;
|
|
1358
|
+
if (!iso) return Date.now();
|
|
1359
|
+
const parsed = Date.parse(iso);
|
|
1360
|
+
return Number.isFinite(parsed) ? parsed : Date.now();
|
|
1361
|
+
}
|
|
854
1362
|
export {
|
|
1363
|
+
AgentEvalError2 as AgentEvalError,
|
|
1364
|
+
BackendTransportError,
|
|
1365
|
+
CaptureIntegrityError,
|
|
1366
|
+
ConfigError,
|
|
855
1367
|
InMemoryRuntimeSessionStore,
|
|
1368
|
+
JudgeError,
|
|
1369
|
+
NotFoundError,
|
|
1370
|
+
ReplayError,
|
|
1371
|
+
RuntimeRunStateError,
|
|
1372
|
+
SessionMismatchError,
|
|
1373
|
+
ValidationError,
|
|
1374
|
+
VerificationError,
|
|
856
1375
|
createIterableBackend,
|
|
857
1376
|
createOpenAICompatibleBackend,
|
|
858
1377
|
createRuntimeEventCollector,
|
|
859
1378
|
createRuntimeStreamEventCollector,
|
|
860
1379
|
createSandboxPromptBackend,
|
|
1380
|
+
createTraceBridge,
|
|
861
1381
|
decideKnowledgeReadiness,
|
|
862
1382
|
encodeServerSentEvent,
|
|
863
1383
|
readinessServerSentEvent,
|
|
@@ -867,6 +1387,8 @@ export {
|
|
|
867
1387
|
sanitizeAgentRuntimeEvent,
|
|
868
1388
|
sanitizeKnowledgeReadinessReport,
|
|
869
1389
|
sanitizeRuntimeStreamEvent,
|
|
870
|
-
|
|
1390
|
+
startRuntimeRun,
|
|
1391
|
+
summarizeAgentTaskRun,
|
|
1392
|
+
toAgentEvalTrace
|
|
871
1393
|
};
|
|
872
1394
|
//# sourceMappingURL=index.js.map
|