@tangle-network/agent-runtime 0.5.6 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,11 +1,61 @@
1
- // src/index.ts
1
+ // src/errors.ts
2
+ import { AgentEvalError } from "@tangle-network/agent-eval";
2
3
  import {
3
- acquisitionPlansForKnowledgeGaps,
4
- blockingKnowledgeEval,
5
- runAgentControlLoop,
6
- scoreKnowledgeReadiness,
7
- userQuestionsForKnowledgeGaps
4
+ AgentEvalError as AgentEvalError2,
5
+ CaptureIntegrityError,
6
+ ConfigError,
7
+ JudgeError,
8
+ NotFoundError,
9
+ ReplayError,
10
+ ValidationError,
11
+ VerificationError
8
12
  } from "@tangle-network/agent-eval";
13
+ var SessionMismatchError = class extends AgentEvalError {
14
+ sessionBackend;
15
+ requestedBackend;
16
+ constructor(sessionBackend, requestedBackend, options) {
17
+ super(
18
+ "validation",
19
+ `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,
20
+ options
21
+ );
22
+ this.sessionBackend = sessionBackend;
23
+ this.requestedBackend = requestedBackend;
24
+ }
25
+ };
26
+ var BackendTransportError = class extends AgentEvalError {
27
+ backend;
28
+ status;
29
+ constructor(backend, message, options) {
30
+ super("config", message, options);
31
+ this.backend = backend;
32
+ this.status = options?.status;
33
+ }
34
+ };
35
+ var RuntimeRunStateError = class extends AgentEvalError {
36
+ constructor(message, options) {
37
+ super("validation", message, options);
38
+ }
39
+ };
40
+
41
+ // src/sessions.ts
42
+ function newRuntimeSession(backend, requestedId, metadata) {
43
+ const now = nowIso();
44
+ return {
45
+ id: requestedId || crypto.randomUUID(),
46
+ backend,
47
+ status: "active",
48
+ createdAt: now,
49
+ updatedAt: now,
50
+ metadata
51
+ };
52
+ }
53
+ function touchSession(session) {
54
+ return { ...session, updatedAt: nowIso() };
55
+ }
56
+ function nowIso() {
57
+ return (/* @__PURE__ */ new Date()).toISOString();
58
+ }
9
59
  var InMemoryRuntimeSessionStore = class {
10
60
  sessions = /* @__PURE__ */ new Map();
11
61
  events = /* @__PURE__ */ new Map();
@@ -24,6 +74,277 @@ var InMemoryRuntimeSessionStore = class {
24
74
  return [...this.events.get(sessionId) ?? []];
25
75
  }
26
76
  };
77
+
78
+ // src/backends.ts
79
+ function createIterableBackend(options) {
80
+ return options;
81
+ }
82
+ function createSandboxPromptBackend(options) {
83
+ const kind = options.kind ?? "sandbox";
84
+ return {
85
+ kind,
86
+ async start(input, context) {
87
+ const box = await options.getBox(input, context);
88
+ return newRuntimeSession(
89
+ kind,
90
+ options.getSessionId?.(box, input) ?? context.requestedSessionId,
91
+ { resumable: true }
92
+ );
93
+ },
94
+ resume(session) {
95
+ return touchSession({ ...session, status: "active" });
96
+ },
97
+ async *stream(input, context) {
98
+ const box = await options.getBox(input, context);
99
+ const message = input.message ?? input.messages?.at(-1)?.content ?? context.task.intent;
100
+ for await (const event of options.streamPrompt(box, message, context)) {
101
+ const mapped = options.mapEvent?.(event, context) ?? mapCommonBackendEvent(event, context);
102
+ if (mapped) yield mapped;
103
+ }
104
+ }
105
+ };
106
+ }
107
+ function createOpenAICompatibleBackend(options) {
108
+ const fetcher = options.fetchImpl ?? fetch;
109
+ const kind = options.kind ?? "tcloud";
110
+ return {
111
+ kind,
112
+ start(_input, context) {
113
+ return newRuntimeSession(kind, context.requestedSessionId);
114
+ },
115
+ async *stream(input, context) {
116
+ const response = await fetcher(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
117
+ method: "POST",
118
+ headers: {
119
+ Authorization: `Bearer ${options.apiKey}`,
120
+ "Content-Type": "application/json"
121
+ },
122
+ body: JSON.stringify({
123
+ model: options.model,
124
+ stream: true,
125
+ messages: input.messages ?? [
126
+ { role: "user", content: input.message ?? context.task.intent }
127
+ ]
128
+ }),
129
+ signal: context.signal
130
+ });
131
+ if (!response.ok) {
132
+ throw new BackendTransportError(kind, `chat backend returned ${response.status}`, {
133
+ status: response.status
134
+ });
135
+ }
136
+ yield* streamResponseEvents(response, context);
137
+ }
138
+ };
139
+ }
140
+ function normalizeBackendStreamEvent(event, task, session) {
141
+ if ("task" in event && event.task && "session" in event && event.session && "timestamp" in event && event.timestamp) {
142
+ return event;
143
+ }
144
+ return {
145
+ ...event,
146
+ task: "task" in event && event.task ? event.task : task,
147
+ session: "session" in event && event.session ? event.session : session,
148
+ timestamp: "timestamp" in event && event.timestamp ? event.timestamp : nowIso()
149
+ };
150
+ }
151
+ function mapCommonBackendEvent(event, context) {
152
+ if (!event || typeof event !== "object") return void 0;
153
+ const record = event;
154
+ const type = String(record.type ?? "");
155
+ const data = record.data && typeof record.data === "object" ? record.data : record;
156
+ if (type === "message.part.updated" || type === "text_delta" || type === "delta") {
157
+ const text = stringValue(data.text) ?? stringValue(data.delta) ?? stringValue(record.text);
158
+ return text ? {
159
+ type: "text_delta",
160
+ task: context.task,
161
+ session: context.session,
162
+ text,
163
+ timestamp: nowIso()
164
+ } : void 0;
165
+ }
166
+ if (type === "reasoning_delta") {
167
+ const text = stringValue(data.text) ?? stringValue(record.text);
168
+ return text ? {
169
+ type: "reasoning_delta",
170
+ task: context.task,
171
+ session: context.session,
172
+ text,
173
+ timestamp: nowIso()
174
+ } : void 0;
175
+ }
176
+ if (type === "tool_call") {
177
+ return {
178
+ type: "tool_call",
179
+ task: context.task,
180
+ session: context.session,
181
+ toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
182
+ toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
183
+ args: data.args ?? data.input ?? record.args,
184
+ timestamp: nowIso()
185
+ };
186
+ }
187
+ if (type === "tool_result") {
188
+ return {
189
+ type: "tool_result",
190
+ task: context.task,
191
+ session: context.session,
192
+ toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
193
+ toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
194
+ result: data.result ?? data.output ?? record.result,
195
+ timestamp: nowIso()
196
+ };
197
+ }
198
+ if (type === "result" || type === "final") {
199
+ const text = stringValue(data.finalText) ?? stringValue(data.text) ?? stringValue(record.text);
200
+ return text ? {
201
+ type: "text_delta",
202
+ task: context.task,
203
+ session: context.session,
204
+ text,
205
+ timestamp: nowIso()
206
+ } : void 0;
207
+ }
208
+ return void 0;
209
+ }
210
+ async function* streamResponseEvents(response, context) {
211
+ const body = response.body;
212
+ if (!body) return;
213
+ const reader = body.getReader();
214
+ const decoder = new TextDecoder();
215
+ let buffer = "";
216
+ for (; ; ) {
217
+ const { done, value } = await reader.read();
218
+ if (done) break;
219
+ buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, "\n");
220
+ for (const event of drainStreamBuffer(false)) yield event;
221
+ }
222
+ buffer += decoder.decode().replace(/\r\n/g, "\n");
223
+ for (const event of drainStreamBuffer(true)) yield event;
224
+ if (buffer.trim()) {
225
+ const event = parseStreamChunk(buffer, context);
226
+ if (event) yield event;
227
+ }
228
+ function* drainStreamBuffer(flush) {
229
+ for (; ; ) {
230
+ const sseBoundary = buffer.indexOf("\n\n");
231
+ if (sseBoundary >= 0) {
232
+ const chunk = buffer.slice(0, sseBoundary);
233
+ buffer = buffer.slice(sseBoundary + 2);
234
+ const event = parseStreamChunk(chunk, context);
235
+ if (event) yield event;
236
+ continue;
237
+ }
238
+ const newline = buffer.indexOf("\n");
239
+ if (newline >= 0 && !buffer.slice(0, newline).startsWith("data:")) {
240
+ const line = buffer.slice(0, newline);
241
+ buffer = buffer.slice(newline + 1);
242
+ const event = parseStreamChunk(line, context);
243
+ if (event) yield event;
244
+ continue;
245
+ }
246
+ if (flush && buffer.trim() && !buffer.trimStart().startsWith("data:")) {
247
+ const line = buffer;
248
+ buffer = "";
249
+ const event = parseStreamChunk(line, context);
250
+ if (event) yield event;
251
+ continue;
252
+ }
253
+ break;
254
+ }
255
+ }
256
+ }
257
+ function parseStreamChunk(chunk, context) {
258
+ const lines = chunk.split(/\r?\n/);
259
+ const dataLines = lines.filter((line) => line.startsWith("data:"));
260
+ const data = dataLines.length > 0 ? dataLines.map((line) => line.slice(5).trimStart()).join("\n") : chunk.trim();
261
+ if (!data || data === "[DONE]") return void 0;
262
+ try {
263
+ const parsed = JSON.parse(data);
264
+ const choices = parsed.choices;
265
+ const choice = Array.isArray(choices) ? choices[0] : void 0;
266
+ const delta = choice?.delta;
267
+ const message = choice?.message;
268
+ const text = stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text);
269
+ if (text) {
270
+ return {
271
+ type: "text_delta",
272
+ task: context.task,
273
+ session: context.session,
274
+ text,
275
+ timestamp: nowIso()
276
+ };
277
+ }
278
+ return mapCommonBackendEvent(parsed, context);
279
+ } catch {
280
+ return {
281
+ type: "text_delta",
282
+ task: context.task,
283
+ session: context.session,
284
+ text: data,
285
+ timestamp: nowIso()
286
+ };
287
+ }
288
+ }
289
+ function stringValue(value) {
290
+ return typeof value === "string" && value.length > 0 ? value : void 0;
291
+ }
292
+
293
+ // src/readiness.ts
294
+ var DEFAULT_MINIMUM_READINESS_SCORE = 0.7;
295
+ function decideKnowledgeReadiness(report, options = {}) {
296
+ const minimumScore = options.minimumScore ?? DEFAULT_MINIMUM_READINESS_SCORE;
297
+ if (!Number.isFinite(minimumScore) || minimumScore < 0 || minimumScore > 1) {
298
+ throw new ValidationError(
299
+ `minimumScore must be a finite number in [0, 1]; received ${String(minimumScore)}`
300
+ );
301
+ }
302
+ const blockingGapIds = report.blockingMissingRequirements.map((requirement) => requirement.id);
303
+ const nonBlockingGapIds = report.nonBlockingGaps.map((requirement) => requirement.id);
304
+ if (blockingGapIds.length > 0) {
305
+ return {
306
+ passed: false,
307
+ status: "blocked",
308
+ reason: report.reason,
309
+ readinessScore: report.readinessScore,
310
+ recommendedAction: report.recommendedAction,
311
+ severity: report.severity,
312
+ blockingGapIds,
313
+ nonBlockingGapIds
314
+ };
315
+ }
316
+ if (report.readinessScore < minimumScore) {
317
+ return {
318
+ passed: false,
319
+ status: "caveat",
320
+ reason: `Knowledge readiness score ${report.readinessScore.toFixed(3)} is below minimum ${minimumScore.toFixed(3)}.`,
321
+ readinessScore: report.readinessScore,
322
+ recommendedAction: report.recommendedAction,
323
+ severity: report.severity,
324
+ blockingGapIds,
325
+ nonBlockingGapIds
326
+ };
327
+ }
328
+ return {
329
+ passed: true,
330
+ status: "ready",
331
+ reason: report.reason,
332
+ readinessScore: report.readinessScore,
333
+ recommendedAction: report.recommendedAction,
334
+ severity: report.severity,
335
+ blockingGapIds,
336
+ nonBlockingGapIds
337
+ };
338
+ }
339
+
340
+ // src/run.ts
341
+ import {
342
+ acquisitionPlansForKnowledgeGaps,
343
+ blockingKnowledgeEval,
344
+ runAgentControlLoop,
345
+ scoreKnowledgeReadiness,
346
+ userQuestionsForKnowledgeGaps
347
+ } from "@tangle-network/agent-eval";
27
348
  async function runAgentTask(options) {
28
349
  const task = options.task;
29
350
  await emit(options.onEvent, { type: "task_start", task });
@@ -35,7 +356,13 @@ async function runAgentTask(options) {
35
356
  ...knowledge.blockingMissingRequirements,
36
357
  ...knowledge.nonBlockingGaps
37
358
  ]);
38
- const preflight = await runKnowledgePreflight(task, questions, acquisitionPlans, options.knowledge, options.onEvent);
359
+ const preflight = await runKnowledgePreflight(
360
+ task,
361
+ questions,
362
+ acquisitionPlans,
363
+ options.knowledge,
364
+ options.onEvent
365
+ );
39
366
  if (options.knowledge?.refreshReadiness && (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)) {
40
367
  await emit(options.onEvent, { type: "readiness_start", task });
41
368
  knowledge = await options.knowledge.refreshReadiness({
@@ -58,13 +385,26 @@ async function runAgentTask(options) {
58
385
  variantId: options.variantId,
59
386
  observe: ({ history, abortSignal }) => options.adapter.observe({ task, knowledge, history, abortSignal }),
60
387
  validate: async ({ state, history, abortSignal }) => {
61
- const readinessEval = blockingKnowledgeEval(knowledge, { minimumScore: options.minimumReadinessScore });
62
- const evals = await options.adapter.validate({ task, knowledge, state, history, abortSignal });
388
+ const readinessEval = blockingKnowledgeEval(knowledge, {
389
+ minimumScore: options.minimumReadinessScore
390
+ });
391
+ const evals = await options.adapter.validate({
392
+ task,
393
+ knowledge,
394
+ state,
395
+ history,
396
+ abortSignal
397
+ });
63
398
  return [readinessEval, ...evals];
64
399
  },
65
400
  decide: (ctx) => {
66
401
  if (isKnowledgeBlocked(ctx.evals)) {
67
- return options.adapter.onKnowledgeBlocked?.({ task, knowledge, questions, acquisitionPlans }) ?? {
402
+ return options.adapter.onKnowledgeBlocked?.({
403
+ task,
404
+ knowledge,
405
+ questions,
406
+ acquisitionPlans
407
+ }) ?? {
68
408
  type: "stop",
69
409
  pass: false,
70
410
  score: knowledge.readinessScore,
@@ -90,7 +430,9 @@ async function runAgentTask(options) {
90
430
  userAnswers: preflight.userAnswers,
91
431
  acquiredEvidenceIds: preflight.acquiredEvidenceIds,
92
432
  control,
93
- runRecords: (options.adapter.projectRunRecords?.(control, task) ?? []).map((record) => record.scenarioId === void 0 ? { ...record, scenarioId } : record)
433
+ runRecords: (options.adapter.projectRunRecords?.(control, task) ?? []).map(
434
+ (record) => record.scenarioId === void 0 ? { ...record, scenarioId } : record
435
+ )
94
436
  };
95
437
  }
96
438
  function summarizeAgentTaskRun(result) {
@@ -102,7 +444,9 @@ function summarizeAgentTaskRun(result) {
102
444
  readinessStatus: decideKnowledgeReadiness(result.knowledge).status,
103
445
  readinessScore: result.knowledge.readinessScore,
104
446
  recommendedAction: result.knowledge.recommendedAction,
105
- blockingGapIds: result.knowledge.blockingMissingRequirements.map((requirement) => requirement.id),
447
+ blockingGapIds: result.knowledge.blockingMissingRequirements.map(
448
+ (requirement) => requirement.id
449
+ ),
106
450
  nonBlockingGapIds: result.knowledge.nonBlockingGaps.map((requirement) => requirement.id),
107
451
  questionCount: result.questions.length,
108
452
  acquisitionPlanCount: result.acquisitionPlans.length,
@@ -117,17 +461,20 @@ function summarizeAgentTaskRun(result) {
117
461
  async function* runAgentTaskStream(options) {
118
462
  const task = options.task;
119
463
  const input = { task, ...options.input ?? {} };
120
- const started = streamEvent({ type: "task_start", task });
121
- yield started;
122
- const readinessStart = streamEvent({ type: "readiness_start", task });
123
- yield readinessStart;
464
+ yield streamEvent({ type: "task_start", task });
465
+ yield streamEvent({ type: "readiness_start", task });
124
466
  let knowledge = await buildReadiness(task, options.knowledge);
125
467
  const questions = userQuestionsForKnowledgeGaps(knowledge.blockingMissingRequirements);
126
468
  const acquisitionPlans = acquisitionPlansForKnowledgeGaps([
127
469
  ...knowledge.blockingMissingRequirements,
128
470
  ...knowledge.nonBlockingGaps
129
471
  ]);
130
- const preflight = await runKnowledgePreflightStream(task, questions, acquisitionPlans, options.knowledge);
472
+ const preflight = await runKnowledgePreflightStream(
473
+ task,
474
+ questions,
475
+ acquisitionPlans,
476
+ options.knowledge
477
+ );
131
478
  for (const event of preflight.events) yield event;
132
479
  if (options.knowledge?.refreshReadiness && (Object.keys(preflight.userAnswers).length > 0 || preflight.acquiredEvidenceIds.length > 0)) {
133
480
  yield streamEvent({ type: "readiness_start", task });
@@ -138,7 +485,9 @@ async function* runAgentTaskStream(options) {
138
485
  acquiredEvidenceIds: preflight.acquiredEvidenceIds
139
486
  });
140
487
  }
141
- const decision = decideKnowledgeReadiness(knowledge, { minimumScore: options.minimumReadinessScore });
488
+ const decision = decideKnowledgeReadiness(knowledge, {
489
+ minimumScore: options.minimumReadinessScore
490
+ });
142
491
  yield streamEvent({ type: "readiness_end", task, knowledge, decision });
143
492
  if (!decision.passed && decision.status === "blocked") {
144
493
  const reason = `knowledge readiness blocked: ${decision.reason}`;
@@ -149,7 +498,16 @@ async function* runAgentTaskStream(options) {
149
498
  const store = options.sessionStore;
150
499
  const existing = options.sessionId ? await store?.get(options.sessionId) : void 0;
151
500
  const shouldResume = Boolean(options.resume && existing);
152
- let session = shouldResume && existing ? await resumeBackendSession(options.backend, existing, input, { task, knowledge, signal: options.signal }) : await startBackendSession(options.backend, input, { task, knowledge, signal: options.signal }, options.sessionId);
501
+ let session = shouldResume && existing ? await resumeBackendSession(options.backend, existing, input, {
502
+ task,
503
+ knowledge,
504
+ signal: options.signal
505
+ }) : await startBackendSession(
506
+ options.backend,
507
+ input,
508
+ { task, knowledge, signal: options.signal },
509
+ options.sessionId
510
+ );
153
511
  await store?.put(session);
154
512
  const sessionEvent = streamEvent({
155
513
  type: shouldResume ? "session_resumed" : "session_created",
@@ -158,12 +516,22 @@ async function* runAgentTaskStream(options) {
158
516
  });
159
517
  await store?.appendEvent?.(session.id, sessionEvent);
160
518
  yield sessionEvent;
161
- const backendStart = streamEvent({ type: "backend_start", task, session, backend: options.backend.kind });
519
+ const backendStart = streamEvent({
520
+ type: "backend_start",
521
+ task,
522
+ session,
523
+ backend: options.backend.kind
524
+ });
162
525
  await store?.appendEvent?.(session.id, backendStart);
163
526
  yield backendStart;
164
527
  let finalText = "";
165
528
  try {
166
- for await (const rawEvent of options.backend.stream(input, { task, knowledge, session, signal: options.signal })) {
529
+ for await (const rawEvent of options.backend.stream(input, {
530
+ task,
531
+ knowledge,
532
+ session,
533
+ signal: options.signal
534
+ })) {
167
535
  const event = normalizeBackendStreamEvent(rawEvent, task, session);
168
536
  if (event.type === "text_delta") finalText += event.text;
169
537
  await store?.appendEvent?.(session.id, event);
@@ -172,14 +540,26 @@ async function* runAgentTaskStream(options) {
172
540
  const completedStatus = "completed";
173
541
  session = touchSession({ ...session, status: completedStatus });
174
542
  await store?.put(session);
175
- const backendEnd = streamEvent({ type: "backend_end", task, session, backend: options.backend.kind });
543
+ const backendEnd = streamEvent({
544
+ type: "backend_end",
545
+ task,
546
+ session,
547
+ backend: options.backend.kind
548
+ });
176
549
  await store?.appendEvent?.(session.id, backendEnd);
177
550
  yield backendEnd;
178
551
  const reason = "backend completed";
179
552
  const taskEnd = streamEvent({ type: "task_end", task, status: completedStatus, reason });
180
553
  await store?.appendEvent?.(session.id, taskEnd);
181
554
  yield taskEnd;
182
- const final = streamEvent({ type: "final", task, session, status: completedStatus, reason, text: finalText || void 0 });
555
+ const final = streamEvent({
556
+ type: "final",
557
+ task,
558
+ session,
559
+ status: completedStatus,
560
+ reason,
561
+ text: finalText || void 0
562
+ });
183
563
  await store?.appendEvent?.(session.id, final);
184
564
  yield final;
185
565
  } catch (err) {
@@ -206,50 +586,226 @@ async function* runAgentTaskStream(options) {
206
586
  const taskEnd = streamEvent({ type: "task_end", task, status, reason: message });
207
587
  await store?.appendEvent?.(session.id, taskEnd);
208
588
  yield taskEnd;
209
- const final = streamEvent({ type: "final", task, session, status, reason: message, text: finalText || void 0 });
589
+ const final = streamEvent({
590
+ type: "final",
591
+ task,
592
+ session,
593
+ status,
594
+ reason: message,
595
+ text: finalText || void 0
596
+ });
210
597
  await store?.appendEvent?.(session.id, final);
211
598
  yield final;
212
599
  }
213
600
  }
214
- function decideKnowledgeReadiness(report, options = {}) {
215
- const minimumScore = options.minimumScore ?? 0.7;
216
- const blockingGapIds = report.blockingMissingRequirements.map((requirement) => requirement.id);
217
- const nonBlockingGapIds = report.nonBlockingGaps.map((requirement) => requirement.id);
218
- if (blockingGapIds.length > 0) {
219
- return {
220
- passed: false,
221
- status: "blocked",
222
- reason: report.reason,
223
- readinessScore: report.readinessScore,
224
- recommendedAction: report.recommendedAction,
225
- severity: report.severity,
226
- blockingGapIds,
227
- nonBlockingGapIds
228
- };
601
+ async function runKnowledgePreflight(task, questions, acquisitionPlans, provider, onEvent) {
602
+ let userAnswers = {};
603
+ let acquiredEvidenceIds = [];
604
+ if (questions.length > 0 && provider?.answerQuestions) {
605
+ await emit(onEvent, { type: "questions_start", task, questions });
606
+ userAnswers = await provider.answerQuestions(questions, task);
607
+ await emit(onEvent, { type: "questions_end", task, questions, userAnswers });
229
608
  }
230
- if (report.readinessScore < minimumScore) {
231
- return {
232
- passed: false,
233
- status: "caveat",
234
- reason: `Knowledge readiness score ${report.readinessScore.toFixed(3)} is below minimum ${minimumScore.toFixed(3)}.`,
235
- readinessScore: report.readinessScore,
236
- recommendedAction: report.recommendedAction,
237
- severity: report.severity,
238
- blockingGapIds,
239
- nonBlockingGapIds
240
- };
609
+ if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
610
+ await emit(onEvent, { type: "acquisition_start", task, acquisitionPlans });
611
+ acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
612
+ await emit(onEvent, {
613
+ type: "acquisition_end",
614
+ task,
615
+ acquisitionPlans,
616
+ acquiredEvidenceIds
617
+ });
618
+ }
619
+ return { userAnswers, acquiredEvidenceIds };
620
+ }
621
+ async function runKnowledgePreflightStream(task, questions, acquisitionPlans, provider) {
622
+ const events = [];
623
+ let userAnswers = {};
624
+ let acquiredEvidenceIds = [];
625
+ if (questions.length > 0 && provider?.answerQuestions) {
626
+ events.push(streamEvent({ type: "questions_start", task, questions }));
627
+ userAnswers = await provider.answerQuestions(questions, task);
628
+ events.push(streamEvent({ type: "questions_end", task, questions, userAnswers }));
629
+ }
630
+ if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
631
+ events.push(streamEvent({ type: "acquisition_start", task, acquisitionPlans }));
632
+ acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
633
+ events.push(
634
+ streamEvent({ type: "acquisition_end", task, acquisitionPlans, acquiredEvidenceIds })
635
+ );
636
+ }
637
+ return { userAnswers, acquiredEvidenceIds, events };
638
+ }
639
+ function streamEvent(event) {
640
+ return { ...event, timestamp: nowIso() };
641
+ }
642
+ async function startBackendSession(backend, input, context, requestedSessionId) {
643
+ if (backend.start) return backend.start(input, { ...context, requestedSessionId });
644
+ return newRuntimeSession(backend.kind, requestedSessionId);
645
+ }
646
+ async function resumeBackendSession(backend, session, input, context) {
647
+ if (session.backend !== backend.kind) {
648
+ throw new SessionMismatchError(session.backend, backend.kind);
649
+ }
650
+ if (backend.resume) return backend.resume(session, input, context);
651
+ return touchSession({ ...session, status: "active" });
652
+ }
653
+ function buildReadiness(task, provider) {
654
+ if (provider?.buildReadiness) return provider.buildReadiness(task);
655
+ return scoreKnowledgeReadiness({
656
+ taskId: task.id,
657
+ requirements: task.requiredKnowledge ?? [],
658
+ metadata: { domain: task.domain, ...task.metadata }
659
+ });
660
+ }
661
+ function isKnowledgeBlocked(evals) {
662
+ return evals.some((evalResult) => evalResult.id === "knowledge-ready" && !evalResult.passed);
663
+ }
664
+ function statusFromControl(control) {
665
+ if (control.stoppedBy === "abort") return "aborted";
666
+ if (control.reason.includes("knowledge readiness blocked")) return "blocked";
667
+ if (control.pass) return "completed";
668
+ return "failed";
669
+ }
670
+ async function emit(sink, event) {
671
+ await sink?.(event);
672
+ }
673
+ function toAgentContext(task, knowledge, ctx) {
674
+ return {
675
+ task,
676
+ knowledge,
677
+ state: ctx.state,
678
+ evals: ctx.evals,
679
+ history: ctx.history,
680
+ budget: ctx.budget,
681
+ stepIndex: ctx.stepIndex,
682
+ wallMs: ctx.wallMs,
683
+ spentCostUsd: ctx.spentCostUsd,
684
+ remainingCostUsd: ctx.remainingCostUsd,
685
+ abortSignal: ctx.abortSignal
686
+ };
687
+ }
688
+
689
+ // src/runtime-run.ts
690
+ function startRuntimeRun(options) {
691
+ if (!options.workspaceId) {
692
+ throw new ValidationError("startRuntimeRun: workspaceId is required");
241
693
  }
694
+ if (!options.taskSpec?.id) {
695
+ throw new ValidationError("startRuntimeRun: taskSpec.id is required");
696
+ }
697
+ const now = options.now ?? Date.now;
698
+ const startedAtMs = now();
699
+ const startedAt = new Date(startedAtMs).toISOString();
700
+ const id = options.id ?? `${options.taskSpec.id}:${randomSuffix()}`;
701
+ let status = "running";
702
+ let completedAtMs;
703
+ let resultSummary;
704
+ let error;
705
+ let completionMetadata;
706
+ const ledger = {
707
+ tokensIn: 0,
708
+ tokensOut: 0,
709
+ costUsd: 0,
710
+ wallMs: 0,
711
+ llmCalls: 0
712
+ };
713
+ const snapshotCost = () => ({
714
+ tokensIn: ledger.tokensIn,
715
+ tokensOut: ledger.tokensOut,
716
+ costUsd: ledger.costUsd,
717
+ wallMs: (completedAtMs ?? now()) - startedAtMs,
718
+ llmCalls: ledger.llmCalls
719
+ });
720
+ const buildRow = (extraMetadata) => ({
721
+ id,
722
+ workspaceId: options.workspaceId,
723
+ sessionId: options.sessionId,
724
+ agentId: options.agentId,
725
+ domain: options.taskSpec.domain,
726
+ taskId: options.taskSpec.id,
727
+ scenarioId: options.scenarioId,
728
+ status,
729
+ resultSummary,
730
+ error,
731
+ cost: snapshotCost(),
732
+ startedAt,
733
+ completedAt: completedAtMs !== void 0 ? new Date(completedAtMs).toISOString() : void 0,
734
+ metadata: mergeMetadata(completionMetadata, extraMetadata)
735
+ });
242
736
  return {
243
- passed: true,
244
- status: "ready",
245
- reason: report.reason,
246
- readinessScore: report.readinessScore,
247
- recommendedAction: report.recommendedAction,
248
- severity: report.severity,
249
- blockingGapIds,
250
- nonBlockingGapIds
737
+ id,
738
+ workspaceId: options.workspaceId,
739
+ sessionId: options.sessionId,
740
+ taskSpec: options.taskSpec,
741
+ get status() {
742
+ return status;
743
+ },
744
+ observe(event) {
745
+ if (event.type !== "llm_call") return;
746
+ ledger.llmCalls += 1;
747
+ if (typeof event.tokensIn === "number" && Number.isFinite(event.tokensIn)) {
748
+ ledger.tokensIn += event.tokensIn;
749
+ }
750
+ if (typeof event.tokensOut === "number" && Number.isFinite(event.tokensOut)) {
751
+ ledger.tokensOut += event.tokensOut;
752
+ }
753
+ if (typeof event.costUsd === "number" && Number.isFinite(event.costUsd)) {
754
+ ledger.costUsd += event.costUsd;
755
+ }
756
+ },
757
+ cost: snapshotCost,
758
+ complete(input) {
759
+ if (input.status === "running") {
760
+ throw new ValidationError('complete() requires a terminal status, got "running"');
761
+ }
762
+ if (status !== "running") {
763
+ if (status === input.status) return;
764
+ throw new RuntimeRunStateError(
765
+ `Cannot transition runtime run from "${status}" to "${input.status}"`
766
+ );
767
+ }
768
+ status = input.status;
769
+ completedAtMs = now();
770
+ resultSummary = input.resultSummary;
771
+ error = input.error;
772
+ completionMetadata = input.metadata;
773
+ if (input.cost) {
774
+ if (typeof input.cost.tokensIn === "number" && Number.isFinite(input.cost.tokensIn)) {
775
+ ledger.tokensIn = input.cost.tokensIn;
776
+ }
777
+ if (typeof input.cost.tokensOut === "number" && Number.isFinite(input.cost.tokensOut)) {
778
+ ledger.tokensOut = input.cost.tokensOut;
779
+ }
780
+ if (typeof input.cost.costUsd === "number" && Number.isFinite(input.cost.costUsd)) {
781
+ ledger.costUsd = input.cost.costUsd;
782
+ }
783
+ if (typeof input.cost.llmCalls === "number" && Number.isFinite(input.cost.llmCalls)) {
784
+ ledger.llmCalls = input.cost.llmCalls;
785
+ }
786
+ }
787
+ },
788
+ toRow(metadata) {
789
+ return buildRow(metadata);
790
+ },
791
+ async persist(metadata) {
792
+ if (status === "running") {
793
+ throw new RuntimeRunStateError("Cannot persist a runtime run before complete() is called");
794
+ }
795
+ if (!options.adapter) return;
796
+ await options.adapter.upsert(buildRow(metadata));
797
+ }
251
798
  };
252
799
  }
800
+ function mergeMetadata(base, extra) {
801
+ if (!base && !extra) return void 0;
802
+ return { ...base ?? {}, ...extra ?? {} };
803
+ }
804
+ function randomSuffix() {
805
+ return Math.random().toString(36).slice(2, 10);
806
+ }
807
+
808
+ // src/sanitize.ts
253
809
  function sanitizeKnowledgeReadinessReport(report, options = {}) {
254
810
  return {
255
811
  taskId: report.taskId,
@@ -277,7 +833,10 @@ function sanitizeAgentRuntimeEvent(event, options = {}) {
277
833
  return { ...base, knowledge: sanitizeKnowledgeReadinessReport(event.knowledge, options) };
278
834
  }
279
835
  if (event.type === "questions_start") {
280
- return { ...base, questions: event.questions.map((question) => sanitizeQuestion(question, options)) };
836
+ return {
837
+ ...base,
838
+ questions: event.questions.map((question) => sanitizeQuestion(question, options))
839
+ };
281
840
  }
282
841
  if (event.type === "questions_end") {
283
842
  return {
@@ -318,7 +877,12 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
318
877
  };
319
878
  }
320
879
  if (event.type === "questions_start") {
321
- return { type: event.type, ...withTask, timestamp: event.timestamp, questions: event.questions.map((question) => sanitizeQuestion(question, options)) };
880
+ return {
881
+ type: event.type,
882
+ ...withTask,
883
+ timestamp: event.timestamp,
884
+ questions: event.questions.map((question) => sanitizeQuestion(question, options))
885
+ };
322
886
  }
323
887
  if (event.type === "questions_end") {
324
888
  return {
@@ -330,7 +894,12 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
330
894
  };
331
895
  }
332
896
  if (event.type === "acquisition_start") {
333
- return { type: event.type, ...withTask, timestamp: event.timestamp, acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan) };
897
+ return {
898
+ type: event.type,
899
+ ...withTask,
900
+ timestamp: event.timestamp,
901
+ acquisitionPlans: event.acquisitionPlans.map(sanitizeAcquisitionPlan)
902
+ };
334
903
  }
335
904
  if (event.type === "acquisition_end") {
336
905
  return {
@@ -364,6 +933,20 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
364
933
  result: options.includeControlPayloads ? event.result : void 0
365
934
  };
366
935
  }
936
+ if (event.type === "llm_call") {
937
+ return {
938
+ type: event.type,
939
+ ...withTask,
940
+ ...withSession,
941
+ timestamp: event.timestamp,
942
+ model: event.model,
943
+ tokensIn: event.tokensIn,
944
+ tokensOut: event.tokensOut,
945
+ costUsd: event.costUsd,
946
+ latencyMs: event.latencyMs,
947
+ finishReason: event.finishReason
948
+ };
949
+ }
367
950
  if (event.type === "artifact") {
368
951
  return {
369
952
  type: event.type,
@@ -397,230 +980,48 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
397
980
  ...pickPublicStreamFields(event)
398
981
  };
399
982
  }
400
- function createRuntimeEventCollector(options = {}) {
401
- const events = [];
983
+ function sanitizeTask(task, options) {
402
984
  return {
403
- events,
404
- onEvent: (event) => {
405
- events.push(sanitizeAgentRuntimeEvent(event, options));
406
- }
985
+ id: task.id,
986
+ intent: task.intent,
987
+ domain: task.domain,
988
+ inputs: options.includeInputs ? task.inputs : task.inputs ? "[redacted]" : void 0,
989
+ requiredKnowledge: task.requiredKnowledge?.map(
990
+ (requirement) => sanitizeKnowledgeRequirement(requirement, options)
991
+ ),
992
+ metadata: options.includeMetadata ? task.metadata : task.metadata ? "[redacted]" : void 0
407
993
  };
408
994
  }
409
- function createRuntimeStreamEventCollector(options = {}) {
410
- const events = [];
411
- const eventCountsByType = {};
412
- let firstSessionId;
413
- let finalStatus;
414
- let finalReason;
415
- let finalText = "";
995
+ function sanitizeRuntimeSession(session, options) {
416
996
  return {
417
- events,
418
- onEvent: (event) => {
419
- events.push(sanitizeRuntimeStreamEvent(event, options));
420
- eventCountsByType[event.type] = (eventCountsByType[event.type] ?? 0) + 1;
421
- if (event.type === "text_delta") finalText += event.text;
422
- if (!firstSessionId && (event.type === "session_created" || event.type === "session_resumed")) {
423
- firstSessionId = event.session.id;
424
- }
425
- if (event.type === "final") {
426
- finalStatus = event.status;
427
- finalReason = event.reason;
428
- }
429
- },
430
- summary() {
431
- return {
432
- eventCount: events.length,
433
- eventCountsByType: { ...eventCountsByType },
434
- firstSessionId,
435
- finalStatus,
436
- finalReason,
437
- finalText
438
- };
439
- }
997
+ id: session.id,
998
+ backend: session.backend,
999
+ status: session.status,
1000
+ hasResumeToken: Boolean(session.resumeToken),
1001
+ createdAt: session.createdAt,
1002
+ updatedAt: session.updatedAt,
1003
+ metadata: options.includeMetadata ? session.metadata : session.metadata ? "[redacted]" : void 0
440
1004
  };
441
1005
  }
442
- function encodeServerSentEvent(data, options = {}) {
443
- const lines = [];
444
- if (options.id) lines.push(`id: ${stripNewlines(options.id)}`);
445
- if (options.event) lines.push(`event: ${stripNewlines(options.event)}`);
446
- if (typeof options.retry === "number" && Number.isFinite(options.retry) && options.retry >= 0) {
447
- lines.push(`retry: ${Math.floor(options.retry)}`);
448
- }
449
- const payload = typeof data === "string" ? data : JSON.stringify(data);
450
- for (const line of payload.split(/\r?\n/)) {
451
- lines.push(`data: ${line}`);
452
- }
453
- return `${lines.join("\n")}
454
-
455
- `;
456
- }
457
- function readinessServerSentEvent(report, options = {}) {
458
- const { event, id, retry, ...telemetryOptions } = options;
459
- return encodeServerSentEvent({
460
- type: "readiness",
461
- readiness: sanitizeKnowledgeReadinessReport(report, telemetryOptions)
462
- }, { event, id, retry });
463
- }
464
- function runtimeStreamServerSentEvent(event, options = {}) {
465
- const { event: sseEvent, id, retry, ...telemetryOptions } = options;
466
- return encodeServerSentEvent(sanitizeRuntimeStreamEvent(event, telemetryOptions), { event: sseEvent, id, retry });
467
- }
468
- function createIterableBackend(options) {
469
- return options;
1006
+ function sanitizeKnowledgeRequirement(requirement, options) {
1007
+ const includeDescription = options.includeRequirementDescriptions && requirement.sensitivity !== "secret";
1008
+ return {
1009
+ id: requirement.id,
1010
+ description: includeDescription ? requirement.description : void 0,
1011
+ requiredFor: requirement.requiredFor,
1012
+ category: requirement.category,
1013
+ acquisitionMode: requirement.acquisitionMode,
1014
+ importance: requirement.importance,
1015
+ freshness: requirement.freshness,
1016
+ sensitivity: requirement.sensitivity,
1017
+ confidenceNeeded: requirement.confidenceNeeded,
1018
+ currentConfidence: requirement.currentConfidence,
1019
+ evidenceCount: requirement.evidenceIds.length,
1020
+ evidenceIds: options.includeEvidenceIds ? requirement.evidenceIds : void 0,
1021
+ fallbackPolicy: requirement.fallbackPolicy
1022
+ };
470
1023
  }
471
- function createSandboxPromptBackend(options) {
472
- return {
473
- kind: options.kind ?? "sandbox",
474
- async start(input, context) {
475
- const box = await options.getBox(input, context);
476
- return newRuntimeSession(options.kind ?? "sandbox", options.getSessionId?.(box, input) ?? context.requestedSessionId, {
477
- resumable: true
478
- });
479
- },
480
- resume(session) {
481
- return touchSession({ ...session, status: "active" });
482
- },
483
- async *stream(input, context) {
484
- const box = await options.getBox(input, context);
485
- const message = input.message ?? input.messages?.at(-1)?.content ?? context.task.intent;
486
- for await (const event of options.streamPrompt(box, message, context)) {
487
- const mapped = options.mapEvent?.(event, context) ?? mapCommonBackendEvent(event, context);
488
- if (mapped) yield mapped;
489
- }
490
- }
491
- };
492
- }
493
- function createCliBridgeBackend(options) {
494
- const fetcher = options.fetchImpl ?? fetch;
495
- return {
496
- kind: options.kind ?? "cli-bridge",
497
- start(_input, context) {
498
- return newRuntimeSession(options.kind ?? "cli-bridge", context.requestedSessionId, { resumable: true });
499
- },
500
- resume(session) {
501
- return touchSession({ ...session, status: "active" });
502
- },
503
- async *stream(input, context) {
504
- const response = await fetcher(options.url, {
505
- method: "POST",
506
- headers: {
507
- "Content-Type": "application/json",
508
- ...options.bearer ? { Authorization: `Bearer ${options.bearer}` } : {}
509
- },
510
- body: JSON.stringify({
511
- sessionId: context.session.id,
512
- resumeToken: context.session.resumeToken,
513
- task: input.task,
514
- message: input.message,
515
- messages: input.messages,
516
- inputs: input.inputs
517
- }),
518
- signal: context.signal
519
- });
520
- if (!response.ok) throw new Error(`cli bridge returned ${response.status}`);
521
- yield* streamResponseEvents(response, context);
522
- }
523
- };
524
- }
525
- function createOpenAICompatibleBackend(options) {
526
- const fetcher = options.fetchImpl ?? fetch;
527
- return {
528
- kind: options.kind ?? "tcloud",
529
- start(_input, context) {
530
- return newRuntimeSession(options.kind ?? "tcloud", context.requestedSessionId);
531
- },
532
- async *stream(input, context) {
533
- const response = await fetcher(`${options.baseUrl.replace(/\/$/, "")}/chat/completions`, {
534
- method: "POST",
535
- headers: {
536
- Authorization: `Bearer ${options.apiKey}`,
537
- "Content-Type": "application/json"
538
- },
539
- body: JSON.stringify({
540
- model: options.model,
541
- stream: true,
542
- messages: input.messages ?? [{ role: "user", content: input.message ?? context.task.intent }]
543
- }),
544
- signal: context.signal
545
- });
546
- if (!response.ok) throw new Error(`chat backend returned ${response.status}`);
547
- yield* streamResponseEvents(response, context);
548
- }
549
- };
550
- }
551
- async function runKnowledgePreflight(task, questions, acquisitionPlans, provider, onEvent) {
552
- let userAnswers = {};
553
- let acquiredEvidenceIds = [];
554
- if (questions.length > 0 && provider?.answerQuestions) {
555
- await emit(onEvent, { type: "questions_start", task, questions });
556
- userAnswers = await provider.answerQuestions(questions, task);
557
- await emit(onEvent, { type: "questions_end", task, questions, userAnswers });
558
- }
559
- if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
560
- await emit(onEvent, { type: "acquisition_start", task, acquisitionPlans });
561
- acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
562
- await emit(onEvent, { type: "acquisition_end", task, acquisitionPlans, acquiredEvidenceIds });
563
- }
564
- return { userAnswers, acquiredEvidenceIds };
565
- }
566
- async function runKnowledgePreflightStream(task, questions, acquisitionPlans, provider) {
567
- const events = [];
568
- let userAnswers = {};
569
- let acquiredEvidenceIds = [];
570
- if (questions.length > 0 && provider?.answerQuestions) {
571
- events.push(streamEvent({ type: "questions_start", task, questions }));
572
- userAnswers = await provider.answerQuestions(questions, task);
573
- events.push(streamEvent({ type: "questions_end", task, questions, userAnswers }));
574
- }
575
- if (acquisitionPlans.length > 0 && provider?.executeAcquisitionPlans) {
576
- events.push(streamEvent({ type: "acquisition_start", task, acquisitionPlans }));
577
- acquiredEvidenceIds = await provider.executeAcquisitionPlans(acquisitionPlans, task);
578
- events.push(streamEvent({ type: "acquisition_end", task, acquisitionPlans, acquiredEvidenceIds }));
579
- }
580
- return { userAnswers, acquiredEvidenceIds, events };
581
- }
582
- function sanitizeTask(task, options) {
583
- return {
584
- id: task.id,
585
- intent: task.intent,
586
- domain: task.domain,
587
- inputs: options.includeInputs ? task.inputs : task.inputs ? "[redacted]" : void 0,
588
- requiredKnowledge: task.requiredKnowledge?.map(
589
- (requirement) => sanitizeKnowledgeRequirement(requirement, options)
590
- ),
591
- metadata: options.includeMetadata ? task.metadata : task.metadata ? "[redacted]" : void 0
592
- };
593
- }
594
- function sanitizeRuntimeSession(session, options) {
595
- return {
596
- id: session.id,
597
- backend: session.backend,
598
- status: session.status,
599
- hasResumeToken: Boolean(session.resumeToken),
600
- createdAt: session.createdAt,
601
- updatedAt: session.updatedAt,
602
- metadata: options.includeMetadata ? session.metadata : session.metadata ? "[redacted]" : void 0
603
- };
604
- }
605
- function sanitizeKnowledgeRequirement(requirement, options) {
606
- const includeDescription = options.includeRequirementDescriptions && requirement.sensitivity !== "secret";
607
- return {
608
- id: requirement.id,
609
- description: includeDescription ? requirement.description : void 0,
610
- requiredFor: requirement.requiredFor,
611
- category: requirement.category,
612
- acquisitionMode: requirement.acquisitionMode,
613
- importance: requirement.importance,
614
- freshness: requirement.freshness,
615
- sensitivity: requirement.sensitivity,
616
- confidenceNeeded: requirement.confidenceNeeded,
617
- currentConfidence: requirement.currentConfidence,
618
- evidenceCount: requirement.evidenceIds.length,
619
- evidenceIds: options.includeEvidenceIds ? requirement.evidenceIds : void 0,
620
- fallbackPolicy: requirement.fallbackPolicy
621
- };
622
- }
623
- function sanitizeQuestion(question, options) {
1024
+ function sanitizeQuestion(question, options) {
624
1025
  return {
625
1026
  id: question.id,
626
1027
  question: options.includeRequirementDescriptions && question.answerType !== "credential" ? question.question : void 0,
@@ -689,208 +1090,294 @@ function summarizeEvals(evals, options) {
689
1090
  function redactRecord(record) {
690
1091
  return Object.fromEntries(Object.keys(record).map((key) => [key, "[redacted]"]));
691
1092
  }
692
- function stripNewlines(value) {
693
- return value.replace(/[\r\n]/g, " ");
694
- }
695
- function timestamp() {
696
- return (/* @__PURE__ */ new Date()).toISOString();
697
- }
698
- function streamEvent(event) {
699
- return { ...event, timestamp: timestamp() };
700
- }
701
- function newRuntimeSession(backend, requestedId, metadata) {
702
- const now = timestamp();
703
- return {
704
- id: requestedId || crypto.randomUUID(),
705
- backend,
706
- status: "active",
707
- createdAt: now,
708
- updatedAt: now,
709
- metadata
710
- };
711
- }
712
- function touchSession(session) {
713
- return { ...session, updatedAt: timestamp() };
714
- }
715
- async function startBackendSession(backend, input, context, requestedSessionId) {
716
- if (backend.start) return backend.start(input, { ...context, requestedSessionId });
717
- return newRuntimeSession(backend.kind, requestedSessionId);
718
- }
719
- async function resumeBackendSession(backend, session, input, context) {
720
- if (session.backend !== backend.kind) {
721
- throw new Error(`Cannot resume ${session.backend} session with ${backend.kind} backend`);
722
- }
723
- if (backend.resume) return backend.resume(session, input, context);
724
- return touchSession({ ...session, status: "active" });
725
- }
726
- function normalizeBackendStreamEvent(event, task, session) {
727
- if ("task" in event && event.task && "session" in event && event.session && "timestamp" in event && event.timestamp) return event;
728
- return {
729
- ...event,
730
- task: "task" in event && event.task ? event.task : task,
731
- session: "session" in event && event.session ? event.session : session,
732
- timestamp: "timestamp" in event && event.timestamp ? event.timestamp : timestamp()
733
- };
734
- }
735
1093
  function pickPublicStreamFields(event) {
736
1094
  if (event.type === "session_created" || event.type === "session_resumed") return {};
737
- if (event.type === "backend_start" || event.type === "backend_end") return { backend: event.backend };
738
- if (event.type === "backend_error") return { backend: event.backend, message: event.message, recoverable: event.recoverable };
1095
+ if (event.type === "backend_start" || event.type === "backend_end")
1096
+ return { backend: event.backend };
1097
+ if (event.type === "backend_error") {
1098
+ return { backend: event.backend, message: event.message, recoverable: event.recoverable };
1099
+ }
739
1100
  if (event.type === "task_end") return { status: event.status, reason: event.reason };
740
1101
  if (event.type === "text_delta" || event.type === "reasoning_delta") return { text: event.text };
741
1102
  return {};
742
1103
  }
743
- function mapCommonBackendEvent(event, context) {
744
- if (!event || typeof event !== "object") return void 0;
745
- const record = event;
746
- const type = String(record.type ?? "");
747
- const data = record.data && typeof record.data === "object" ? record.data : record;
748
- if (type === "message.part.updated" || type === "text_delta" || type === "delta") {
749
- const text = stringValue(data.text) ?? stringValue(data.delta) ?? stringValue(record.text);
750
- return text ? { type: "text_delta", task: context.task, session: context.session, text, timestamp: timestamp() } : void 0;
751
- }
752
- if (type === "reasoning_delta") {
753
- const text = stringValue(data.text) ?? stringValue(record.text);
754
- return text ? { type: "reasoning_delta", task: context.task, session: context.session, text, timestamp: timestamp() } : void 0;
755
- }
756
- if (type === "tool_call") {
757
- return {
758
- type: "tool_call",
759
- task: context.task,
760
- session: context.session,
761
- toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
762
- toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
763
- args: data.args ?? data.input ?? record.args,
764
- timestamp: timestamp()
765
- };
766
- }
767
- if (type === "tool_result") {
768
- return {
769
- type: "tool_result",
770
- task: context.task,
771
- session: context.session,
772
- toolName: stringValue(data.name) ?? stringValue(record.toolName) ?? "tool",
773
- toolCallId: stringValue(data.id) ?? stringValue(record.toolCallId),
774
- result: data.result ?? data.output ?? record.result,
775
- timestamp: timestamp()
776
- };
777
- }
778
- if (type === "result" || type === "final") {
779
- const text = stringValue(data.finalText) ?? stringValue(data.text) ?? stringValue(record.text);
780
- return text ? { type: "text_delta", task: context.task, session: context.session, text, timestamp: timestamp() } : void 0;
781
- }
782
- return void 0;
1104
+ function createRuntimeEventCollector(options = {}) {
1105
+ const events = [];
1106
+ return {
1107
+ events,
1108
+ onEvent: (event) => {
1109
+ events.push(sanitizeAgentRuntimeEvent(event, options));
1110
+ }
1111
+ };
783
1112
  }
784
- async function* streamResponseEvents(response, context) {
785
- const body = response.body;
786
- if (!body) return;
787
- const reader = body.getReader();
788
- const decoder = new TextDecoder();
789
- let buffer = "";
790
- for (; ; ) {
791
- const { done, value } = await reader.read();
792
- if (done) break;
793
- buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, "\n");
794
- for (const event of drainStreamBuffer(false)) yield event;
795
- }
796
- buffer += decoder.decode().replace(/\r\n/g, "\n");
797
- for (const event of drainStreamBuffer(true)) yield event;
798
- if (buffer.trim()) {
799
- const event = parseStreamChunk(buffer, context);
800
- if (event) yield event;
801
- }
802
- function* drainStreamBuffer(flush) {
803
- for (; ; ) {
804
- const sseBoundary = buffer.indexOf("\n\n");
805
- if (sseBoundary >= 0) {
806
- const chunk = buffer.slice(0, sseBoundary);
807
- buffer = buffer.slice(sseBoundary + 2);
808
- const event = parseStreamChunk(chunk, context);
809
- if (event) yield event;
810
- continue;
811
- }
812
- const newline = buffer.indexOf("\n");
813
- if (newline >= 0 && !buffer.slice(0, newline).startsWith("data:")) {
814
- const line = buffer.slice(0, newline);
815
- buffer = buffer.slice(newline + 1);
816
- const event = parseStreamChunk(line, context);
817
- if (event) yield event;
818
- continue;
1113
+ function createRuntimeStreamEventCollector(options = {}) {
1114
+ const events = [];
1115
+ const eventCountsByType = {};
1116
+ let firstSessionId;
1117
+ let finalStatus;
1118
+ let finalReason;
1119
+ let finalText = "";
1120
+ return {
1121
+ events,
1122
+ onEvent: (event) => {
1123
+ events.push(sanitizeRuntimeStreamEvent(event, options));
1124
+ eventCountsByType[event.type] = (eventCountsByType[event.type] ?? 0) + 1;
1125
+ if (event.type === "text_delta") finalText += event.text;
1126
+ if (!firstSessionId && (event.type === "session_created" || event.type === "session_resumed")) {
1127
+ firstSessionId = event.session.id;
819
1128
  }
820
- if (flush && buffer.trim() && !buffer.trimStart().startsWith("data:")) {
821
- const line = buffer;
822
- buffer = "";
823
- const event = parseStreamChunk(line, context);
824
- if (event) yield event;
825
- continue;
1129
+ if (event.type === "final") {
1130
+ finalStatus = event.status;
1131
+ finalReason = event.reason;
826
1132
  }
827
- break;
1133
+ },
1134
+ summary() {
1135
+ return {
1136
+ eventCount: events.length,
1137
+ eventCountsByType: { ...eventCountsByType },
1138
+ firstSessionId,
1139
+ finalStatus,
1140
+ finalReason,
1141
+ finalText
1142
+ };
828
1143
  }
829
- }
1144
+ };
830
1145
  }
831
- function parseStreamChunk(chunk, context) {
832
- const lines = chunk.split(/\r?\n/);
833
- const dataLines = lines.filter((line) => line.startsWith("data:"));
834
- const data = dataLines.length > 0 ? dataLines.map((line) => line.slice(5).trimStart()).join("\n") : chunk.trim();
835
- if (!data || data === "[DONE]") return void 0;
836
- try {
837
- const parsed = JSON.parse(data);
838
- const choice = Array.isArray(parsed.choices) ? parsed.choices[0] : void 0;
839
- const delta = choice?.delta;
840
- const message = choice?.message;
841
- const text = stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text);
842
- if (text) return { type: "text_delta", task: context.task, session: context.session, text, timestamp: timestamp() };
843
- return mapCommonBackendEvent(parsed, context);
844
- } catch {
845
- return { type: "text_delta", task: context.task, session: context.session, text: data, timestamp: timestamp() };
1146
+
1147
+ // src/sse.ts
1148
+ function encodeServerSentEvent(data, options = {}) {
1149
+ const lines = [];
1150
+ if (options.id) lines.push(`id: ${stripNewlines(options.id)}`);
1151
+ if (options.event) lines.push(`event: ${stripNewlines(options.event)}`);
1152
+ if (typeof options.retry === "number" && Number.isFinite(options.retry) && options.retry >= 0) {
1153
+ lines.push(`retry: ${Math.floor(options.retry)}`);
1154
+ }
1155
+ const payload = typeof data === "string" ? data : JSON.stringify(data);
1156
+ for (const line of payload.split(/\r?\n/)) {
1157
+ lines.push(`data: ${line}`);
846
1158
  }
1159
+ return `${lines.join("\n")}
1160
+
1161
+ `;
847
1162
  }
848
- function stringValue(value) {
849
- return typeof value === "string" && value.length > 0 ? value : void 0;
1163
+ function readinessServerSentEvent(report, options = {}) {
1164
+ const { event, id, retry, ...telemetryOptions } = options;
1165
+ return encodeServerSentEvent(
1166
+ {
1167
+ type: "readiness",
1168
+ readiness: sanitizeKnowledgeReadinessReport(report, telemetryOptions)
1169
+ },
1170
+ { event, id, retry }
1171
+ );
850
1172
  }
851
- function buildReadiness(task, provider) {
852
- if (provider?.buildReadiness) return provider.buildReadiness(task);
853
- return scoreKnowledgeReadiness({
854
- taskId: task.id,
855
- requirements: task.requiredKnowledge ?? [],
856
- metadata: { domain: task.domain, ...task.metadata }
1173
+ function runtimeStreamServerSentEvent(event, options = {}) {
1174
+ const { event: sseEvent, id, retry, ...telemetryOptions } = options;
1175
+ return encodeServerSentEvent(sanitizeRuntimeStreamEvent(event, telemetryOptions), {
1176
+ event: sseEvent,
1177
+ id,
1178
+ retry
857
1179
  });
858
1180
  }
859
- function isKnowledgeBlocked(evals) {
860
- return evals.some((evalResult) => evalResult.id === "knowledge-ready" && !evalResult.passed);
861
- }
862
- function statusFromControl(control) {
863
- if (control.stoppedBy === "abort") return "aborted";
864
- if (control.reason.includes("knowledge readiness blocked")) return "blocked";
865
- if (control.pass) return "completed";
866
- return "failed";
867
- }
868
- async function emit(sink, event) {
869
- await sink?.(event);
1181
+ function stripNewlines(value) {
1182
+ return value.replace(/[\r\n]/g, " ");
870
1183
  }
871
- function toAgentContext(task, knowledge, ctx) {
1184
+
1185
+ // src/trace-bridge.ts
1186
+ function createTraceBridge(options) {
1187
+ if (!options.runId) {
1188
+ throw new ValidationError("createTraceBridge: runId is required");
1189
+ }
1190
+ let counter = 0;
1191
+ const newEventId = options.newEventId ?? (() => `evt-${++counter}`);
1192
+ const baseSpanId = options.spanId;
1193
+ const toTraceEvent = (event) => {
1194
+ const projection = projectToTraceEvent(event);
1195
+ if (!projection) return void 0;
1196
+ return {
1197
+ eventId: newEventId(),
1198
+ runId: options.runId,
1199
+ spanId: baseSpanId,
1200
+ kind: projection.kind,
1201
+ timestamp: timestampFor(event),
1202
+ payload: projection.payload
1203
+ };
1204
+ };
872
1205
  return {
873
- task,
874
- knowledge,
875
- state: ctx.state,
876
- evals: ctx.evals,
877
- history: ctx.history,
878
- budget: ctx.budget,
879
- stepIndex: ctx.stepIndex,
880
- wallMs: ctx.wallMs,
881
- spentCostUsd: ctx.spentCostUsd,
882
- remainingCostUsd: ctx.remainingCostUsd,
883
- abortSignal: ctx.abortSignal
1206
+ toTraceEvent,
1207
+ drain(events) {
1208
+ const out = [];
1209
+ for (const event of events) {
1210
+ const trace = toTraceEvent(event);
1211
+ if (trace) out.push(trace);
1212
+ }
1213
+ return out;
1214
+ }
884
1215
  };
885
1216
  }
1217
+ function toAgentEvalTrace(event, options) {
1218
+ return createTraceBridge(options).toTraceEvent(event);
1219
+ }
1220
+ function projectToTraceEvent(event) {
1221
+ switch (event.type) {
1222
+ case "task_start":
1223
+ return {
1224
+ kind: "log",
1225
+ payload: { phase: "task_start", taskId: event.task.id, intent: event.task.intent }
1226
+ };
1227
+ case "readiness_start":
1228
+ return { kind: "log", payload: { phase: "readiness_start", taskId: event.task.id } };
1229
+ case "readiness_end":
1230
+ return {
1231
+ kind: event.decision.passed ? "log" : "policy_violation",
1232
+ payload: {
1233
+ phase: "readiness_end",
1234
+ taskId: event.task.id,
1235
+ status: event.decision.status,
1236
+ readinessScore: event.decision.readinessScore,
1237
+ blockingGapIds: event.decision.blockingGapIds,
1238
+ nonBlockingGapIds: event.decision.nonBlockingGapIds,
1239
+ reason: event.decision.reason
1240
+ }
1241
+ };
1242
+ case "questions_start":
1243
+ return {
1244
+ kind: "log",
1245
+ payload: { phase: "questions_start", questionCount: event.questions.length }
1246
+ };
1247
+ case "questions_end":
1248
+ return {
1249
+ kind: "log",
1250
+ payload: {
1251
+ phase: "questions_end",
1252
+ questionCount: event.questions.length,
1253
+ answerCount: Object.keys(event.userAnswers).length
1254
+ }
1255
+ };
1256
+ case "acquisition_start":
1257
+ return {
1258
+ kind: "log",
1259
+ payload: { phase: "acquisition_start", planCount: event.acquisitionPlans.length }
1260
+ };
1261
+ case "acquisition_end":
1262
+ return {
1263
+ kind: "log",
1264
+ payload: {
1265
+ phase: "acquisition_end",
1266
+ planCount: event.acquisitionPlans.length,
1267
+ evidenceCount: event.acquiredEvidenceIds.length
1268
+ }
1269
+ };
1270
+ case "session_created":
1271
+ case "session_resumed":
1272
+ return {
1273
+ kind: "log",
1274
+ payload: {
1275
+ phase: event.type,
1276
+ sessionId: event.session.id,
1277
+ backend: event.session.backend
1278
+ }
1279
+ };
1280
+ case "backend_start":
1281
+ case "backend_end":
1282
+ return { kind: "log", payload: { phase: event.type, backend: event.backend } };
1283
+ case "backend_error":
1284
+ return {
1285
+ kind: "error",
1286
+ payload: {
1287
+ backend: event.backend,
1288
+ message: event.message,
1289
+ recoverable: event.recoverable
1290
+ }
1291
+ };
1292
+ case "tool_call":
1293
+ return {
1294
+ kind: "log",
1295
+ payload: {
1296
+ phase: "tool_call",
1297
+ toolName: event.toolName,
1298
+ toolCallId: event.toolCallId
1299
+ // Args intentionally omitted at this layer; consumers attach the
1300
+ // payload to a `ToolSpan` if they need to retain it. Trace events
1301
+ // are point-in-time markers, not the canonical store for tool I/O.
1302
+ }
1303
+ };
1304
+ case "tool_result":
1305
+ return {
1306
+ kind: "log",
1307
+ payload: {
1308
+ phase: "tool_result",
1309
+ toolName: event.toolName,
1310
+ toolCallId: event.toolCallId
1311
+ }
1312
+ };
1313
+ case "llm_call":
1314
+ return {
1315
+ kind: "log",
1316
+ payload: {
1317
+ phase: "llm_call",
1318
+ model: event.model,
1319
+ tokensIn: event.tokensIn,
1320
+ tokensOut: event.tokensOut,
1321
+ costUsd: event.costUsd,
1322
+ latencyMs: event.latencyMs,
1323
+ finishReason: event.finishReason
1324
+ }
1325
+ };
1326
+ case "artifact":
1327
+ return {
1328
+ kind: "state_mutation",
1329
+ payload: {
1330
+ phase: "artifact",
1331
+ artifactId: event.artifactId,
1332
+ name: event.name,
1333
+ mimeType: event.mimeType
1334
+ }
1335
+ };
1336
+ case "task_end":
1337
+ return {
1338
+ kind: event.status === "failed" || event.status === "aborted" ? "error" : "log",
1339
+ payload: { phase: "task_end", status: event.status, reason: event.reason }
1340
+ };
1341
+ case "final":
1342
+ return {
1343
+ kind: event.status === "failed" || event.status === "aborted" ? "error" : "log",
1344
+ payload: { phase: "final", status: event.status, reason: event.reason }
1345
+ };
1346
+ case "text_delta":
1347
+ case "reasoning_delta":
1348
+ return void 0;
1349
+ default: {
1350
+ const exhaust = event;
1351
+ void exhaust;
1352
+ return void 0;
1353
+ }
1354
+ }
1355
+ }
1356
+ function timestampFor(event) {
1357
+ const iso = "timestamp" in event ? event.timestamp : void 0;
1358
+ if (!iso) return Date.now();
1359
+ const parsed = Date.parse(iso);
1360
+ return Number.isFinite(parsed) ? parsed : Date.now();
1361
+ }
886
1362
  export {
1363
+ AgentEvalError2 as AgentEvalError,
1364
+ BackendTransportError,
1365
+ CaptureIntegrityError,
1366
+ ConfigError,
887
1367
  InMemoryRuntimeSessionStore,
888
- createCliBridgeBackend,
1368
+ JudgeError,
1369
+ NotFoundError,
1370
+ ReplayError,
1371
+ RuntimeRunStateError,
1372
+ SessionMismatchError,
1373
+ ValidationError,
1374
+ VerificationError,
889
1375
  createIterableBackend,
890
1376
  createOpenAICompatibleBackend,
891
1377
  createRuntimeEventCollector,
892
1378
  createRuntimeStreamEventCollector,
893
1379
  createSandboxPromptBackend,
1380
+ createTraceBridge,
894
1381
  decideKnowledgeReadiness,
895
1382
  encodeServerSentEvent,
896
1383
  readinessServerSentEvent,
@@ -900,6 +1387,8 @@ export {
900
1387
  sanitizeAgentRuntimeEvent,
901
1388
  sanitizeKnowledgeReadinessReport,
902
1389
  sanitizeRuntimeStreamEvent,
903
- summarizeAgentTaskRun
1390
+ startRuntimeRun,
1391
+ summarizeAgentTaskRun,
1392
+ toAgentEvalTrace
904
1393
  };
905
1394
  //# sourceMappingURL=index.js.map