@tangle-network/agent-runtime 0.21.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +116 -1
  2. package/dist/agent.d.ts +1 -1
  3. package/dist/chunk-7HN72MF3.js +200 -0
  4. package/dist/chunk-7HN72MF3.js.map +1 -0
  5. package/dist/{chunk-Z5LKAYAS.js → chunk-CBQVID7G.js} +2 -2
  6. package/dist/chunk-IQHYOJU3.js +427 -0
  7. package/dist/chunk-IQHYOJU3.js.map +1 -0
  8. package/dist/{chunk-EDVCVFQB.js → chunk-TZ53F7M7.js} +4 -3
  9. package/dist/chunk-TZ53F7M7.js.map +1 -0
  10. package/dist/{chunk-QDNJLAEU.js → chunk-UNQM6XQO.js} +34 -433
  11. package/dist/chunk-UNQM6XQO.js.map +1 -0
  12. package/dist/{chunk-XLWPTPRP.js → chunk-URDSRUPQ.js} +2 -2
  13. package/dist/{chunk-RZAOYKCO.js → chunk-XZYF3YJN.js} +9 -1
  14. package/dist/{chunk-RZAOYKCO.js.map → chunk-XZYF3YJN.js.map} +1 -1
  15. package/dist/index.d.ts +77 -4
  16. package/dist/index.js +209 -41
  17. package/dist/index.js.map +1 -1
  18. package/dist/loops.d.ts +4 -4
  19. package/dist/loops.js +3 -3
  20. package/dist/mcp/bin.js +6 -5
  21. package/dist/mcp/bin.js.map +1 -1
  22. package/dist/mcp/index.d.ts +54 -4
  23. package/dist/mcp/index.js +60 -11
  24. package/dist/mcp/index.js.map +1 -1
  25. package/dist/otel-export-B33Cy_60.d.ts +114 -0
  26. package/dist/profiles.d.ts +3 -3
  27. package/dist/profiles.js +3 -3
  28. package/dist/{runtime-run-B2j-hvBj.d.ts → runtime-run-D5ItCKl_.d.ts} +1 -1
  29. package/dist/{types-DvJIha6w.d.ts → types-BFgFD_sl.d.ts} +87 -1
  30. package/dist/{types-Cu-SkGa0.d.ts → types-DmkRGTBn.d.ts} +18 -1
  31. package/package.json +1 -1
  32. package/dist/chunk-EDVCVFQB.js.map +0 -1
  33. package/dist/chunk-QDNJLAEU.js.map +0 -1
  34. /package/dist/{chunk-Z5LKAYAS.js.map → chunk-CBQVID7G.js.map} +0 -0
  35. /package/dist/{chunk-XLWPTPRP.js.map → chunk-URDSRUPQ.js.map} +0 -0
@@ -26,10 +26,18 @@ var SessionMismatchError = class extends AgentEvalError {
26
26
  var BackendTransportError = class extends AgentEvalError {
27
27
  backend;
28
28
  status;
29
+ /**
30
+ * Truncated upstream response body (≤2 KiB) when available. Diagnostic
31
+ * only — surfaces in `backend_error.error.body` and `final.error.body`
32
+ * so operators can see "free_tier_limit", "invalid_api_key", etc. without
33
+ * cracking the log line open.
34
+ */
35
+ body;
29
36
  constructor(backend, message, options) {
30
37
  super("config", message, options);
31
38
  this.backend = backend;
32
39
  this.status = options?.status;
40
+ this.body = options?.body;
33
41
  }
34
42
  };
35
43
  var RuntimeRunStateError = class extends AgentEvalError {
@@ -48,4 +56,4 @@ export {
48
56
  NotFoundError,
49
57
  ValidationError
50
58
  };
51
- //# sourceMappingURL=chunk-RZAOYKCO.js.map
59
+ //# sourceMappingURL=chunk-XZYF3YJN.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/errors.ts"],"sourcesContent":["/**\n * @stable\n *\n * Error taxonomy for `@tangle-network/agent-runtime`.\n *\n * Public contract: every error this package throws as part of its consumer-\n * facing API either extends `AgentEvalError` (re-exported here for ergonomic\n * `instanceof` checks at the runtime boundary) or extends one of the\n * runtime-specific subclasses below.\n *\n * Internal invariant guards (`throw new Error('this should never happen')`)\n * remain plain `Error` — they are programmer-mistake assertions, not\n * consumer-catchable contract failures.\n *\n * Subclassing strategy: where a runtime-specific failure maps cleanly to an\n * agent-eval code (validation, config, not_found), we re-use the agent-eval\n * subclass. Runtime-only failure modes (session resume against the wrong\n * backend, backend transport errors) get fresh subclasses that still carry an\n * `AgentEvalErrorCode` so cross-package handlers can pattern-match without\n * importing the runtime.\n */\n\nimport { AgentEvalError } from '@tangle-network/agent-eval'\n\nexport {\n AgentEvalError,\n type AgentEvalErrorCode,\n CaptureIntegrityError,\n ConfigError,\n JudgeError,\n NotFoundError,\n ReplayError,\n ValidationError,\n VerificationError,\n} from '@tangle-network/agent-eval'\n\n/**\n * @stable\n *\n * Caller asked to resume a session against a backend whose `kind` does not\n * match the session's recorded backend. This is a routing bug — the same\n * session id was reused across two different backend implementations — and\n * is not retryable without picking the right backend.\n */\nexport class SessionMismatchError extends AgentEvalError {\n readonly sessionBackend: string\n readonly requestedBackend: string\n\n constructor(sessionBackend: string, requestedBackend: string, options?: { cause?: unknown }) {\n super(\n 'validation',\n `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,\n options,\n )\n this.sessionBackend = sessionBackend\n this.requestedBackend = requestedBackend\n }\n}\n\n/**\n * @stable\n *\n * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success\n * status. Distinct from `JudgeError` (which is structural / unrecoverable)\n * because backend failures are sometimes retryable and consumers may want to\n * branch on the upstream status code.\n */\nexport class BackendTransportError extends AgentEvalError {\n readonly backend: string\n readonly status?: number\n\n constructor(backend: string, message: string, options?: { cause?: unknown; status?: number }) {\n super('config', message, options)\n this.backend = backend\n this.status = options?.status\n }\n}\n\n/**\n * @stable\n *\n * A runtime-run lifecycle method was called in an order the state machine does\n * not allow: `persist()` before `complete()`, `complete()` twice, etc.\n */\nexport class RuntimeRunStateError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n"],"mappings":";AAsBA,SAAS,sBAAsB;AAE/B;AAAA,EACE,kBAAAA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAUA,IAAM,uBAAN,cAAmC,eAAe;AAAA,EAC9C;AAAA,EACA;AAAA,EAET,YAAY,gBAAwB,kBAA0B,SAA+B;AAC3F;AAAA,MACE;AAAA,MACA,iBAAiB,cAAc,iBAAiB,gBAAgB;AAAA,MAChE;AAAA,IACF;AACA,SAAK,iBAAiB;AACtB,SAAK,mBAAmB;AAAA,EAC1B;AACF;AAUO,IAAM,wBAAN,cAAoC,eAAe;AAAA,EAC/C;AAAA,EACA;AAAA,EAET,YAAY,SAAiB,SAAiB,SAAgD;AAC5F,UAAM,UAAU,SAAS,OAAO;AAChC,SAAK,UAAU;AACf,SAAK,SAAS,SAAS;AAAA,EACzB;AACF;AAQO,IAAM,uBAAN,cAAmC,eAAe;AAAA,EACvD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;","names":["AgentEvalError"]}
1
+ {"version":3,"sources":["../src/errors.ts"],"sourcesContent":["/**\n * @stable\n *\n * Error taxonomy for `@tangle-network/agent-runtime`.\n *\n * Public contract: every error this package throws as part of its consumer-\n * facing API either extends `AgentEvalError` (re-exported here for ergonomic\n * `instanceof` checks at the runtime boundary) or extends one of the\n * runtime-specific subclasses below.\n *\n * Internal invariant guards (`throw new Error('this should never happen')`)\n * remain plain `Error` — they are programmer-mistake assertions, not\n * consumer-catchable contract failures.\n *\n * Subclassing strategy: where a runtime-specific failure maps cleanly to an\n * agent-eval code (validation, config, not_found), we re-use the agent-eval\n * subclass. Runtime-only failure modes (session resume against the wrong\n * backend, backend transport errors) get fresh subclasses that still carry an\n * `AgentEvalErrorCode` so cross-package handlers can pattern-match without\n * importing the runtime.\n */\n\nimport { AgentEvalError } from '@tangle-network/agent-eval'\n\nexport {\n AgentEvalError,\n type AgentEvalErrorCode,\n CaptureIntegrityError,\n ConfigError,\n JudgeError,\n NotFoundError,\n ReplayError,\n ValidationError,\n VerificationError,\n} from '@tangle-network/agent-eval'\n\n/**\n * @stable\n *\n * Caller asked to resume a session against a backend whose `kind` does not\n * match the session's recorded backend. This is a routing bug — the same\n * session id was reused across two different backend implementations — and\n * is not retryable without picking the right backend.\n */\nexport class SessionMismatchError extends AgentEvalError {\n readonly sessionBackend: string\n readonly requestedBackend: string\n\n constructor(sessionBackend: string, requestedBackend: string, options?: { cause?: unknown }) {\n super(\n 'validation',\n `Cannot resume ${sessionBackend} session with ${requestedBackend} backend`,\n options,\n )\n this.sessionBackend = sessionBackend\n this.requestedBackend = requestedBackend\n }\n}\n\n/**\n * @stable\n *\n * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success\n * status. Distinct from `JudgeError` (which is structural / unrecoverable)\n * because backend failures are sometimes retryable and consumers may want to\n * branch on the upstream status code.\n */\nexport class BackendTransportError extends AgentEvalError {\n readonly backend: string\n readonly status?: number\n /**\n * Truncated upstream response body (≤2 KiB) when available. Diagnostic\n * only — surfaces in `backend_error.error.body` and `final.error.body`\n * so operators can see \"free_tier_limit\", \"invalid_api_key\", etc. without\n * cracking the log line open.\n */\n readonly body?: string\n\n constructor(\n backend: string,\n message: string,\n options?: { cause?: unknown; status?: number; body?: string },\n ) {\n super('config', message, options)\n this.backend = backend\n this.status = options?.status\n this.body = options?.body\n }\n}\n\n/**\n * @stable\n *\n * A runtime-run lifecycle method was called in an order the state machine does\n * not allow: `persist()` before `complete()`, `complete()` twice, etc.\n */\nexport class RuntimeRunStateError extends AgentEvalError {\n constructor(message: string, options?: { cause?: unknown }) {\n super('validation', message, options)\n }\n}\n"],"mappings":";AAsBA,SAAS,sBAAsB;AAE/B;AAAA,EACE,kBAAAA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAUA,IAAM,uBAAN,cAAmC,eAAe;AAAA,EAC9C;AAAA,EACA;AAAA,EAET,YAAY,gBAAwB,kBAA0B,SAA+B;AAC3F;AAAA,MACE;AAAA,MACA,iBAAiB,cAAc,iBAAiB,gBAAgB;AAAA,MAChE;AAAA,IACF;AACA,SAAK,iBAAiB;AACtB,SAAK,mBAAmB;AAAA,EAC1B;AACF;AAUO,IAAM,wBAAN,cAAoC,eAAe;AAAA,EAC/C;AAAA,EACA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA;AAAA,EAET,YACE,SACA,SACA,SACA;AACA,UAAM,UAAU,SAAS,OAAO;AAChC,SAAK,UAAU;AACf,SAAK,SAAS,SAAS;AACvB,SAAK,OAAO,SAAS;AAAA,EACvB;AACF;AAQO,IAAM,uBAAN,cAAmC,eAAe;AAAA,EACvD,YAAY,SAAiB,SAA+B;AAC1D,UAAM,cAAc,SAAS,OAAO;AAAA,EACtC;AACF;","names":["AgentEvalError"]}
package/dist/index.d.ts CHANGED
@@ -1,8 +1,9 @@
1
1
  import { AgentEvalError, KnowledgeReadinessReport, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
2
2
  export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
3
- import { a as AgentBackendInput, b as AgentExecutionBackend, c as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, d as RunAgentTaskOptions, e as AgentTaskRunResult, f as RunAgentTaskStreamOptions, g as AgentRuntimeEvent, h as AgentTaskStatus, i as RuntimeSessionStore, j as RuntimeSession } from './types-DvJIha6w.js';
4
- export { k as AgentAdapter, l as AgentKnowledgeProvider, m as AgentRuntimeEventSink, n as AgentTaskContext, A as AgentTaskSpec } from './types-DvJIha6w.js';
5
- export { R as RuntimeRunHandle, a as RuntimeRunPersistenceAdapter, b as RuntimeRunRow, s as startRuntimeRun } from './runtime-run-B2j-hvBj.js';
3
+ import { a as AgentBackendInput, b as AgentExecutionBackend, O as OpenAIChatTool, c as OpenAIChatToolChoice, d as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, e as RunAgentTaskOptions, f as AgentTaskRunResult, g as RunAgentTaskStreamOptions, h as AgentRuntimeEvent, i as AgentTaskStatus, j as RuntimeSessionStore, k as RuntimeSession } from './types-BFgFD_sl.js';
4
+ export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext, A as AgentTaskSpec, B as BackendErrorDetail } from './types-BFgFD_sl.js';
5
+ export { O as OtelAttribute, a as OtelExportConfig, b as OtelExporter, c as OtelSpan, d as createOtelExporter, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, e as mcpToolsForRuntimeMcpSubset } from './otel-export-B33Cy_60.js';
6
+ export { R as RuntimeRunHandle, a as RuntimeRunPersistenceAdapter, b as RuntimeRunRow, s as startRuntimeRun } from './runtime-run-D5ItCKl_.js';
6
7
 
7
8
  /**
8
9
  * @stable
@@ -68,11 +69,59 @@ interface BackendRetryPolicy {
68
69
  */
69
70
  requestTimeoutMs?: number;
70
71
  }
72
+ /**
73
+ * @stable
74
+ *
75
+ * OpenAI-compat streaming backend. Routes `runAgentTaskStream` through any
76
+ * `POST /chat/completions` endpoint that speaks OpenAI's SSE protocol —
77
+ * Tangle Router, OpenAI direct, OpenRouter, Groq, DeepSeek, Together. The
78
+ * router also fronts Anthropic models in Anthropic-native SSE shape; this
79
+ * backend handles both.
80
+ *
81
+ * ### Tool calls
82
+ *
83
+ * Pass `tools` (and optionally `toolChoice`) to forward an OpenAI Chat
84
+ * Completions `tools[]` array on every request. Streamed `tool_call` chunks
85
+ * are buffered until the model finalizes them (either `finish_reason:
86
+ * 'tool_calls'` for OpenAI shape or a `content_block_stop` for Anthropic
87
+ * `tool_use` blocks proxied through the router), then emitted as a single
88
+ * `tool_call` RuntimeStreamEvent with the assembled `args`.
89
+ *
90
+ * The backend does NOT execute tools — it surfaces calls for the caller's
91
+ * own dispatcher (typically the product's MCP / sandbox runtime) to fulfill
92
+ * and feed back as a subsequent `messages` turn. This keeps the transport
93
+ * thin and lets the agent host own tool dispatch policy.
94
+ *
95
+ * ### Fail-loud errors
96
+ *
97
+ * Non-success HTTP responses (4xx/5xx) and exhausted retry budgets throw
98
+ * `BackendTransportError` from inside the `stream()` generator. The runtime
99
+ * catches the throw, yields a `backend_error` with a typed `error` field
100
+ * (`kind`, `status`, truncated `body`) and a terminal `final` event with
101
+ * `status: 'failed'` carrying the same detail. Consumers MUST map
102
+ * `final.error` onto their `RunRecord.error` — silently treating an empty
103
+ * `finalText` as "agent produced nothing" hides credit exhaustion, auth
104
+ * failure, and upstream outages.
105
+ */
71
106
  declare function createOpenAICompatibleBackend<TInput extends AgentBackendInput = AgentBackendInput>(options: {
72
107
  apiKey: string;
73
108
  baseUrl: string;
74
109
  model: string;
75
110
  kind?: string;
111
+ /**
112
+ * OpenAI Chat Completions `tools[]` definitions surfaced to the model on
113
+ * every request. Omit to send a tool-free request (existing behavior).
114
+ * The runtime makes no assumption about the dispatcher — calls stream out
115
+ * as `tool_call` events and the caller is responsible for executing them
116
+ * and feeding `tool_result` messages back on a follow-up turn.
117
+ */
118
+ tools?: ReadonlyArray<OpenAIChatTool>;
119
+ /**
120
+ * OpenAI Chat Completions `tool_choice`. Default `undefined` (request
121
+ * omits the field; provider falls back to its own default — typically
122
+ * `'auto'`).
123
+ */
124
+ toolChoice?: OpenAIChatToolChoice;
76
125
  fetchImpl?: typeof fetch;
77
126
  retry?: BackendRetryPolicy;
78
127
  }): AgentExecutionBackend<TInput>;
@@ -220,6 +269,30 @@ declare function deriveExecutionId(input: {
220
269
  * importing the runtime.
221
270
  */
222
271
 
272
+ /**
273
+ * @stable
274
+ *
275
+ * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success
276
+ * status. Distinct from `JudgeError` (which is structural / unrecoverable)
277
+ * because backend failures are sometimes retryable and consumers may want to
278
+ * branch on the upstream status code.
279
+ */
280
+ declare class BackendTransportError extends AgentEvalError {
281
+ readonly backend: string;
282
+ readonly status?: number;
283
+ /**
284
+ * Truncated upstream response body (≤2 KiB) when available. Diagnostic
285
+ * only — surfaces in `backend_error.error.body` and `final.error.body`
286
+ * so operators can see "free_tier_limit", "invalid_api_key", etc. without
287
+ * cracking the log line open.
288
+ */
289
+ readonly body?: string;
290
+ constructor(backend: string, message: string, options?: {
291
+ cause?: unknown;
292
+ status?: number;
293
+ body?: string;
294
+ });
295
+ }
223
296
  /**
224
297
  * @stable
225
298
  *
@@ -504,4 +577,4 @@ declare function readinessServerSentEvent(report: KnowledgeReadinessReport, opti
504
577
  /** @stable */
505
578
  declare function runtimeStreamServerSentEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions & ServerSentEventOptions): string;
506
579
 
507
- export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, DEFAULT_ROUTER_BASE_URL, InMemoryRuntimeSessionStore, type ModelInfo, type ResolvedChatModel, type RouterEnv, type RunChatTurnInput, type RuntimeEventCollector, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, cleanModelId, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, decideKnowledgeReadiness, deriveExecutionId, getModels, handleChatTurn, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, validateChatModelId };
580
+ export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, DEFAULT_ROUTER_BASE_URL, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, type ResolvedChatModel, type RouterEnv, type RunChatTurnInput, type RuntimeEventCollector, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, cleanModelId, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, decideKnowledgeReadiness, deriveExecutionId, getModels, handleChatTurn, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, validateChatModelId };
package/dist/index.js CHANGED
@@ -1,3 +1,10 @@
1
+ import {
2
+ createOtelExporter,
3
+ loopEventToOtelSpan,
4
+ mcpToolsForRuntimeMcp,
5
+ mcpToolsForRuntimeMcpSubset
6
+ } from "./chunk-7HN72MF3.js";
7
+ import "./chunk-UNQM6XQO.js";
1
8
  import {
2
9
  AgentEvalError,
3
10
  BackendTransportError,
@@ -7,7 +14,7 @@ import {
7
14
  RuntimeRunStateError,
8
15
  SessionMismatchError,
9
16
  ValidationError
10
- } from "./chunk-RZAOYKCO.js";
17
+ } from "./chunk-XZYF3YJN.js";
11
18
  import "./chunk-DGUM43GV.js";
12
19
 
13
20
  // src/sessions.ts
@@ -144,14 +151,19 @@ function createOpenAICompatibleBackend(options) {
144
151
  },
145
152
  async *stream(input, context) {
146
153
  const url = `${options.baseUrl.replace(/\/$/, "")}/chat/completions`;
147
- const requestBody = JSON.stringify({
154
+ const bodyPayload = {
148
155
  model: options.model,
149
156
  stream: true,
150
157
  stream_options: { include_usage: true },
151
158
  messages: input.messages ?? [
152
159
  { role: "user", content: input.message ?? context.task.intent }
153
160
  ]
154
- });
161
+ };
162
+ if (options.tools && options.tools.length > 0) {
163
+ bodyPayload.tools = options.tools;
164
+ if (options.toolChoice !== void 0) bodyPayload.tool_choice = options.toolChoice;
165
+ }
166
+ const requestBody = JSON.stringify(bodyPayload);
155
167
  let response;
156
168
  let lastStatus = 0;
157
169
  let lastThrown;
@@ -198,14 +210,23 @@ function createOpenAICompatibleBackend(options) {
198
210
  );
199
211
  }
200
212
  if (!response.ok) {
213
+ let body;
214
+ try {
215
+ const raw = await response.text();
216
+ body = raw.length > MAX_ERROR_BODY_BYTES ? `${raw.slice(0, MAX_ERROR_BODY_BYTES)}\u2026` : raw;
217
+ } catch {
218
+ body = void 0;
219
+ }
201
220
  throw new BackendTransportError(kind, `chat backend returned ${lastStatus || "unknown"}`, {
202
- status: lastStatus || 0
221
+ status: lastStatus || 0,
222
+ body
203
223
  });
204
224
  }
205
225
  yield* streamResponseEvents(response, context, options.model);
206
226
  }
207
227
  };
208
228
  }
229
+ var MAX_ERROR_BODY_BYTES = 2048;
209
230
  function normalizeBackendStreamEvent(event, task, session) {
210
231
  if ("task" in event && event.task && "session" in event && event.session && "timestamp" in event && event.timestamp) {
211
232
  return event;
@@ -315,6 +336,7 @@ async function* streamResponseEvents(response, context, requestedModel) {
315
336
  const decoder = new TextDecoder();
316
337
  let buffer = "";
317
338
  const usage = { saw: false };
339
+ const toolCalls = /* @__PURE__ */ new Map();
318
340
  const startedAt = Date.now();
319
341
  for (; ; ) {
320
342
  const { done, value } = await reader.read();
@@ -325,9 +347,9 @@ async function* streamResponseEvents(response, context, requestedModel) {
325
347
  buffer += decoder.decode().replace(/\r\n/g, "\n");
326
348
  for (const event of drainStreamBuffer(true)) yield event;
327
349
  if (buffer.trim()) {
328
- const event = parseStreamChunk(buffer, context, usage);
329
- if (event) yield event;
350
+ for (const event of parseStreamChunk(buffer, context, usage, toolCalls)) yield event;
330
351
  }
352
+ for (const event of flushPendingToolCalls(toolCalls, context)) yield event;
331
353
  if (usage.saw) {
332
354
  yield {
333
355
  type: "llm_call",
@@ -350,56 +372,124 @@ async function* streamResponseEvents(response, context, requestedModel) {
350
372
  if (sseBoundary >= 0) {
351
373
  const chunk = buffer.slice(0, sseBoundary);
352
374
  buffer = buffer.slice(sseBoundary + 2);
353
- const event = parseStreamChunk(chunk, context, usage);
354
- if (event) yield event;
375
+ for (const event of parseStreamChunk(chunk, context, usage, toolCalls)) yield event;
355
376
  continue;
356
377
  }
357
378
  const newline = buffer.indexOf("\n");
358
379
  if (newline >= 0 && !buffer.slice(0, newline).startsWith("data:")) {
359
380
  const line = buffer.slice(0, newline);
360
381
  buffer = buffer.slice(newline + 1);
361
- const event = parseStreamChunk(line, context, usage);
362
- if (event) yield event;
382
+ for (const event of parseStreamChunk(line, context, usage, toolCalls)) yield event;
363
383
  continue;
364
384
  }
365
385
  if (flush && buffer.trim() && !buffer.trimStart().startsWith("data:")) {
366
386
  const line = buffer;
367
387
  buffer = "";
368
- const event = parseStreamChunk(line, context, usage);
369
- if (event) yield event;
388
+ for (const event of parseStreamChunk(line, context, usage, toolCalls)) yield event;
370
389
  continue;
371
390
  }
372
391
  break;
373
392
  }
374
393
  }
375
394
  }
376
- function parseStreamChunk(chunk, context, usage) {
395
+ function* parseStreamChunk(chunk, context, usage, toolCalls) {
377
396
  const lines = chunk.split(/\r?\n/);
378
397
  const dataLines = lines.filter((line) => line.startsWith("data:"));
379
398
  const data = dataLines.length > 0 ? dataLines.map((line) => line.slice(5).trimStart()).join("\n") : chunk.trim();
380
- if (!data || data === "[DONE]") return void 0;
399
+ if (!data || data === "[DONE]") return;
400
+ let parsed;
381
401
  try {
382
- const parsed = JSON.parse(data);
383
- captureStreamUsage(parsed, usage);
384
- const choices = parsed.choices;
385
- const choice = Array.isArray(choices) ? choices[0] : void 0;
386
- const delta = choice?.delta;
387
- const message = choice?.message;
388
- const text = stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text);
389
- if (text) {
390
- return {
391
- type: "text_delta",
392
- task: context.task,
393
- session: context.session,
394
- text,
395
- timestamp: nowIso()
396
- };
402
+ parsed = JSON.parse(data);
403
+ } catch {
404
+ yield {
405
+ type: "text_delta",
406
+ task: context.task,
407
+ session: context.session,
408
+ text: data,
409
+ timestamp: nowIso()
410
+ };
411
+ return;
412
+ }
413
+ captureStreamUsage(parsed, usage);
414
+ const choices = parsed.choices;
415
+ const choice = Array.isArray(choices) ? choices[0] : void 0;
416
+ const delta = choice?.delta;
417
+ const message = choice?.message;
418
+ const deltaToolCalls = delta?.tool_calls;
419
+ if (Array.isArray(deltaToolCalls)) {
420
+ for (const tc of deltaToolCalls) {
421
+ if (!tc || typeof tc !== "object") continue;
422
+ const rec = tc;
423
+ const idx = numberValue(rec.index) ?? 0;
424
+ const key = `openai:${idx}`;
425
+ const acc = toolCalls.get(key) ?? { argsRaw: "", source: "openai", finalized: false };
426
+ const id = stringValue(rec.id);
427
+ if (id) acc.id = id;
428
+ const fn = rec.function;
429
+ const name = stringValue(fn?.name);
430
+ if (name) acc.name = name;
431
+ const args = stringValue(fn?.arguments);
432
+ if (args) acc.argsRaw += args;
433
+ toolCalls.set(key, acc);
434
+ }
435
+ }
436
+ const messageToolCalls = message?.tool_calls;
437
+ if (Array.isArray(messageToolCalls)) {
438
+ for (const tc of messageToolCalls) {
439
+ if (!tc || typeof tc !== "object") continue;
440
+ const rec = tc;
441
+ const fn = rec.function;
442
+ const idx = numberValue(rec.index) ?? messageToolCalls.indexOf(tc);
443
+ const key = `openai:${idx}`;
444
+ const acc = toolCalls.get(key) ?? { argsRaw: "", source: "openai", finalized: false };
445
+ const id = stringValue(rec.id);
446
+ if (id) acc.id = id;
447
+ const name = stringValue(fn?.name);
448
+ if (name) acc.name = name;
449
+ const args = stringValue(fn?.arguments);
450
+ if (args) acc.argsRaw += args;
451
+ acc.finalized = true;
452
+ toolCalls.set(key, acc);
453
+ }
454
+ }
455
+ const finishReason = stringValue(choice?.finish_reason);
456
+ if (finishReason === "tool_calls") {
457
+ for (const [key, acc] of toolCalls) {
458
+ if (acc.source === "openai" && !acc.finalized) acc.finalized = true;
459
+ toolCalls.set(key, acc);
397
460
  }
398
- if (stringValue(parsed.type) === "content_block_delta") {
399
- const d = parsed.delta;
461
+ }
462
+ const eventType = stringValue(parsed.type);
463
+ if (eventType === "content_block_start") {
464
+ const block = parsed.content_block;
465
+ if (block && stringValue(block.type) === "tool_use") {
466
+ const idx = numberValue(parsed.index) ?? 0;
467
+ const key = `anthropic:${idx}`;
468
+ toolCalls.set(key, {
469
+ id: stringValue(block.id),
470
+ name: stringValue(block.name),
471
+ argsRaw: "",
472
+ source: "anthropic",
473
+ finalized: false
474
+ });
475
+ }
476
+ }
477
+ if (eventType === "content_block_delta") {
478
+ const d = parsed.delta;
479
+ const dType = stringValue(d?.type);
480
+ if (dType === "input_json_delta") {
481
+ const idx = numberValue(parsed.index) ?? 0;
482
+ const key = `anthropic:${idx}`;
483
+ const acc = toolCalls.get(key);
484
+ if (acc) {
485
+ const partial = stringValue(d?.partial_json) ?? "";
486
+ acc.argsRaw += partial;
487
+ toolCalls.set(key, acc);
488
+ }
489
+ } else {
400
490
  const text2 = stringValue(d?.text);
401
491
  if (text2) {
402
- return {
492
+ yield {
403
493
  type: "text_delta",
404
494
  task: context.task,
405
495
  session: context.session,
@@ -408,17 +498,65 @@ function parseStreamChunk(chunk, context, usage) {
408
498
  };
409
499
  }
410
500
  }
411
- return mapCommonBackendEvent(parsed, context);
412
- } catch {
413
- return {
501
+ }
502
+ if (eventType === "content_block_stop") {
503
+ const idx = numberValue(parsed.index) ?? 0;
504
+ const key = `anthropic:${idx}`;
505
+ const acc = toolCalls.get(key);
506
+ if (acc) {
507
+ acc.finalized = true;
508
+ toolCalls.set(key, acc);
509
+ }
510
+ }
511
+ for (const event of drainFinalizedToolCalls(toolCalls, context)) yield event;
512
+ const text = stringValue(delta?.content) ?? stringValue(message?.content) ?? stringValue(parsed.text);
513
+ if (text) {
514
+ yield {
414
515
  type: "text_delta",
415
516
  task: context.task,
416
517
  session: context.session,
417
- text: data,
518
+ text,
418
519
  timestamp: nowIso()
419
520
  };
521
+ return;
522
+ }
523
+ const mapped = mapCommonBackendEvent(parsed, context);
524
+ if (mapped) yield mapped;
525
+ }
526
+ function* drainFinalizedToolCalls(toolCalls, context) {
527
+ for (const [key, acc] of toolCalls) {
528
+ if (!acc.finalized) continue;
529
+ toolCalls.delete(key);
530
+ yield buildToolCallEvent(acc, context);
531
+ }
532
+ }
533
+ function* flushPendingToolCalls(toolCalls, context) {
534
+ for (const [key, acc] of toolCalls) {
535
+ toolCalls.delete(key);
536
+ yield buildToolCallEvent(acc, context);
420
537
  }
421
538
  }
539
+ function buildToolCallEvent(acc, context) {
540
+ let args = acc.argsRaw;
541
+ if (acc.argsRaw.length > 0) {
542
+ try {
543
+ args = JSON.parse(acc.argsRaw);
544
+ } catch {
545
+ args = acc.argsRaw;
546
+ }
547
+ } else {
548
+ args = {};
549
+ }
550
+ return {
551
+ type: "tool_call",
552
+ task: context.task,
553
+ session: context.session,
554
+ toolName: acc.name ?? "tool",
555
+ toolCallId: acc.id,
556
+ args,
557
+ timestamp: nowIso()
558
+ };
559
+ }
422
560
  function captureStreamUsage(parsed, usage) {
423
561
  const model = stringValue(parsed.model);
424
562
  if (model && !usage.model) usage.model = model;
@@ -888,13 +1026,21 @@ async function* runAgentTaskStream(options) {
888
1026
  } catch (stopErr) {
889
1027
  stopErrorMessage = stopErr instanceof Error ? stopErr.message : String(stopErr);
890
1028
  }
1029
+ const combinedMessage = stopErrorMessage ? `${message}; backend stop failed: ${stopErrorMessage}` : message;
1030
+ const errorDetail = err instanceof BackendTransportError ? {
1031
+ kind: "transport",
1032
+ message: combinedMessage,
1033
+ status: err.status,
1034
+ body: err.body
1035
+ } : { kind: "backend", message: combinedMessage };
891
1036
  const backendError = streamEvent({
892
1037
  type: "backend_error",
893
1038
  task,
894
1039
  session,
895
1040
  backend: options.backend.kind,
896
- message: stopErrorMessage ? `${message}; backend stop failed: ${stopErrorMessage}` : message,
897
- recoverable: !options.signal?.aborted
1041
+ message: combinedMessage,
1042
+ recoverable: !options.signal?.aborted,
1043
+ error: errorDetail
898
1044
  });
899
1045
  await store?.appendEvent?.(session.id, backendError);
900
1046
  yield backendError;
@@ -908,7 +1054,8 @@ async function* runAgentTaskStream(options) {
908
1054
  session,
909
1055
  status,
910
1056
  reason: message,
911
- text: finalText || void 0
1057
+ text: finalText || void 0,
1058
+ error: errorDetail
912
1059
  });
913
1060
  await store?.appendEvent?.(session.id, final);
914
1061
  yield final;
@@ -1289,6 +1436,12 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
1289
1436
  };
1290
1437
  }
1291
1438
  if (event.type === "final") {
1439
+ const sanitizedError = event.error !== void 0 ? {
1440
+ kind: event.error.kind,
1441
+ message: event.error.message,
1442
+ status: event.error.status,
1443
+ body: options.includeControlPayloads ? event.error.body : void 0
1444
+ } : void 0;
1292
1445
  return {
1293
1446
  type: event.type,
1294
1447
  ...withTask,
@@ -1297,7 +1450,8 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
1297
1450
  status: event.status,
1298
1451
  reason: event.reason,
1299
1452
  text: options.includeControlPayloads ? event.text : void 0,
1300
- metadata: options.includeMetadata ? event.metadata : void 0
1453
+ metadata: options.includeMetadata ? event.metadata : void 0,
1454
+ ...sanitizedError !== void 0 ? { error: sanitizedError } : {}
1301
1455
  };
1302
1456
  }
1303
1457
  return {
@@ -1423,7 +1577,16 @@ function pickPublicStreamFields(event) {
1423
1577
  if (event.type === "backend_start" || event.type === "backend_end")
1424
1578
  return { backend: event.backend };
1425
1579
  if (event.type === "backend_error") {
1426
- return { backend: event.backend, message: event.message, recoverable: event.recoverable };
1580
+ const sanitizedError = event.error !== void 0 ? {
1581
+ kind: event.error.kind,
1582
+ status: event.error.status
1583
+ } : void 0;
1584
+ return {
1585
+ backend: event.backend,
1586
+ message: event.message,
1587
+ recoverable: event.recoverable,
1588
+ ...sanitizedError !== void 0 ? { error: sanitizedError } : {}
1589
+ };
1427
1590
  }
1428
1591
  if (event.type === "task_end") return { status: event.status, reason: event.reason };
1429
1592
  if (event.type === "text_delta" || event.type === "reasoning_delta") return { text: event.text };
@@ -1511,6 +1674,7 @@ function stripNewlines(value) {
1511
1674
  }
1512
1675
  export {
1513
1676
  AgentEvalError,
1677
+ BackendTransportError,
1514
1678
  ConfigError,
1515
1679
  DEFAULT_ROUTER_BASE_URL,
1516
1680
  InMemoryRuntimeSessionStore,
@@ -1521,6 +1685,7 @@ export {
1521
1685
  cleanModelId,
1522
1686
  createIterableBackend,
1523
1687
  createOpenAICompatibleBackend,
1688
+ createOtelExporter,
1524
1689
  createRuntimeEventCollector,
1525
1690
  createRuntimeStreamEventCollector,
1526
1691
  createSandboxPromptBackend,
@@ -1528,6 +1693,9 @@ export {
1528
1693
  deriveExecutionId,
1529
1694
  getModels,
1530
1695
  handleChatTurn,
1696
+ loopEventToOtelSpan,
1697
+ mcpToolsForRuntimeMcp,
1698
+ mcpToolsForRuntimeMcpSubset,
1531
1699
  readinessServerSentEvent,
1532
1700
  resolveChatModel,
1533
1701
  resolveRouterBaseUrl,