npm - @galdor/provider-anthropic - Versions diffs - 0.3.0 - Mend

@galdor/provider-anthropic 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/src/anthropic.test.ts ADDED Viewed

@@ -0,0 +1,194 @@
+/**
+ * Behavioral tests for the Anthropic adapter.
+ *
+ * Each case stands up an ephemeral local HTTP server that impersonates the
+ * Messages API, points a freshly constructed provider at it, and asserts on both
+ * the outgoing wire request and the parsed result — covering request shaping,
+ * typed error mapping, and SSE stream reassembly.
+ */
+import { afterEach, describe, expect, test } from "bun:test";
+import { ContentType, messageText } from "@galdor/core/schema";
+import { APIError, collectStream, RateLimitError } from "@galdor/core/provider";
+import { newAnthropic } from "./index.ts";
+let server: { stop(): void; url: string } | undefined;
+afterEach(() => {
+  server?.stop();
+  server = undefined;
+});
+function serve(handler: (req: Request) => Response | Promise<Response>): string {
+  const s = Bun.serve({ port: 0, fetch: handler });
+  server = { stop: () => s.stop(true), url: `http://localhost:${s.port}` };
+  return server.url;
+}
+describe("AnthropicProvider.generate", () => {
+  test("sends the wire request and parses text + tool calls + usage", async () => {
+    let received: any;
+    const url = serve(async (req) => {
+      received = await req.json();
+      return Response.json({
+        id: "msg_1",
+        type: "message",
+        role: "assistant",
+        model: "claude-haiku-4-5",
+        content: [
+          { type: "text", text: "the answer" },
+          { type: "tool_use", id: "t1", name: "add", input: { a: 1, b: 2 } },
+        ],
+        stop_reason: "tool_use",
+        usage: { input_tokens: 10, output_tokens: 5 },
+      });
+    });
+    const p = newAnthropic({ apiKey: "sk-test", baseURL: url });
+    const resp = await p.generate({
+      model: "claude-haiku-4-5",
+      messages: [
+        { role: "system", content: [{ type: "text", text: "be terse" }] },
+        { role: "user", content: [{ type: "text", text: "hi" }] },
+      ],
+    });
+    // request shaping: system hoisted, max_tokens defaulted
+    expect(received.system[0].text).toBe("be terse");
+    expect(received.max_tokens).toBe(4096);
+    expect(received.messages[0].role).toBe("user");
+    // response parsing
+    expect(messageText(resp.message)).toBe("the answer");
+    expect(resp.message.toolCalls?.[0]).toEqual({ id: "t1", name: "add", arguments: { a: 1, b: 2 } });
+    expect(resp.stopReason).toBe("tool_use");
+    expect(resp.usage.inputTokens).toBe(10);
+    // providerRaw passthrough: the untouched response bytes are attached.
+    expect(resp.providerRaw).toBeInstanceOf(Uint8Array);
+    const rawDecoded = JSON.parse(new TextDecoder().decode(resp.providerRaw!));
+    expect(rawDecoded.id).toBe("msg_1");
+  });
+  test("maps a 429 to a typed RateLimitError", async () => {
+    const url = serve(
+      () =>
+        new Response(JSON.stringify({ type: "error", error: { type: "rate_limit_error", message: "slow down" } }), {
+          status: 429,
+          headers: { "retry-after": "7" },
+        }),
+    );
+    const p = newAnthropic({ apiKey: "sk-test", baseURL: url });
+    try {
+      await p.generate({ model: "m", messages: [{ role: "user", content: [{ type: "text", text: "x" }] }] });
+      throw new Error("should have thrown");
+    } catch (e) {
+      expect(e).toBeInstanceOf(RateLimitError);
+      expect((e as RateLimitError).retryAfter).toBe(7);
+    }
+  });
+});
+describe("AnthropicProvider.stream", () => {
+  test("parses an SSE sequence into events that collectStream reassembles", async () => {
+    const sse = [
+      `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { model: "claude-haiku-4-5", usage: { input_tokens: 3 } } })}\n\n`,
+      `event: content_block_start\ndata: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "text" } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hello " } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "world" } })}\n\n`,
+      `event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 2 } })}\n\n`,
+      `event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`,
+    ].join("");
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newAnthropic({ apiKey: "sk-test", baseURL: url });
+    const resp = await collectStream(p.stream({ model: "m", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] }));
+    expect(messageText(resp.message)).toBe("hello world");
+    expect(resp.stopReason).toBe("end_turn");
+  });
+  test("picks up input_tokens from message_delta and passes an unknown stop_reason through as-is", async () => {
+    const sse = [
+      `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { model: "claude-haiku-4-5", usage: { input_tokens: 3 } } })}\n\n`,
+      `event: content_block_start\ndata: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "text" } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } })}\n\n`,
+      // message_delta revises input_tokens upward and reports a stop_reason the
+      // adapter doesn't special-case: both must survive to the terminal event.
+      `event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "model_context_window_exceeded" }, usage: { input_tokens: 11, output_tokens: 2 } })}\n\n`,
+      `event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`,
+    ].join("");
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newAnthropic({ apiKey: "sk-test", baseURL: url });
+    const resp = await collectStream(p.stream({ model: "m", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] }));
+    expect(resp.usage.inputTokens).toBe(11); // revised by message_delta, not stuck at 3
+    expect(resp.stopReason as string).toBe("model_context_window_exceeded"); // passed through, not coerced to end_turn
+  });
+  test("accumulates thinking_delta + signature_delta into a thinking ContentPart", async () => {
+    const sse = [
+      `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { model: "claude-haiku-4-5", usage: { input_tokens: 3 } } })}\n\n`,
+      `event: content_block_start\ndata: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "thinking" } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "thinking_delta", thinking: "let me " } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "thinking_delta", thinking: "reason" } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "signature_delta", signature: "sig-abc" } })}\n\n`,
+      `event: content_block_stop\ndata: ${JSON.stringify({ type: "content_block_stop", index: 0 })}\n\n`,
+      `event: content_block_start\ndata: ${JSON.stringify({ type: "content_block_start", index: 1, content_block: { type: "text" } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 1, delta: { type: "text_delta", text: "the answer" } })}\n\n`,
+      `event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 2 } })}\n\n`,
+      `event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`,
+    ].join("");
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newAnthropic({ apiKey: "sk-test", baseURL: url });
+    const resp = await collectStream(p.stream({ model: "m", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] }));
+    // The thinking deltas are not forwarded as live text...
+    expect(messageText(resp.message)).toBe("the answer");
+    // ...but reassembled into a thinking ContentPart carrying the signature.
+    const thinking = resp.message.content.find((p) => p.type === ContentType.Thinking);
+    expect(thinking).toBeDefined();
+    expect(thinking?.text).toBe("let me reason");
+    expect(thinking?.signature).toBe("sig-abc");
+  });
+  test("throws a classified error on a mid-stream error frame", async () => {
+    const sse = [
+      `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { model: "claude-haiku-4-5", usage: { input_tokens: 3 } } })}\n\n`,
+      `event: content_block_start\ndata: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "text" } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "partial" } })}\n\n`,
+      `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "rate_limit_error", message: "overloaded" } })}\n\n`,
+    ].join("");
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newAnthropic({ apiKey: "sk-test", baseURL: url });
+    try {
+      for await (const _ev of p.stream({ model: "m", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] })) {
+        // drain until the error frame throws
+      }
+      throw new Error("should have thrown");
+    } catch (e) {
+      expect(e).toBeInstanceOf(APIError);
+      expect(e).toBeInstanceOf(RateLimitError);
+      expect((e as APIError).message).toBe("overloaded");
+    }
+  });
+  test("folds cache token counts from message_start into the final usage", async () => {
+    const sse = [
+      `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { model: "claude-haiku-4-5", usage: { input_tokens: 3, cache_creation_input_tokens: 11, cache_read_input_tokens: 22 } } })}\n\n`,
+      `event: content_block_start\ndata: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "text" } })}\n\n`,
+      `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "ok" } })}\n\n`,
+      `event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 2 } })}\n\n`,
+      `event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`,
+    ].join("");
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newAnthropic({ apiKey: "sk-test", baseURL: url });
+    const resp = await collectStream(p.stream({ model: "m", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] }));
+    expect(resp.usage.inputTokens).toBe(3);
+    expect(resp.usage.outputTokens).toBe(2);
+    expect(resp.usage.cacheCreationTokens).toBe(11);
+    expect(resp.usage.cacheReadTokens).toBe(22);
+  });
+});

package/src/convert.ts ADDED Viewed

@@ -0,0 +1,376 @@
+/**
+ * Conversion between galdor's shared schema and the Anthropic Messages API
+ * wire shape.
+ *
+ * One direction, {@link buildRequest}, lowers a galdor {@link Request} into the
+ * snake_case JSON the Messages API expects: system messages are hoisted into a
+ * dedicated `system` array, content parts and tool calls become typed blocks,
+ * extended thinking and structured output are expressed in Anthropic's own
+ * terms, and prompt-caching markers are attached to the final block of a span.
+ * The other direction, {@link responseFromWire} (with
+ * {@link extractStructuredOutput} and {@link usageFromWire}), collapses a wire
+ * response back into a galdor {@link Response}.
+ */
+import { InvalidRequestError, type Request, type Response, type ToolChoice } from "@galdor/core/provider";
+import {
+  ContentType,
+  type ContentPart,
+  type ImageContent,
+  type Message,
+  messageText,
+  Role,
+  type StopReason,
+  textPart,
+} from "@galdor/core/schema";
+/**
+ * Default `max_tokens` sent when the caller leaves {@link Request.maxTokens}
+ * unset. The Messages API requires the field, so a concrete value is always
+ * supplied.
+ */
+export const DEFAULT_MAX_TOKENS = 4096;
+/**
+ * Build an {@link InvalidRequestError} for a request that cannot be lowered to
+ * the wire shape (empty model, unknown role, unconvertible content). Typed so
+ * callers can discriminate a local build failure from a transport error.
+ */
+function invalidRequest(message: string): InvalidRequestError {
+  return new InvalidRequestError({ kind: "invalid_request", provider: "anthropic", statusCode: 0, message });
+}
+// ── Wire types (Anthropic JSON, snake_case) ──────────────────────────────────
+/** A single Anthropic content block in its on-the-wire JSON form. */
+interface WireBlock {
+  type: string;
+  text?: string;
+  source?: { type: string; media_type?: string; data?: string; url?: string };
+  id?: string;
+  name?: string;
+  input?: unknown;
+  thinking?: string;
+  signature?: string;
+  data?: string;
+  tool_use_id?: string;
+  content?: WireBlock[];
+  is_error?: boolean;
+  cache_control?: { type: string };
+}
+/** One turn in the Anthropic conversation array (role plus content blocks). */
+interface WireMessage {
+  role: string;
+  content: WireBlock[];
+}
+/** Request body of the Anthropic Messages API in its JSON wire shape. */
+export interface MessageRequest {
+  model: string;
+  messages: WireMessage[];
+  system?: Array<{ type: string; text: string; cache_control?: { type: string } }>;
+  max_tokens: number;
+  temperature?: number;
+  top_p?: number;
+  stop_sequences?: string[];
+  stream?: boolean;
+  tools?: Array<{ name: string; description?: string; input_schema: unknown }>;
+  tool_choice?: { type: string; name?: string };
+  thinking?: { type: string; budget_tokens: number };
+  metadata?: { user_id?: string };
+}
+/** Response body of a non-streaming Anthropic Messages API call. */
+export interface MessageResponse {
+  id: string;
+  type: string;
+  role: string;
+  model: string;
+  content: WireBlock[];
+  stop_reason: string;
+  stop_sequence?: string;
+  usage: WireUsage;
+}
+/** Token-usage block reported by the Messages API, including cache counters. */
+export interface WireUsage {
+  input_tokens: number;
+  output_tokens: number;
+  cache_creation_input_tokens?: number;
+  cache_read_input_tokens?: number;
+}
+/** Resolve the tool name used for forced structured output, defaulting when unnamed. */
+function structuredToolName(name: string | undefined): string {
+  return name && name !== "" ? name : "structured_output";
+}
+/** Encode raw image bytes as a base64 string for inline transport. */
+function toBase64(data: Uint8Array): string {
+  return Buffer.from(data).toString("base64");
+}
+/** The non-null `source` shape of an image block. */
+type WireImageSource = NonNullable<WireBlock["source"]>;
+/**
+ * Build the `source` of an image block, preferring a URL reference and otherwise
+ * encoding inline bytes as base64.
+ *
+ * @throws {InvalidRequestError} When inline data is present but its MIME type is missing, or
+ * when the part carries neither a URL nor data.
+ */
+function imageToWire(img: ImageContent): WireImageSource {
+  if (img.url && img.url !== "") return { type: "url", url: img.url };
+  if (img.data && img.data.length > 0) {
+    if (!img.media) throw invalidRequest("anthropic: inline image missing media (MIME type)");
+    return { type: "base64", media_type: img.media, data: toBase64(img.data) };
+  }
+  throw invalidRequest("anthropic: image part with no url or data");
+}
+/**
+ * Convert galdor content parts into Anthropic content blocks, attaching the
+ * message's cache-control marker to the final emitted block.
+ *
+ * Unsigned reasoning parts are dropped because they cannot be replayed without a
+ * signature.
+ *
+ * @throws {InvalidRequestError} On an image part missing its image, or an unsupported part type.
+ */
+function partsToWire(parts: ContentPart[], cc: Message["cacheControl"]): WireBlock[] {
+  const out: WireBlock[] = [];
+  for (const p of parts) {
+    switch (p.type) {
+      case ContentType.Text:
+        out.push({ type: "text", text: p.text ?? "" });
+        break;
+      case ContentType.Image:
+        if (!p.image) throw invalidRequest("anthropic: image part with nil image");
+        out.push({ type: "image", source: imageToWire(p.image) });
+        break;
+      case ContentType.Thinking:
+        if (!p.signature) continue; // unsigned reasoning can't be resent
+        out.push({ type: "thinking", thinking: p.text ?? "", signature: p.signature });
+        break;
+      case ContentType.RedactedThinking:
+        if (!p.signature) continue;
+        out.push({ type: "redacted_thinking", data: p.signature });
+        break;
+      default:
+        throw invalidRequest(`anthropic: unsupported content type ${p.type}`);
+    }
+  }
+  applyCacheControl(out, cc);
+  return out;
+}
+/** Stamp the cache-control marker, if any, onto the last block of a span. */
+function applyCacheControl(blocks: WireBlock[], cc: Message["cacheControl"]): void {
+  if (cc && blocks.length > 0) blocks[blocks.length - 1]!.cache_control = { type: cc.type };
+}
+/**
+ * Translate a galdor {@link ToolChoice} into the Anthropic `tool_choice` object.
+ *
+ * @returns The wire choice, or `undefined` to leave the field unset (provider default).
+ */
+function toolChoiceToWire(c: ToolChoice | undefined): MessageRequest["tool_choice"] {
+  switch (c) {
+    case "none":
+      return { type: "none" };
+    case "required":
+      return { type: "any" };
+    case "auto":
+      return { type: "auto" };
+    default:
+      return undefined;
+  }
+}
+/**
+ * Translate a galdor {@link Request} into an Anthropic {@link MessageRequest}.
+ *
+ * System messages are hoisted into the `system` array; user, assistant and tool
+ * messages become conversation turns, with consecutive tool results folded into
+ * the preceding user turn. Enabling reasoning sets a thinking budget (clamped to
+ * a minimum), grows `max_tokens` to cover it, and drops sampling controls that
+ * are incompatible with extended thinking. A `json_schema` response format is
+ * realized as a single forced tool call whose input schema is the requested one.
+ *
+ * @param req - The galdor request to lower.
+ * @param stream - Whether to set the wire `stream` flag.
+ * @returns The fully-formed Anthropic request body.
+ * @throws {InvalidRequestError} When the model is empty, a role is unknown, or content cannot be converted.
+ * @example
+ * const wire = buildRequest({ model: "claude-haiku-4-5", messages }, false);
+ */
+export function buildRequest(req: Request, stream: boolean): MessageRequest {
+  if (req.model === "") throw invalidRequest("anthropic: model is required");
+  let maxTokens = req.maxTokens ?? DEFAULT_MAX_TOKENS;
+  const out: MessageRequest = { model: req.model, messages: [], max_tokens: maxTokens, stream };
+  if (req.temperature !== undefined) out.temperature = req.temperature;
+  if (req.topP !== undefined) out.top_p = req.topP;
+  if (req.stopSequences) out.stop_sequences = req.stopSequences;
+  if (req.reasoning?.enabled) {
+    let budget = req.reasoning.budget ?? 0;
+    if (budget < 1024) budget = 1024;
+    if (out.max_tokens <= budget) out.max_tokens = budget + maxTokens;
+    out.thinking = { type: "enabled", budget_tokens: budget };
+    delete out.temperature; // incompatible with extended thinking
+    delete out.top_p;
+  }
+  for (const m of req.messages) {
+    switch (m.role) {
+      case Role.System:
+        (out.system ??= []).push({ type: "text", text: messageText(m), ...(m.cacheControl ? { cache_control: { type: m.cacheControl.type } } : {}) });
+        break;
+      case Role.User:
+        out.messages.push({ role: "user", content: partsToWire(m.content, m.cacheControl) });
+        break;
+      case Role.Assistant: {
+        const blocks = partsToWire(m.content, undefined);
+        for (const tc of m.toolCalls ?? []) {
+          const input = tc.arguments === undefined || tc.arguments === null ? {} : tc.arguments;
+          blocks.push({ type: "tool_use", id: tc.id, name: tc.name, input });
+        }
+        applyCacheControl(blocks, m.cacheControl);
+        out.messages.push({ role: "assistant", content: blocks });
+        break;
+      }
+      case Role.Tool: {
+        const block: WireBlock = {
+          type: "tool_result",
+          tool_use_id: m.toolCallId ?? "",
+          content: [{ type: "text", text: messageText(m) }],
+        };
+        const last = out.messages.at(-1);
+        if (last && last.role === "user") last.content.push(block);
+        else out.messages.push({ role: "user", content: [block] });
+        break;
+      }
+      default:
+        throw invalidRequest(`anthropic: unknown role ${m.role}`);
+    }
+  }
+  if (req.tools && req.tools.length > 0) {
+    out.tools = req.tools.map((t) => ({ name: t.name, description: t.description, input_schema: t.schema }));
+  }
+  const tc = toolChoiceToWire(req.toolChoice);
+  if (tc) out.tool_choice = tc;
+  // Structured output → forced single tool whose input_schema is the request's.
+  if (req.responseFormat?.type === "json_schema") {
+    const name = structuredToolName(req.responseFormat.name);
+    out.tools = [{ name, description: "Respond by calling this tool with the structured result.", input_schema: req.responseFormat.schema }];
+    out.tool_choice = { type: "tool", name };
+  }
+  const uid = req.metadata?.user_id;
+  if (uid) out.metadata = { user_id: uid };
+  return out;
+}
+/** Map a wire stop-reason string to a galdor {@link StopReason}, treating empty as `end_turn`. */
+function normalizeStopReason(s: string): StopReason {
+  switch (s) {
+    case "end_turn":
+      return "end_turn";
+    case "max_tokens":
+      return "max_tokens";
+    case "tool_use":
+      return "tool_use";
+    case "stop_sequence":
+      return "stop_sequence";
+    case "refusal":
+      return "refusal";
+    default:
+      // Empty and unknown reasons pass through as-is (matching the oracle),
+      // rather than being coerced to end_turn.
+      return s as StopReason;
+  }
+}
+/**
+ * Convert a wire usage block into galdor's usage shape.
+ *
+ * @returns Token counts with cache-creation and cache-read figures, each
+ * defaulting to zero when the field is absent.
+ */
+export function usageFromWire(u: WireUsage) {
+  return {
+    inputTokens: u.input_tokens ?? 0,
+    outputTokens: u.output_tokens ?? 0,
+    cacheCreationTokens: u.cache_creation_input_tokens ?? 0,
+    cacheReadTokens: u.cache_read_input_tokens ?? 0,
+  };
+}
+/**
+ * Collapse a non-streaming Anthropic {@link MessageResponse} into a galdor {@link Response}.
+ *
+ * Text and thinking blocks become assistant content parts; `tool_use` blocks
+ * become tool calls; `redacted_thinking` is preserved via its signature. Empty
+ * blocks are skipped.
+ *
+ * @param r - The decoded wire response.
+ * @param raw - Optional raw response bytes, attached as `providerRaw` when given.
+ * @returns The assembled response with message, stop reason, usage and model.
+ */
+export function responseFromWire(r: MessageResponse, raw?: Uint8Array): Response {
+  const message: Message = { role: Role.Assistant, content: [] };
+  const toolCalls = [];
+  for (const b of r.content) {
+    switch (b.type) {
+      case "text":
+        if (b.text) message.content.push(textPart(b.text));
+        break;
+      case "tool_use":
+        toolCalls.push({ id: b.id ?? "", name: b.name ?? "", arguments: (b.input ?? {}) as never });
+        break;
+      case "thinking":
+        if (b.thinking) message.content.push({ type: ContentType.Thinking, text: b.thinking, ...(b.signature ? { signature: b.signature } : {}) });
+        break;
+      case "redacted_thinking":
+        if (b.data) message.content.push({ type: ContentType.RedactedThinking, signature: b.data });
+        break;
+    }
+  }
+  if (toolCalls.length > 0) message.toolCalls = toolCalls;
+  return {
+    message,
+    stopReason: normalizeStopReason(r.stop_reason),
+    usage: usageFromWire(r.usage),
+    model: r.model,
+    ...(raw ? { providerRaw: raw } : {}),
+  };
+}
+/**
+ * Rewrite a forced structured-output tool call into plain message text.
+ *
+ * When the response contains the tool call that backs structured output, its
+ * arguments are serialized to JSON and become the assistant message body, so
+ * callers receive the structured result as text rather than a tool invocation.
+ * If no matching call is found, the response is returned unchanged.
+ *
+ * @param resp - The response produced by {@link responseFromWire}.
+ * @param schemaName - The configured schema name, resolved the same way as in {@link buildRequest}.
+ * @returns The (possibly rewritten) response.
+ */
+export function extractStructuredOutput(resp: Response, schemaName: string | undefined): Response {
+  const name = structuredToolName(schemaName);
+  for (const tc of resp.message.toolCalls ?? []) {
+    if (tc.name === name) {
+      resp.message = { role: Role.Assistant, content: [textPart(JSON.stringify(tc.arguments))] };
+      return resp;
+    }
+  }
+  return resp;
+}