npm - veryfront - Versions diffs - 0.1.207 → 0.1.209 - Mend

veryfront 0.1.207 → 0.1.209

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/esm/deno.js +1 -1
package/esm/src/provider/runtime-loader.d.ts +47 -0
package/esm/src/provider/runtime-loader.d.ts.map +1 -1
package/esm/src/provider/runtime-loader.js +1386 -70
package/esm/src/provider/types.d.ts +2 -0
package/esm/src/provider/types.d.ts.map +1 -1
package/esm/src/utils/version-constant.d.ts +1 -1
package/esm/src/utils/version-constant.js +1 -1
package/package.json +1 -1
package/src/deno.js +1 -1
package/src/src/provider/runtime-loader.ts +1872 -102
package/src/src/provider/types.ts +2 -0
package/src/src/utils/version-constant.ts +1 -1

package/esm/src/provider/runtime-loader.js CHANGED Viewed

@@ -13,6 +13,9 @@ function getAnthropicMessagesUrl(baseURL) {
 function getOpenAIChatCompletionsUrl(baseURL) {
     return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "chat/completions");
 }
+function getOpenAIResponsesUrl(baseURL) {
+    return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "responses");
+}
 function getGoogleGenerateContentUrl(baseURL, modelId) {
     return joinUrl(baseURL ?? DEFAULT_GOOGLE_BASE_URL, `models/${encodeURIComponent(modelId)}:generateContent`);
 }
@@ -77,26 +80,184 @@ function extractGoogleUsageTokens(payload) {
     const promptTokenCount = usageMetadata?.promptTokenCount;
     return typeof promptTokenCount === "number" ? promptTokenCount : undefined;
 }
-async function readErrorMessage(response) {
-    const text = await response.text();
-    return text.trim() || `${response.status} ${response.statusText}`.trim();
+function createWarningCollector() {
+    const list = [];
+    return {
+        push(warning) {
+            list.push(warning);
+        },
+        drain() {
+            return list.slice();
+        },
+    };
+}
+/**
+ * Base class for typed provider errors. The `retryable` flag is the
+ * primary signal for callers (or a retry wrapper) to decide whether to
+ * re-issue the request. `retryAfterMs` is set when the provider gave an
+ * explicit delay hint (Retry-After header, Retry-Info trailer).
+ */
+export class ProviderError extends Error {
+    provider;
+    status;
+    retryable;
+    retryAfterMs;
+    constructor(options) {
+        super(options.message);
+        this.name = globalThis[Symbol.for("import-meta-ponyfill-esmodule")](import.meta).name;
+        this.provider = options.provider;
+        this.status = options.status;
+        this.retryable = options.retryable;
+        if (options.retryAfterMs !== undefined) {
+            this.retryAfterMs = options.retryAfterMs;
+        }
+    }
+}
+/** Provider reports it is overloaded (Anthropic 529, OpenAI/Google 503). */
+export class ProviderOverloadedError extends ProviderError {
+}
+/** Provider is rate limiting this API key (OpenAI/Google 429 with Retry-After). */
+export class ProviderRateLimitError extends ProviderError {
+}
+/** Provider account quota is exhausted — non-retryable. */
+export class ProviderQuotaError extends ProviderError {
+}
+/** Non-retryable 4xx/5xx that doesn't fit another bucket. */
+export class ProviderRequestError extends ProviderError {
+}
+function parseRetryAfterMs(header) {
+    if (!header)
+        return undefined;
+    const asNumber = Number(header);
+    if (Number.isFinite(asNumber) && asNumber >= 0) {
+        return Math.round(asNumber * 1000);
+    }
+    // HTTP-date form (rare in practice for LLM providers).
+    const parsed = Date.parse(header);
+    if (!Number.isNaN(parsed)) {
+        return Math.max(0, parsed - Date.now());
+    }
+    return undefined;
+}
+/**
+ * Inspect a non-2xx response and build the most specific ProviderError
+ * subclass we can. Reads the response body as text (it's already dead
+ * on the wire by this point). Body classification handles the cases
+ * where HTTP status alone is ambiguous — notably OpenAI
+ * `insufficient_quota` vs `rate_limit_exceeded` both arriving as 429.
+ */
+async function buildProviderError(provider, response) {
+    const rawBody = await response.text();
+    const message = rawBody.trim() || `${response.status} ${response.statusText}`.trim();
+    const status = response.status;
+    const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
+    const parsedBody = (() => {
+        try {
+            return JSON.parse(rawBody);
+        }
+        catch {
+            return undefined;
+        }
+    })();
+    const errorRecord = readRecord(parsedBody?.error);
+    const errorCode = typeof errorRecord?.code === "string"
+        ? errorRecord.code
+        : typeof errorRecord?.type === "string"
+            ? errorRecord.type
+            : typeof errorRecord?.status === "string"
+                ? errorRecord.status
+                : undefined;
+    // Anthropic 529 = overloaded. Anthropic surfaces this with
+    // { error: { type: "overloaded_error" } } in the body.
+    if (provider === "anthropic" && status === 529) {
+        return new ProviderOverloadedError({
+            provider,
+            status,
+            message,
+            retryable: true,
+            ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
+        });
+    }
+    // OpenAI / Google 503 = overloaded.
+    if ((provider === "openai" || provider === "google") && status === 503) {
+        return new ProviderOverloadedError({
+            provider,
+            status,
+            message,
+            retryable: true,
+            ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
+        });
+    }
+    // OpenAI 429 splits based on the error code in the body:
+    //  - insufficient_quota → hard quota, non-retryable
+    //  - rate_limit_exceeded / tokens_per_min_exceeded → retry with Retry-After
+    if (provider === "openai" && status === 429) {
+        if (errorCode === "insufficient_quota") {
+            return new ProviderQuotaError({
+                provider,
+                status,
+                message,
+                retryable: false,
+            });
+        }
+        return new ProviderRateLimitError({
+            provider,
+            status,
+            message,
+            retryable: true,
+            ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
+        });
+    }
+    // Google 429 RESOURCE_EXHAUSTED is almost always the daily free-tier
+    // quota — surface as a hard quota error so callers don't hot-loop on
+    // retries that can't possibly succeed until midnight UTC.
+    if (provider === "google" && status === 429) {
+        if (errorCode === "RESOURCE_EXHAUSTED") {
+            return new ProviderQuotaError({
+                provider,
+                status,
+                message,
+                retryable: false,
+            });
+        }
+        return new ProviderRateLimitError({
+            provider,
+            status,
+            message,
+            retryable: true,
+            ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
+        });
+    }
+    return new ProviderRequestError({
+        provider,
+        status,
+        message,
+        retryable: false,
+    });
 }
 async function requestJson(options) {
     const response = await options.fetchImpl(options.url, options.init);
     if (!response.ok) {
-        const message = await readErrorMessage(response);
-        throw new Error(`${options.providerLabel} request failed: ${message}`);
+        const err = await buildProviderError(options.providerKind, response);
+        err.message = `${options.providerLabel} request failed: ${err.message}`;
+        throw err;
     }
     return response.json();
 }
 async function requestStream(options) {
     const response = await options.fetchImpl(options.url, options.init);
     if (!response.ok) {
-        const message = await readErrorMessage(response);
-        throw new Error(`${options.providerLabel} request failed: ${message}`);
+        const err = await buildProviderError(options.providerKind, response);
+        err.message = `${options.providerLabel} request failed: ${err.message}`;
+        throw err;
     }
     if (!response.body) {
-        throw new Error(`${options.providerLabel} request failed: stream body missing`);
+        throw new ProviderRequestError({
+            provider: options.providerKind,
+            status: response.status,
+            message: `${options.providerLabel} request failed: stream body missing`,
+            retryable: false,
+        });
     }
     return response.body;
 }
@@ -133,6 +294,11 @@ function toOpenAICompatibleMessages(prompt) {
                         text += part.text;
                         continue;
                     }
+                    // OpenAI Chat Completions has no roundtrip slot for Anthropic
+                    // thinking blocks — they get dropped on replay. Anthropic-only.
+                    if (part.type === "reasoning") {
+                        continue;
+                    }
                     toolCalls.push({
                         id: part.toolCallId,
                         type: "function",
@@ -222,6 +388,8 @@ function extractAnthropicUsage(payload) {
     }
     const inputTokens = usage.input_tokens;
     const outputTokens = usage.output_tokens;
+    const cacheCreationInputTokens = usage.cache_creation_input_tokens;
+    const cacheReadInputTokens = usage.cache_read_input_tokens;
     return {
         inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
         outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
@@ -229,6 +397,8 @@ function extractAnthropicUsage(payload) {
             ? (typeof inputTokens === "number" ? inputTokens : 0) +
                 (typeof outputTokens === "number" ? outputTokens : 0)
             : undefined,
+        ...(typeof cacheCreationInputTokens === "number" ? { cacheCreationInputTokens } : {}),
+        ...(typeof cacheReadInputTokens === "number" ? { cacheReadInputTokens } : {}),
     };
 }
 function mergeUsage(current, next) {
@@ -240,10 +410,15 @@ function mergeUsage(current, next) {
     }
     const inputTokens = next.inputTokens ?? current.inputTokens;
     const outputTokens = next.outputTokens ?? current.outputTokens;
+    const cacheCreationInputTokens = next.cacheCreationInputTokens ??
+        current.cacheCreationInputTokens;
+    const cacheReadInputTokens = next.cacheReadInputTokens ?? current.cacheReadInputTokens;
     return {
         inputTokens,
         outputTokens,
         totalTokens: (inputTokens ?? 0) + (outputTokens ?? 0),
+        ...(cacheCreationInputTokens !== undefined ? { cacheCreationInputTokens } : {}),
+        ...(cacheReadInputTokens !== undefined ? { cacheReadInputTokens } : {}),
     };
 }
 function normalizeAnthropicToolChoice(toolChoice) {
@@ -258,6 +433,23 @@ function toSnakeCaseRecord(record) {
         value,
     ]));
 }
+/**
+ * Recursive snake_case key converter for nested config objects (used for
+ * Anthropic mcp_servers, where authorizationToken / toolConfiguration /
+ * allowedTools all need conversion).
+ */
+function deepSnakeCase(value) {
+    if (Array.isArray(value)) {
+        return value.map(deepSnakeCase);
+    }
+    if (value !== null && typeof value === "object") {
+        return Object.fromEntries(Object.entries(value).map(([key, v]) => [
+            key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
+            deepSnakeCase(v),
+        ]));
+    }
+    return value;
+}
 function pushAnthropicUserContent(messages, content) {
     if (content.length === 0) {
         return;
@@ -272,7 +464,23 @@ function pushAnthropicUserContent(messages, content) {
         content,
     });
 }
-function toAnthropicMessages(prompt) {
+/**
+ * Resolves a {@link ProviderCacheTtl} into Anthropic's `cache_control` shape.
+ *
+ * Returns `undefined` when caching is not requested (`false` / `undefined`),
+ * `{ type: "ephemeral" }` for the 5-minute default (`true` / `"5m"`), or
+ * `{ type: "ephemeral", ttl: "1h" }` for the extended 1-hour cache.
+ */
+function resolveAnthropicCacheControlBlock(ttl) {
+    if (ttl === undefined || ttl === false) {
+        return undefined;
+    }
+    if (ttl === "1h") {
+        return { type: "ephemeral", ttl: "1h" };
+    }
+    return { type: "ephemeral" };
+}
+function toAnthropicMessages(prompt, systemCacheControl) {
     const systemParts = [];
     const messages = [];
     for (const message of prompt) {
@@ -291,11 +499,32 @@ function toAnthropicMessages(prompt) {
             case "assistant":
                 messages.push({
                     role: "assistant",
-                    content: message.content.map((part) => part.type === "text" ? { type: "text", text: part.text } : {
-                        type: "tool_use",
-                        id: part.toolCallId,
-                        name: part.toolName,
-                        input: part.input,
+                    content: message.content.map((part) => {
+                        if (part.type === "text") {
+                            return { type: "text", text: part.text };
+                        }
+                        if (part.type === "reasoning") {
+                            // Redacted thinking blocks roundtrip as the encrypted blob
+                            // form Anthropic gave us. Plain thinking blocks need the
+                            // signature to verify on the server.
+                            if (typeof part.redactedData === "string") {
+                                return {
+                                    type: "redacted_thinking",
+                                    data: part.redactedData,
+                                };
+                            }
+                            return {
+                                type: "thinking",
+                                thinking: part.text ?? "",
+                                ...(typeof part.signature === "string" ? { signature: part.signature } : {}),
+                            };
+                        }
+                        return {
+                            type: "tool_use",
+                            id: part.toolCallId,
+                            name: part.toolName,
+                            input: part.input,
+                        };
                     }),
                 });
                 break;
@@ -308,12 +537,55 @@ function toAnthropicMessages(prompt) {
                 break;
         }
     }
-    return {
-        ...(systemParts.length > 0 ? { system: systemParts.join("\n\n") } : {}),
-        messages,
-    };
+    if (systemParts.length === 0) {
+        return { messages };
+    }
+    const joined = systemParts.join("\n\n");
+    // Cache-controlled system prompts must use the array-of-blocks form so the
+    // breakpoint lands on an individual content block. Callers that don't opt
+    // in keep the legacy raw-string form for backward compatibility.
+    if (systemCacheControl) {
+        return {
+            system: [{
+                    type: "text",
+                    text: joined,
+                    cache_control: systemCacheControl,
+                }],
+            messages,
+        };
+    }
+    return { system: joined, messages };
+}
+/**
+ * Short-name → latest-versioned-type alias map for Anthropic provider tools.
+ *
+ * Anthropic tool types are date-stamped (e.g. `code_execution_20260120`) so
+ * callers either pin a version or get the latest. We accept both: a caller
+ * can pass `anthropic.code_execution` and we map to the latest known version,
+ * or pass `anthropic.code_execution_20250522` and we forward verbatim.
+ *
+ * Versions chosen here are the latest documented releases as of 2026-04-15
+ * — see https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview.
+ * When Anthropic ships newer versions, update this map.
+ */
+const ANTHROPIC_TOOL_VERSION_ALIASES = {
+    code_execution: "code_execution_20260120",
+    computer_use: "computer_20250124",
+    computer: "computer_20250124",
+    text_editor: "text_editor_20250728",
+    bash: "bash_20250124",
+    memory: "memory_20250818",
+    web_search: "web_search_20250305",
+    web_fetch: "web_fetch_20250910",
+};
+function resolveAnthropicProviderType(rawType) {
+    // Already-versioned types (contain a date stamp suffix) pass through verbatim.
+    if (/_\d{8}$/.test(rawType)) {
+        return rawType;
+    }
+    return ANTHROPIC_TOOL_VERSION_ALIASES[rawType] ?? rawType;
 }
-function toAnthropicTools(tools) {
+function toAnthropicTools(tools, toolsCacheControl) {
     if (!tools) {
         return undefined;
     }
@@ -330,17 +602,31 @@ function toAnthropicTools(tools) {
         if (!tool.id.startsWith("anthropic.")) {
             continue;
         }
-        const providerType = tool.id.slice("anthropic.".length);
-        if (providerType.length === 0) {
+        const rawType = tool.id.slice("anthropic.".length);
+        if (rawType.length === 0) {
             continue;
         }
         normalized.push({
-            type: providerType,
+            type: resolveAnthropicProviderType(rawType),
             name: tool.name,
             ...toSnakeCaseRecord(tool.args),
         });
     }
-    return normalized.length > 0 ? normalized : undefined;
+    if (normalized.length === 0) {
+        return undefined;
+    }
+    // Attach the cache breakpoint to the final tool entry so Anthropic caches
+    // the entire tools block up to and including that definition. Earlier tool
+    // entries are implicitly covered by the same breakpoint per Anthropic's
+    // walk-backward cache lookup behaviour.
+    if (toolsCacheControl) {
+        const lastIndex = normalized.length - 1;
+        normalized[lastIndex] = {
+            ...normalized[lastIndex],
+            cache_control: toolsCacheControl,
+        };
+    }
+    return normalized;
 }
 function createAnthropicRequestHeaders(options) {
     const headers = new Headers(options.extraHeaders);
@@ -397,27 +683,185 @@ function resolveAnthropicMaxTokens(modelId, callerMaxOutputTokens) {
     }
     return requested;
 }
-function buildAnthropicMessagesRequest(modelId, providerName, options, stream) {
-    const { system, messages } = toAnthropicMessages(options.prompt);
+/**
+ * Map a unified reasoning effort level to an Anthropic `thinking.budget_tokens`
+ * value. Anthropic's minimum accepted budget is 1024; higher tiers give Claude
+ * more headroom to explore. `max` maps to the upper bound documented for
+ * Claude 4.x family (32k tokens of thinking — caller can override via
+ * `budgetTokens` if they need more).
+ */
+function resolveAnthropicThinkingBudget(option) {
+    if (!option || option.enabled !== true) {
+        return undefined;
+    }
+    if (typeof option.budgetTokens === "number" && option.budgetTokens >= 1024) {
+        return option.budgetTokens;
+    }
+    switch (option.effort) {
+        case "low":
+            return 1024;
+        case "high":
+            return 16_384;
+        case "max":
+            return 32_768;
+        case "medium":
+        default:
+            return 4096;
+    }
+}
+function buildAnthropicMessagesRequest(modelId, providerName, options, stream, warnings) {
+    const systemCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.system);
+    const toolsCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.tools);
+    const { system, messages } = toAnthropicMessages(options.prompt, systemCacheControl);
+    const anthropicTools = toAnthropicTools(options.tools, toolsCacheControl);
+    const thinkingBudget = resolveAnthropicThinkingBudget(options.reasoning);
+    const thinkingEnabled = thinkingBudget !== undefined;
+    // Anthropic doesn't support these unified options at all — emit warnings
+    // so callers don't quietly pass values that have zero effect.
+    if (options.presencePenalty !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "presencePenalty",
+            details: "Anthropic Messages API has no equivalent and the value was dropped.",
+        });
+    }
+    if (options.frequencyPenalty !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "frequencyPenalty",
+            details: "Anthropic Messages API has no equivalent and the value was dropped.",
+        });
+    }
+    if (options.seed !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "seed",
+            details: "Anthropic Messages API does not support deterministic seeding.",
+        });
+    }
+    if (options.topK !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "topK",
+            details: "Anthropic Messages API does not expose top_k on this surface.",
+        });
+    }
+    if (options.stopSequences && options.stopSequences.length > 4) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "stopSequences",
+            details: `Anthropic accepts at most 4 stop sequences; ${options.stopSequences.length} were provided and the extras were truncated.`,
+        });
+    }
+    if (thinkingEnabled && options.temperature !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "temperature",
+            details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
+        });
+    }
+    if (thinkingEnabled && options.topP !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "topP",
+            details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
+        });
+    }
+    if (options.responseFormat && options.responseFormat.type !== "text") {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "anthropic",
+            setting: "responseFormat",
+            details: "Anthropic Messages API does not have a structured-output response_format equivalent. Use a tool with the schema as input_schema instead.",
+        });
+    }
+    // Anthropic requires max_tokens > budget_tokens when thinking is enabled.
+    // Growing max_tokens by the thinking budget preserves the caller's intended
+    // output budget, and we clamp the sum at the model's advertised maximum so
+    // the request never exceeds the API's hard cap.
+    const baseMaxTokens = resolveAnthropicMaxTokens(modelId, options.maxOutputTokens);
+    const maxTokens = thinkingEnabled
+        ? Math.min(baseMaxTokens + (thinkingBudget ?? 0), getAnthropicModelCapabilities(modelId).maxOutputTokens)
+        : baseMaxTokens;
     const body = {
         model: modelId,
         messages,
-        max_tokens: resolveAnthropicMaxTokens(modelId, options.maxOutputTokens),
+        max_tokens: maxTokens,
         ...(stream ? { stream: true } : {}),
         ...(system ? { system } : {}),
-        ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
-        ...(options.topP !== undefined ? { top_p: options.topP } : {}),
+        // Sampling params are mutually exclusive with thinking on Anthropic — the
+        // API rejects the combo outright. Drop them silently when thinking is on
+        // (callers see thinking's output instead of what they'd have gotten from
+        // custom sampling, which is the documented tradeoff).
+        ...(!thinkingEnabled && options.temperature !== undefined
+            ? { temperature: options.temperature }
+            : {}),
+        ...(!thinkingEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
         ...(options.stopSequences && options.stopSequences.length > 0
-            ? { stop_sequences: options.stopSequences }
+            ? { stop_sequences: options.stopSequences.slice(0, 4) }
             : {}),
-        ...(toAnthropicTools(options.tools) ? { tools: toAnthropicTools(options.tools) } : {}),
+        ...(anthropicTools ? { tools: anthropicTools } : {}),
         ...(options.toolChoice !== undefined
             ? { tool_choice: normalizeAnthropicToolChoice(options.toolChoice) }
             : {}),
+        ...(thinkingEnabled ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } } : {}),
+        ...(typeof options.userId === "string" && options.userId.length > 0
+            ? { metadata: { user_id: options.userId } }
+            : {}),
+        ...(options.mcpServers && options.mcpServers.length > 0
+            ? { mcp_servers: deepSnakeCase(options.mcpServers) }
+            : {}),
+        ...(options.anthropicContainer !== undefined ? { container: options.anthropicContainer } : {}),
     };
     Object.assign(body, readProviderOptions(options.providerOptions, "anthropic", providerName));
     return body;
 }
+/**
+ * Best-effort camelCase normalization of a single Anthropic citation
+ * record. Handles the union of fields across web_search_result_location,
+ * web_fetch_result_location, char_location, page_location, and
+ * content_block_location citation kinds — see
+ * https://docs.claude.com/en/docs/build-with-claude/citations
+ */
+function normalizeAnthropicCitation(raw) {
+    const r = readRecord(raw);
+    if (!r)
+        return undefined;
+    const typeStr = typeof r.type === "string" ? r.type : undefined;
+    if (!typeStr)
+        return undefined;
+    const out = { type: typeStr };
+    if (typeof r.cited_text === "string")
+        out.citedText = r.cited_text;
+    if (typeof r.url === "string")
+        out.url = r.url;
+    if (typeof r.title === "string")
+        out.title = r.title;
+    if (typeof r.start_char_index === "number")
+        out.startCharIndex = r.start_char_index;
+    if (typeof r.end_char_index === "number")
+        out.endCharIndex = r.end_char_index;
+    if (typeof r.start_block_index === "number")
+        out.startBlockIndex = r.start_block_index;
+    if (typeof r.end_block_index === "number")
+        out.endBlockIndex = r.end_block_index;
+    if (typeof r.start_page_number === "number")
+        out.startPageNumber = r.start_page_number;
+    if (typeof r.end_page_number === "number")
+        out.endPageNumber = r.end_page_number;
+    if (typeof r.document_index === "number")
+        out.documentIndex = r.document_index;
+    if (typeof r.document_title === "string")
+        out.documentTitle = r.document_title;
+    return out;
+}
 function buildAnthropicGenerateResult(payload) {
     const record = readRecord(payload);
     const content = Array.isArray(record?.content) ? record.content : [];
@@ -426,7 +870,40 @@ function buildAnthropicGenerateResult(payload) {
         const block = readRecord(blockValue);
         const blockType = typeof block?.type === "string" ? block.type : undefined;
         if (blockType === "text" && typeof block?.text === "string" && block.text.length > 0) {
-            normalized.push({ type: "text", text: block.text });
+            const citationsRaw = Array.isArray(block.citations) ? block.citations : undefined;
+            const citations = citationsRaw
+                ?.flatMap((c) => {
+                const normalizedCitation = normalizeAnthropicCitation(c);
+                return normalizedCitation ? [normalizedCitation] : [];
+            });
+            normalized.push({
+                type: "text",
+                text: block.text,
+                ...(citations && citations.length > 0 ? { citations } : {}),
+            });
+            continue;
+        }
+        // Thinking blocks carry the cleartext trace plus a signature that
+        // Anthropic uses to verify on subsequent turns. Surfacing both lets
+        // callers persist them as `reasoning` content parts and replay on
+        // the next turn so Claude can continue from the same thinking.
+        if (blockType === "thinking") {
+            normalized.push({
+                type: "reasoning",
+                ...(typeof block?.thinking === "string" ? { text: block.thinking } : {}),
+                ...(typeof block?.signature === "string" ? { signature: block.signature } : {}),
+            });
+            continue;
+        }
+        // Redacted thinking blocks arrive when Claude's safety classifier
+        // hides the trace. Pass the encrypted blob through opaquely so the
+        // caller can replay it on the next turn (Anthropic still needs the
+        // blob to verify continuity even though it can't read it).
+        if (blockType === "redacted_thinking" && typeof block?.data === "string") {
+            normalized.push({
+                type: "reasoning",
+                redactedData: block.data,
+            });
             continue;
         }
         if ((blockType === "tool_use" || blockType === "server_tool_use") &&
@@ -537,6 +1014,19 @@ async function* streamAnthropicCompatibleParts(stream) {
                     }
                     continue;
                 }
+                // Redacted thinking blocks arrive as opaque encrypted payloads when
+                // Claude's safety classifier flags the reasoning trace. Surface them
+                // as a zero-length reasoning block so callers know thinking happened
+                // without leaking the (legitimately hidden) contents.
+                if (blockType === "redacted_thinking") {
+                    const reasoningId = `thinking-${index}`;
+                    reasoningBlocks.set(index, { id: reasoningId });
+                    yield {
+                        type: "reasoning-start",
+                        id: reasoningId,
+                    };
+                    continue;
+                }
                 if ((blockType === "tool_use" || blockType === "server_tool_use") &&
                     typeof contentBlock?.id === "string" &&
                     typeof contentBlock?.name === "string") {
@@ -696,10 +1186,13 @@ function extractOpenAIUsage(payload) {
     const inputTokens = usage.prompt_tokens;
     const outputTokens = usage.completion_tokens;
     const totalTokens = usage.total_tokens;
+    const promptTokensDetails = readRecord(usage.prompt_tokens_details);
+    const cachedTokens = promptTokensDetails?.cached_tokens;
     return {
         inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
         outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
         totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
+        ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
     };
 }
 function extractOpenAIContentText(content) {
@@ -742,14 +1235,81 @@ function extractOpenAIToolCalls(message) {
     }
     return normalized;
 }
-function buildOpenAIChatRequest(modelId, providerName, options, stream) {
+/**
+ * OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
+ * have different constraints than chat models: sampling params are rejected,
+ * and they accept a `reasoning_effort` field. We detect them by model id
+ * prefix so callers don't have to configure it per runtime.
+ */
+function isOpenAIReasoningModel(modelId) {
+    return /^o[134](-|$)/.test(modelId);
+}
+/**
+ * Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
+ * OpenAI doesn't accept "max" — we collapse it to "high".
+ */
+function resolveOpenAIReasoningEffort(option) {
+    if (!option || option.enabled !== true) {
+        return undefined;
+    }
+    switch (option.effort) {
+        case "low":
+            return "low";
+        case "high":
+        case "max":
+            return "high";
+        case "medium":
+        default:
+            return "medium";
+    }
+}
+function buildOpenAIChatRequest(modelId, providerName, options, stream, warnings) {
+    const isReasoningModel = isOpenAIReasoningModel(modelId);
+    const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
+    const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
+    // OpenAI Chat Completions has no top_k surface (it's exposed only on the
+    // Responses API for some reasoning models). Quietly accepting it would
+    // mislead callers into thinking it took effect.
+    if (options.topK !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "openai",
+            setting: "topK",
+            details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
+        });
+    }
+    // Reasoning models (o1 / o3 / o4) reject sampling params outright. Emit
+    // warnings at build time so callers see *why* the value didn't apply
+    // rather than a 400 from the API.
+    if (reasoningEnabled) {
+        const dropped = [
+            ["temperature", "temperature"],
+            ["topP", "top_p"],
+            ["presencePenalty", "presence_penalty"],
+            ["frequencyPenalty", "frequency_penalty"],
+        ];
+        for (const [key, openaiName] of dropped) {
+            if (options[key] !== undefined) {
+                warnings.push({
+                    type: "unsupported-setting",
+                    provider: "openai",
+                    setting: key,
+                    details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
+                });
+            }
+        }
+    }
     const body = {
         model: modelId,
         messages: toOpenAICompatibleMessages(options.prompt),
         ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
         ...(options.maxOutputTokens !== undefined ? { max_tokens: options.maxOutputTokens } : {}),
-        ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
-        ...(options.topP !== undefined ? { top_p: options.topP } : {}),
+        // OpenAI reasoning models reject temperature / top_p / frequency / presence.
+        // Drop them silently rather than letting the API bounce the request.
+        ...(!reasoningEnabled && options.temperature !== undefined
+            ? { temperature: options.temperature }
+            : {}),
+        ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
         ...(options.stopSequences && options.stopSequences.length > 0
             ? { stop: options.stopSequences }
             : {}),
@@ -758,10 +1318,37 @@ function buildOpenAIChatRequest(modelId, providerName, options, stream) {
             : {}),
         ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
         ...(options.seed !== undefined ? { seed: options.seed } : {}),
-        ...(options.presencePenalty !== undefined ? { presence_penalty: options.presencePenalty } : {}),
-        ...(options.frequencyPenalty !== undefined
+        ...(!reasoningEnabled && options.presencePenalty !== undefined
+            ? { presence_penalty: options.presencePenalty }
+            : {}),
+        ...(!reasoningEnabled && options.frequencyPenalty !== undefined
             ? { frequency_penalty: options.frequencyPenalty }
             : {}),
+        ...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
+        ...(typeof options.userId === "string" && options.userId.length > 0
+            ? { user: options.userId }
+            : {}),
+        ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
+        ...(options.parallelToolCalls !== undefined
+            ? { parallel_tool_calls: options.parallelToolCalls }
+            : {}),
+        ...(options.responseFormat && options.responseFormat.type !== "text"
+            ? {
+                response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
+                    type: "json_schema",
+                    json_schema: {
+                        name: options.responseFormat.name,
+                        ...(typeof options.responseFormat.description === "string"
+                            ? { description: options.responseFormat.description }
+                            : {}),
+                        schema: unwrapToolInputSchema(options.responseFormat.schema),
+                        ...(options.responseFormat.strict !== undefined
+                            ? { strict: options.responseFormat.strict }
+                            : {}),
+                    },
+                },
+            }
+            : {}),
     };
     Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
     return body;
@@ -791,10 +1378,14 @@ function extractGoogleUsage(payload) {
     const inputTokens = usage.promptTokenCount;
     const outputTokens = usage.candidatesTokenCount;
     const totalTokens = usage.totalTokenCount;
+    const cachedContentTokenCount = usage.cachedContentTokenCount;
     return {
         inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
         outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
         totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
+        ...(typeof cachedContentTokenCount === "number"
+            ? { cacheReadInputTokens: cachedContentTokenCount }
+            : {}),
     };
 }
 function toGoogleContents(prompt) {
@@ -813,18 +1404,29 @@ function toGoogleContents(prompt) {
                     parts: [{ text: readTextParts(message.content) }],
                 });
                 break;
-            case "assistant":
-                contents.push({
-                    role: "model",
-                    parts: message.content.map((part) => part.type === "text" ? { text: part.text } : {
+            case "assistant": {
+                // Anthropic-only `reasoning` parts have no Gemini equivalent
+                // and are dropped on replay.
+                const parts = [];
+                for (const part of message.content) {
+                    if (part.type === "text") {
+                        parts.push({ text: part.text });
+                        continue;
+                    }
+                    if (part.type === "reasoning") {
+                        continue;
+                    }
+                    parts.push({
                         functionCall: {
                             id: part.toolCallId,
                             name: part.toolName,
                             args: part.input,
                         },
-                    }),
-                });
+                    });
+                }
+                contents.push({ role: "model", parts });
                 break;
+            }
             case "tool":
                 contents.push({
                     role: "user",
@@ -852,14 +1454,37 @@ function toGoogleTools(tools) {
     if (!tools) {
         return undefined;
     }
-    const functionDeclarations = tools.flatMap((tool) => tool.type === "function"
-        ? [{
+    const functionDeclarations = [];
+    const providerEntries = [];
+    for (const tool of tools) {
+        if (tool.type === "function") {
+            functionDeclarations.push({
                 name: tool.name,
                 ...(typeof tool.description === "string" ? { description: tool.description } : {}),
                 parameters: unwrapToolInputSchema(tool.inputSchema),
-            }]
-        : []);
-    return functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined;
+            });
+            continue;
+        }
+        // Gemini provider tools — code_execution, google_search,
+        // google_search_retrieval — each lives in its own tools[] entry
+        // with a single key keyed by the camelCase tool name and an
+        // optional config payload (caller-provided tool.args).
+        if (!tool.id.startsWith("google.")) {
+            continue;
+        }
+        const providerType = tool.id.slice("google.".length);
+        if (providerType.length === 0) {
+            continue;
+        }
+        const camelKey = providerType.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
+        providerEntries.push({ [camelKey]: tool.args ?? {} });
+    }
+    const result = [];
+    if (functionDeclarations.length > 0) {
+        result.push({ functionDeclarations });
+    }
+    result.push(...providerEntries);
+    return result.length > 0 ? result : undefined;
 }
 function unwrapToolInputSchema(inputSchema) {
     if (typeof inputSchema !== "object" || inputSchema === null || Array.isArray(inputSchema)) {
@@ -884,7 +1509,11 @@ function normalizeGoogleToolChoice(toolChoice) {
         }
     }
     const record = readRecord(toolChoice);
-    if (record?.type === "tool" && typeof record.name === "string") {
+    if (!record)
+        return undefined;
+    // Single-tool restriction: { type: "tool", name } — pin to one
+    // function via mode: ANY + allowedFunctionNames: [name].
+    if (record.type === "tool" && typeof record.name === "string") {
         return {
             functionCallingConfig: {
                 mode: "ANY",
@@ -892,9 +1521,66 @@ function normalizeGoogleToolChoice(toolChoice) {
             },
         };
     }
+    // Multi-tool restriction: { type: "tools", names: string[] } — pin
+    // to a subset via mode: ANY + the full allowedFunctionNames array.
+    if (record.type === "tools" && Array.isArray(record.names)) {
+        const names = record.names.filter((n) => typeof n === "string");
+        if (names.length > 0) {
+            return {
+                functionCallingConfig: {
+                    mode: "ANY",
+                    allowedFunctionNames: names,
+                },
+            };
+        }
+    }
+    // Explicit mode forms: { type: "auto" | "none" | "any" }.
+    if (record.type === "auto") {
+        return { functionCallingConfig: { mode: "AUTO" } };
+    }
+    if (record.type === "none") {
+        return { functionCallingConfig: { mode: "NONE" } };
+    }
+    if (record.type === "any" || record.type === "required") {
+        return { functionCallingConfig: { mode: "ANY" } };
+    }
     return undefined;
 }
+/**
+ * Map the unified reasoning option to Gemini's thinkingConfig. Gemini 2.5+
+ * accepts `includeThoughts: true` to stream back `thought` parts, and
+ * `thinkingBudget: N` to cap the thinking token count. The effort levels
+ * here follow Google's own guidance (low ~= 512, medium ~= 2048,
+ * high ~= 8192, max = -1 means "dynamic/no cap").
+ */
+function resolveGoogleThinkingConfig(option) {
+    if (!option || option.enabled !== true) {
+        return undefined;
+    }
+    const config = { includeThoughts: true };
+    if (typeof option.budgetTokens === "number") {
+        config.thinkingBudget = option.budgetTokens;
+        return config;
+    }
+    switch (option.effort) {
+        case "low":
+            config.thinkingBudget = 512;
+            break;
+        case "high":
+            config.thinkingBudget = 8192;
+            break;
+        case "max":
+            config.thinkingBudget = -1;
+            break;
+        case "medium":
+        default:
+            config.thinkingBudget = 2048;
+            break;
+    }
+    return config;
+}
 function buildGoogleGenerationConfig(options) {
+    const thinkingConfig = resolveGoogleThinkingConfig(options.reasoning);
     const config = {
         ...(options.maxOutputTokens !== undefined ? { maxOutputTokens: options.maxOutputTokens } : {}),
         ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
@@ -904,11 +1590,48 @@ function buildGoogleGenerationConfig(options) {
             ? { stopSequences: options.stopSequences }
             : {}),
         ...(options.seed !== undefined ? { seed: options.seed } : {}),
+        ...(thinkingConfig ? { thinkingConfig } : {}),
     };
     return Object.keys(config).length > 0 ? config : undefined;
 }
-function buildGoogleGenerateContentRequest(providerName, options) {
+function buildGoogleGenerateContentRequest(providerName, options, warnings) {
+    // Google generate-content surface doesn't accept presence/frequency
+    // penalties on most current models. Emit warnings and let the request
+    // through without them.
+    if (options.presencePenalty !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "google",
+            setting: "presencePenalty",
+            details: "Gemini generateContent does not accept presencePenalty; the value was dropped.",
+        });
+    }
+    if (options.frequencyPenalty !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "google",
+            setting: "frequencyPenalty",
+            details: "Gemini generateContent does not accept frequencyPenalty; the value was dropped.",
+        });
+    }
+    if (options.responseFormat && options.responseFormat.type !== "text") {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "google",
+            setting: "responseFormat",
+            details: "Gemini uses generationConfig.responseMimeType + responseSchema for structured outputs, which is a separate surface and not yet wired through this option.",
+        });
+    }
     const { systemInstruction, contents } = toGoogleContents(options.prompt);
+    const generationConfig = buildGoogleGenerationConfig(options);
+    // requestLabels wins over userId-derived labels: when callers explicitly
+    // provide a label map, that's the source of truth. Otherwise fall back
+    // to {user_id} derived from the unified userId option.
+    const labels = options.requestLabels && Object.keys(options.requestLabels).length > 0
+        ? options.requestLabels
+        : typeof options.userId === "string" && options.userId.length > 0
+            ? { user_id: options.userId }
+            : undefined;
     const body = {
         contents,
         ...(systemInstruction ? { systemInstruction } : {}),
@@ -916,8 +1639,13 @@ function buildGoogleGenerateContentRequest(providerName, options) {
         ...(normalizeGoogleToolChoice(options.toolChoice)
             ? { toolConfig: normalizeGoogleToolChoice(options.toolChoice) }
             : {}),
-        ...(buildGoogleGenerationConfig(options)
-            ? { generationConfig: buildGoogleGenerationConfig(options) }
+        ...(generationConfig ? { generationConfig } : {}),
+        ...(labels ? { labels } : {}),
+        ...(typeof options.googleCachedContent === "string" && options.googleCachedContent.length > 0
+            ? { cachedContent: options.googleCachedContent }
+            : {}),
+        ...(options.googleSafetySettings && options.googleSafetySettings.length > 0
+            ? { safetySettings: options.googleSafetySettings }
             : {}),
     };
     Object.assign(body, readProviderOptions(options.providerOptions, "google", providerName));
@@ -961,10 +1689,18 @@ function buildGoogleGenerateResult(payload) {
             });
         }
     }
+    // Gemini grounding (google_search / google_search_retrieval) returns
+    // a per-candidate groundingMetadata object with web search queries,
+    // grounding chunks, and citation indices into the response text.
+    // Pass it through opaquely so callers can render footnotes / source
+    // chips / "Search results" UI without parsing the wire shape.
+    const candidate = extractFirstGoogleCandidate(payload);
+    const groundingMetadata = readRecord(candidate?.groundingMetadata);
     return {
         content,
-        finishReason: normalizeGoogleFinishReason(extractFirstGoogleCandidate(payload)?.finishReason),
+        finishReason: normalizeGoogleFinishReason(candidate?.finishReason),
         usage: extractGoogleUsage(payload),
+        ...(groundingMetadata ? { groundingMetadata } : {}),
     };
 }
 async function* streamGoogleCompatibleParts(stream) {
@@ -1238,11 +1974,13 @@ export function createOpenAIModelRuntime(config, modelId) {
         doGenerate(optionsForRuntime) {
             const options = optionsForRuntime;
             const url = getOpenAIChatCompletionsUrl(config.baseURL);
-            const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false);
+            const warnings = createWarningCollector();
+            const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false, warnings);
             return requestJson({
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "openai",
+                providerKind: "openai",
                 init: {
                     method: "POST",
                     headers: createRequestHeaders({
@@ -1253,16 +1991,24 @@ export function createOpenAIModelRuntime(config, modelId) {
                     body: JSON.stringify(body),
                     signal: options.abortSignal,
                 },
-            }).then(buildOpenAIGenerateResult);
+            }).then((payload) => {
+                const drained = warnings.drain();
+                return {
+                    ...buildOpenAIGenerateResult(payload),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
         },
         doStream(optionsForRuntime) {
             const options = optionsForRuntime;
             const url = getOpenAIChatCompletionsUrl(config.baseURL);
-            const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true);
+            const warnings = createWarningCollector();
+            const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true, warnings);
             return requestStream({
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "openai",
+                providerKind: "openai",
                 init: {
                     method: "POST",
                     headers: createRequestHeaders({
@@ -1273,9 +2019,549 @@ export function createOpenAIModelRuntime(config, modelId) {
                     body: JSON.stringify(body),
                     signal: options.abortSignal,
                 },
-            }).then((responseStream) => ({
-                stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
-            }));
+            }).then((responseStream) => {
+                const drained = warnings.drain();
+                return {
+                    stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
+        },
+    };
+}
+/**
+ * Convert the unified RuntimePromptMessage[] to the Responses API `input`
+ * array shape. Differences from Chat Completions:
+ *  - System prompts go on the top-level `instructions` field, not inline.
+ *  - Content parts use `input_text` / `output_text` discriminants instead
+ *    of the Chat Completions plain-text shorthand.
+ *  - Assistant tool calls become standalone `function_call` items in the
+ *    input array, not nested `tool_calls` on a message.
+ *  - Tool results become standalone `function_call_output` items.
+ *  - Reasoning content parts roundtrip as `reasoning` items so callers can
+ *    replay multi-turn conversations with chain-of-thought intact.
+ */
+function toOpenAIResponsesInput(prompt) {
+    const instructionsParts = [];
+    const input = [];
+    for (const message of prompt) {
+        switch (message.role) {
+            case "system":
+                if (message.content.length > 0) {
+                    instructionsParts.push(message.content);
+                }
+                break;
+            case "user":
+                input.push({
+                    role: "user",
+                    content: [{ type: "input_text", text: readTextParts(message.content) }],
+                });
+                break;
+            case "assistant": {
+                const messageContent = [];
+                for (const part of message.content) {
+                    if (part.type === "text") {
+                        messageContent.push({ type: "output_text", text: part.text });
+                        continue;
+                    }
+                    if (part.type === "reasoning") {
+                        // Reasoning items are top-level entries in the input array,
+                        // not nested inside the assistant message — flush whatever
+                        // text we've accumulated first, then push the reasoning item.
+                        if (messageContent.length > 0) {
+                            input.push({ role: "assistant", content: [...messageContent] });
+                            messageContent.length = 0;
+                        }
+                        const summary = [];
+                        if (typeof part.text === "string" && part.text.length > 0) {
+                            summary.push({ type: "summary_text", text: part.text });
+                        }
+                        input.push({
+                            type: "reasoning",
+                            ...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
+                            summary,
+                        });
+                        continue;
+                    }
+                    // tool-call: flush message content, then push as standalone
+                    // function_call item per Responses API shape.
+                    if (messageContent.length > 0) {
+                        input.push({ role: "assistant", content: [...messageContent] });
+                        messageContent.length = 0;
+                    }
+                    input.push({
+                        type: "function_call",
+                        call_id: part.toolCallId,
+                        name: part.toolName,
+                        arguments: stringifyJsonValue(part.input),
+                    });
+                }
+                if (messageContent.length > 0) {
+                    input.push({ role: "assistant", content: messageContent });
+                }
+                break;
+            }
+            case "tool":
+                for (const part of message.content) {
+                    input.push({
+                        type: "function_call_output",
+                        call_id: part.toolCallId,
+                        output: stringifyJsonValue(part.output.value),
+                    });
+                }
+                break;
+        }
+    }
+    return {
+        ...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
+        input,
+    };
+}
+/**
+ * Tools on the Responses API differ from Chat Completions: instead of
+ * `{ type: "function", function: { name, parameters } }` the function
+ * shape lifts the name/parameters/strict to the top of the entry. Native
+ * tools (web_search, file_search, computer_use, code_interpreter) live
+ * alongside function tools in the same array.
+ */
+function toOpenAIResponsesTools(tools) {
+    if (!tools)
+        return undefined;
+    const normalized = [];
+    for (const tool of tools) {
+        if (tool.type === "function") {
+            normalized.push({
+                type: "function",
+                name: tool.name,
+                ...(typeof tool.description === "string" ? { description: tool.description } : {}),
+                parameters: unwrapToolInputSchema(tool.inputSchema),
+            });
+            continue;
+        }
+        if (!tool.id.startsWith("openai."))
+            continue;
+        const providerType = tool.id.slice("openai.".length);
+        if (providerType.length === 0)
+            continue;
+        normalized.push({
+            type: providerType,
+            ...toSnakeCaseRecord(tool.args),
+        });
+    }
+    return normalized.length > 0 ? normalized : undefined;
+}
+function buildOpenAIResponsesRequest(modelId, providerName, options, stream, warnings) {
+    const isReasoningModel = isOpenAIReasoningModel(modelId);
+    const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
+    const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
+    // Same param-sanitization rules as Chat Completions: reasoning models
+    // reject sampling params. Drop with a warning.
+    if (options.topK !== undefined) {
+        warnings.push({
+            type: "unsupported-setting",
+            provider: "openai",
+            setting: "topK",
+            details: "OpenAI Responses API does not expose top_k; the value was dropped.",
+        });
+    }
+    if (reasoningEnabled) {
+        const dropped = [
+            ["temperature", "temperature"],
+            ["topP", "top_p"],
+            ["presencePenalty", "presence_penalty"],
+            ["frequencyPenalty", "frequency_penalty"],
+        ];
+        for (const [key, openaiName] of dropped) {
+            if (options[key] !== undefined) {
+                warnings.push({
+                    type: "unsupported-setting",
+                    provider: "openai",
+                    setting: key,
+                    details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
+                });
+            }
+        }
+    }
+    const { instructions, input } = toOpenAIResponsesInput(options.prompt);
+    const responsesTools = toOpenAIResponsesTools(options.tools);
+    const body = {
+        model: modelId,
+        input,
+        ...(instructions !== undefined ? { instructions } : {}),
+        ...(stream ? { stream: true } : {}),
+        ...(options.maxOutputTokens !== undefined
+            ? { max_output_tokens: options.maxOutputTokens }
+            : {}),
+        ...(!reasoningEnabled && options.temperature !== undefined
+            ? { temperature: options.temperature }
+            : {}),
+        ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
+        ...(responsesTools ? { tools: responsesTools } : {}),
+        ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
+        // The Responses API surfaces reasoning effort + summary verbosity
+        // in a structured `reasoning` object instead of a flat field. We
+        // request "auto" summary so callers see structured summary parts
+        // without having to opt into them per request.
+        ...(reasoningEffort !== undefined
+            ? { reasoning: { effort: reasoningEffort, summary: "auto" } }
+            : {}),
+        ...(typeof options.userId === "string" && options.userId.length > 0
+            ? { user: options.userId }
+            : {}),
+        ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
+        ...(options.parallelToolCalls !== undefined
+            ? { parallel_tool_calls: options.parallelToolCalls }
+            : {}),
+        // Responses API uses `text.format` instead of Chat Completions'
+        // `response_format`. The shape is similar but nested under `text`.
+        ...(options.responseFormat && options.responseFormat.type !== "text"
+            ? {
+                text: {
+                    format: options.responseFormat.type === "json" ? { type: "json_object" } : {
+                        type: "json_schema",
+                        name: options.responseFormat.name,
+                        ...(typeof options.responseFormat.description === "string"
+                            ? { description: options.responseFormat.description }
+                            : {}),
+                        schema: unwrapToolInputSchema(options.responseFormat.schema),
+                        ...(options.responseFormat.strict !== undefined
+                            ? { strict: options.responseFormat.strict }
+                            : {}),
+                    },
+                },
+            }
+            : {}),
+    };
+    Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
+    return body;
+}
+/**
+ * The Responses API uses `input_tokens` / `output_tokens` field names
+ * instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
+ * It also nests cached input tokens under `input_tokens_details` and
+ * exposes reasoning tokens via `output_tokens_details.reasoning_tokens`.
+ */
+function extractOpenAIResponsesUsage(payload) {
+    const record = readRecord(payload);
+    // Streaming usage lives on response.completed inside `response.usage`;
+    // non-streaming has it at the top level.
+    const responseRecord = readRecord(record?.response);
+    const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
+    if (!usage)
+        return undefined;
+    const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
+    const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
+    const totalTokens = typeof usage.total_tokens === "number"
+        ? usage.total_tokens
+        : (inputTokens !== undefined || outputTokens !== undefined
+            ? (inputTokens ?? 0) + (outputTokens ?? 0)
+            : undefined);
+    const inputDetails = readRecord(usage.input_tokens_details);
+    const cachedTokens = inputDetails?.cached_tokens;
+    return {
+        inputTokens,
+        outputTokens,
+        totalTokens,
+        ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
+    };
+}
+function normalizeOpenAIResponsesFinishReason(raw) {
+    if (typeof raw !== "string")
+        return null;
+    switch (raw) {
+        case "completed":
+            return { unified: "stop", raw };
+        case "incomplete":
+            return { unified: "length", raw };
+        case "failed":
+            return { unified: "error", raw };
+        case "in_progress":
+            return null;
+        default:
+            return raw;
+    }
+}
+function buildOpenAIResponsesGenerateResult(payload) {
+    const record = readRecord(payload);
+    const output = Array.isArray(record?.output) ? record.output : [];
+    const content = [];
+    for (const item of output) {
+        const itemRecord = readRecord(item);
+        const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
+        if (itemType === "message" && Array.isArray(itemRecord?.content)) {
+            // A message item bundles one or more output_text parts. Concat
+            // their texts into a single text content entry.
+            let text = "";
+            for (const part of itemRecord.content) {
+                const p = readRecord(part);
+                if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
+                    text += p.text;
+                }
+            }
+            if (text.length > 0) {
+                content.push({ type: "text", text });
+            }
+            continue;
+        }
+        if (itemType === "function_call") {
+            content.push({
+                type: "tool-call",
+                toolCallId: typeof itemRecord?.call_id === "string"
+                    ? itemRecord.call_id
+                    : (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
+                toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
+                input: typeof itemRecord?.arguments === "string"
+                    ? itemRecord.arguments
+                    : stringifyJsonValue(itemRecord?.arguments ?? {}),
+            });
+            continue;
+        }
+        if (itemType === "reasoning") {
+            const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
+            const summaries = [];
+            for (const s of summary) {
+                const sr = readRecord(s);
+                if (typeof sr?.text === "string" && sr.text.length > 0) {
+                    summaries.push({
+                        ...(typeof sr?.id === "string" ? { id: sr.id } : {}),
+                        text: sr.text,
+                    });
+                }
+            }
+            content.push({
+                type: "reasoning",
+                ...(summaries.length > 0 ? { summaries } : {}),
+                ...(typeof itemRecord?.encrypted_content === "string"
+                    ? { signature: itemRecord.encrypted_content }
+                    : {}),
+            });
+            continue;
+        }
+    }
+    return {
+        content,
+        finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
+        usage: extractOpenAIResponsesUsage(payload),
+    };
+}
+/**
+ * Parse the Responses API streaming event grammar into the same UI part
+ * shapes the existing OpenAI / Anthropic / Google streams emit. The
+ * Responses API uses a strict event-typed protocol — every event has a
+ * `type` field naming the lifecycle phase — instead of the loose
+ * `delta`-based shape Chat Completions uses.
+ */
+async function* streamOpenAIResponsesParts(stream) {
+    const decoder = new TextDecoder();
+    let buffer = "";
+    const reasoningBlocks = new Map();
+    const functionCalls = new Map();
+    const startedToolCalls = new Set();
+    let finishReason = null;
+    let usage;
+    let reasoningCounter = 0;
+    for await (const chunk of stream) {
+        buffer += decoder.decode(chunk, { stream: true });
+        const parsed = parseSseChunk(buffer);
+        buffer = parsed.remainder;
+        for (const event of parsed.events) {
+            if (event === "[DONE]")
+                continue;
+            const record = readRecord(event);
+            const type = typeof record?.type === "string" ? record.type : undefined;
+            if (!type)
+                continue;
+            // response.output_item.added: a new output item begins. Track
+            // function_call items so their argument deltas can be attributed,
+            // and reasoning items so summary deltas can group correctly.
+            if (type === "response.output_item.added") {
+                const item = readRecord(record?.item);
+                const itemType = typeof item?.type === "string" ? item.type : undefined;
+                const itemId = typeof item?.id === "string" ? item.id : undefined;
+                if (itemType === "function_call" && itemId) {
+                    const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
+                    const name = typeof item?.name === "string" ? item.name : "";
+                    functionCalls.set(itemId, {
+                        id: itemId,
+                        toolCallId: callId,
+                        name,
+                        arguments: "",
+                    });
+                }
+                if (itemType === "reasoning" && itemId) {
+                    reasoningBlocks.set(itemId, {
+                        id: `reasoning-${reasoningCounter++}`,
+                        emittedStart: false,
+                    });
+                }
+                continue;
+            }
+            // response.output_text.delta: text chunk for a message item.
+            if (type === "response.output_text.delta" && typeof record?.delta === "string") {
+                if (record.delta.length > 0) {
+                    yield { type: "text-delta", delta: record.delta };
+                }
+                continue;
+            }
+            // response.reasoning_summary_text.delta: reasoning summary text
+            // chunk. The first delta on an item lazily emits the
+            // reasoning-start event so callers can group deltas into a part.
+            if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
+                const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
+                const state = itemId ? reasoningBlocks.get(itemId) : undefined;
+                if (state && record.delta.length > 0) {
+                    if (!state.emittedStart) {
+                        yield { type: "reasoning-start", id: state.id };
+                        state.emittedStart = true;
+                    }
+                    yield { type: "reasoning-delta", id: state.id, delta: record.delta };
+                }
+                continue;
+            }
+            // response.function_call_arguments.delta: tool call argument
+            // chunk. The first delta lazily emits tool-input-start.
+            if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
+                const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
+                const state = itemId ? functionCalls.get(itemId) : undefined;
+                if (state && record.delta.length > 0) {
+                    if (!startedToolCalls.has(state.id)) {
+                        yield {
+                            type: "tool-input-start",
+                            id: state.toolCallId,
+                            toolName: state.name,
+                        };
+                        startedToolCalls.add(state.id);
+                    }
+                    state.arguments += record.delta;
+                    yield {
+                        type: "tool-input-delta",
+                        id: state.toolCallId,
+                        delta: record.delta,
+                    };
+                }
+                continue;
+            }
+            // response.output_item.done: an item has finished emitting deltas.
+            // Close any reasoning or function-call streams that were open.
+            if (type === "response.output_item.done") {
+                const item = readRecord(record?.item);
+                const itemType = typeof item?.type === "string" ? item.type : undefined;
+                const itemId = typeof item?.id === "string" ? item.id : undefined;
+                if (itemType === "reasoning" && itemId) {
+                    const state = reasoningBlocks.get(itemId);
+                    if (state?.emittedStart) {
+                        yield { type: "reasoning-end", id: state.id };
+                    }
+                    reasoningBlocks.delete(itemId);
+                }
+                if (itemType === "function_call" && itemId) {
+                    const state = functionCalls.get(itemId);
+                    if (state) {
+                        yield {
+                            type: "tool-call",
+                            toolCallId: state.toolCallId,
+                            toolName: state.name,
+                            input: state.arguments,
+                        };
+                    }
+                    functionCalls.delete(itemId);
+                }
+                continue;
+            }
+            // response.completed: terminal event with the final response object
+            // (status + usage). Capture both for the final finish part.
+            if (type === "response.completed") {
+                usage = extractOpenAIResponsesUsage(record) ?? usage;
+                const responseRecord = readRecord(record?.response);
+                finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
+                continue;
+            }
+            if (type === "response.failed" || type === "response.incomplete") {
+                const responseRecord = readRecord(record?.response);
+                finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
+                    (type === "response.failed"
+                        ? { unified: "error", raw: "failed" }
+                        : { unified: "length", raw: "incomplete" });
+                usage = extractOpenAIResponsesUsage(record) ?? usage;
+                continue;
+            }
+        }
+    }
+    // Close any reasoning streams still open at end-of-stream (defensive
+    // — a clean Responses API stream always closes them via output_item.done).
+    for (const state of reasoningBlocks.values()) {
+        if (state.emittedStart) {
+            yield { type: "reasoning-end", id: state.id };
+        }
+    }
+    yield {
+        type: "finish",
+        finishReason,
+        ...(usage ? { usage } : {}),
+    };
+}
+export function createOpenAIResponsesRuntime(config, modelId) {
+    const fetchImpl = config.fetch ?? globalThis.fetch;
+    return {
+        provider: config.name ?? "openai",
+        modelId,
+        specificationVersion: "v3",
+        supportedUrls: {},
+        doGenerate(optionsForRuntime) {
+            const options = optionsForRuntime;
+            const url = getOpenAIResponsesUrl(config.baseURL);
+            const warnings = createWarningCollector();
+            const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, false, warnings);
+            return requestJson({
+                url,
+                fetchImpl,
+                providerLabel: config.name ?? "openai",
+                providerKind: "openai",
+                init: {
+                    method: "POST",
+                    headers: createRequestHeaders({
+                        apiKeyHeaderName: "authorization",
+                        apiKey: `Bearer ${config.apiKey}`,
+                        extraHeaders: options.headers,
+                    }),
+                    body: JSON.stringify(body),
+                    signal: options.abortSignal,
+                },
+            }).then((payload) => {
+                const drained = warnings.drain();
+                return {
+                    ...buildOpenAIResponsesGenerateResult(payload),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
+        },
+        doStream(optionsForRuntime) {
+            const options = optionsForRuntime;
+            const url = getOpenAIResponsesUrl(config.baseURL);
+            const warnings = createWarningCollector();
+            const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, true, warnings);
+            return requestStream({
+                url,
+                fetchImpl,
+                providerLabel: config.name ?? "openai",
+                providerKind: "openai",
+                init: {
+                    method: "POST",
+                    headers: createRequestHeaders({
+                        apiKeyHeaderName: "authorization",
+                        apiKey: `Bearer ${config.apiKey}`,
+                        extraHeaders: options.headers,
+                    }),
+                    body: JSON.stringify(body),
+                    signal: options.abortSignal,
+                },
+            }).then((responseStream) => {
+                const drained = warnings.drain();
+                return {
+                    stream: ReadableStream.from(streamOpenAIResponsesParts(responseStream)),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
         },
     };
 }
@@ -1289,11 +2575,13 @@ export function createAnthropicModelRuntime(config, modelId) {
         doGenerate(optionsForRuntime) {
             const options = optionsForRuntime;
             const url = getAnthropicMessagesUrl(config.baseURL);
-            const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, false);
+            const warnings = createWarningCollector();
+            const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, false, warnings);
             return requestJson({
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "anthropic",
+                providerKind: "anthropic",
                 init: {
                     method: "POST",
                     headers: createAnthropicRequestHeaders({
@@ -1304,16 +2592,24 @@ export function createAnthropicModelRuntime(config, modelId) {
                     body: JSON.stringify(body),
                     signal: options.abortSignal,
                 },
-            }).then(buildAnthropicGenerateResult);
+            }).then((payload) => {
+                const drained = warnings.drain();
+                return {
+                    ...buildAnthropicGenerateResult(payload),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
         },
         doStream(optionsForRuntime) {
             const options = optionsForRuntime;
             const url = getAnthropicMessagesUrl(config.baseURL);
-            const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, true);
+            const warnings = createWarningCollector();
+            const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, true, warnings);
             return requestStream({
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "anthropic",
+                providerKind: "anthropic",
                 init: {
                     method: "POST",
                     headers: createAnthropicRequestHeaders({
@@ -1324,9 +2620,13 @@ export function createAnthropicModelRuntime(config, modelId) {
                     body: JSON.stringify(body),
                     signal: options.abortSignal,
                 },
-            }).then((responseStream) => ({
-                stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
-            }));
+            }).then((responseStream) => {
+                const drained = warnings.drain();
+                return {
+                    stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
         },
     };
 }
@@ -1340,11 +2640,13 @@ export function createGoogleModelRuntime(config, modelId) {
         doGenerate(optionsForRuntime) {
             const options = optionsForRuntime;
             const url = getGoogleGenerateContentUrl(config.baseURL, modelId);
-            const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
+            const warnings = createWarningCollector();
+            const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
             return requestJson({
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "google",
+                providerKind: "google",
                 init: {
                     method: "POST",
                     headers: createRequestHeaders({
@@ -1355,16 +2657,24 @@ export function createGoogleModelRuntime(config, modelId) {
                     body: JSON.stringify(body),
                     signal: options.abortSignal,
                 },
-            }).then(buildGoogleGenerateResult);
+            }).then((payload) => {
+                const drained = warnings.drain();
+                return {
+                    ...buildGoogleGenerateResult(payload),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
         },
         doStream(optionsForRuntime) {
             const options = optionsForRuntime;
             const url = getGoogleStreamGenerateContentUrl(config.baseURL, modelId);
-            const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
+            const warnings = createWarningCollector();
+            const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
             return requestStream({
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "google",
+                providerKind: "google",
                 init: {
                     method: "POST",
                     headers: createRequestHeaders({
@@ -1375,9 +2685,13 @@ export function createGoogleModelRuntime(config, modelId) {
                     body: JSON.stringify(body),
                     signal: options.abortSignal,
                 },
-            }).then((responseStream) => ({
-                stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
-            }));
+            }).then((responseStream) => {
+                const drained = warnings.drain();
+                return {
+                    stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
+                    ...(drained.length > 0 ? { warnings: drained } : {}),
+                };
+            });
         },
     };
 }
@@ -1400,6 +2714,7 @@ export function createOpenAIEmbeddingRuntime(config, modelId) {
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "openai",
+                providerKind: "openai",
                 init: {
                     method: "POST",
                     headers: {
@@ -1442,6 +2757,7 @@ export function createGoogleEmbeddingRuntime(config, modelId) {
                 url,
                 fetchImpl,
                 providerLabel: config.name ?? "google",
+                providerKind: "google",
                 init: {
                     method: "POST",
                     headers: {