npm - @prometheus-ai/ai - Versions diffs - 0.5.4 → 0.5.8 - Mend

@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (377) hide show

package/dist/types/auth-broker/remote-store.d.ts +2 -1
package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
package/dist/types/auth-gateway/server.d.ts +19 -0
package/dist/types/auth-gateway/types.d.ts +9 -3
package/dist/types/auth-retry.d.ts +119 -0
package/dist/types/auth-storage.d.ts +217 -8
package/dist/types/errors.d.ts +24 -0
package/dist/types/index.d.ts +5 -9
package/dist/types/provider-details.d.ts +1 -1
package/dist/types/providers/amazon-bedrock.d.ts +12 -6
package/dist/types/providers/anthropic-client.d.ts +10 -3
package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
package/dist/types/providers/anthropic-wire.d.ts +3 -3
package/dist/types/providers/anthropic.d.ts +41 -34
package/dist/types/providers/aws-credentials.d.ts +8 -0
package/dist/types/providers/azure-openai-responses.d.ts +1 -0
package/dist/types/providers/google-gemini-cli.d.ts +22 -1
package/dist/types/providers/google-shared.d.ts +22 -0
package/dist/types/providers/google-types.d.ts +13 -1
package/dist/types/providers/mock.d.ts +8 -3
package/dist/types/providers/ollama.d.ts +6 -0
package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
package/dist/types/providers/openai-chat-server.d.ts +3 -3
package/dist/types/providers/openai-chat-wire.d.ts +644 -0
package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
package/dist/types/providers/openai-codex-responses.d.ts +31 -2
package/dist/types/providers/openai-completions-compat.d.ts +2 -25
package/dist/types/providers/openai-completions.d.ts +2 -10
package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
package/dist/types/providers/openai-responses-server.d.ts +2 -2
package/dist/types/providers/openai-responses-shared.d.ts +49 -9
package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
package/dist/types/providers/openai-responses.d.ts +13 -4
package/dist/types/providers/prometheus-native-client.d.ts +9 -0
package/dist/types/providers/prometheus-native-server.d.ts +4 -3
package/dist/types/providers/transform-messages.d.ts +1 -2
package/dist/types/rate-limit-utils.d.ts +3 -2
package/dist/types/registry/aimlapi.d.ts +4 -0
package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
package/dist/types/registry/amazon-bedrock.d.ts +5 -0
package/dist/types/registry/anthropic.d.ts +10 -0
package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
package/dist/types/registry/cerebras.d.ts +7 -0
package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
package/dist/types/registry/cursor.d.ts +7 -0
package/dist/types/registry/deepseek.d.ts +8 -0
package/dist/types/registry/derived.d.ts +5 -0
package/dist/types/registry/firepass.d.ts +16 -0
package/dist/types/registry/fireworks.d.ts +7 -0
package/dist/types/registry/github-copilot.d.ts +7 -0
package/dist/types/registry/gitlab-duo.d.ts +9 -0
package/dist/types/registry/google-antigravity.d.ts +9 -0
package/dist/types/registry/google-gemini-cli.d.ts +9 -0
package/dist/types/registry/google-vertex.d.ts +5 -0
package/dist/types/registry/google.d.ts +4 -0
package/dist/types/registry/groq.d.ts +4 -0
package/dist/types/registry/huggingface.d.ts +7 -0
package/dist/types/registry/index.d.ts +4 -0
package/dist/types/registry/kagi.d.ts +14 -0
package/dist/types/registry/kilo.d.ts +7 -0
package/dist/types/registry/kimi-code.d.ts +7 -0
package/dist/types/registry/litellm.d.ts +13 -0
package/dist/types/registry/lm-studio.d.ts +8 -0
package/dist/types/registry/minimax-code-cn.d.ts +6 -0
package/dist/types/registry/minimax-code.d.ts +6 -0
package/dist/types/registry/minimax.d.ts +4 -0
package/dist/types/registry/mistral.d.ts +4 -0
package/dist/types/registry/moonshot.d.ts +7 -0
package/dist/types/registry/nanogpt.d.ts +7 -0
package/dist/types/registry/nvidia.d.ts +7 -0
package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
package/dist/types/registry/ollama-cloud.d.ts +7 -0
package/dist/types/registry/ollama.d.ts +12 -0
package/dist/types/registry/openai-codex-device.d.ts +8 -0
package/dist/types/registry/openai-codex.d.ts +9 -0
package/dist/types/registry/openai.d.ts +4 -0
package/dist/types/registry/opencode-go.d.ts +6 -0
package/dist/types/registry/opencode-zen.d.ts +6 -0
package/dist/types/registry/openrouter.d.ts +13 -0
package/dist/types/registry/parallel.d.ts +14 -0
package/dist/types/registry/perplexity.d.ts +7 -0
package/dist/types/registry/qianfan.d.ts +7 -0
package/dist/types/registry/qwen-portal.d.ts +7 -0
package/dist/types/registry/registry.d.ts +272 -0
package/dist/types/registry/synthetic.d.ts +6 -0
package/dist/types/registry/tavily.d.ts +14 -0
package/dist/types/registry/together.d.ts +6 -0
package/dist/types/registry/types.d.ts +51 -0
package/dist/types/registry/venice.d.ts +13 -0
package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
package/dist/types/registry/vllm.d.ts +7 -0
package/dist/types/registry/wafer-pass.d.ts +6 -0
package/dist/types/registry/wafer-serverless.d.ts +6 -0
package/dist/types/registry/xai-oauth.d.ts +7 -0
package/dist/types/registry/xai.d.ts +4 -0
package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
package/dist/types/registry/xiaomi.d.ts +6 -0
package/dist/types/registry/zai.d.ts +7 -0
package/dist/types/registry/zenmux.d.ts +7 -0
package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
package/dist/types/stream.d.ts +9 -1
package/dist/types/types.d.ts +56 -295
package/dist/types/usage/google-antigravity.d.ts +15 -1
package/dist/types/usage/openai-codex-reset.d.ts +79 -0
package/dist/types/usage/openai-codex.d.ts +1 -0
package/dist/types/usage.d.ts +77 -4
package/dist/types/utils/abort.d.ts +6 -0
package/dist/types/utils/event-stream.d.ts +2 -0
package/dist/types/utils/http-inspector.d.ts +0 -1
package/dist/types/utils/idle-iterator.d.ts +35 -0
package/dist/types/utils/openai-http.d.ts +58 -0
package/dist/types/utils/request-debug.d.ts +3 -0
package/dist/types/utils/retry-after.d.ts +1 -0
package/dist/types/utils/schema/fields.d.ts +5 -0
package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
package/dist/types/utils/schema/stamps.d.ts +7 -15
package/dist/types/utils/sse-debug.d.ts +0 -5
package/dist/types/utils/stream-markup-healing.d.ts +2 -0
package/dist/types/utils.d.ts +1 -5
package/package.json +17 -29
package/src/auth-broker/remote-store.ts +10 -1
package/src/auth-broker/snapshot-cache.ts +1 -1
package/src/auth-broker/wire-schemas.ts +1 -1
package/src/auth-gateway/http.ts +1 -1
package/src/auth-gateway/server.ts +95 -30
package/src/auth-gateway/types.ts +10 -2
package/src/auth-retry.ts +238 -0
package/src/auth-storage.ts +935 -430
package/src/errors.ts +32 -0
package/src/index.ts +9 -14
package/src/provider-details.ts +1 -1
package/src/providers/__tests__/google-auth.test.ts +144 -0
package/src/providers/amazon-bedrock.ts +70 -40
package/src/providers/anthropic-client.ts +15 -13
package/src/providers/anthropic-messages-server-schema.ts +17 -7
package/src/providers/anthropic-messages-server.ts +88 -20
package/src/providers/anthropic-wire.ts +4 -3
package/src/providers/anthropic.ts +1234 -621
package/src/providers/aws-credentials.ts +47 -5
package/src/providers/aws-eventstream.ts +5 -0
package/src/providers/azure-openai-responses.ts +117 -67
package/src/providers/cursor.ts +30 -30
package/src/providers/github-copilot-headers.ts +1 -1
package/src/providers/gitlab-duo.ts +36 -29
package/src/providers/google-auth.ts +71 -8
package/src/providers/google-gemini-cli.ts +118 -22
package/src/providers/google-shared.ts +163 -43
package/src/providers/google-types.ts +10 -1
package/src/providers/kimi.ts +1 -1
package/src/providers/mock.ts +11 -3
package/src/providers/ollama.ts +64 -7
package/src/providers/openai-anthropic-shim.ts +17 -8
package/src/providers/openai-chat-server-schema.ts +9 -3
package/src/providers/openai-chat-server.ts +82 -16
package/src/providers/openai-chat-wire.ts +847 -0
package/src/providers/openai-codex/request-transformer.ts +129 -34
package/src/providers/openai-codex/response-handler.ts +22 -1
package/src/providers/openai-codex-responses.ts +699 -247
package/src/providers/openai-completions-compat.ts +8 -308
package/src/providers/openai-completions.ts +416 -267
package/src/providers/openai-responses-server-schema.ts +15 -9
package/src/providers/openai-responses-server.ts +162 -114
package/src/providers/openai-responses-shared.ts +320 -82
package/src/providers/openai-responses-wire.ts +6391 -0
package/src/providers/openai-responses.ts +382 -176
package/src/providers/prometheus-native-client.ts +27 -11
package/src/providers/prometheus-native-server.ts +44 -17
package/src/providers/transform-messages.ts +311 -120
package/src/providers/vision-guard.ts +5 -3
package/src/rate-limit-utils.ts +13 -3
package/src/registry/aimlapi.ts +6 -0
package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
package/src/registry/amazon-bedrock.ts +22 -0
package/src/registry/anthropic.ts +26 -0
package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
package/src/{utils/oauth → registry}/cerebras.ts +8 -1
package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
package/src/registry/cursor.ts +20 -0
package/src/{utils/oauth → registry}/deepseek.ts +9 -17
package/src/registry/derived.ts +9 -0
package/src/{utils/oauth → registry}/firepass.ts +10 -2
package/src/{utils/oauth → registry}/fireworks.ts +8 -1
package/src/registry/github-copilot.ts +22 -0
package/src/registry/gitlab-duo.ts +19 -0
package/src/registry/google-antigravity.ts +21 -0
package/src/registry/google-gemini-cli.ts +21 -0
package/src/registry/google-vertex.ts +38 -0
package/src/registry/google.ts +6 -0
package/src/registry/groq.ts +6 -0
package/src/{utils/oauth → registry}/huggingface.ts +8 -19
package/src/registry/index.ts +4 -0
package/src/{utils/oauth → registry}/kagi.ts +9 -11
package/src/{utils/oauth → registry}/kilo.ts +11 -6
package/src/registry/kimi-code.ts +17 -0
package/src/{utils/oauth → registry}/litellm.ts +8 -12
package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
package/src/registry/minimax-code-cn.ts +12 -0
package/src/registry/minimax-code.ts +12 -0
package/src/registry/minimax.ts +6 -0
package/src/registry/mistral.ts +6 -0
package/src/{utils/oauth → registry}/moonshot.ts +8 -9
package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
package/src/{utils/oauth → registry}/nvidia.ts +8 -18
package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
package/src/{utils → registry}/oauth/anthropic.ts +38 -17
package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
package/src/registry/oauth/gitlab-duo.ts +198 -0
package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
package/src/registry/oauth/index.ts +164 -0
package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
package/src/{utils → registry}/oauth/types.ts +7 -51
package/src/{utils → registry}/oauth/wafer.ts +1 -1
package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
package/src/{utils/oauth → registry}/ollama.ts +8 -13
package/src/registry/openai-codex-device.ts +18 -0
package/src/registry/openai-codex.ts +19 -0
package/src/registry/openai.ts +6 -0
package/src/registry/opencode-go.ts +12 -0
package/src/registry/opencode-zen.ts +12 -0
package/src/{utils/oauth → registry}/openrouter.ts +10 -2
package/src/{utils/oauth → registry}/parallel.ts +9 -11
package/src/registry/perplexity.ts +13 -0
package/src/{utils/oauth → registry}/qianfan.ts +8 -17
package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
package/src/registry/registry.ts +149 -0
package/src/{utils/oauth → registry}/synthetic.ts +7 -1
package/src/{utils/oauth → registry}/tavily.ts +10 -12
package/src/{utils/oauth → registry}/together.ts +7 -1
package/src/registry/types.ts +56 -0
package/src/{utils/oauth → registry}/venice.ts +8 -12
package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
package/src/{utils/oauth → registry}/vllm.ts +9 -16
package/src/registry/wafer-pass.ts +12 -0
package/src/registry/wafer-serverless.ts +12 -0
package/src/registry/xai-oauth.ts +17 -0
package/src/registry/xai.ts +6 -0
package/src/registry/xiaomi-token-plan-ams.ts +12 -0
package/src/registry/xiaomi-token-plan-cn.ts +12 -0
package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
package/src/registry/xiaomi.ts +12 -0
package/src/{utils/oauth → registry}/zai.ts +10 -22
package/src/{utils/oauth → registry}/zenmux.ts +8 -1
package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
package/src/stream.ts +229 -199
package/src/types.ts +63 -384
package/src/usage/claude.ts +4 -2
package/src/usage/github-copilot.ts +4 -2
package/src/usage/google-antigravity.ts +196 -28
package/src/usage/kimi.ts +1 -1
package/src/usage/minimax-code.ts +5 -6
package/src/usage/openai-codex-reset.ts +174 -0
package/src/usage/openai-codex.ts +19 -2
package/src/usage/zai.ts +2 -1
package/src/usage.ts +93 -4
package/src/utils/abort.ts +14 -0
package/src/utils/event-stream.ts +17 -0
package/src/utils/http-inspector.ts +4 -12
package/src/utils/idle-iterator.ts +250 -79
package/src/utils/openai-http.ts +157 -0
package/src/utils/request-debug.ts +67 -19
package/src/utils/retry-after.ts +1 -1
package/src/utils/retry.ts +23 -2
package/src/utils/schema/CONSTRAINTS.md +4 -2
package/src/utils/schema/fields.ts +16 -0
package/src/utils/schema/json-schema-validator.ts +19 -1
package/src/utils/schema/normalize.ts +80 -8
package/src/utils/schema/stamps.ts +22 -10
package/src/utils/schema/wire.ts +2 -2
package/src/utils/sse-debug.ts +0 -271
package/src/utils/stream-markup-healing.ts +50 -8
package/src/utils/validation.ts +49 -13
package/src/utils.ts +2 -26
package/dist/types/model-cache.d.ts +0 -17
package/dist/types/model-manager.d.ts +0 -64
package/dist/types/model-thinking.d.ts +0 -100
package/dist/types/models.d.ts +0 -12
package/dist/types/provider-models/bundled-references.d.ts +0 -4
package/dist/types/provider-models/descriptors.d.ts +0 -50
package/dist/types/provider-models/google.d.ts +0 -24
package/dist/types/provider-models/index.d.ts +0 -5
package/dist/types/provider-models/ollama.d.ts +0 -7
package/dist/types/provider-models/openai-compat.d.ts +0 -323
package/dist/types/provider-models/special.d.ts +0 -16
package/dist/types/utils/discovery/antigravity.d.ts +0 -61
package/dist/types/utils/discovery/codex.d.ts +0 -38
package/dist/types/utils/discovery/cursor.d.ts +0 -23
package/dist/types/utils/discovery/gemini.d.ts +0 -25
package/dist/types/utils/discovery/index.d.ts +0 -4
package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
package/dist/types/utils/oauth/cerebras.d.ts +0 -1
package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/deepseek.d.ts +0 -10
package/dist/types/utils/oauth/firepass.d.ts +0 -1
package/dist/types/utils/oauth/fireworks.d.ts +0 -1
package/dist/types/utils/oauth/huggingface.d.ts +0 -19
package/dist/types/utils/oauth/kagi.d.ts +0 -17
package/dist/types/utils/oauth/kilo.d.ts +0 -5
package/dist/types/utils/oauth/litellm.d.ts +0 -18
package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
package/dist/types/utils/oauth/moonshot.d.ts +0 -1
package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
package/dist/types/utils/oauth/nvidia.d.ts +0 -18
package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
package/dist/types/utils/oauth/ollama.d.ts +0 -18
package/dist/types/utils/oauth/openrouter.d.ts +0 -1
package/dist/types/utils/oauth/parallel.d.ts +0 -17
package/dist/types/utils/oauth/qianfan.d.ts +0 -17
package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
package/dist/types/utils/oauth/synthetic.d.ts +0 -1
package/dist/types/utils/oauth/tavily.d.ts +0 -17
package/dist/types/utils/oauth/together.d.ts +0 -1
package/dist/types/utils/oauth/venice.d.ts +0 -18
package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/vllm.d.ts +0 -16
package/dist/types/utils/oauth/zai.d.ts +0 -18
package/dist/types/utils/oauth/zenmux.d.ts +0 -1
package/dist/types/utils/oauth/zhipu.d.ts +0 -18
package/src/model-cache.ts +0 -129
package/src/model-manager.ts +0 -469
package/src/model-thinking.ts +0 -756
package/src/models.json +0 -60287
package/src/models.json.d.ts +0 -9
package/src/models.ts +0 -56
package/src/provider-models/bundled-references.ts +0 -38
package/src/provider-models/descriptors.ts +0 -364
package/src/provider-models/google.ts +0 -88
package/src/provider-models/index.ts +0 -5
package/src/provider-models/ollama.ts +0 -153
package/src/provider-models/openai-compat.ts +0 -2904
package/src/provider-models/special.ts +0 -67
package/src/utils/discovery/antigravity.ts +0 -261
package/src/utils/discovery/codex.ts +0 -371
package/src/utils/discovery/cursor.ts +0 -306
package/src/utils/discovery/gemini.ts +0 -248
package/src/utils/discovery/index.ts +0 -4
package/src/utils/discovery/openai-compatible.ts +0 -224
package/src/utils/oauth/gitlab-duo.ts +0 -123
package/src/utils/oauth/index.ts +0 -502
/package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
/package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
/package/src/{utils → registry}/oauth/callback-server.ts +0 -0
/package/src/{utils → registry}/oauth/cursor.ts +0 -0
/package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
/package/src/{utils → registry}/oauth/kimi.ts +0 -0
/package/src/{utils → registry}/oauth/oauth.html +0 -0
/package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
/package/src/{utils → registry}/oauth/opencode.ts +0 -0
/package/src/{utils → registry}/oauth/perplexity.ts +0 -0
/package/src/{utils → registry}/oauth/pkce.ts +0 -0

package/src/providers/google-shared.ts CHANGED Viewed

@@ -2,8 +2,10 @@
  * Shared utilities for Google Generative AI and Google Cloud Code Assist providers.
  */
+import { scheduler } from "node:timers/promises";
+import { calculateCost } from "@prometheus-ai/catalog/models";
 import { extractHttpStatusFromError, readSseJson } from "@prometheus-ai/utils";
-import { calculateCost } from "../models";
+import { ProviderHttpError } from "../errors";
 import type {
 	Api,
 	AssistantMessage,
@@ -20,7 +22,7 @@ import type {
 } from "../types";
 import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
-import { finalizeErrorMessage, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
+import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
 import { normalizeSchemaForCCA, normalizeSchemaForGoogle, toolWireSchema } from "../utils/schema";
 import type {
 	Content,
@@ -45,6 +47,11 @@ export type {
 } from "./google-types";
 export { normalizeSchemaForGoogle };
+/** Non-2xx response (or in-stream error chunk) from the Google Generative Language / Vertex API. */
+export class GoogleApiError extends ProviderHttpError {
+	override readonly name = "GoogleApiError";
+}
 type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex";
 /**
@@ -160,7 +167,19 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
 	const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
+	// Gemini < 3 image tool results go in a separate user turn, but parallel tool results must
+	// stay a single contiguous functionResponse turn ("number of function response parts is not
+	// equal to number of function call parts"). Buffer image turns and flush them only after the
+	// merged functionResponse turn is complete.
+	let pendingToolImageParts: Part[] = [];
+	const flushPendingToolImages = () => {
+		if (pendingToolImageParts.length === 0) return;
+		contents.push({ role: "user", parts: pendingToolImageParts });
+		pendingToolImageParts = [];
+	};
 	for (const msg of transformedMessages) {
+		if (msg.role !== "toolResult") flushPendingToolImages();
 		if (msg.role === "user" || msg.role === "developer") {
 			if (typeof msg.content === "string") {
 				// Skip empty user messages
@@ -314,15 +333,13 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
 				});
 			}
-			// For Gemini < 3, add images in a separate user message
+			// For Gemini < 3, buffer images for a separate user message after the functionResponse turn
 			if (hasImages && !modelSupportsMultimodalFunctionResponse) {
-				contents.push({
-					role: "user",
-					parts: [{ text: "Tool result image:" }, ...imageParts],
-				});
+				pendingToolImageParts.push({ text: "Tool result image:" }, ...imageParts);
 			}
 		}
 	}
+	flushPendingToolImages();
 	return contents;
 }
@@ -355,7 +372,7 @@ export function convertTools(
 				description: tool.description || "",
 				...(useParameters
 					? { parameters: normalizeSchemaForCCA(toolWireSchema(tool)) }
-					: { parametersJsonSchema: toolWireSchema(tool) }),
+					: { parametersJsonSchema: normalizeSchemaForGoogle(toolWireSchema(tool)) }),
 			})),
 		},
 	];
@@ -422,6 +439,47 @@ export function mapStopReasonString(reason: string): StopReason {
 	}
 }
+/**
+ * Bounded retries for the well-known Gemini "empty response" failure: a benign
+ * `finishReason: STOP` carrying only an empty/whitespace text part and no tool call.
+ * Shared by the public/Vertex `streamGoogleGenAI` path and the Cloud Code Assist
+ * (`google-gemini-cli`/`google-antigravity`) provider so both apply the same policy.
+ */
+export const MAX_EMPTY_STREAM_RETRIES = 2;
+export const EMPTY_STREAM_BASE_DELAY_MS = 500;
+/**
+ * Whether a completed Google assistant message carries content worth delivering.
+ *
+ * A tool call or any non-whitespace text counts as meaningful. An empty/whitespace-only
+ * text part — or thinking that never produced an answer — is the "empty response" failure:
+ * delivered as-is the agent loop has nothing to act on and silently halts, so the request
+ * must be retried instead of surfaced.
+ */
+export function hasMeaningfulGoogleContent(output: AssistantMessage): boolean {
+	for (const block of output.content) {
+		if (block.type === "toolCall") return true;
+		if (block.type === "text" && block.text.trim().length > 0) return true;
+	}
+	return false;
+}
+/** Wipe a streamed message between empty-response retries so the next attempt starts clean. */
+function resetGoogleStreamOutputForRetry(output: AssistantMessage): void {
+	output.content = [];
+	output.usage = {
+		input: 0,
+		output: 0,
+		cacheRead: 0,
+		cacheWrite: 0,
+		totalTokens: 0,
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+	};
+	output.stopReason = "stop";
+	output.errorMessage = undefined;
+	output.timestamp = Date.now();
+}
 /**
  * Module-local counter for generating unique tool call IDs across Google providers.
  * Shared so that a single monotonically-increasing sequence is used regardless of which
@@ -527,6 +585,7 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
 	const blockIndex = () => blocks.length - 1;
 	let currentBlock: TextContent | ThinkingContent | null = null;
 	let firstTokenSeen = false;
+	let sawFinishReason = false;
 	const flushCurrent = () => {
 		if (!currentBlock) return;
@@ -534,10 +593,23 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
 	};
 	for await (const chunk of googleStream) {
+		if (chunk.error) {
+			const detail = chunk.error.message || chunk.error.status || "unknown error";
+			const message = `Google API stream error: ${detail}`;
+			throw typeof chunk.error.code === "number" && chunk.error.code >= 400
+				? new GoogleApiError(message, chunk.error.code)
+				: new Error(message);
+		}
+		if (!chunk.candidates?.length && chunk.promptFeedback?.blockReason) {
+			const detail = chunk.promptFeedback.blockReasonMessage;
+			throw new Error(
+				`Request blocked by Google (${chunk.promptFeedback.blockReason})${detail ? `: ${detail}` : ""}`,
+			);
+		}
 		const candidate = chunk.candidates?.[0];
 		if (candidate?.content?.parts) {
 			for (const part of candidate.content.parts) {
-				if (part.text !== undefined) {
+				if (part.text !== undefined && part.text !== "") {
 					if (!firstTokenSeen) {
 						firstTokenSeen = true;
 						onFirstToken?.();
@@ -578,6 +650,18 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
 							partial: output,
 						});
 					}
+				} else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
+					if (currentBlock.type === "thinking") {
+						currentBlock.thinkingSignature = retainThoughtSignature(
+							currentBlock.thinkingSignature,
+							part.thoughtSignature,
+						);
+					} else if (retainTextSignature) {
+						currentBlock.textSignature = retainThoughtSignature(
+							currentBlock.textSignature,
+							part.thoughtSignature,
+						);
+					}
 				}
 				if (part.functionCall) {
@@ -606,9 +690,17 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
 		}
 		if (candidate?.finishReason) {
-			output.stopReason = mapStopReason(candidate.finishReason);
-			if (output.content.some(b => b.type === "toolCall")) {
+			sawFinishReason = true;
+			const mapped = mapStopReason(candidate.finishReason);
+			// Only let a trailing tool call upgrade benign finishes; SAFETY/MALFORMED_FUNCTION_CALL
+			// and friends must surface as errors even when earlier chunks carried valid tool calls.
+			if ((mapped === "stop" || mapped === "length") && output.content.some(b => b.type === "toolCall")) {
 				output.stopReason = "toolUse";
+			} else {
+				output.stopReason = mapped;
+				if (mapped === "error") {
+					output.errorMessage = `Generation failed with finish reason: ${candidate.finishReason}`;
+				}
 			}
 		}
@@ -645,6 +737,10 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
 		throw new Error("Request was aborted");
 	}
+	if (!sawFinishReason) {
+		throw new Error("Google API stream ended without a finish reason (connection dropped or response truncated)");
+	}
 	if (output.stopReason === "aborted" || output.stopReason === "error") {
 		throw new Error(output.errorMessage ?? "An unknown error occurred");
 	}
@@ -804,41 +900,65 @@ export function streamGoogleGenAI<T extends "google-generative-ai" | "google-ver
 				headers: plan.headers,
 			};
-			const wireBody = paramsToWireBody(params);
+			const bodyJson = JSON.stringify(paramsToWireBody(params));
 			const fetchImpl = plan.fetch ?? options?.fetch ?? (globalThis.fetch.bind(globalThis) as FetchImpl);
-			const response = await fetchImpl(plan.url, {
-				method: "POST",
-				headers: { ...plan.headers, "Content-Type": "application/json", Accept: "text/event-stream" },
-				body: JSON.stringify(wireBody),
-				signal: options?.signal,
-			});
-			if (!response.ok) {
-				const errorText = await response.text().catch(() => "");
-				throw withHttpStatus(
-					new Error(`Google API error (${response.status}): ${extractGoogleErrorMessage(errorText)}`),
-					response.status,
-				);
-			}
-			if (!response.body) {
-				throw new Error("Google API returned an empty response body");
-			}
-			const googleStream = readSseJson<GenerateContentResponse>(response.body, options?.signal, event =>
-				options?.onSseEvent?.({ event: event.event, data: event.data, raw: [...event.raw] }, model),
-			);
+			const openStream = async (): Promise<ReadableStream<Uint8Array>> => {
+				const response = await fetchImpl(plan.url, {
+					method: "POST",
+					headers: { ...plan.headers, "Content-Type": "application/json", Accept: "text/event-stream" },
+					body: bodyJson,
+					signal: options?.signal,
+				});
+				if (!response.ok) {
+					const errorText = await response.text().catch(() => "");
+					throw new GoogleApiError(
+						`Google API error (${response.status}): ${extractGoogleErrorMessage(errorText)}`,
+						response.status,
+						{ headers: response.headers },
+					);
+				}
+				if (!response.body) {
+					throw new Error("Google API returned an empty response body");
+				}
+				return response.body as ReadableStream<Uint8Array>;
+			};
+			let body = await openStream();
 			stream.push({ type: "start", partial: output });
-			await consumeGoogleStream({
-				googleStream,
-				output,
-				stream,
-				model,
-				options,
-				retainTextSignature,
-				onFirstToken: () => {
-					firstTokenTime = Date.now();
-				},
-			});
+			// Gemini occasionally finishes with `finishReason: STOP` while emitting only an empty
+			// text part and no tool call. Delivered as-is the agent receives a blank message and
+			// silently halts mid-task, so retry a bounded number of times before giving up.
+			for (let emptyAttempt = 0; ; emptyAttempt++) {
+				const googleStream = readSseJson<GenerateContentResponse>(body, options?.signal, event =>
+					options?.onSseEvent?.({ event: event.event, data: event.data, raw: [...event.raw] }, model),
+				);
+				await consumeGoogleStream({
+					googleStream,
+					output,
+					stream,
+					model,
+					options,
+					retainTextSignature,
+					onFirstToken: () => {
+						firstTokenTime = Date.now();
+					},
+				});
+				if (output.stopReason !== "stop" || hasMeaningfulGoogleContent(output)) break;
+				if (emptyAttempt >= MAX_EMPTY_STREAM_RETRIES) {
+					throw new Error(
+						`Google API returned an empty response (finishReason STOP with no content) after ${MAX_EMPTY_STREAM_RETRIES + 1} attempts`,
+					);
+				}
+				try {
+					await scheduler.wait(EMPTY_STREAM_BASE_DELAY_MS * 2 ** emptyAttempt, { signal: options?.signal });
+				} catch {
+					throw new Error("Request was aborted");
+				}
+				resetGoogleStreamOutputForRetry(output);
+				body = await openStream();
+			}
 			output.duration = Date.now() - startTime;
 			if (firstTokenTime) output.ttft = firstTokenTime - startTime;

package/src/providers/google-types.ts CHANGED Viewed

@@ -157,11 +157,20 @@ export interface UsageMetadata {
 	cachedContentTokenCount?: number;
 }
+/** Prompt-level safety feedback; `blockReason` is set (with no candidates) when the prompt is blocked. */
+export interface PromptFeedback {
+	blockReason?: string;
+	blockReasonMessage?: string;
+	[key: string]: unknown;
+}
 /** Single SSE chunk's parsed JSON body. */
 export interface GenerateContentResponse {
 	candidates?: Candidate[];
 	usageMetadata?: UsageMetadata;
 	modelVersion?: string;
 	responseId?: string;
-	promptFeedback?: Record<string, unknown>;
+	promptFeedback?: PromptFeedback;
+	/** In-band stream failure (quota, internal error) delivered as a final JSON event. */
+	error?: { code?: number; message?: string; status?: string };
 }

package/src/providers/kimi.ts CHANGED Viewed

@@ -9,9 +9,9 @@
  * Note: Kimi calculates TPM rate limits based on max_tokens, not actual output.
  */
+import { getKimiCommonHeaders } from "../registry/oauth/kimi";
 import type { Api, Context, Model } from "../types";
 import type { AssistantMessageEventStream } from "../utils/event-stream";
-import { getKimiCommonHeaders } from "../utils/oauth/kimi";
 import {
 	type OpenAIAnthropicApiFormat,
 	type OpenAIAnthropicShimOptions,

package/src/providers/mock.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * Mock provider for tests.
  *
  * Implements `Model<"mock">` + `streamMock` so test code can drive
- * pi-agent-core / streamSimple-shaped consumers without an HTTP client.
+ * agent-core / streamSimple-shaped consumers without an HTTP client.
  *
  * Usage:
  *
@@ -49,6 +49,7 @@ import type {
 	Context,
 	Model,
 	SimpleStreamOptions,
+	StopDetails,
 	StopReason,
 	TextContent,
 	ThinkingContent,
@@ -81,6 +82,10 @@ export interface MockResponse {
 	content?: ReadonlyArray<MockContent>;
 	/** Stop reason. Defaults to `"toolUse"` when content has tool calls, else `"stop"`. */
 	stopReason?: StopReason;
+	/** Structured terminal stop classification, e.g. Anthropic refusal metadata. */
+	stopDetails?: StopDetails | null;
+	/** Error text paired with an explicit `"error"` stop reason. */
+	errorMessage?: string;
 	/** Usage stats. Missing fields default to 0; missing `cost.total` is recomputed from components. */
 	usage?: Partial<Omit<Usage, "cost">> & { cost?: Partial<Usage["cost"]> };
 	/** Pre-set responseId. */
@@ -168,6 +173,7 @@ export class MockModel implements Model<MockApi> {
 	readonly cost: Model["cost"];
 	readonly contextWindow: number;
 	readonly maxTokens: number;
+	readonly compat = undefined;
 	/** Recorded calls in invocation order. */
 	readonly calls: MockCall[] = [];
@@ -226,7 +232,7 @@ export function createMockModel(options: MockModelOptions = {}): MockModel {
 	return new MockModel(options);
 }
-/** Stream function for `Model<"mock">`. Matches the @prometheus-ai/ai per-provider stream signature. */
+/** Stream function for `Model<"mock">`. Matches the Prometheus AI per-provider stream signature. */
 export function streamMock(
 	model: Model<Api>,
 	context: Context,
@@ -250,7 +256,7 @@ export function streamMock(
 }
 /** Convenience: register the mock provider with the global custom API registry. */
-export function registerMockApi(sourceId = "prometheus-ai/mock"): void {
+export function registerMockApi(sourceId = "@prometheus-ai/ai/mock"): void {
 	registerCustomApi(MOCK_API, streamMock, sourceId);
 }
@@ -388,6 +394,8 @@ async function runMock(
 	const reason: StopReason = response.stopReason ?? (hasToolCall ? ("toolUse" as StopReason) : ("stop" as StopReason));
 	partial.stopReason = reason;
+	partial.stopDetails = response.stopDetails;
+	partial.errorMessage = response.errorMessage;
 	partial.usage = mergeUsage(response.usage);
 	partial.duration = Date.now() - startedAt;

package/src/providers/ollama.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { extractHttpStatusFromError, fetchWithRetry } from "@prometheus-ai/utils";
+import { ProviderHttpError } from "../errors";
 import { getEnvApiKey } from "../stream";
 import type {
 	Api,
@@ -16,7 +17,8 @@ import type {
 } from "../types";
 import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
-import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
+import { type CapturedHttpErrorResponse, finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
+import { getOpenAIStreamFirstEventTimeoutMs, getOpenAIStreamIdleTimeoutMs } from "../utils/idle-iterator";
 import { parseStreamingJson } from "../utils/json-parse";
 import { toolWireSchema } from "../utils/schema/wire";
 import {
@@ -27,8 +29,14 @@ import {
 } from "../utils/stream-markup-healing";
 import { transformMessages } from "./transform-messages";
+/** Non-2xx response from the Ollama `/api/chat` endpoint. */
+export class OllamaApiError extends ProviderHttpError {
+	override readonly name = "OllamaApiError";
+}
 export interface OllamaChatOptions extends StreamOptions {
 	reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
+	disableReasoning?: boolean;
 	toolChoice?: ToolChoice;
 }
@@ -91,7 +99,14 @@ function normalizeBaseUrl(baseUrl?: string): string {
 	return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
 }
-function mapReasoning(reasoning: OllamaChatOptions["reasoning"]): boolean | "low" | "medium" | "high" | undefined {
+function mapReasoning(
+	reasoning: OllamaChatOptions["reasoning"],
+	disableReasoning: boolean | undefined,
+	modelReasoning: boolean,
+): boolean | "low" | "medium" | "high" | undefined {
+	if (disableReasoning && modelReasoning) {
+		return false;
+	}
 	switch (reasoning) {
 		case "minimal":
 		case "low":
@@ -258,7 +273,7 @@ function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefin
 }
 function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
-	const think = mapReasoning(options?.reasoning);
+	const think = mapReasoning(options?.reasoning, options?.disableReasoning, model.reasoning);
 	const toolChoice = mapToolChoice(options?.toolChoice);
 	const selectedTools = selectToolsForToolChoice(context.tools, options?.toolChoice);
 	const tools = convertTools(selectedTools);
@@ -268,11 +283,32 @@ function createChatBody(model: Model<"ollama-chat">, context: Context, options:
 		...(tools ? { tools } : {}),
 		...(think !== undefined ? { think } : {}),
 		...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
-		...(options?.maxTokens !== undefined ? { options: { num_predict: options.maxTokens } } : {}),
+		...(options?.maxTokens !== undefined && !model.omitMaxOutputTokens
+			? { options: { num_predict: options.maxTokens } }
+			: {}),
 		stream: true,
 	};
 }
+async function captureHttpErrorResponse(response: Response): Promise<CapturedHttpErrorResponse> {
+	let bodyText: string | undefined;
+	let bodyJson: unknown;
+	try {
+		bodyText = await response.text();
+		if (bodyText.trim()) {
+			try {
+				bodyJson = JSON.parse(bodyText) as unknown;
+			} catch {}
+		}
+	} catch {}
+	return {
+		status: response.status,
+		headers: response.headers,
+		bodyText,
+		bodyJson,
+	};
+}
 async function* iterateNdjson(stream: ReadableStream<Uint8Array>): AsyncGenerator<OllamaChatChunk> {
 	const reader = stream.getReader();
 	const decoder = new TextDecoder();
@@ -376,6 +412,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
 		let firstTokenTime: number | undefined;
 		const output = createEmptyOutput(model);
 		let rawRequestDump: RawHttpRequestDump | undefined;
+		let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
 		let activeThinkingIndex: number | undefined;
 		let activeTextIndex: number | undefined;
 		const activeToolIndices = new Set<number>();
@@ -489,6 +526,22 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
 				url: `${baseUrl}/api/chat`,
 				body,
 			};
+			// Direct callers that bypass `register-builtins` (which installs
+			// the iterator-level watchdog) need a pre-response timer alongside
+			// `timeout: false`; otherwise an Ollama server that accepts the
+			// POST and never streams headers would hang forever (issue #2422).
+			const idleTimeoutMs = options.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
+			const firstEventTimeoutMs =
+				options.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
+			const preResponseWatchdog =
+				firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
+					? AbortSignal.timeout(firstEventTimeoutMs)
+					: undefined;
+			const fetchSignal = preResponseWatchdog
+				? options.signal
+					? AbortSignal.any([options.signal, preResponseWatchdog])
+					: preResponseWatchdog
+				: options.signal;
 			const response = await fetchWithRetry(`${baseUrl}/api/chat`, {
 				method: "POST",
 				headers: {
@@ -498,12 +551,16 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
 					"Content-Type": "application/json",
 				},
 				body: JSON.stringify(body),
-				signal: options.signal,
+				signal: fetchSignal,
 				defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
 				fetch: options.fetch,
+				timeout: false,
 			});
 			if (!response.ok) {
-				throw new Error(`HTTP ${response.status} from ${baseUrl}/api/chat`);
+				capturedErrorResponse = await captureHttpErrorResponse(response);
+				throw new OllamaApiError(`HTTP ${response.status} from ${baseUrl}/api/chat`, response.status, {
+					headers: response.headers,
+				});
 			}
 			if (!response.body) {
 				throw new Error("Ollama returned an empty response body");
@@ -631,7 +688,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
 			}
 			output.stopReason = options.signal?.aborted ? "aborted" : "error";
 			output.errorStatus = extractHttpStatusFromError(error);
-			output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
+			output.errorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
 			output.duration = Date.now() - startTime;
 			if (firstTokenTime) {
 				output.ttft = firstTokenTime - startTime;

package/src/providers/openai-anthropic-shim.ts CHANGED Viewed

@@ -8,8 +8,9 @@
  * here once.
  */
+import { buildModel } from "@prometheus-ai/catalog/build";
 import { ANTHROPIC_THINKING } from "../stream";
-import type { Context, Model, SimpleStreamOptions } from "../types";
+import type { Context, Model, ModelSpec, SimpleStreamOptions } from "../types";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { createProviderErrorMessage } from "./error-message";
 import { streamAnthropic, streamOpenAICompletions } from "./register-builtins";
@@ -44,6 +45,9 @@ export function streamOpenAIAnthropicShim(
 ): AssistantMessageEventStream {
 	const stream = new AssistantMessageEventStream();
 	const format = options?.format ?? config.defaultFormat;
+	// The resolver form of `apiKey` is resolved upstream in `streamSimple`;
+	// this shim only ever receives a static bearer string.
+	const apiKey = typeof options?.apiKey === "string" ? options.apiKey : undefined;
 	(async () => {
 		try {
@@ -53,7 +57,7 @@ export function streamOpenAIAnthropicShim(
 			};
 			if (format === "anthropic") {
-				const anthropicModel: Model<"anthropic-messages"> = {
+				const anthropicModel = buildModel({
 					id: model.id,
 					name: model.name,
 					api: "anthropic-messages",
@@ -65,7 +69,7 @@ export function streamOpenAIAnthropicShim(
 					reasoning: model.reasoning,
 					input: model.input,
 					cost: model.cost,
-				};
+				} as ModelSpec<"anthropic-messages">);
 				const reasoningEffort = options?.reasoning;
 				const thinkingEnabled = !!reasoningEffort && model.reasoning;
@@ -74,14 +78,14 @@ export function streamOpenAIAnthropicShim(
 					: undefined;
 				const innerStream = streamAnthropic(anthropicModel, context, {
-					apiKey: options?.apiKey,
+					apiKey,
 					temperature: options?.temperature,
 					topP: options?.topP,
 					topK: options?.topK,
 					minP: options?.minP,
 					presencePenalty: options?.presencePenalty,
 					repetitionPenalty: options?.repetitionPenalty,
-					maxTokens: options?.maxTokens ?? model.maxTokens,
+					maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
 					signal: options?.signal,
 					headers: mergedHeaders,
 					sessionId: options?.sessionId,
@@ -98,19 +102,24 @@ export function streamOpenAIAnthropicShim(
 				}
 			} else {
 				const openaiModel: Model<"openai-completions"> = config.openaiBaseUrl
-					? { ...model, baseUrl: config.openaiBaseUrl, headers: mergedHeaders }
+					? buildModel({
+							...model,
+							baseUrl: config.openaiBaseUrl,
+							headers: mergedHeaders,
+							compat: model.compatConfig,
+						} as ModelSpec<"openai-completions">)
 					: model;
 				const reasoningEffort = options?.reasoning;
 				const innerStream = streamOpenAICompletions(openaiModel, context, {
-					apiKey: options?.apiKey,
+					apiKey,
 					temperature: options?.temperature,
 					topP: options?.topP,
 					topK: options?.topK,
 					minP: options?.minP,
 					presencePenalty: options?.presencePenalty,
 					repetitionPenalty: options?.repetitionPenalty,
-					maxTokens: options?.maxTokens ?? model.maxTokens,
+					maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
 					signal: options?.signal,
 					headers: mergedHeaders,
 					sessionId: options?.sessionId,

package/src/providers/openai-chat-server-schema.ts CHANGED Viewed

@@ -7,6 +7,8 @@
  * non-strict defaults (e.g. `stream_options.include_obfuscation`) — does not
  * trip 400s on shapes we simply ignore.
  */
+import { z } from "zod/v4";
 import type {
 	ChatCompletionContentPart,
 	ChatCompletionCreateParams,
@@ -14,8 +16,7 @@ import type {
 	ChatCompletionMessageToolCall,
 	ChatCompletionTool,
 	ChatCompletionToolChoiceOption,
-} from "openai/resources/chat/completions";
-import * as z from "zod/v4";
+} from "./openai-chat-wire";
 // ─── User-message content parts ─────────────────────────────────────────────
@@ -27,7 +28,7 @@ export const textPartSchema = z.object({
 /**
  * OpenAI documents `image_url` as either `{ url: string, detail?: ... }` or —
  * older clients — a bare string. Accept both shapes; downstream we extract a
- * URL. `detail` is accepted for forward-compat but currently dropped (@prometheus-ai/ai's
+ * URL. `detail` is accepted for forward-compat but currently dropped (Prometheus AI's
  * `ImageContent` has no detail field — TODO: plumb through if/when added).
  */
 export const imagePartSchema = z.object({
@@ -145,6 +146,11 @@ export const assistantMessageSchema = z.object({
 	role: z.literal("assistant"),
 	content: baseContent.optional(),
 	tool_calls: z.array(toolCallSchema).optional(),
+	// DeepSeek-style reasoning channel. The gateway emits it on the way out
+	// (encodeResponse/encodeStream); accept it back so thinking-mode
+	// continuations replay the model's actual reasoning instead of a
+	// synthesized placeholder.
+	reasoning_content: z.string().nullish(),
 });
 export const toolMessageSchema = z.object({