npm - @prometheus-ai/ai - Versions diffs - 0.5.3 → 0.5.8 - Mend

@prometheus-ai/ai 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (377) hide show

package/dist/types/auth-broker/remote-store.d.ts +2 -1
package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
package/dist/types/auth-gateway/server.d.ts +19 -0
package/dist/types/auth-gateway/types.d.ts +9 -3
package/dist/types/auth-retry.d.ts +119 -0
package/dist/types/auth-storage.d.ts +217 -8
package/dist/types/errors.d.ts +24 -0
package/dist/types/index.d.ts +5 -9
package/dist/types/provider-details.d.ts +1 -1
package/dist/types/providers/amazon-bedrock.d.ts +12 -6
package/dist/types/providers/anthropic-client.d.ts +10 -3
package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
package/dist/types/providers/anthropic-wire.d.ts +3 -3
package/dist/types/providers/anthropic.d.ts +41 -34
package/dist/types/providers/aws-credentials.d.ts +8 -0
package/dist/types/providers/azure-openai-responses.d.ts +1 -0
package/dist/types/providers/google-gemini-cli.d.ts +22 -1
package/dist/types/providers/google-shared.d.ts +22 -0
package/dist/types/providers/google-types.d.ts +13 -1
package/dist/types/providers/mock.d.ts +8 -3
package/dist/types/providers/ollama.d.ts +6 -0
package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
package/dist/types/providers/openai-chat-server.d.ts +3 -3
package/dist/types/providers/openai-chat-wire.d.ts +644 -0
package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
package/dist/types/providers/openai-codex-responses.d.ts +31 -2
package/dist/types/providers/openai-completions-compat.d.ts +2 -25
package/dist/types/providers/openai-completions.d.ts +2 -10
package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
package/dist/types/providers/openai-responses-server.d.ts +2 -2
package/dist/types/providers/openai-responses-shared.d.ts +49 -9
package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
package/dist/types/providers/openai-responses.d.ts +13 -4
package/dist/types/providers/prometheus-native-client.d.ts +9 -0
package/dist/types/providers/prometheus-native-server.d.ts +4 -3
package/dist/types/providers/transform-messages.d.ts +1 -2
package/dist/types/rate-limit-utils.d.ts +3 -2
package/dist/types/registry/aimlapi.d.ts +4 -0
package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
package/dist/types/registry/amazon-bedrock.d.ts +5 -0
package/dist/types/registry/anthropic.d.ts +10 -0
package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
package/dist/types/registry/cerebras.d.ts +7 -0
package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
package/dist/types/registry/cursor.d.ts +7 -0
package/dist/types/registry/deepseek.d.ts +8 -0
package/dist/types/registry/derived.d.ts +5 -0
package/dist/types/registry/firepass.d.ts +16 -0
package/dist/types/registry/fireworks.d.ts +7 -0
package/dist/types/registry/github-copilot.d.ts +7 -0
package/dist/types/registry/gitlab-duo.d.ts +9 -0
package/dist/types/registry/google-antigravity.d.ts +9 -0
package/dist/types/registry/google-gemini-cli.d.ts +9 -0
package/dist/types/registry/google-vertex.d.ts +5 -0
package/dist/types/registry/google.d.ts +4 -0
package/dist/types/registry/groq.d.ts +4 -0
package/dist/types/registry/huggingface.d.ts +7 -0
package/dist/types/registry/index.d.ts +4 -0
package/dist/types/registry/kagi.d.ts +14 -0
package/dist/types/registry/kilo.d.ts +7 -0
package/dist/types/registry/kimi-code.d.ts +7 -0
package/dist/types/registry/litellm.d.ts +13 -0
package/dist/types/registry/lm-studio.d.ts +8 -0
package/dist/types/registry/minimax-code-cn.d.ts +6 -0
package/dist/types/registry/minimax-code.d.ts +6 -0
package/dist/types/registry/minimax.d.ts +4 -0
package/dist/types/registry/mistral.d.ts +4 -0
package/dist/types/registry/moonshot.d.ts +7 -0
package/dist/types/registry/nanogpt.d.ts +7 -0
package/dist/types/registry/nvidia.d.ts +7 -0
package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
package/dist/types/registry/ollama-cloud.d.ts +7 -0
package/dist/types/registry/ollama.d.ts +12 -0
package/dist/types/registry/openai-codex-device.d.ts +8 -0
package/dist/types/registry/openai-codex.d.ts +9 -0
package/dist/types/registry/openai.d.ts +4 -0
package/dist/types/registry/opencode-go.d.ts +6 -0
package/dist/types/registry/opencode-zen.d.ts +6 -0
package/dist/types/registry/openrouter.d.ts +13 -0
package/dist/types/registry/parallel.d.ts +14 -0
package/dist/types/registry/perplexity.d.ts +7 -0
package/dist/types/registry/qianfan.d.ts +7 -0
package/dist/types/registry/qwen-portal.d.ts +7 -0
package/dist/types/registry/registry.d.ts +272 -0
package/dist/types/registry/synthetic.d.ts +6 -0
package/dist/types/registry/tavily.d.ts +14 -0
package/dist/types/registry/together.d.ts +6 -0
package/dist/types/registry/types.d.ts +51 -0
package/dist/types/registry/venice.d.ts +13 -0
package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
package/dist/types/registry/vllm.d.ts +7 -0
package/dist/types/registry/wafer-pass.d.ts +6 -0
package/dist/types/registry/wafer-serverless.d.ts +6 -0
package/dist/types/registry/xai-oauth.d.ts +7 -0
package/dist/types/registry/xai.d.ts +4 -0
package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
package/dist/types/registry/xiaomi.d.ts +6 -0
package/dist/types/registry/zai.d.ts +7 -0
package/dist/types/registry/zenmux.d.ts +7 -0
package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
package/dist/types/stream.d.ts +9 -1
package/dist/types/types.d.ts +56 -295
package/dist/types/usage/google-antigravity.d.ts +15 -1
package/dist/types/usage/openai-codex-reset.d.ts +79 -0
package/dist/types/usage/openai-codex.d.ts +1 -0
package/dist/types/usage.d.ts +77 -4
package/dist/types/utils/abort.d.ts +6 -0
package/dist/types/utils/event-stream.d.ts +2 -0
package/dist/types/utils/http-inspector.d.ts +0 -1
package/dist/types/utils/idle-iterator.d.ts +35 -0
package/dist/types/utils/openai-http.d.ts +58 -0
package/dist/types/utils/request-debug.d.ts +3 -0
package/dist/types/utils/retry-after.d.ts +1 -0
package/dist/types/utils/schema/fields.d.ts +5 -0
package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
package/dist/types/utils/schema/stamps.d.ts +7 -15
package/dist/types/utils/sse-debug.d.ts +0 -5
package/dist/types/utils/stream-markup-healing.d.ts +2 -0
package/dist/types/utils.d.ts +1 -5
package/package.json +17 -29
package/src/auth-broker/remote-store.ts +10 -1
package/src/auth-broker/snapshot-cache.ts +1 -1
package/src/auth-broker/wire-schemas.ts +1 -1
package/src/auth-gateway/http.ts +1 -1
package/src/auth-gateway/server.ts +95 -30
package/src/auth-gateway/types.ts +10 -2
package/src/auth-retry.ts +238 -0
package/src/auth-storage.ts +935 -430
package/src/errors.ts +32 -0
package/src/index.ts +9 -14
package/src/provider-details.ts +1 -1
package/src/providers/__tests__/google-auth.test.ts +144 -0
package/src/providers/amazon-bedrock.ts +70 -40
package/src/providers/anthropic-client.ts +15 -13
package/src/providers/anthropic-messages-server-schema.ts +17 -7
package/src/providers/anthropic-messages-server.ts +88 -20
package/src/providers/anthropic-wire.ts +4 -3
package/src/providers/anthropic.ts +1234 -621
package/src/providers/aws-credentials.ts +47 -5
package/src/providers/aws-eventstream.ts +5 -0
package/src/providers/azure-openai-responses.ts +117 -67
package/src/providers/cursor.ts +30 -30
package/src/providers/github-copilot-headers.ts +1 -1
package/src/providers/gitlab-duo.ts +36 -29
package/src/providers/google-auth.ts +71 -8
package/src/providers/google-gemini-cli.ts +118 -22
package/src/providers/google-shared.ts +163 -43
package/src/providers/google-types.ts +10 -1
package/src/providers/kimi.ts +1 -1
package/src/providers/mock.ts +11 -3
package/src/providers/ollama.ts +64 -7
package/src/providers/openai-anthropic-shim.ts +17 -8
package/src/providers/openai-chat-server-schema.ts +9 -3
package/src/providers/openai-chat-server.ts +82 -16
package/src/providers/openai-chat-wire.ts +847 -0
package/src/providers/openai-codex/request-transformer.ts +129 -34
package/src/providers/openai-codex/response-handler.ts +22 -1
package/src/providers/openai-codex-responses.ts +699 -247
package/src/providers/openai-completions-compat.ts +8 -308
package/src/providers/openai-completions.ts +416 -267
package/src/providers/openai-responses-server-schema.ts +15 -9
package/src/providers/openai-responses-server.ts +162 -114
package/src/providers/openai-responses-shared.ts +320 -82
package/src/providers/openai-responses-wire.ts +6391 -0
package/src/providers/openai-responses.ts +382 -176
package/src/providers/prometheus-native-client.ts +27 -11
package/src/providers/prometheus-native-server.ts +44 -17
package/src/providers/transform-messages.ts +311 -120
package/src/providers/vision-guard.ts +5 -3
package/src/rate-limit-utils.ts +13 -3
package/src/registry/aimlapi.ts +6 -0
package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
package/src/registry/amazon-bedrock.ts +22 -0
package/src/registry/anthropic.ts +26 -0
package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
package/src/{utils/oauth → registry}/cerebras.ts +8 -1
package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
package/src/registry/cursor.ts +20 -0
package/src/{utils/oauth → registry}/deepseek.ts +9 -17
package/src/registry/derived.ts +9 -0
package/src/{utils/oauth → registry}/firepass.ts +10 -2
package/src/{utils/oauth → registry}/fireworks.ts +8 -1
package/src/registry/github-copilot.ts +22 -0
package/src/registry/gitlab-duo.ts +19 -0
package/src/registry/google-antigravity.ts +21 -0
package/src/registry/google-gemini-cli.ts +21 -0
package/src/registry/google-vertex.ts +38 -0
package/src/registry/google.ts +6 -0
package/src/registry/groq.ts +6 -0
package/src/{utils/oauth → registry}/huggingface.ts +8 -19
package/src/registry/index.ts +4 -0
package/src/{utils/oauth → registry}/kagi.ts +9 -11
package/src/{utils/oauth → registry}/kilo.ts +11 -6
package/src/registry/kimi-code.ts +17 -0
package/src/{utils/oauth → registry}/litellm.ts +8 -12
package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
package/src/registry/minimax-code-cn.ts +12 -0
package/src/registry/minimax-code.ts +12 -0
package/src/registry/minimax.ts +6 -0
package/src/registry/mistral.ts +6 -0
package/src/{utils/oauth → registry}/moonshot.ts +8 -9
package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
package/src/{utils/oauth → registry}/nvidia.ts +8 -18
package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
package/src/{utils → registry}/oauth/anthropic.ts +38 -17
package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
package/src/registry/oauth/gitlab-duo.ts +198 -0
package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
package/src/registry/oauth/index.ts +164 -0
package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
package/src/{utils → registry}/oauth/types.ts +7 -51
package/src/{utils → registry}/oauth/wafer.ts +1 -1
package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
package/src/{utils/oauth → registry}/ollama.ts +8 -13
package/src/registry/openai-codex-device.ts +18 -0
package/src/registry/openai-codex.ts +19 -0
package/src/registry/openai.ts +6 -0
package/src/registry/opencode-go.ts +12 -0
package/src/registry/opencode-zen.ts +12 -0
package/src/{utils/oauth → registry}/openrouter.ts +10 -2
package/src/{utils/oauth → registry}/parallel.ts +9 -11
package/src/registry/perplexity.ts +13 -0
package/src/{utils/oauth → registry}/qianfan.ts +8 -17
package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
package/src/registry/registry.ts +149 -0
package/src/{utils/oauth → registry}/synthetic.ts +7 -1
package/src/{utils/oauth → registry}/tavily.ts +10 -12
package/src/{utils/oauth → registry}/together.ts +7 -1
package/src/registry/types.ts +56 -0
package/src/{utils/oauth → registry}/venice.ts +8 -12
package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
package/src/{utils/oauth → registry}/vllm.ts +9 -16
package/src/registry/wafer-pass.ts +12 -0
package/src/registry/wafer-serverless.ts +12 -0
package/src/registry/xai-oauth.ts +17 -0
package/src/registry/xai.ts +6 -0
package/src/registry/xiaomi-token-plan-ams.ts +12 -0
package/src/registry/xiaomi-token-plan-cn.ts +12 -0
package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
package/src/registry/xiaomi.ts +12 -0
package/src/{utils/oauth → registry}/zai.ts +10 -22
package/src/{utils/oauth → registry}/zenmux.ts +8 -1
package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
package/src/stream.ts +229 -199
package/src/types.ts +63 -384
package/src/usage/claude.ts +4 -2
package/src/usage/github-copilot.ts +4 -2
package/src/usage/google-antigravity.ts +196 -28
package/src/usage/kimi.ts +1 -1
package/src/usage/minimax-code.ts +5 -6
package/src/usage/openai-codex-reset.ts +174 -0
package/src/usage/openai-codex.ts +19 -2
package/src/usage/zai.ts +2 -1
package/src/usage.ts +93 -4
package/src/utils/abort.ts +14 -0
package/src/utils/event-stream.ts +17 -0
package/src/utils/http-inspector.ts +4 -12
package/src/utils/idle-iterator.ts +250 -79
package/src/utils/openai-http.ts +157 -0
package/src/utils/request-debug.ts +67 -19
package/src/utils/retry-after.ts +1 -1
package/src/utils/retry.ts +23 -2
package/src/utils/schema/CONSTRAINTS.md +4 -2
package/src/utils/schema/fields.ts +16 -0
package/src/utils/schema/json-schema-validator.ts +19 -1
package/src/utils/schema/normalize.ts +80 -8
package/src/utils/schema/stamps.ts +22 -10
package/src/utils/schema/wire.ts +2 -2
package/src/utils/sse-debug.ts +0 -271
package/src/utils/stream-markup-healing.ts +50 -8
package/src/utils/validation.ts +49 -13
package/src/utils.ts +2 -26
package/dist/types/model-cache.d.ts +0 -17
package/dist/types/model-manager.d.ts +0 -64
package/dist/types/model-thinking.d.ts +0 -100
package/dist/types/models.d.ts +0 -12
package/dist/types/provider-models/bundled-references.d.ts +0 -4
package/dist/types/provider-models/descriptors.d.ts +0 -50
package/dist/types/provider-models/google.d.ts +0 -24
package/dist/types/provider-models/index.d.ts +0 -5
package/dist/types/provider-models/ollama.d.ts +0 -7
package/dist/types/provider-models/openai-compat.d.ts +0 -323
package/dist/types/provider-models/special.d.ts +0 -16
package/dist/types/utils/discovery/antigravity.d.ts +0 -61
package/dist/types/utils/discovery/codex.d.ts +0 -38
package/dist/types/utils/discovery/cursor.d.ts +0 -23
package/dist/types/utils/discovery/gemini.d.ts +0 -25
package/dist/types/utils/discovery/index.d.ts +0 -4
package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
package/dist/types/utils/oauth/cerebras.d.ts +0 -1
package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/deepseek.d.ts +0 -10
package/dist/types/utils/oauth/firepass.d.ts +0 -1
package/dist/types/utils/oauth/fireworks.d.ts +0 -1
package/dist/types/utils/oauth/huggingface.d.ts +0 -19
package/dist/types/utils/oauth/kagi.d.ts +0 -17
package/dist/types/utils/oauth/kilo.d.ts +0 -5
package/dist/types/utils/oauth/litellm.d.ts +0 -18
package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
package/dist/types/utils/oauth/moonshot.d.ts +0 -1
package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
package/dist/types/utils/oauth/nvidia.d.ts +0 -18
package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
package/dist/types/utils/oauth/ollama.d.ts +0 -18
package/dist/types/utils/oauth/openrouter.d.ts +0 -1
package/dist/types/utils/oauth/parallel.d.ts +0 -17
package/dist/types/utils/oauth/qianfan.d.ts +0 -17
package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
package/dist/types/utils/oauth/synthetic.d.ts +0 -1
package/dist/types/utils/oauth/tavily.d.ts +0 -17
package/dist/types/utils/oauth/together.d.ts +0 -1
package/dist/types/utils/oauth/venice.d.ts +0 -18
package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/vllm.d.ts +0 -16
package/dist/types/utils/oauth/zai.d.ts +0 -18
package/dist/types/utils/oauth/zenmux.d.ts +0 -1
package/dist/types/utils/oauth/zhipu.d.ts +0 -18
package/src/model-cache.ts +0 -129
package/src/model-manager.ts +0 -469
package/src/model-thinking.ts +0 -756
package/src/models.json +0 -60287
package/src/models.json.d.ts +0 -9
package/src/models.ts +0 -56
package/src/provider-models/bundled-references.ts +0 -38
package/src/provider-models/descriptors.ts +0 -364
package/src/provider-models/google.ts +0 -88
package/src/provider-models/index.ts +0 -5
package/src/provider-models/ollama.ts +0 -153
package/src/provider-models/openai-compat.ts +0 -2904
package/src/provider-models/special.ts +0 -67
package/src/utils/discovery/antigravity.ts +0 -261
package/src/utils/discovery/codex.ts +0 -371
package/src/utils/discovery/cursor.ts +0 -306
package/src/utils/discovery/gemini.ts +0 -248
package/src/utils/discovery/index.ts +0 -4
package/src/utils/discovery/openai-compatible.ts +0 -224
package/src/utils/oauth/gitlab-duo.ts +0 -123
package/src/utils/oauth/index.ts +0 -502
/package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
/package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
/package/src/{utils → registry}/oauth/callback-server.ts +0 -0
/package/src/{utils → registry}/oauth/cursor.ts +0 -0
/package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
/package/src/{utils → registry}/oauth/kimi.ts +0 -0
/package/src/{utils → registry}/oauth/oauth.html +0 -0
/package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
/package/src/{utils → registry}/oauth/opencode.ts +0 -0
/package/src/{utils → registry}/oauth/perplexity.ts +0 -0
/package/src/{utils → registry}/oauth/pkce.ts +0 -0

package/src/stream.ts CHANGED Viewed

@@ -1,15 +1,17 @@
-import * as fs from "node:fs";
-import * as os from "node:os";
-import * as path from "node:path";
-import { $env, $pickenv, extractHttpStatusFromError } from "@prometheus-ai/utils";
-import { getCustomApi } from "./api-registry";
-import type { Effort } from "./model-thinking";
+import type { Effort } from "@prometheus-ai/catalog/effort";
+import { isVertexExpressOpenAIUrl, isVertexRawPredictUrl } from "@prometheus-ai/catalog/hosts";
 import {
 	mapEffortToAnthropicAdaptiveEffort,
 	mapEffortToGoogleThinkingLevel,
-	modelOmitsReasoningEffort,
+	minimumSupportedEffort,
 	requireSupportedEffort,
-} from "./model-thinking";
+	resolveWireModelId,
+} from "@prometheus-ai/catalog/model-thinking";
+import { CATALOG_PROVIDERS, type ProviderCatalogEntry } from "@prometheus-ai/catalog/provider-models";
+import { $env, $pickenv, extractHttpStatusFromError } from "@prometheus-ai/utils";
+import { getCustomApi } from "./api-registry";
+import { type ApiKeyResolver, AUTH_RETRY_STEPS, isApiKeyResolver, resolveRetryKey } from "./auth-retry";
+import { ProviderHttpError } from "./errors";
 import type { BedrockOptions } from "./providers/amazon-bedrock";
 import type { AnthropicOptions } from "./providers/anthropic";
 import type { CursorOptions } from "./providers/cursor";
@@ -46,6 +48,7 @@ import {
 import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
 import { streamXAIResponses } from "./providers/xai-responses";
 import { isUsageLimitError } from "./rate-limit-utils";
+import { PROVIDER_REGISTRY } from "./registry";
 import type {
 	Api,
 	AssistantMessage,
@@ -60,29 +63,13 @@ import type {
 	ToolChoice,
 } from "./types";
 import { AssistantMessageEventStream } from "./utils/event-stream";
-import { isFoundryEnabled } from "./utils/foundry";
 import { withRequestDebugFetch } from "./utils/request-debug";
-let cachedVertexAdcCredentialsExists: boolean | null = null;
-function hasVertexAdcCredentials(): boolean {
-	if (cachedVertexAdcCredentialsExists === null) {
-		const gacPath = $env.GOOGLE_APPLICATION_CREDENTIALS;
-		if (gacPath) {
-			cachedVertexAdcCredentialsExists = fs.existsSync(gacPath);
-		} else {
-			cachedVertexAdcCredentialsExists = fs.existsSync(
-				path.join(os.homedir(), ".config", "gcloud", "application_default_credentials.json"),
-			);
-		}
-	}
-	return cachedVertexAdcCredentialsExists;
-}
 function isGoogleVertexAuthenticatedModel(model: Model<Api>): boolean {
 	return (
 		model.provider === "google-vertex" &&
-		((model.api === "openai-completions" && model.baseUrl.includes("/endpoints/openapi")) ||
-			(model.api === "anthropic-messages" && model.baseUrl.includes(":streamRawPredict")))
+		((model.api === "openai-completions" && isVertexExpressOpenAIUrl(model.baseUrl)) ||
+			(model.api === "anthropic-messages" && isVertexRawPredictUrl(model.baseUrl)))
 	);
 }
@@ -94,7 +81,7 @@ function createVertexAuthenticatedFetch(options: StreamOptions | undefined): Fet
 		headers.set("Authorization", `Bearer ${token}`);
 		const rewritten = resolveVertexRequest(input);
 		const url = rewritten instanceof Request ? rewritten.url : rewritten.toString();
-		if (isVertexAnthropicRawPredict(url)) {
+		if (isVertexRawPredictUrl(url)) {
 			const bodyText = await readVertexRequestBody(rewritten, init);
 			const transformed = transformVertexAnthropicBody(bodyText);
 			return baseFetch(url, {
@@ -109,10 +96,6 @@ function createVertexAuthenticatedFetch(options: StreamOptions | undefined): Fet
 	return Object.assign(vertexFetch, baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {});
 }
-function isVertexAnthropicRawPredict(url: string): boolean {
-	return url.includes(":streamRawPredict") || url.includes(":rawPredict");
-}
 async function readVertexRequestBody(input: string | URL | Request, init: RequestInit | undefined): Promise<string> {
 	if (input instanceof Request) return input.clone().text();
 	const body = init?.body;
@@ -174,101 +157,35 @@ function resolveVertexRequest(input: string | URL | Request): string | URL | Req
 type KeyResolver = string | (() => string | undefined);
-const serviceProviderMap: Record<string, KeyResolver> = {
-	"alibaba-coding-plan": "ALIBABA_CODING_PLAN_API_KEY",
-	openai: "OPENAI_API_KEY",
-	google: "GEMINI_API_KEY",
-	groq: "GROQ_API_KEY",
-	cerebras: "CEREBRAS_API_KEY",
-	xai: "XAI_API_KEY",
-	"xai-oauth": () => $pickenv("XAI_OAUTH_TOKEN", "XAI_API_KEY"),
-	fireworks: "FIREWORKS_API_KEY",
-	firepass: "FIREPASS_API_KEY",
-	"wafer-pass": "WAFER_PASS_API_KEY",
-	"wafer-serverless": "WAFER_SERVERLESS_API_KEY",
-	openrouter: "OPENROUTER_API_KEY",
-	kilo: "KILO_API_KEY",
-	"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
-	zai: "ZAI_API_KEY",
-	"zhipu-coding-plan": "ZHIPU_API_KEY",
-	mistral: "MISTRAL_API_KEY",
-	minimax: "MINIMAX_API_KEY",
-	"minimax-code": "MINIMAX_CODE_API_KEY",
-	"minimax-code-cn": "MINIMAX_CODE_CN_API_KEY",
-	"opencode-go": "OPENCODE_API_KEY",
-	"opencode-zen": "OPENCODE_API_KEY",
-	cursor: "CURSOR_ACCESS_TOKEN",
-	deepseek: "DEEPSEEK_API_KEY",
-	"openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
+const LEGACY_ENV_KEYS: Record<string, KeyResolver> = {
+	// Non-provider / search-tool keys and API-name keys not modeled as registry provider defs.
 	"azure-openai-responses": "AZURE_OPENAI_API_KEY",
+	"llama.cpp": "LLAMA_CPP_API_KEY",
 	exa: "EXA_API_KEY",
 	jina: "JINA_API_KEY",
 	brave: "BRAVE_API_KEY",
-	perplexity: "PERPLEXITY_API_KEY",
-	tavily: "TAVILY_API_KEY",
-	parallel: "PARALLEL_API_KEY",
-	kagi: "KAGI_API_KEY",
-	// GitHub Copilot uses GitHub personal access token
-	"github-copilot": () => $pickenv("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
-	// Foundry mode optionally switches Anthropic auth to enterprise gateway credentials.
-	anthropic: () =>
-		isFoundryEnabled()
-			? $pickenv("ANTHROPIC_FOUNDRY_API_KEY", "ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY")
-			: $pickenv("ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"),
-	"gitlab-duo": "GITLAB_TOKEN",
-	// Vertex AI supports either GOOGLE_CLOUD_API_KEY or Application Default Credentials.
-	"google-vertex": () => {
-		if ($env.GOOGLE_CLOUD_API_KEY) {
-			return $env.GOOGLE_CLOUD_API_KEY;
-		}
-		const hasCredentials = hasVertexAdcCredentials();
-		const hasProject = !!($env.GOOGLE_CLOUD_PROJECT || $env.GCP_PROJECT || $env.GCLOUD_PROJECT);
-		const hasLocation = !!($env.GOOGLE_VERTEX_LOCATION || $env.GOOGLE_CLOUD_LOCATION || $env.VERTEX_LOCATION);
-		if (hasCredentials && hasProject && hasLocation) {
-			return "<authenticated>";
-		}
-	},
-	// Amazon Bedrock supports multiple credential sources:
-	// 1. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
-	// 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
-	// 3. AWS_PROFILE - named profile from ~/.aws/credentials
-	// 4. AWS_CONTAINER_CREDENTIALS_* - ECS/Task IAM role credentials
-	// 5. AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN - IRSA (EKS) web identity
-	"amazon-bedrock": () => {
-		const hasEcsCredentials =
-			!!$env.AWS_CONTAINER_CREDENTIALS_RELATIVE_URI || !!$env.AWS_CONTAINER_CREDENTIALS_FULL_URI;
-		const hasWebIdentity = !!$env.AWS_WEB_IDENTITY_TOKEN_FILE && !!$env.AWS_ROLE_ARN;
-		if (
-			$env.AWS_PROFILE ||
-			($env.AWS_ACCESS_KEY_ID && $env.AWS_SECRET_ACCESS_KEY) ||
-			$env.AWS_BEARER_TOKEN_BEDROCK ||
-			hasEcsCredentials ||
-			hasWebIdentity
-		) {
-			return "<authenticated>";
-		}
-	},
-	synthetic: "SYNTHETIC_API_KEY",
-	"cloudflare-ai-gateway": "CLOUDFLARE_AI_GATEWAY_API_KEY",
-	huggingface: () => $pickenv("HUGGINGFACE_HUB_TOKEN", "HF_TOKEN"),
-	litellm: "LITELLM_API_KEY",
-	moonshot: "MOONSHOT_API_KEY",
-	nvidia: "NVIDIA_API_KEY",
-	nanogpt: "NANO_GPT_API_KEY",
-	"lm-studio": "LM_STUDIO_API_KEY",
-	ollama: "OLLAMA_API_KEY",
-	"ollama-cloud": "OLLAMA_CLOUD_API_KEY",
-	"llama.cpp": "LLAMA_CPP_API_KEY",
-	qianfan: "QIANFAN_API_KEY",
-	"qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
-	together: "TOGETHER_API_KEY",
-	zenmux: "ZENMUX_API_KEY",
-	venice: "VENICE_API_KEY",
-	vllm: "VLLM_API_KEY",
-	xiaomi: "XIAOMI_API_KEY",
-	"xiaomi-token-plan-sgp": "XIAOMI_TOKEN_PLAN_SGP_API_KEY",
-	"xiaomi-token-plan-ams": "XIAOMI_TOKEN_PLAN_AMS_API_KEY",
-	"xiaomi-token-plan-cn": "XIAOMI_TOKEN_PLAN_CN_API_KEY",
+};
+/**
+ * Env fallbacks derived from the catalog table — the single source for plain
+ * provider env-var names. Registry defs override with computed resolvers
+ * (Foundry/ADC/Bedrock probes); legacy non-provider keys merge last.
+ */
+const CATALOG_ENTRY_ENV_KEYS = (CATALOG_PROVIDERS as readonly ProviderCatalogEntry[]).flatMap(provider => {
+	const envVars = provider.envVars;
+	if (!envVars || envVars.length === 0) return [];
+	const resolver: KeyResolver = envVars.length === 1 ? envVars[0] : () => $pickenv(...envVars);
+	return [[provider.id, resolver] as [string, KeyResolver]];
+});
+const serviceProviderMap: Record<string, KeyResolver> = {
+	...Object.fromEntries(CATALOG_ENTRY_ENV_KEYS),
+	...Object.fromEntries(
+		PROVIDER_REGISTRY.flatMap(provider =>
+			provider.envKeys != null ? [[provider.id, provider.envKeys] as [string, KeyResolver]] : [],
+		),
+	),
+	...LEGACY_ENV_KEYS,
 };
 /**
@@ -285,6 +202,18 @@ export function getEnvApiKey(provider: string): string | undefined {
 	return resolver?.();
 }
+/**
+ * Name of the environment variable that backs `getEnvApiKey` for a provider,
+ * when that provider maps to a single named variable (e.g. `github-copilot` →
+ * `COPILOT_GITHUB_TOKEN`). Returns undefined for providers whose env fallback
+ * is computed (multi-var pickers, Vertex ADC / Bedrock probes, …) since no
+ * single variable name describes the source.
+ */
+export function getEnvApiKeyName(provider: string): string | undefined {
+	const resolver = serviceProviderMap[provider];
+	return typeof resolver === "string" ? resolver : undefined;
+}
 /**
  * Enumerate every provider that has an env-var fallback for `getEnvApiKey`.
  * Used by `prometheus auth-broker migrate --include-env` to discover env-sourced keys
@@ -424,11 +353,10 @@ function isRetryableUpstreamError(error: unknown, status: number | undefined, me
 	return !!message && isUsageLimitError(message);
 }
-function createAssistantAuthError(message: AssistantMessage): Error & { status?: number } {
-	const error: Error & { status?: number } = new Error(message.errorMessage ?? "Provider authentication failed");
+function createAssistantAuthError(message: AssistantMessage): Error {
+	const text = message.errorMessage ?? "Provider authentication failed";
 	const status = extractStatusFromAssistantError(message);
-	if (status !== undefined) error.status = status;
-	return error;
+	return status === undefined ? new Error(text) : new ProviderHttpError(text, status);
 }
 function emitBufferedEvents(stream: AssistantMessageEventStream, events: AssistantMessageEvent[]): void {
@@ -437,18 +365,39 @@ function emitBufferedEvents(stream: AssistantMessageEventStream, events: Assista
 	}
 }
+function createLegacyAuthErrorResolver(
+	provider: string,
+	initialKey: string,
+	onAuthError: NonNullable<SimpleStreamOptions["onAuthError"]>,
+): ApiKeyResolver {
+	let currentKey = initialKey;
+	return async ({ error }) => {
+		if (error === undefined) return currentKey;
+		const nextKey = (await onAuthError(provider, currentKey, error)) || undefined;
+		if (nextKey !== undefined) currentKey = nextKey;
+		return nextKey;
+	};
+}
 export function streamSimple<TApi extends Api>(
 	model: Model<TApi>,
 	context: Context,
 	options?: SimpleStreamOptions,
 ): AssistantMessageEventStream {
 	const requestOptions = withRequestDebugFetch(options);
-	const retryApiKey = requestOptions?.onAuthError
-		? (requestOptions.apiKey ?? getEnvApiKey(model.provider))
-		: undefined;
-	if (retryApiKey) {
+	const legacyAuthErrorResolver =
+		typeof requestOptions?.apiKey === "string" && requestOptions.onAuthError
+			? createLegacyAuthErrorResolver(model.provider, requestOptions.apiKey, requestOptions.onAuthError)
+			: undefined;
+	const apiKeyResolver = isApiKeyResolver(requestOptions?.apiKey) ? requestOptions.apiKey : legacyAuthErrorResolver;
+	if (apiKeyResolver) {
 		const outer = new AssistantMessageEventStream();
-		const onAuthError = requestOptions!.onAuthError!;
+		const signal = requestOptions?.signal;
+		// One inner attempt against a resolved string key. When
+		// `captureAuthFailure` is set, a retryable auth error that arrives before
+		// any replay-unsafe event is buffered and returned (so the caller can
+		// retry with a fresh key) instead of surfaced. The terminal attempt
+		// clears the flag and emits whatever it gets.
 		const runAttempt = async (apiKey: string, captureAuthFailure: boolean): Promise<AuthRetryFailure | undefined> => {
 			const bufferedEvents: AssistantMessageEvent[] = [];
 			let emittedReplayUnsafeEvent = false;
@@ -458,7 +407,8 @@ export function streamSimple<TApi extends Api>(
 			};
 			try {
-				const inner = streamSimple(model, context, { ...requestOptions, apiKey, onAuthError: undefined });
+				const innerOptions: SimpleStreamOptions = { ...requestOptions, apiKey, onAuthError: undefined };
+				const inner = streamSimple(model, context, innerOptions);
 				for await (const event of inner) {
 					if (!emittedReplayUnsafeEvent && event.type === "start") {
 						bufferedEvents.push(event);
@@ -510,19 +460,43 @@ export function streamSimple<TApi extends Api>(
 		};
 		void (async () => {
-			const failure = await runAttempt(retryApiKey, true);
-			if (!failure) return;
-			let nextKey: string | undefined;
+			let lastKey: string | undefined;
 			try {
-				nextKey = await onAuthError(model.provider, retryApiKey, failure.error);
-			} catch {
-				nextKey = undefined;
+				lastKey = (await apiKeyResolver({ lastChance: false, error: undefined, signal })) || undefined;
+			} catch (error) {
+				// A thrown resolver is a broker/OAuth/network failure, not a missing
+				// key — surface the cause instead of masking it as "No API key".
+				outer.fail(
+					new Error(
+						`Failed to resolve API key for provider ${model.provider}: ${error instanceof Error ? error.message : String(error)}`,
+						{ cause: error },
+					),
+				);
+				return;
 			}
-			if (!nextKey || nextKey === retryApiKey) {
-				emitFailure(failure);
+			if (lastKey === undefined) {
+				outer.fail(new Error(`No API key for provider: ${model.provider}`));
 				return;
 			}
-			await runAttempt(nextKey, false);
+			let failure = await runAttempt(lastKey, true);
+			if (!failure) return;
+			// a/b/c policy: refresh the same account (lastChance=false), then
+			// switch to a sibling (lastChance=true). A step is skipped when the
+			// resolver yields the same key it just tried or `undefined`; the
+			// final step's attempt clears the capture flag so it emits directly.
+			for (let step = 0; step < AUTH_RETRY_STEPS.length; step++) {
+				// Caller aborted between attempts: don't mint a fresh token or fire
+				// another doomed request — emit the captured failure instead.
+				if (signal?.aborted) break;
+				const nextKey = await resolveRetryKey(apiKeyResolver, AUTH_RETRY_STEPS[step]!, failure.error, signal);
+				if (nextKey === undefined || nextKey === lastKey) continue;
+				lastKey = nextKey;
+				const isLastStep = step === AUTH_RETRY_STEPS.length - 1;
+				const next = await runAttempt(nextKey, !isLastStep);
+				if (!next) return;
+				failure = next;
+			}
+			emitFailure(failure);
 		})();
 		return outer;
 	}
@@ -553,7 +527,10 @@ export function streamSimple<TApi extends Api>(
 		return stream(model, context, providerOptions);
 	}
-	const apiKey = requestOptions?.apiKey || getEnvApiKey(model.provider);
+	// The resolver form is handled by the wrapper above; only a static string
+	// key reaches this point.
+	const apiKey =
+		(typeof requestOptions?.apiKey === "string" ? requestOptions.apiKey : undefined) || getEnvApiKey(model.provider);
 	if (!apiKey) {
 		throw new Error(`No API key for provider: ${model.provider}`);
 	}
@@ -599,6 +576,16 @@ export async function completeSimple<TApi extends Api>(
 }
 const MIN_OUTPUT_TOKENS = 1024;
+// Fallback total output cap for models whose catalog entry has no maxTokens.
+const OUTPUT_CAP_WHEN_UNKNOWN = 64_000;
+function maxTokensWithThinkingBudget(
+	baseMaxTokens: number | undefined,
+	modelMaxTokens: number | null,
+	thinkingBudget: number,
+): number {
+	const uncappedMaxTokens = baseMaxTokens === undefined ? OUTPUT_CAP_WHEN_UNKNOWN : baseMaxTokens + thinkingBudget;
+	return Math.min(uncappedMaxTokens, modelMaxTokens ?? Number.POSITIVE_INFINITY);
+}
 export const OUTPUT_FALLBACK_BUFFER = 4000;
 const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PROMETHEUS_NO_INTERLEAVED_THINKING !== "1";
@@ -697,24 +684,53 @@ function resolveOpenAiReasoningEffort<TApi extends Api>(
 ): Effort | undefined {
 	const reasoning = options?.reasoning;
 	if (!reasoning || !model.reasoning) return undefined;
-	// Models with compat.supportsReasoningEffort: false reason natively but
-	// reject the wire effort param. The wire-side omitReasoningEffort gate
-	// (providers/xai-responses.ts:78) is the actual strip; returning
-	// undefined here avoids a redundant requireSupportedEffort throw that
-	// would defeat the gate and surface a confusing
-	// "Compaction failed: Thinking effort high is not supported by..." to
-	// the user.
-	if (modelOmitsReasoningEffort(model)) return undefined;
+	// Models that reason natively but expose no effort dial carry
+	// `thinking: undefined` (baked at build time from
+	// `compat.supportsReasoningEffort: false` on openai-responses*). The
+	// wire-side omitReasoningEffort gate (providers/xai-responses.ts:78) is the
+	// actual strip; returning undefined here avoids a redundant
+	// requireSupportedEffort throw that would defeat the gate and surface a
+	// confusing "Compaction failed: Thinking effort high is not supported
+	// by..." to the user.
+	if (!model.thinking) return undefined;
 	return requireSupportedEffort(model, reasoning);
 }
 const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
-function mapOptionsForApi<TApi extends Api>(
+/**
+ * Mandatory-reasoning endpoints (`thinking.requiresEffort`) reject disabled
+ * or omitted thinking ("Reasoning is mandatory for this endpoint and cannot
+ * be disabled") — clamp to the lowest supported effort instead.
+ * `suppressWhenOff` models handle off provider-side via explicit wire
+ * suppression. Collapsed pairs interplay: pair derivation strips member
+ * flags (off routes to a bare SKU that CAN disable), while identity backfill
+ * re-flags pairs whose logical id is itself mandatory (Gemini 3.x) — there
+ * the clamp wins and the floored effort routes to the thinking SKU.
+ */
+function normalizeMandatoryReasoningOptions<TApi extends Api>(
 	model: Model<TApi>,
 	options?: SimpleStreamOptions,
+): SimpleStreamOptions | undefined {
+	if (
+		!model.reasoning ||
+		!model.thinking?.requiresEffort ||
+		model.thinking.suppressWhenOff ||
+		(options?.reasoning !== undefined && !options.disableReasoning)
+	) {
+		return options;
+	}
+	const floor = minimumSupportedEffort(model);
+	if (floor === undefined) return options;
+	return { ...options, reasoning: floor, disableReasoning: undefined };
+}
+function mapOptionsForApi<TApi extends Api>(
+	model: Model<TApi>,
+	rawOptions?: SimpleStreamOptions,
 	apiKey?: string,
 ): OptionsForApi<TApi> {
+	const options = normalizeMandatoryReasoningOptions(model, rawOptions);
 	const base = {
 		temperature: options?.temperature,
 		topP: options?.topP,
@@ -722,9 +738,9 @@ function mapOptionsForApi<TApi extends Api>(
 		minP: options?.minP,
 		presencePenalty: options?.presencePenalty,
 		repetitionPenalty: options?.repetitionPenalty,
-		maxTokens: options?.maxTokens ?? model.maxTokens,
+		maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
 		signal: options?.signal,
-		apiKey: apiKey || options?.apiKey,
+		apiKey: apiKey ?? (typeof options?.apiKey === "string" ? options.apiKey : undefined),
 		cacheRetention: options?.cacheRetention,
 		headers: options?.headers,
 		initiatorOverride: options?.initiatorOverride,
@@ -750,6 +766,7 @@ function mapOptionsForApi<TApi extends Api>(
 			if (!reasoning || !model.reasoning) {
 				return castApi<"anthropic-messages">({
 					...base,
+					requestModelId: resolveWireModelId(model, undefined),
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
@@ -761,6 +778,7 @@ function mapOptionsForApi<TApi extends Api>(
 			if (thinkingBudget <= 0) {
 				return castApi<"anthropic-messages">({
 					...base,
+					requestModelId: resolveWireModelId(model, undefined),
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
@@ -774,6 +792,7 @@ function mapOptionsForApi<TApi extends Api>(
 				const effort = mapEffortToAnthropicAdaptiveEffort(model, reasoning);
 				return castApi<"anthropic-messages">({
 					...base,
+					requestModelId: resolveWireModelId(model, reasoning),
 					thinkingEnabled: true,
 					effort,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
@@ -785,6 +804,7 @@ function mapOptionsForApi<TApi extends Api>(
 			if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
 				return castApi<"anthropic-messages">({
 					...base,
+					requestModelId: resolveWireModelId(model, reasoning),
 					thinkingEnabled: true,
 					thinkingBudgetTokens: thinkingBudget,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
@@ -793,8 +813,8 @@ function mapOptionsForApi<TApi extends Api>(
 				});
 			}
-			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
-			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
+			// Caller's maxTokens is desired output, so add thinking budget on top. With no caller/model cap, use a finite total fallback.
+			const maxTokens = maxTokensWithThinkingBudget(base.maxTokens, model.maxTokens, thinkingBudget);
 			// If not enough room for thinking + output, reduce thinking budget
 			if (maxTokens <= thinkingBudget) {
@@ -805,6 +825,7 @@ function mapOptionsForApi<TApi extends Api>(
 			if (thinkingBudget <= 0) {
 				return castApi<"anthropic-messages">({
 					...base,
+					requestModelId: resolveWireModelId(model, undefined),
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
@@ -814,6 +835,7 @@ function mapOptionsForApi<TApi extends Api>(
 				return castApi<"anthropic-messages">({
 					...base,
 					maxTokens,
+					requestModelId: resolveWireModelId(model, reasoning),
 					thinkingEnabled: true,
 					thinkingBudgetTokens: thinkingBudget,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
@@ -837,10 +859,13 @@ function mapOptionsForApi<TApi extends Api>(
 			}
 			const budgetInfo = resolveBedrockThinkingBudget(model as Model<"bedrock-converse-stream">, options);
 			if (!budgetInfo) return bedrockBase as OptionsForApi<TApi>;
-			let maxTokens = bedrockBase.maxTokens ?? model.maxTokens;
+			let maxTokens = bedrockBase.maxTokens ?? model.maxTokens ?? OUTPUT_CAP_WHEN_UNKNOWN;
 			let thinkingBudgets = bedrockBase.thinkingBudgets;
 			if (maxTokens <= budgetInfo.budget) {
-				const desiredMaxTokens = Math.min(model.maxTokens, budgetInfo.budget + MIN_OUTPUT_TOKENS);
+				const desiredMaxTokens = Math.min(
+					model.maxTokens ?? Number.POSITIVE_INFINITY,
+					budgetInfo.budget + MIN_OUTPUT_TOKENS,
+				);
 				if (desiredMaxTokens > maxTokens) {
 					maxTokens = desiredMaxTokens;
 				}
@@ -912,7 +937,7 @@ function mapOptionsForApi<TApi extends Api>(
 					...base,
 					thinking: {
 						enabled: true,
-						level: mapEffortToGoogleThinkingLevel(googleModel, effort),
+						level: mapEffortToGoogleThinkingLevel(effort),
 					},
 					toolChoice: mapGoogleToolChoice(options?.toolChoice),
 				});
@@ -930,53 +955,57 @@ function mapOptionsForApi<TApi extends Api>(
 		case "google-gemini-cli": {
 			const reasoning = options?.reasoning;
-			if (!reasoning || !model.reasoning) {
-				return castApi<"google-gemini-cli">({
-					...base,
-					thinking: { enabled: false },
-					toolChoice: mapGoogleToolChoice(options?.toolChoice),
-				});
-			}
-			const effort = requireSupportedEffort(model, reasoning);
+			const toolChoice = mapGoogleToolChoice(options?.toolChoice);
+			if (reasoning && model.reasoning) {
+				const effort = requireSupportedEffort(model, reasoning);
+				// Gemini 3+ models use thinkingLevel instead of thinkingBudget
+				if (model.thinking?.mode === "google-level") {
+					return castApi<"google-gemini-cli">({
+						...base,
+						requestModelId: resolveWireModelId(model, effort),
+						thinking: {
+							enabled: true,
+							level: mapEffortToGoogleThinkingLevel(effort),
+						},
+						toolChoice,
+					});
+				}
-			// Gemini 3+ models use thinkingLevel instead of thinkingBudget
-			if (model.thinking?.mode === "google-level") {
-				return castApi<"google-gemini-cli">({
-					...base,
-					thinking: {
-						enabled: true,
-						level: mapEffortToGoogleThinkingLevel(model, effort),
-					},
-					toolChoice: mapGoogleToolChoice(options?.toolChoice),
-				});
-			}
+				let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
-			let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
+				// Caller's maxTokens is desired output, so add thinking budget on top. With no caller/model cap, use a finite total fallback.
+				const maxTokens = maxTokensWithThinkingBudget(base.maxTokens, model.maxTokens, thinkingBudget);
-			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
-			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
+				// If not enough room for thinking + output, reduce thinking budget
+				if (maxTokens <= thinkingBudget) {
+					thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
+				}
-			// If not enough room for thinking + output, reduce thinking budget
-			if (maxTokens <= thinkingBudget) {
-				thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
+				if (thinkingBudget > 0) {
+					return castApi<"google-gemini-cli">({
+						...base,
+						maxTokens,
+						requestModelId: resolveWireModelId(model, effort),
+						thinking: { enabled: true, budgetTokens: thinkingBudget },
+						toolChoice,
+					});
+				}
+				// Budget clamped to zero — fall through to the thinking-off path.
 			}
-			// If thinking budget is too low, disable thinking
-			if (thinkingBudget <= 0) {
-				return castApi<"google-gemini-cli">({
-					...base,
-					thinking: { enabled: false },
-					toolChoice: mapGoogleToolChoice(options?.toolChoice),
-				});
-			} else {
-				return castApi<"google-gemini-cli">({
-					...base,
-					maxTokens,
-					thinking: { enabled: true, budgetTokens: thinkingBudget },
-					toolChoice: mapGoogleToolChoice(options?.toolChoice),
-				});
+			const thinking: GoogleGeminiCliOptions["thinking"] = { enabled: false };
+			if (model.reasoning && model.thinking?.suppressWhenOff) {
+				// CCA re-applies the per-id baked server default when the config
+				// is omitted; suppression must be explicit on the wire.
+				thinking.suppress = model.thinking.mode === "google-level" ? { level: "MINIMAL" } : { budget: 0 };
 			}
+			return castApi<"google-gemini-cli">({
+				...base,
+				requestModelId: resolveWireModelId(model, undefined),
+				thinking,
+				toolChoice,
+			});
 		}
 		case "google-vertex": {
@@ -999,7 +1028,7 @@ function mapOptionsForApi<TApi extends Api>(
 					...base,
 					thinking: {
 						enabled: true,
-						level: mapEffortToGoogleThinkingLevel(geminiModel, effort),
+						level: mapEffortToGoogleThinkingLevel(effort),
 					},
 					toolChoice: mapGoogleToolChoice(options?.toolChoice),
 				});
@@ -1019,6 +1048,7 @@ function mapOptionsForApi<TApi extends Api>(
 			return castApi<"ollama-chat">({
 				...base,
 				reasoning: resolveOpenAiReasoningEffort(model, options),
+				disableReasoning: options?.disableReasoning,
 				toolChoice: options?.toolChoice,
 			});