npm - @gajae-code/ai - Versions diffs - 0.1.1 - Mend

@gajae-code/ai 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (349) hide show

package/CHANGELOG.md +2644 -0
package/README.md +1181 -0
package/dist/types/api-registry.d.ts +30 -0
package/dist/types/auth-broker/client.d.ts +66 -0
package/dist/types/auth-broker/index.d.ts +5 -0
package/dist/types/auth-broker/refresher.d.ts +25 -0
package/dist/types/auth-broker/remote-store.d.ts +96 -0
package/dist/types/auth-broker/server.d.ts +32 -0
package/dist/types/auth-broker/types.d.ts +105 -0
package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
package/dist/types/auth-gateway/http.d.ts +39 -0
package/dist/types/auth-gateway/index.d.ts +3 -0
package/dist/types/auth-gateway/server.d.ts +17 -0
package/dist/types/auth-gateway/types.d.ts +115 -0
package/dist/types/auth-storage.d.ts +641 -0
package/dist/types/cli.d.ts +2 -0
package/dist/types/index.d.ts +49 -0
package/dist/types/model-cache.d.ts +17 -0
package/dist/types/model-manager.d.ts +62 -0
package/dist/types/model-thinking.d.ts +71 -0
package/dist/types/models.d.ts +12 -0
package/dist/types/provider-details.d.ts +24 -0
package/dist/types/provider-models/bundled-references.d.ts +4 -0
package/dist/types/provider-models/descriptors.d.ts +48 -0
package/dist/types/provider-models/google.d.ts +20 -0
package/dist/types/provider-models/index.d.ts +5 -0
package/dist/types/provider-models/ollama.d.ts +7 -0
package/dist/types/provider-models/openai-compat.d.ts +237 -0
package/dist/types/provider-models/special.d.ts +16 -0
package/dist/types/providers/amazon-bedrock.d.ts +36 -0
package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
package/dist/types/providers/anthropic.d.ts +188 -0
package/dist/types/providers/aws-credentials.d.ts +43 -0
package/dist/types/providers/aws-eventstream.d.ts +38 -0
package/dist/types/providers/aws-sigv4.d.ts +55 -0
package/dist/types/providers/azure-openai-responses.d.ts +15 -0
package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
package/dist/types/providers/cursor.d.ts +42 -0
package/dist/types/providers/error-message.d.ts +27 -0
package/dist/types/providers/github-copilot-headers.d.ts +40 -0
package/dist/types/providers/gitlab-duo.d.ts +27 -0
package/dist/types/providers/google-auth.d.ts +24 -0
package/dist/types/providers/google-gemini-cli.d.ts +72 -0
package/dist/types/providers/google-gemini-headers.d.ts +18 -0
package/dist/types/providers/google-shared.d.ts +163 -0
package/dist/types/providers/google-types.d.ts +138 -0
package/dist/types/providers/google-vertex.d.ts +7 -0
package/dist/types/providers/google.d.ts +4 -0
package/dist/types/providers/grammar.d.ts +1 -0
package/dist/types/providers/kimi.d.ts +27 -0
package/dist/types/providers/mock.d.ts +175 -0
package/dist/types/providers/ollama.d.ts +6 -0
package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
package/dist/types/providers/openai-chat-server.d.ts +16 -0
package/dist/types/providers/openai-codex/constants.d.ts +26 -0
package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
package/dist/types/providers/openai-codex-responses.d.ts +67 -0
package/dist/types/providers/openai-completions-compat.d.ts +25 -0
package/dist/types/providers/openai-completions.d.ts +33 -0
package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
package/dist/types/providers/openai-responses-server.d.ts +17 -0
package/dist/types/providers/openai-responses-shared.d.ts +89 -0
package/dist/types/providers/openai-responses.d.ts +32 -0
package/dist/types/providers/pi-native-client.d.ts +13 -0
package/dist/types/providers/pi-native-server.d.ts +68 -0
package/dist/types/providers/register-builtins.d.ts +31 -0
package/dist/types/providers/synthetic.d.ts +26 -0
package/dist/types/providers/transform-messages.d.ts +12 -0
package/dist/types/providers/vision-guard.d.ts +8 -0
package/dist/types/rate-limit-utils.d.ts +19 -0
package/dist/types/stream.d.ts +24 -0
package/dist/types/types.d.ts +746 -0
package/dist/types/usage/claude.d.ts +3 -0
package/dist/types/usage/gemini.d.ts +2 -0
package/dist/types/usage/github-copilot.d.ts +7 -0
package/dist/types/usage/google-antigravity.d.ts +2 -0
package/dist/types/usage/kimi.d.ts +2 -0
package/dist/types/usage/minimax-code.d.ts +2 -0
package/dist/types/usage/openai-codex.d.ts +3 -0
package/dist/types/usage/shared.d.ts +1 -0
package/dist/types/usage/zai.d.ts +2 -0
package/dist/types/usage.d.ts +258 -0
package/dist/types/utils/abort.d.ts +19 -0
package/dist/types/utils/anthropic-auth.d.ts +31 -0
package/dist/types/utils/discovery/antigravity.d.ts +61 -0
package/dist/types/utils/discovery/codex.d.ts +38 -0
package/dist/types/utils/discovery/cursor.d.ts +23 -0
package/dist/types/utils/discovery/gemini.d.ts +25 -0
package/dist/types/utils/discovery/index.d.ts +4 -0
package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
package/dist/types/utils/event-stream.d.ts +28 -0
package/dist/types/utils/fireworks-model-id.d.ts +10 -0
package/dist/types/utils/foundry.d.ts +1 -0
package/dist/types/utils/h2-fetch.d.ts +22 -0
package/dist/types/utils/http-inspector.d.ts +31 -0
package/dist/types/utils/idle-iterator.d.ts +67 -0
package/dist/types/utils/json-parse.d.ts +10 -0
package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
package/dist/types/utils/oauth/anthropic.d.ts +22 -0
package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
package/dist/types/utils/oauth/callback-server.d.ts +57 -0
package/dist/types/utils/oauth/cerebras.d.ts +1 -0
package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
package/dist/types/utils/oauth/cursor.d.ts +15 -0
package/dist/types/utils/oauth/deepseek.d.ts +10 -0
package/dist/types/utils/oauth/firepass.d.ts +1 -0
package/dist/types/utils/oauth/fireworks.d.ts +1 -0
package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
package/dist/types/utils/oauth/huggingface.d.ts +19 -0
package/dist/types/utils/oauth/index.d.ts +38 -0
package/dist/types/utils/oauth/kagi.d.ts +17 -0
package/dist/types/utils/oauth/kilo.d.ts +5 -0
package/dist/types/utils/oauth/kimi.d.ts +21 -0
package/dist/types/utils/oauth/litellm.d.ts +18 -0
package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
package/dist/types/utils/oauth/moonshot.d.ts +1 -0
package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
package/dist/types/utils/oauth/nvidia.d.ts +18 -0
package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
package/dist/types/utils/oauth/ollama.d.ts +18 -0
package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
package/dist/types/utils/oauth/opencode.d.ts +18 -0
package/dist/types/utils/oauth/parallel.d.ts +17 -0
package/dist/types/utils/oauth/perplexity.d.ts +9 -0
package/dist/types/utils/oauth/pkce.d.ts +8 -0
package/dist/types/utils/oauth/qianfan.d.ts +17 -0
package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
package/dist/types/utils/oauth/synthetic.d.ts +1 -0
package/dist/types/utils/oauth/tavily.d.ts +17 -0
package/dist/types/utils/oauth/together.d.ts +1 -0
package/dist/types/utils/oauth/types.d.ts +44 -0
package/dist/types/utils/oauth/venice.d.ts +18 -0
package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
package/dist/types/utils/oauth/vllm.d.ts +16 -0
package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
package/dist/types/utils/oauth/zai.d.ts +18 -0
package/dist/types/utils/oauth/zenmux.d.ts +1 -0
package/dist/types/utils/overflow.d.ts +54 -0
package/dist/types/utils/parse-bind.d.ts +23 -0
package/dist/types/utils/provider-response.d.ts +3 -0
package/dist/types/utils/retry-after.d.ts +3 -0
package/dist/types/utils/retry.d.ts +26 -0
package/dist/types/utils/schema/adapt.d.ts +24 -0
package/dist/types/utils/schema/compatibility.d.ts +30 -0
package/dist/types/utils/schema/dereference.d.ts +11 -0
package/dist/types/utils/schema/draft.d.ts +10 -0
package/dist/types/utils/schema/equality.d.ts +4 -0
package/dist/types/utils/schema/fields.d.ts +49 -0
package/dist/types/utils/schema/index.d.ts +13 -0
package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
package/dist/types/utils/schema/meta-validator.d.ts +2 -0
package/dist/types/utils/schema/normalize.d.ts +93 -0
package/dist/types/utils/schema/spill.d.ts +8 -0
package/dist/types/utils/schema/stamps.d.ts +25 -0
package/dist/types/utils/schema/types.d.ts +4 -0
package/dist/types/utils/schema/wire.d.ts +54 -0
package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
package/dist/types/utils/sse-debug.d.ts +10 -0
package/dist/types/utils/tool-call-healing.d.ts +71 -0
package/dist/types/utils/tool-choice.d.ts +50 -0
package/dist/types/utils/validation.d.ts +17 -0
package/dist/types/utils.d.ts +28 -0
package/package.json +146 -0
package/src/api-registry.ts +96 -0
package/src/auth-broker/client.ts +358 -0
package/src/auth-broker/index.ts +5 -0
package/src/auth-broker/refresher.ts +127 -0
package/src/auth-broker/remote-store.ts +623 -0
package/src/auth-broker/server.ts +644 -0
package/src/auth-broker/types.ts +127 -0
package/src/auth-broker/wire-schemas.ts +200 -0
package/src/auth-gateway/http.ts +194 -0
package/src/auth-gateway/index.ts +3 -0
package/src/auth-gateway/server.ts +717 -0
package/src/auth-gateway/types.ts +134 -0
package/src/auth-storage.ts +4104 -0
package/src/cli.ts +262 -0
package/src/index.ts +54 -0
package/src/model-cache.ts +129 -0
package/src/model-manager.ts +450 -0
package/src/model-thinking.ts +691 -0
package/src/models.json +73853 -0
package/src/models.json.d.ts +9 -0
package/src/models.ts +56 -0
package/src/prompts/turn-aborted-guidance.md +4 -0
package/src/provider-details.ts +90 -0
package/src/provider-models/bundled-references.ts +38 -0
package/src/provider-models/descriptors.ts +308 -0
package/src/provider-models/google.ts +91 -0
package/src/provider-models/index.ts +5 -0
package/src/provider-models/ollama.ts +153 -0
package/src/provider-models/openai-compat.ts +2275 -0
package/src/provider-models/special.ts +67 -0
package/src/providers/amazon-bedrock.ts +849 -0
package/src/providers/anthropic-messages-server-schema.ts +229 -0
package/src/providers/anthropic-messages-server.ts +677 -0
package/src/providers/anthropic.ts +2696 -0
package/src/providers/aws-credentials.ts +501 -0
package/src/providers/aws-eventstream.ts +185 -0
package/src/providers/aws-sigv4.ts +218 -0
package/src/providers/azure-openai-responses.ts +337 -0
package/src/providers/cursor/gen/agent_pb.ts +15274 -0
package/src/providers/cursor/proto/agent.proto +3526 -0
package/src/providers/cursor/proto/buf.gen.yaml +6 -0
package/src/providers/cursor/proto/buf.yaml +17 -0
package/src/providers/cursor.ts +2561 -0
package/src/providers/error-message.ts +21 -0
package/src/providers/github-copilot-headers.ts +140 -0
package/src/providers/gitlab-duo.ts +372 -0
package/src/providers/google-auth.ts +252 -0
package/src/providers/google-gemini-cli.ts +795 -0
package/src/providers/google-gemini-headers.ts +41 -0
package/src/providers/google-shared.ts +902 -0
package/src/providers/google-types.ts +167 -0
package/src/providers/google-vertex.ts +88 -0
package/src/providers/google.ts +41 -0
package/src/providers/grammar.ts +70 -0
package/src/providers/kimi.ts +52 -0
package/src/providers/mock.ts +500 -0
package/src/providers/ollama.ts +544 -0
package/src/providers/openai-anthropic-shim.ts +138 -0
package/src/providers/openai-chat-server-schema.ts +243 -0
package/src/providers/openai-chat-server.ts +628 -0
package/src/providers/openai-codex/constants.ts +43 -0
package/src/providers/openai-codex/request-transformer.ts +161 -0
package/src/providers/openai-codex/response-handler.ts +81 -0
package/src/providers/openai-codex-responses.ts +2598 -0
package/src/providers/openai-completions-compat.ts +279 -0
package/src/providers/openai-completions.ts +1853 -0
package/src/providers/openai-responses-server-schema.ts +290 -0
package/src/providers/openai-responses-server.ts +1183 -0
package/src/providers/openai-responses-shared.ts +800 -0
package/src/providers/openai-responses.ts +621 -0
package/src/providers/pi-native-client.ts +228 -0
package/src/providers/pi-native-server.ts +210 -0
package/src/providers/register-builtins.ts +412 -0
package/src/providers/synthetic.ts +50 -0
package/src/providers/transform-messages.ts +309 -0
package/src/providers/vision-guard.ts +31 -0
package/src/rate-limit-utils.ts +84 -0
package/src/stream.ts +895 -0
package/src/types.ts +884 -0
package/src/usage/claude.ts +431 -0
package/src/usage/gemini.ts +250 -0
package/src/usage/github-copilot.ts +421 -0
package/src/usage/google-antigravity.ts +201 -0
package/src/usage/kimi.ts +271 -0
package/src/usage/minimax-code.ts +31 -0
package/src/usage/openai-codex.ts +503 -0
package/src/usage/shared.ts +10 -0
package/src/usage/zai.ts +247 -0
package/src/usage.ts +183 -0
package/src/utils/abort.ts +51 -0
package/src/utils/anthropic-auth.ts +87 -0
package/src/utils/discovery/antigravity.ts +261 -0
package/src/utils/discovery/codex.ts +371 -0
package/src/utils/discovery/cursor.ts +306 -0
package/src/utils/discovery/gemini.ts +248 -0
package/src/utils/discovery/index.ts +4 -0
package/src/utils/discovery/openai-compatible.ts +224 -0
package/src/utils/event-stream.ts +142 -0
package/src/utils/fireworks-model-id.ts +30 -0
package/src/utils/foundry.ts +8 -0
package/src/utils/h2-fetch.ts +60 -0
package/src/utils/http-inspector.ts +176 -0
package/src/utils/idle-iterator.ts +250 -0
package/src/utils/json-parse.ts +148 -0
package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
package/src/utils/oauth/anthropic.ts +200 -0
package/src/utils/oauth/api-key-login.ts +87 -0
package/src/utils/oauth/api-key-validation.ts +92 -0
package/src/utils/oauth/callback-server.ts +276 -0
package/src/utils/oauth/cerebras.ts +16 -0
package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
package/src/utils/oauth/cursor.ts +157 -0
package/src/utils/oauth/deepseek.ts +53 -0
package/src/utils/oauth/firepass.ts +24 -0
package/src/utils/oauth/fireworks.ts +15 -0
package/src/utils/oauth/github-copilot.ts +362 -0
package/src/utils/oauth/gitlab-duo.ts +123 -0
package/src/utils/oauth/google-antigravity.ts +200 -0
package/src/utils/oauth/google-gemini-cli.ts +256 -0
package/src/utils/oauth/google-oauth-shared.ts +110 -0
package/src/utils/oauth/huggingface.ts +62 -0
package/src/utils/oauth/index.ts +444 -0
package/src/utils/oauth/kagi.ts +47 -0
package/src/utils/oauth/kilo.ts +87 -0
package/src/utils/oauth/kimi.ts +254 -0
package/src/utils/oauth/litellm.ts +47 -0
package/src/utils/oauth/lm-studio.ts +38 -0
package/src/utils/oauth/minimax-code.ts +78 -0
package/src/utils/oauth/moonshot.ts +16 -0
package/src/utils/oauth/nanogpt.ts +15 -0
package/src/utils/oauth/nvidia.ts +70 -0
package/src/utils/oauth/oauth.html +199 -0
package/src/utils/oauth/ollama-cloud.ts +28 -0
package/src/utils/oauth/ollama.ts +47 -0
package/src/utils/oauth/openai-codex.ts +299 -0
package/src/utils/oauth/opencode.ts +49 -0
package/src/utils/oauth/parallel.ts +46 -0
package/src/utils/oauth/perplexity.ts +206 -0
package/src/utils/oauth/pkce.ts +18 -0
package/src/utils/oauth/qianfan.ts +58 -0
package/src/utils/oauth/qwen-portal.ts +60 -0
package/src/utils/oauth/synthetic.ts +16 -0
package/src/utils/oauth/tavily.ts +46 -0
package/src/utils/oauth/together.ts +16 -0
package/src/utils/oauth/types.ts +94 -0
package/src/utils/oauth/venice.ts +59 -0
package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
package/src/utils/oauth/vllm.ts +40 -0
package/src/utils/oauth/xiaomi.ts +137 -0
package/src/utils/oauth/zai.ts +60 -0
package/src/utils/oauth/zenmux.ts +15 -0
package/src/utils/overflow.ts +137 -0
package/src/utils/parse-bind.ts +54 -0
package/src/utils/provider-response.ts +30 -0
package/src/utils/retry-after.ts +110 -0
package/src/utils/retry.ts +54 -0
package/src/utils/schema/CONSTRAINTS.md +164 -0
package/src/utils/schema/adapt.ts +36 -0
package/src/utils/schema/compatibility.ts +435 -0
package/src/utils/schema/dereference.ts +98 -0
package/src/utils/schema/draft.ts +341 -0
package/src/utils/schema/equality.ts +97 -0
package/src/utils/schema/fields.ts +190 -0
package/src/utils/schema/index.ts +13 -0
package/src/utils/schema/json-schema-validator.ts +577 -0
package/src/utils/schema/meta-validator.ts +167 -0
package/src/utils/schema/normalize.ts +1588 -0
package/src/utils/schema/spill.ts +43 -0
package/src/utils/schema/stamps.ts +97 -0
package/src/utils/schema/types.ts +11 -0
package/src/utils/schema/wire.ts +213 -0
package/src/utils/schema/zod-decontaminate.ts +331 -0
package/src/utils/sse-debug.ts +289 -0
package/src/utils/tool-call-healing.ts +271 -0
package/src/utils/tool-choice.ts +99 -0
package/src/utils/validation.ts +1019 -0
package/src/utils.ts +166 -0

package/src/stream.ts ADDED Viewed

@@ -0,0 +1,895 @@
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { $env, $pickenv, extractHttpStatusFromError } from "@gajae-code/utils";
+import { getCustomApi } from "./api-registry";
+import type { Effort } from "./model-thinking";
+import {
+	mapEffortToAnthropicAdaptiveEffort,
+	mapEffortToGoogleThinkingLevel,
+	requireSupportedEffort,
+} from "./model-thinking";
+import type { BedrockOptions } from "./providers/amazon-bedrock";
+import type { AnthropicOptions } from "./providers/anthropic";
+import type { CursorOptions } from "./providers/cursor";
+import { isGitLabDuoModel, streamGitLabDuo } from "./providers/gitlab-duo";
+import type { GoogleOptions } from "./providers/google";
+import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli";
+import type { GoogleVertexOptions } from "./providers/google-vertex";
+import { isKimiModel, streamKimi } from "./providers/kimi";
+import type { OllamaChatOptions } from "./providers/ollama";
+import type { OpenAICompletionsOptions } from "./providers/openai-completions";
+import { streamPiNative } from "./providers/pi-native-client";
+// Heavy provider stream functions are imported lazily via register-builtins,
+// which wraps each provider module in a dynamic import. This keeps the
+// AWS SDK, google-auth-library, @google/genai, @bufbuild/protobuf, and
+// other provider SDKs out of the CLI startup parse graph. The
+// gitlab-duo / kimi / synthetic providers stay eager because their modules
+// export routing predicates (isGitLabDuoModel, isKimiModel, isSyntheticModel)
+// that must be callable synchronously before streaming begins, and their
+// modules are thin wrappers with no heavy SDK dependencies.
+import {
+	streamAnthropic,
+	streamAzureOpenAIResponses,
+	streamBedrock,
+	streamCursor,
+	streamGoogle,
+	streamGoogleGeminiCli,
+	streamGoogleVertex,
+	streamOllama,
+	streamOpenAICodexResponses,
+	streamOpenAICompletions,
+	streamOpenAIResponses,
+} from "./providers/register-builtins";
+import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
+import type {
+	Api,
+	AssistantMessage,
+	AssistantMessageEvent,
+	Context,
+	Model,
+	OptionsForApi,
+	SimpleStreamOptions,
+	StreamOptions,
+	ThinkingBudgets,
+	ToolChoice,
+} from "./types";
+import { AssistantMessageEventStream } from "./utils/event-stream";
+import { isFoundryEnabled } from "./utils/foundry";
+let cachedVertexAdcCredentialsExists: boolean | null = null;
+function hasVertexAdcCredentials(): boolean {
+	if (cachedVertexAdcCredentialsExists === null) {
+		const gacPath = $env.GOOGLE_APPLICATION_CREDENTIALS;
+		if (gacPath) {
+			cachedVertexAdcCredentialsExists = fs.existsSync(gacPath);
+		} else {
+			cachedVertexAdcCredentialsExists = fs.existsSync(
+				path.join(os.homedir(), ".config", "gcloud", "application_default_credentials.json"),
+			);
+		}
+	}
+	return cachedVertexAdcCredentialsExists;
+}
+type KeyResolver = string | (() => string | undefined);
+const serviceProviderMap: Record<string, KeyResolver> = {
+	"alibaba-coding-plan": "ALIBABA_CODING_PLAN_API_KEY",
+	openai: "OPENAI_API_KEY",
+	google: "GEMINI_API_KEY",
+	groq: "GROQ_API_KEY",
+	cerebras: "CEREBRAS_API_KEY",
+	xai: "XAI_API_KEY",
+	fireworks: "FIREWORKS_API_KEY",
+	firepass: "FIREPASS_API_KEY",
+	openrouter: "OPENROUTER_API_KEY",
+	kilo: "KILO_API_KEY",
+	"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
+	zai: "ZAI_API_KEY",
+	mistral: "MISTRAL_API_KEY",
+	minimax: "MINIMAX_API_KEY",
+	"minimax-code": "MINIMAX_CODE_API_KEY",
+	"minimax-code-cn": "MINIMAX_CODE_CN_API_KEY",
+	"opencode-go": "OPENCODE_API_KEY",
+	"opencode-zen": "OPENCODE_API_KEY",
+	cursor: "CURSOR_ACCESS_TOKEN",
+	deepseek: "DEEPSEEK_API_KEY",
+	"openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
+	"azure-openai-responses": "AZURE_OPENAI_API_KEY",
+	exa: "EXA_API_KEY",
+	jina: "JINA_API_KEY",
+	brave: "BRAVE_API_KEY",
+	perplexity: "PERPLEXITY_API_KEY",
+	tavily: "TAVILY_API_KEY",
+	parallel: "PARALLEL_API_KEY",
+	kagi: "KAGI_API_KEY",
+	// GitHub Copilot uses GitHub personal access token
+	"github-copilot": () => $pickenv("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
+	// Foundry mode optionally switches Anthropic auth to enterprise gateway credentials.
+	anthropic: () =>
+		isFoundryEnabled()
+			? $pickenv("ANTHROPIC_FOUNDRY_API_KEY", "ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY")
+			: $pickenv("ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"),
+	"gitlab-duo": "GITLAB_TOKEN",
+	// Vertex AI supports either GOOGLE_CLOUD_API_KEY or Application Default Credentials.
+	"google-vertex": () => {
+		if ($env.GOOGLE_CLOUD_API_KEY) {
+			return $env.GOOGLE_CLOUD_API_KEY;
+		}
+		const hasCredentials = hasVertexAdcCredentials();
+		const hasProject = !!($env.GOOGLE_CLOUD_PROJECT || $env.GCLOUD_PROJECT);
+		const hasLocation = !!$env.GOOGLE_CLOUD_LOCATION;
+		if (hasCredentials && hasProject && hasLocation) {
+			return "<authenticated>";
+		}
+	},
+	// Amazon Bedrock supports multiple credential sources:
+	// 1. AWS_PROFILE - named profile from ~/.aws/credentials
+	// 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
+	// 3. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
+	// 4. AWS_CONTAINER_CREDENTIALS_* - ECS/Task IAM role credentials
+	// 5. AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN - IRSA (EKS) web identity
+	"amazon-bedrock": () => {
+		const hasEcsCredentials =
+			!!$env.AWS_CONTAINER_CREDENTIALS_RELATIVE_URI || !!$env.AWS_CONTAINER_CREDENTIALS_FULL_URI;
+		const hasWebIdentity = !!$env.AWS_WEB_IDENTITY_TOKEN_FILE && !!$env.AWS_ROLE_ARN;
+		if (
+			$env.AWS_PROFILE ||
+			($env.AWS_ACCESS_KEY_ID && $env.AWS_SECRET_ACCESS_KEY) ||
+			$env.AWS_BEARER_TOKEN_BEDROCK ||
+			hasEcsCredentials ||
+			hasWebIdentity
+		) {
+			return "<authenticated>";
+		}
+	},
+	synthetic: "SYNTHETIC_API_KEY",
+	"cloudflare-ai-gateway": "CLOUDFLARE_AI_GATEWAY_API_KEY",
+	huggingface: () => $pickenv("HUGGINGFACE_HUB_TOKEN", "HF_TOKEN"),
+	litellm: "LITELLM_API_KEY",
+	moonshot: "MOONSHOT_API_KEY",
+	nvidia: "NVIDIA_API_KEY",
+	nanogpt: "NANO_GPT_API_KEY",
+	"lm-studio": "LM_STUDIO_API_KEY",
+	ollama: "OLLAMA_API_KEY",
+	"ollama-cloud": "OLLAMA_CLOUD_API_KEY",
+	"llama.cpp": "LLAMA_CPP_API_KEY",
+	qianfan: "QIANFAN_API_KEY",
+	"qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
+	together: "TOGETHER_API_KEY",
+	zenmux: "ZENMUX_API_KEY",
+	venice: "VENICE_API_KEY",
+	vllm: "VLLM_API_KEY",
+	xiaomi: "XIAOMI_API_KEY",
+};
+/**
+ * Get API key for provider from known environment variables, e.g. OPENAI_API_KEY.
+ *
+ * Will not return API keys for providers that require OAuth tokens.
+ * Checks Bun.env, then cwd/.env, then ~/.env.
+ */
+export function getEnvApiKey(provider: string): string | undefined {
+	const resolver = serviceProviderMap[provider];
+	if (typeof resolver === "string") {
+		return $env[resolver];
+	}
+	return resolver?.();
+}
+/**
+ * Enumerate every provider that has an env-var fallback for `getEnvApiKey`.
+ * Used by `gjc auth-broker migrate --include-env` to discover env-sourced keys
+ * that should be uploaded to the broker.
+ */
+export function listProvidersWithEnvKey(): string[] {
+	return Object.keys(serviceProviderMap);
+}
+export function stream<TApi extends Api>(
+	model: Model<TApi>,
+	context: Context,
+	options?: OptionsForApi<TApi>,
+): AssistantMessageEventStream {
+	// Check custom API registry first (extension-provided APIs like "vertex-Anthropic model-api")
+	const customApiProvider = getCustomApi(model.api);
+	if (customApiProvider) {
+		return customApiProvider.stream(model, context, options as StreamOptions);
+	}
+	if (isGitLabDuoModel(model)) {
+		const apiKey = (options as StreamOptions | undefined)?.apiKey || getEnvApiKey(model.provider);
+		if (!apiKey) {
+			throw new Error(`No API key for provider: ${model.provider}`);
+		}
+		return streamGitLabDuo(model, context, {
+			...(options as SimpleStreamOptions | undefined),
+			apiKey,
+		});
+	}
+	// Vertex AI uses Application Default Credentials, not API keys
+	if (model.api === "google-vertex") {
+		return streamGoogleVertex(model as Model<"google-vertex">, context, options as GoogleVertexOptions);
+	} else if (model.api === "bedrock-converse-stream") {
+		// Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
+		return streamBedrock(model as Model<"bedrock-converse-stream">, context, (options || {}) as BedrockOptions);
+	}
+	const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+	if (!apiKey) {
+		throw new Error(`No API key for provider: ${model.provider}`);
+	}
+	const providerOptions = { ...options, apiKey };
+	const api: Api = model.api;
+	switch (api) {
+		case "anthropic-messages": {
+			const anthropicOptions = providerOptions as AnthropicOptions;
+			return streamAnthropic(model as Model<"anthropic-messages">, context, {
+				...anthropicOptions,
+				isOAuth: anthropicOptions.isOAuth ?? model.isOAuth,
+			});
+		}
+		case "openai-completions":
+			return streamOpenAICompletions(model as Model<"openai-completions">, context, providerOptions as any);
+		case "openai-responses":
+			return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
+		case "azure-openai-responses":
+			return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
+		case "openai-codex-responses":
+			return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);
+		case "google-generative-ai":
+			return streamGoogle(model as Model<"google-generative-ai">, context, providerOptions);
+		case "google-gemini-cli":
+			return streamGoogleGeminiCli(
+				model as Model<"google-gemini-cli">,
+				context,
+				providerOptions as GoogleGeminiCliOptions,
+			);
+		case "ollama-chat":
+			return streamOllama(model as Model<"ollama-chat">, context, providerOptions as OllamaChatOptions);
+		case "cursor-agent":
+			return streamCursor(model as Model<"cursor-agent">, context, providerOptions as CursorOptions);
+		default:
+			throw new Error(`Unhandled API: ${api}`);
+	}
+}
+export async function complete<TApi extends Api>(
+	model: Model<TApi>,
+	context: Context,
+	options?: OptionsForApi<TApi>,
+): Promise<AssistantMessage> {
+	const s = stream(model, context, options);
+	return s.result();
+}
+type AuthRetryFailure = {
+	error: unknown;
+	bufferedEvents: AssistantMessageEvent[];
+	terminalEvent?: Extract<AssistantMessageEvent, { type: "error" }>;
+};
+function extractStatusFromAssistantError(message: AssistantMessage): number | undefined {
+	if (message.errorStatus !== undefined) return message.errorStatus;
+	if (!message.errorMessage) return undefined;
+	return extractHttpStatusFromError({ message: message.errorMessage });
+}
+function createAssistantAuthError(message: AssistantMessage): Error & { status?: number } {
+	const error: Error & { status?: number } = new Error(message.errorMessage ?? "Provider authentication failed");
+	const status = extractStatusFromAssistantError(message);
+	if (status !== undefined) error.status = status;
+	return error;
+}
+function emitBufferedEvents(stream: AssistantMessageEventStream, events: AssistantMessageEvent[]): void {
+	for (const event of events) {
+		stream.push(event);
+	}
+}
+export function streamSimple<TApi extends Api>(
+	model: Model<TApi>,
+	context: Context,
+	options?: SimpleStreamOptions,
+): AssistantMessageEventStream {
+	const retryApiKey = options?.onAuthError ? (options.apiKey ?? getEnvApiKey(model.provider)) : undefined;
+	if (retryApiKey) {
+		const outer = new AssistantMessageEventStream();
+		const onAuthError = options!.onAuthError!;
+		const runAttempt = async (apiKey: string, captureAuthFailure: boolean): Promise<AuthRetryFailure | undefined> => {
+			const bufferedEvents: AssistantMessageEvent[] = [];
+			let emittedReplayUnsafeEvent = false;
+			const flushBuffered = (): void => {
+				emitBufferedEvents(outer, bufferedEvents);
+				bufferedEvents.length = 0;
+			};
+			try {
+				const inner = streamSimple(model, context, { ...options, apiKey, onAuthError: undefined });
+				for await (const event of inner) {
+					if (!emittedReplayUnsafeEvent && event.type === "start") {
+						bufferedEvents.push(event);
+						continue;
+					}
+					if (
+						!emittedReplayUnsafeEvent &&
+						captureAuthFailure &&
+						event.type === "error" &&
+						extractStatusFromAssistantError(event.error) === 401
+					) {
+						return { error: createAssistantAuthError(event.error), bufferedEvents, terminalEvent: event };
+					}
+					flushBuffered();
+					emittedReplayUnsafeEvent = true;
+					outer.push(event);
+					if (outer.done) return undefined;
+				}
+				flushBuffered();
+				if (!outer.done) outer.end(await inner.result());
+			} catch (error) {
+				if (!emittedReplayUnsafeEvent && captureAuthFailure && extractHttpStatusFromError(error) === 401) {
+					return { error, bufferedEvents };
+				}
+				flushBuffered();
+				outer.fail(error);
+			}
+			return undefined;
+		};
+		const emitFailure = (failure: AuthRetryFailure): void => {
+			emitBufferedEvents(outer, failure.bufferedEvents);
+			if (failure.terminalEvent) {
+				outer.push(failure.terminalEvent);
+			} else {
+				outer.fail(failure.error);
+			}
+		};
+		void (async () => {
+			const failure = await runAttempt(retryApiKey, true);
+			if (!failure) return;
+			let nextKey: string | undefined;
+			try {
+				nextKey = await onAuthError(model.provider, retryApiKey, failure.error);
+			} catch {
+				nextKey = undefined;
+			}
+			if (!nextKey || nextKey === retryApiKey) {
+				emitFailure(failure);
+				return;
+			}
+			await runAttempt(nextKey, false);
+		})();
+		return outer;
+	}
+	// Pi-native transport short-circuits the per-provider dispatch entirely:
+	// the gateway resolves provider + credential server-side, so we don't
+	// need an `apiKey` from `getEnvApiKey` here — `options.apiKey` carries
+	// the gateway bearer instead. Comes BEFORE the custom-API check so
+	// extension-registered APIs can't accidentally override a configured
+	// pi-native transport.
+	if (model.transport === "pi-native") {
+		return streamPiNative(model, context, options);
+	}
+	// Check custom API registry (extension-provided APIs)
+	const customApiProvider = getCustomApi(model.api);
+	if (customApiProvider) {
+		return customApiProvider.streamSimple(model, context, options);
+	}
+	// Vertex AI uses Application Default Credentials, not API keys
+	if (model.api === "google-vertex") {
+		const providerOptions = mapOptionsForApi(model, options, undefined);
+		return stream(model, context, providerOptions);
+	} else if (model.api === "bedrock-converse-stream") {
+		// Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
+		const providerOptions = mapOptionsForApi(model, options, undefined);
+		return stream(model, context, providerOptions);
+	}
+	const apiKey = options?.apiKey || getEnvApiKey(model.provider);
+	if (!apiKey) {
+		throw new Error(`No API key for provider: ${model.provider}`);
+	}
+	// GitLab Duo - wraps Anthropic/OpenAI behind GitLab AI Gateway direct access tokens
+	if (isGitLabDuoModel(model)) {
+		return streamGitLabDuo(model, context, {
+			...options,
+			apiKey,
+		});
+	}
+	// Kimi Code - route to dedicated handler that wraps OpenAI or Anthropic API
+	if (isKimiModel(model)) {
+		// Pass raw SimpleStreamOptions - streamKimi handles mapping internally
+		return streamKimi(model as Model<"openai-completions">, context, {
+			...options,
+			apiKey,
+			format: options?.kimiApiFormat ?? "anthropic",
+		});
+	}
+	// Synthetic - route to dedicated handler that wraps OpenAI or Anthropic API
+	if (isSyntheticModel(model)) {
+		// Pass raw SimpleStreamOptions - streamSynthetic handles mapping internally
+		return streamSynthetic(model as Model<"openai-completions">, context, {
+			...options,
+			apiKey,
+			format: options?.syntheticApiFormat ?? "openai", // Default to OpenAI format
+		});
+	}
+	const providerOptions = mapOptionsForApi(model, options, apiKey);
+	return stream(model, context, providerOptions);
+}
+export async function completeSimple<TApi extends Api>(
+	model: Model<TApi>,
+	context: Context,
+	options?: SimpleStreamOptions,
+): Promise<AssistantMessage> {
+	const s = streamSimple(model, context, options);
+	return s.result();
+}
+const MIN_OUTPUT_TOKENS = 1024;
+export const OUTPUT_FALLBACK_BUFFER = 4000;
+const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PI_NO_INTERLEAVED_THINKING !== "1";
+export const ANTHROPIC_THINKING: Record<Effort, number> = {
+	minimal: 1024,
+	low: 4096,
+	medium: 8192,
+	high: 16384,
+	xhigh: 32768,
+};
+const GOOGLE_THINKING: Record<Effort, number> = {
+	minimal: 1024,
+	low: 4096,
+	medium: 8192,
+	high: 16384,
+	xhigh: 24575,
+};
+const BEDROCK_CLAUDE_THINKING: Record<Effort, number> = {
+	minimal: 1024,
+	low: 2048,
+	medium: 8192,
+	high: 16384,
+	xhigh: 16384,
+};
+function resolveBedrockThinkingBudget(
+	model: Model<"bedrock-converse-stream">,
+	options?: SimpleStreamOptions,
+): { budget: number; level: Effort } | null {
+	if (!options?.reasoning || !model.reasoning) return null;
+	const level = requireSupportedEffort(model, options.reasoning);
+	const budget = options.thinkingBudgets?.[level] ?? BEDROCK_CLAUDE_THINKING[level];
+	return { budget, level };
+}
+export function mapAnthropicToolChoice(choice?: ToolChoice): AnthropicOptions["toolChoice"] {
+	if (!choice) return undefined;
+	if (typeof choice === "string") {
+		if (choice === "required") return "any";
+		if (choice === "auto" || choice === "none" || choice === "any") return choice;
+		return undefined;
+	}
+	if (choice.type === "tool") {
+		return choice.name ? { type: "tool", name: choice.name } : undefined;
+	}
+	if (choice.type === "function") {
+		const name = "function" in choice ? choice.function?.name : choice.name;
+		return name ? { type: "tool", name } : undefined;
+	}
+	return undefined;
+}
+function mapGoogleToolChoice(
+	choice?: ToolChoice,
+): GoogleOptions["toolChoice"] | GoogleGeminiCliOptions["toolChoice"] | GoogleVertexOptions["toolChoice"] {
+	if (!choice) return undefined;
+	if (typeof choice === "string") {
+		if (choice === "required") return "any";
+		if (choice === "auto" || choice === "none" || choice === "any") return choice;
+		return undefined;
+	}
+	return "any";
+}
+function mapOpenAiToolChoice(choice?: ToolChoice): OpenAICompletionsOptions["toolChoice"] {
+	if (!choice) return undefined;
+	if (typeof choice === "string") {
+		if (choice === "any") return "required";
+		if (choice === "auto" || choice === "none" || choice === "required") return choice;
+		return undefined;
+	}
+	if (choice.type === "tool") {
+		return choice.name ? { type: "function", function: { name: choice.name } } : undefined;
+	}
+	if (choice.type === "function") {
+		const name = "function" in choice ? choice.function?.name : choice.name;
+		return name ? { type: "function", function: { name } } : undefined;
+	}
+	return undefined;
+}
+function resolveOpenAiReasoningEffort<TApi extends Api>(
+	model: Model<TApi>,
+	options?: SimpleStreamOptions,
+): Effort | undefined {
+	const reasoning = options?.reasoning;
+	if (!reasoning || !model.reasoning) return undefined;
+	return requireSupportedEffort(model, reasoning);
+}
+const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
+function mapOptionsForApi<TApi extends Api>(
+	model: Model<TApi>,
+	options?: SimpleStreamOptions,
+	apiKey?: string,
+): OptionsForApi<TApi> {
+	const base = {
+		temperature: options?.temperature,
+		topP: options?.topP,
+		topK: options?.topK,
+		minP: options?.minP,
+		presencePenalty: options?.presencePenalty,
+		repetitionPenalty: options?.repetitionPenalty,
+		maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
+		signal: options?.signal,
+		apiKey: apiKey || options?.apiKey,
+		cacheRetention: options?.cacheRetention,
+		headers: options?.headers,
+		initiatorOverride: options?.initiatorOverride,
+		maxRetryDelayMs: options?.maxRetryDelayMs,
+		metadata: options?.metadata,
+		sessionId: options?.sessionId,
+		providerSessionState: options?.providerSessionState,
+		onPayload: options?.onPayload,
+		onResponse: options?.onResponse,
+		onSseEvent: options?.onSseEvent,
+		execHandlers: options?.execHandlers,
+	};
+	switch (model.api) {
+		case "anthropic-messages": {
+			// Explicitly disable thinking when reasoning is not specified or model doesn't support it
+			const reasoning = options?.reasoning;
+			if (!reasoning || !model.reasoning) {
+				return castApi<"anthropic-messages">({
+					...base,
+					thinkingEnabled: false,
+					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
+					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
+				});
+			}
+			let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
+			if (thinkingBudget <= 0) {
+				return castApi<"anthropic-messages">({
+					...base,
+					thinkingEnabled: false,
+					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
+					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
+				});
+			}
+			// For Opus 4.6+ and Sonnet 4.6+: use adaptive thinking with effort level
+			// For older models: use budget-based thinking
+			if (model.thinking?.mode === "anthropic-adaptive") {
+				const effort = mapEffortToAnthropicAdaptiveEffort(model, reasoning);
+				return castApi<"anthropic-messages">({
+					...base,
+					thinkingEnabled: true,
+					effort,
+					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
+					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
+				});
+			}
+			if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
+				return castApi<"anthropic-messages">({
+					...base,
+					thinkingEnabled: true,
+					thinkingBudgetTokens: thinkingBudget,
+					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
+					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
+				});
+			}
+			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
+			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
+			// If not enough room for thinking + output, reduce thinking budget
+			if (maxTokens <= thinkingBudget) {
+				thinkingBudget = maxTokens - MIN_OUTPUT_TOKENS;
+			}
+			// If thinking budget is too low, disable thinking
+			if (thinkingBudget <= 0) {
+				return castApi<"anthropic-messages">({
+					...base,
+					thinkingEnabled: false,
+					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
+					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
+				});
+			} else {
+				return castApi<"anthropic-messages">({
+					...base,
+					maxTokens,
+					thinkingEnabled: true,
+					thinkingBudgetTokens: thinkingBudget,
+					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
+					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
+				});
+			}
+		}
+		case "bedrock-converse-stream": {
+			const bedrockBase: BedrockOptions = {
+				...base,
+				reasoning: options?.reasoning,
+				thinkingBudgets: options?.thinkingBudgets,
+				toolChoice: mapAnthropicToolChoice(options?.toolChoice),
+				thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+			};
+			// Adaptive mode sends effort directly, no budget_tokens — skip budget inflation.
+			if (model.thinking?.mode === "anthropic-adaptive") {
+				return castApi<"bedrock-converse-stream">(bedrockBase);
+			}
+			const budgetInfo = resolveBedrockThinkingBudget(model as Model<"bedrock-converse-stream">, options);
+			if (!budgetInfo) return bedrockBase as OptionsForApi<TApi>;
+			let maxTokens = bedrockBase.maxTokens ?? model.maxTokens;
+			let thinkingBudgets = bedrockBase.thinkingBudgets;
+			if (maxTokens <= budgetInfo.budget) {
+				const desiredMaxTokens = Math.min(model.maxTokens, budgetInfo.budget + MIN_OUTPUT_TOKENS);
+				if (desiredMaxTokens > maxTokens) {
+					maxTokens = desiredMaxTokens;
+				}
+			}
+			if (maxTokens <= budgetInfo.budget) {
+				const adjustedBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
+				thinkingBudgets = { ...(thinkingBudgets ?? {}), [budgetInfo.level]: adjustedBudget };
+			}
+			return castApi<"bedrock-converse-stream">({ ...bedrockBase, maxTokens, thinkingBudgets });
+		}
+		case "openai-completions":
+			return castApi<"openai-completions">({
+				...base,
+				reasoning: resolveOpenAiReasoningEffort(model, options),
+				disableReasoning: options?.disableReasoning,
+				toolChoice: mapOpenAiToolChoice(options?.toolChoice),
+				serviceTier: options?.serviceTier,
+			});
+		case "openai-responses":
+			return castApi<"openai-responses">({
+				...base,
+				reasoning: resolveOpenAiReasoningEffort(model, options),
+				toolChoice: mapOpenAiToolChoice(options?.toolChoice),
+				serviceTier: options?.serviceTier,
+				reasoningSummary: options?.hideThinkingSummary ? null : undefined,
+			});
+		case "azure-openai-responses":
+			return castApi<"azure-openai-responses">({
+				...base,
+				reasoning: resolveOpenAiReasoningEffort(model, options),
+				toolChoice: mapOpenAiToolChoice(options?.toolChoice),
+				serviceTier: options?.serviceTier,
+				reasoningSummary: options?.hideThinkingSummary ? null : undefined,
+			});
+		case "openai-codex-responses":
+			return castApi<"openai-codex-responses">({
+				...base,
+				reasoning: resolveOpenAiReasoningEffort(model, options),
+				toolChoice: mapOpenAiToolChoice(options?.toolChoice),
+				serviceTier: options?.serviceTier,
+				preferWebsockets: options?.preferWebsockets,
+				reasoningSummary: options?.hideThinkingSummary ? null : undefined,
+			});
+		case "google-generative-ai": {
+			// Explicitly disable thinking when reasoning is not specified or model doesn't support it
+			// This is needed because Gemini has "dynamic thinking" enabled by default
+			const reasoning = options?.reasoning;
+			if (!reasoning || !model.reasoning) {
+				return castApi<"google-generative-ai">({
+					...base,
+					thinking: { enabled: false },
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			}
+			const googleModel = model as Model<"google-generative-ai">;
+			const effort = requireSupportedEffort(googleModel, reasoning);
+			// Gemini 3+ models use thinkingLevel exclusively instead of thinkingBudget.
+			// https://ai.google.dev/gemini-api/docs/thinking#set-budget
+			if (googleModel.thinking?.mode === "google-level") {
+				return castApi<"google-generative-ai">({
+					...base,
+					thinking: {
+						enabled: true,
+						level: mapEffortToGoogleThinkingLevel(googleModel, effort),
+					},
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			}
+			return castApi<"google-gemini-cli">({
+				...base,
+				thinking: {
+					enabled: true,
+					budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
+				},
+				toolChoice: mapGoogleToolChoice(options?.toolChoice),
+			});
+		}
+		case "google-gemini-cli": {
+			const reasoning = options?.reasoning;
+			if (!reasoning || !model.reasoning) {
+				return castApi<"google-gemini-cli">({
+					...base,
+					thinking: { enabled: false },
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			}
+			const effort = requireSupportedEffort(model, reasoning);
+			// Gemini 3+ models use thinkingLevel instead of thinkingBudget
+			if (model.thinking?.mode === "google-level") {
+				return castApi<"google-gemini-cli">({
+					...base,
+					thinking: {
+						enabled: true,
+						level: mapEffortToGoogleThinkingLevel(model, effort),
+					},
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			}
+			let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
+			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
+			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
+			// If not enough room for thinking + output, reduce thinking budget
+			if (maxTokens <= thinkingBudget) {
+				thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
+			}
+			// If thinking budget is too low, disable thinking
+			if (thinkingBudget <= 0) {
+				return castApi<"google-gemini-cli">({
+					...base,
+					thinking: { enabled: false },
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			} else {
+				return castApi<"google-gemini-cli">({
+					...base,
+					maxTokens,
+					thinking: { enabled: true, budgetTokens: thinkingBudget },
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			}
+		}
+		case "google-vertex": {
+			// Explicitly disable thinking when reasoning is not specified or model doesn't support it
+			const reasoning = options?.reasoning;
+			if (!reasoning || !model.reasoning) {
+				return castApi<"google-vertex">({
+					...base,
+					thinking: { enabled: false },
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			}
+			const vertexModel = model as Model<"google-vertex">;
+			const effort = requireSupportedEffort(vertexModel, reasoning);
+			const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
+			if (geminiModel.thinking?.mode === "google-level") {
+				return castApi<"google-vertex">({
+					...base,
+					thinking: {
+						enabled: true,
+						level: mapEffortToGoogleThinkingLevel(geminiModel, effort),
+					},
+					toolChoice: mapGoogleToolChoice(options?.toolChoice),
+				});
+			}
+			return castApi<"google-vertex">({
+				...base,
+				thinking: {
+					enabled: true,
+					budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
+				},
+				toolChoice: mapGoogleToolChoice(options?.toolChoice),
+			});
+		}
+		case "ollama-chat":
+			return castApi<"ollama-chat">({
+				...base,
+				reasoning: resolveOpenAiReasoningEffort(model, options),
+				toolChoice: options?.toolChoice,
+			});
+		case "cursor-agent": {
+			const execHandlers = options?.cursorExecHandlers ?? options?.execHandlers;
+			const onToolResult = options?.cursorOnToolResult ?? execHandlers?.onToolResult;
+			return castApi<"cursor-agent">({
+				...base,
+				execHandlers,
+				onToolResult,
+			});
+		}
+		default:
+			throw new Error(`Unhandled API in mapOptionsForApi: ${model.api}`);
+	}
+}
+function getGoogleBudget(
+	model: Model<"google-generative-ai">,
+	effort: Effort,
+	customBudgets?: ThinkingBudgets,
+): number {
+	requireSupportedEffort(model, effort);
+	// Custom budgets take precedence if provided for this level
+	if (customBudgets?.[effort] !== undefined) {
+		return customBudgets[effort]!;
+	}
+	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
+	if (model.id.includes("2.5-")) {
+		switch (effort) {
+			case "minimal":
+				return 128;
+			case "low":
+				return 2048;
+			case "medium":
+				return 8192;
+			default:
+				return model.id.includes("2.5-flash") ? 24576 : 32768;
+		}
+	}
+	// Unknown model - use dynamic
+	return -1;
+}