@prometheus-ai/ai 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
package/src/stream.ts
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import * as path from "node:path";
|
|
4
|
-
import { $env, $pickenv, extractHttpStatusFromError } from "@prometheus-ai/utils";
|
|
5
|
-
import { getCustomApi } from "./api-registry";
|
|
6
|
-
import type { Effort } from "./model-thinking";
|
|
1
|
+
import type { Effort } from "@prometheus-ai/catalog/effort";
|
|
2
|
+
import { isVertexExpressOpenAIUrl, isVertexRawPredictUrl } from "@prometheus-ai/catalog/hosts";
|
|
7
3
|
import {
|
|
8
4
|
mapEffortToAnthropicAdaptiveEffort,
|
|
9
5
|
mapEffortToGoogleThinkingLevel,
|
|
10
|
-
|
|
6
|
+
minimumSupportedEffort,
|
|
11
7
|
requireSupportedEffort,
|
|
12
|
-
|
|
8
|
+
resolveWireModelId,
|
|
9
|
+
} from "@prometheus-ai/catalog/model-thinking";
|
|
10
|
+
import { CATALOG_PROVIDERS, type ProviderCatalogEntry } from "@prometheus-ai/catalog/provider-models";
|
|
11
|
+
import { $env, $pickenv, extractHttpStatusFromError } from "@prometheus-ai/utils";
|
|
12
|
+
import { getCustomApi } from "./api-registry";
|
|
13
|
+
import { type ApiKeyResolver, AUTH_RETRY_STEPS, isApiKeyResolver, resolveRetryKey } from "./auth-retry";
|
|
14
|
+
import { ProviderHttpError } from "./errors";
|
|
13
15
|
import type { BedrockOptions } from "./providers/amazon-bedrock";
|
|
14
16
|
import type { AnthropicOptions } from "./providers/anthropic";
|
|
15
17
|
import type { CursorOptions } from "./providers/cursor";
|
|
@@ -46,6 +48,7 @@ import {
|
|
|
46
48
|
import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
|
|
47
49
|
import { streamXAIResponses } from "./providers/xai-responses";
|
|
48
50
|
import { isUsageLimitError } from "./rate-limit-utils";
|
|
51
|
+
import { PROVIDER_REGISTRY } from "./registry";
|
|
49
52
|
import type {
|
|
50
53
|
Api,
|
|
51
54
|
AssistantMessage,
|
|
@@ -60,29 +63,13 @@ import type {
|
|
|
60
63
|
ToolChoice,
|
|
61
64
|
} from "./types";
|
|
62
65
|
import { AssistantMessageEventStream } from "./utils/event-stream";
|
|
63
|
-
import { isFoundryEnabled } from "./utils/foundry";
|
|
64
66
|
import { withRequestDebugFetch } from "./utils/request-debug";
|
|
65
67
|
|
|
66
|
-
let cachedVertexAdcCredentialsExists: boolean | null = null;
|
|
67
|
-
|
|
68
|
-
function hasVertexAdcCredentials(): boolean {
|
|
69
|
-
if (cachedVertexAdcCredentialsExists === null) {
|
|
70
|
-
const gacPath = $env.GOOGLE_APPLICATION_CREDENTIALS;
|
|
71
|
-
if (gacPath) {
|
|
72
|
-
cachedVertexAdcCredentialsExists = fs.existsSync(gacPath);
|
|
73
|
-
} else {
|
|
74
|
-
cachedVertexAdcCredentialsExists = fs.existsSync(
|
|
75
|
-
path.join(os.homedir(), ".config", "gcloud", "application_default_credentials.json"),
|
|
76
|
-
);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
return cachedVertexAdcCredentialsExists;
|
|
80
|
-
}
|
|
81
68
|
function isGoogleVertexAuthenticatedModel(model: Model<Api>): boolean {
|
|
82
69
|
return (
|
|
83
70
|
model.provider === "google-vertex" &&
|
|
84
|
-
((model.api === "openai-completions" && model.baseUrl
|
|
85
|
-
(model.api === "anthropic-messages" && model.baseUrl
|
|
71
|
+
((model.api === "openai-completions" && isVertexExpressOpenAIUrl(model.baseUrl)) ||
|
|
72
|
+
(model.api === "anthropic-messages" && isVertexRawPredictUrl(model.baseUrl)))
|
|
86
73
|
);
|
|
87
74
|
}
|
|
88
75
|
|
|
@@ -94,7 +81,7 @@ function createVertexAuthenticatedFetch(options: StreamOptions | undefined): Fet
|
|
|
94
81
|
headers.set("Authorization", `Bearer ${token}`);
|
|
95
82
|
const rewritten = resolveVertexRequest(input);
|
|
96
83
|
const url = rewritten instanceof Request ? rewritten.url : rewritten.toString();
|
|
97
|
-
if (
|
|
84
|
+
if (isVertexRawPredictUrl(url)) {
|
|
98
85
|
const bodyText = await readVertexRequestBody(rewritten, init);
|
|
99
86
|
const transformed = transformVertexAnthropicBody(bodyText);
|
|
100
87
|
return baseFetch(url, {
|
|
@@ -109,10 +96,6 @@ function createVertexAuthenticatedFetch(options: StreamOptions | undefined): Fet
|
|
|
109
96
|
return Object.assign(vertexFetch, baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {});
|
|
110
97
|
}
|
|
111
98
|
|
|
112
|
-
function isVertexAnthropicRawPredict(url: string): boolean {
|
|
113
|
-
return url.includes(":streamRawPredict") || url.includes(":rawPredict");
|
|
114
|
-
}
|
|
115
|
-
|
|
116
99
|
async function readVertexRequestBody(input: string | URL | Request, init: RequestInit | undefined): Promise<string> {
|
|
117
100
|
if (input instanceof Request) return input.clone().text();
|
|
118
101
|
const body = init?.body;
|
|
@@ -174,101 +157,35 @@ function resolveVertexRequest(input: string | URL | Request): string | URL | Req
|
|
|
174
157
|
|
|
175
158
|
type KeyResolver = string | (() => string | undefined);
|
|
176
159
|
|
|
177
|
-
const
|
|
178
|
-
|
|
179
|
-
openai: "OPENAI_API_KEY",
|
|
180
|
-
google: "GEMINI_API_KEY",
|
|
181
|
-
groq: "GROQ_API_KEY",
|
|
182
|
-
cerebras: "CEREBRAS_API_KEY",
|
|
183
|
-
xai: "XAI_API_KEY",
|
|
184
|
-
"xai-oauth": () => $pickenv("XAI_OAUTH_TOKEN", "XAI_API_KEY"),
|
|
185
|
-
fireworks: "FIREWORKS_API_KEY",
|
|
186
|
-
firepass: "FIREPASS_API_KEY",
|
|
187
|
-
"wafer-pass": "WAFER_PASS_API_KEY",
|
|
188
|
-
"wafer-serverless": "WAFER_SERVERLESS_API_KEY",
|
|
189
|
-
openrouter: "OPENROUTER_API_KEY",
|
|
190
|
-
kilo: "KILO_API_KEY",
|
|
191
|
-
"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
|
|
192
|
-
zai: "ZAI_API_KEY",
|
|
193
|
-
"zhipu-coding-plan": "ZHIPU_API_KEY",
|
|
194
|
-
mistral: "MISTRAL_API_KEY",
|
|
195
|
-
minimax: "MINIMAX_API_KEY",
|
|
196
|
-
"minimax-code": "MINIMAX_CODE_API_KEY",
|
|
197
|
-
"minimax-code-cn": "MINIMAX_CODE_CN_API_KEY",
|
|
198
|
-
"opencode-go": "OPENCODE_API_KEY",
|
|
199
|
-
"opencode-zen": "OPENCODE_API_KEY",
|
|
200
|
-
cursor: "CURSOR_ACCESS_TOKEN",
|
|
201
|
-
deepseek: "DEEPSEEK_API_KEY",
|
|
202
|
-
"openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
|
|
160
|
+
const LEGACY_ENV_KEYS: Record<string, KeyResolver> = {
|
|
161
|
+
// Non-provider / search-tool keys and API-name keys not modeled as registry provider defs.
|
|
203
162
|
"azure-openai-responses": "AZURE_OPENAI_API_KEY",
|
|
163
|
+
"llama.cpp": "LLAMA_CPP_API_KEY",
|
|
204
164
|
exa: "EXA_API_KEY",
|
|
205
165
|
jina: "JINA_API_KEY",
|
|
206
166
|
brave: "BRAVE_API_KEY",
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
}
|
|
230
|
-
},
|
|
231
|
-
// Amazon Bedrock supports multiple credential sources:
|
|
232
|
-
// 1. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
|
|
233
|
-
// 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
|
|
234
|
-
// 3. AWS_PROFILE - named profile from ~/.aws/credentials
|
|
235
|
-
// 4. AWS_CONTAINER_CREDENTIALS_* - ECS/Task IAM role credentials
|
|
236
|
-
// 5. AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN - IRSA (EKS) web identity
|
|
237
|
-
"amazon-bedrock": () => {
|
|
238
|
-
const hasEcsCredentials =
|
|
239
|
-
!!$env.AWS_CONTAINER_CREDENTIALS_RELATIVE_URI || !!$env.AWS_CONTAINER_CREDENTIALS_FULL_URI;
|
|
240
|
-
const hasWebIdentity = !!$env.AWS_WEB_IDENTITY_TOKEN_FILE && !!$env.AWS_ROLE_ARN;
|
|
241
|
-
if (
|
|
242
|
-
$env.AWS_PROFILE ||
|
|
243
|
-
($env.AWS_ACCESS_KEY_ID && $env.AWS_SECRET_ACCESS_KEY) ||
|
|
244
|
-
$env.AWS_BEARER_TOKEN_BEDROCK ||
|
|
245
|
-
hasEcsCredentials ||
|
|
246
|
-
hasWebIdentity
|
|
247
|
-
) {
|
|
248
|
-
return "<authenticated>";
|
|
249
|
-
}
|
|
250
|
-
},
|
|
251
|
-
synthetic: "SYNTHETIC_API_KEY",
|
|
252
|
-
"cloudflare-ai-gateway": "CLOUDFLARE_AI_GATEWAY_API_KEY",
|
|
253
|
-
huggingface: () => $pickenv("HUGGINGFACE_HUB_TOKEN", "HF_TOKEN"),
|
|
254
|
-
litellm: "LITELLM_API_KEY",
|
|
255
|
-
moonshot: "MOONSHOT_API_KEY",
|
|
256
|
-
nvidia: "NVIDIA_API_KEY",
|
|
257
|
-
nanogpt: "NANO_GPT_API_KEY",
|
|
258
|
-
"lm-studio": "LM_STUDIO_API_KEY",
|
|
259
|
-
ollama: "OLLAMA_API_KEY",
|
|
260
|
-
"ollama-cloud": "OLLAMA_CLOUD_API_KEY",
|
|
261
|
-
"llama.cpp": "LLAMA_CPP_API_KEY",
|
|
262
|
-
qianfan: "QIANFAN_API_KEY",
|
|
263
|
-
"qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
|
|
264
|
-
together: "TOGETHER_API_KEY",
|
|
265
|
-
zenmux: "ZENMUX_API_KEY",
|
|
266
|
-
venice: "VENICE_API_KEY",
|
|
267
|
-
vllm: "VLLM_API_KEY",
|
|
268
|
-
xiaomi: "XIAOMI_API_KEY",
|
|
269
|
-
"xiaomi-token-plan-sgp": "XIAOMI_TOKEN_PLAN_SGP_API_KEY",
|
|
270
|
-
"xiaomi-token-plan-ams": "XIAOMI_TOKEN_PLAN_AMS_API_KEY",
|
|
271
|
-
"xiaomi-token-plan-cn": "XIAOMI_TOKEN_PLAN_CN_API_KEY",
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Env fallbacks derived from the catalog table — the single source for plain
|
|
171
|
+
* provider env-var names. Registry defs override with computed resolvers
|
|
172
|
+
* (Foundry/ADC/Bedrock probes); legacy non-provider keys merge last.
|
|
173
|
+
*/
|
|
174
|
+
const CATALOG_ENTRY_ENV_KEYS = (CATALOG_PROVIDERS as readonly ProviderCatalogEntry[]).flatMap(provider => {
|
|
175
|
+
const envVars = provider.envVars;
|
|
176
|
+
if (!envVars || envVars.length === 0) return [];
|
|
177
|
+
const resolver: KeyResolver = envVars.length === 1 ? envVars[0] : () => $pickenv(...envVars);
|
|
178
|
+
return [[provider.id, resolver] as [string, KeyResolver]];
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
const serviceProviderMap: Record<string, KeyResolver> = {
|
|
182
|
+
...Object.fromEntries(CATALOG_ENTRY_ENV_KEYS),
|
|
183
|
+
...Object.fromEntries(
|
|
184
|
+
PROVIDER_REGISTRY.flatMap(provider =>
|
|
185
|
+
provider.envKeys != null ? [[provider.id, provider.envKeys] as [string, KeyResolver]] : [],
|
|
186
|
+
),
|
|
187
|
+
),
|
|
188
|
+
...LEGACY_ENV_KEYS,
|
|
272
189
|
};
|
|
273
190
|
|
|
274
191
|
/**
|
|
@@ -285,6 +202,18 @@ export function getEnvApiKey(provider: string): string | undefined {
|
|
|
285
202
|
return resolver?.();
|
|
286
203
|
}
|
|
287
204
|
|
|
205
|
+
/**
|
|
206
|
+
* Name of the environment variable that backs `getEnvApiKey` for a provider,
|
|
207
|
+
* when that provider maps to a single named variable (e.g. `github-copilot` →
|
|
208
|
+
* `COPILOT_GITHUB_TOKEN`). Returns undefined for providers whose env fallback
|
|
209
|
+
* is computed (multi-var pickers, Vertex ADC / Bedrock probes, …) since no
|
|
210
|
+
* single variable name describes the source.
|
|
211
|
+
*/
|
|
212
|
+
export function getEnvApiKeyName(provider: string): string | undefined {
|
|
213
|
+
const resolver = serviceProviderMap[provider];
|
|
214
|
+
return typeof resolver === "string" ? resolver : undefined;
|
|
215
|
+
}
|
|
216
|
+
|
|
288
217
|
/**
|
|
289
218
|
* Enumerate every provider that has an env-var fallback for `getEnvApiKey`.
|
|
290
219
|
* Used by `prometheus auth-broker migrate --include-env` to discover env-sourced keys
|
|
@@ -424,11 +353,10 @@ function isRetryableUpstreamError(error: unknown, status: number | undefined, me
|
|
|
424
353
|
return !!message && isUsageLimitError(message);
|
|
425
354
|
}
|
|
426
355
|
|
|
427
|
-
function createAssistantAuthError(message: AssistantMessage): Error
|
|
428
|
-
const
|
|
356
|
+
function createAssistantAuthError(message: AssistantMessage): Error {
|
|
357
|
+
const text = message.errorMessage ?? "Provider authentication failed";
|
|
429
358
|
const status = extractStatusFromAssistantError(message);
|
|
430
|
-
|
|
431
|
-
return error;
|
|
359
|
+
return status === undefined ? new Error(text) : new ProviderHttpError(text, status);
|
|
432
360
|
}
|
|
433
361
|
|
|
434
362
|
function emitBufferedEvents(stream: AssistantMessageEventStream, events: AssistantMessageEvent[]): void {
|
|
@@ -437,18 +365,39 @@ function emitBufferedEvents(stream: AssistantMessageEventStream, events: Assista
|
|
|
437
365
|
}
|
|
438
366
|
}
|
|
439
367
|
|
|
368
|
+
function createLegacyAuthErrorResolver(
|
|
369
|
+
provider: string,
|
|
370
|
+
initialKey: string,
|
|
371
|
+
onAuthError: NonNullable<SimpleStreamOptions["onAuthError"]>,
|
|
372
|
+
): ApiKeyResolver {
|
|
373
|
+
let currentKey = initialKey;
|
|
374
|
+
return async ({ error }) => {
|
|
375
|
+
if (error === undefined) return currentKey;
|
|
376
|
+
const nextKey = (await onAuthError(provider, currentKey, error)) || undefined;
|
|
377
|
+
if (nextKey !== undefined) currentKey = nextKey;
|
|
378
|
+
return nextKey;
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
|
|
440
382
|
export function streamSimple<TApi extends Api>(
|
|
441
383
|
model: Model<TApi>,
|
|
442
384
|
context: Context,
|
|
443
385
|
options?: SimpleStreamOptions,
|
|
444
386
|
): AssistantMessageEventStream {
|
|
445
387
|
const requestOptions = withRequestDebugFetch(options);
|
|
446
|
-
const
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
388
|
+
const legacyAuthErrorResolver =
|
|
389
|
+
typeof requestOptions?.apiKey === "string" && requestOptions.onAuthError
|
|
390
|
+
? createLegacyAuthErrorResolver(model.provider, requestOptions.apiKey, requestOptions.onAuthError)
|
|
391
|
+
: undefined;
|
|
392
|
+
const apiKeyResolver = isApiKeyResolver(requestOptions?.apiKey) ? requestOptions.apiKey : legacyAuthErrorResolver;
|
|
393
|
+
if (apiKeyResolver) {
|
|
450
394
|
const outer = new AssistantMessageEventStream();
|
|
451
|
-
const
|
|
395
|
+
const signal = requestOptions?.signal;
|
|
396
|
+
// One inner attempt against a resolved string key. When
|
|
397
|
+
// `captureAuthFailure` is set, a retryable auth error that arrives before
|
|
398
|
+
// any replay-unsafe event is buffered and returned (so the caller can
|
|
399
|
+
// retry with a fresh key) instead of surfaced. The terminal attempt
|
|
400
|
+
// clears the flag and emits whatever it gets.
|
|
452
401
|
const runAttempt = async (apiKey: string, captureAuthFailure: boolean): Promise<AuthRetryFailure | undefined> => {
|
|
453
402
|
const bufferedEvents: AssistantMessageEvent[] = [];
|
|
454
403
|
let emittedReplayUnsafeEvent = false;
|
|
@@ -458,7 +407,8 @@ export function streamSimple<TApi extends Api>(
|
|
|
458
407
|
};
|
|
459
408
|
|
|
460
409
|
try {
|
|
461
|
-
const
|
|
410
|
+
const innerOptions: SimpleStreamOptions = { ...requestOptions, apiKey, onAuthError: undefined };
|
|
411
|
+
const inner = streamSimple(model, context, innerOptions);
|
|
462
412
|
for await (const event of inner) {
|
|
463
413
|
if (!emittedReplayUnsafeEvent && event.type === "start") {
|
|
464
414
|
bufferedEvents.push(event);
|
|
@@ -510,19 +460,43 @@ export function streamSimple<TApi extends Api>(
|
|
|
510
460
|
};
|
|
511
461
|
|
|
512
462
|
void (async () => {
|
|
513
|
-
|
|
514
|
-
if (!failure) return;
|
|
515
|
-
let nextKey: string | undefined;
|
|
463
|
+
let lastKey: string | undefined;
|
|
516
464
|
try {
|
|
517
|
-
|
|
518
|
-
} catch {
|
|
519
|
-
|
|
465
|
+
lastKey = (await apiKeyResolver({ lastChance: false, error: undefined, signal })) || undefined;
|
|
466
|
+
} catch (error) {
|
|
467
|
+
// A thrown resolver is a broker/OAuth/network failure, not a missing
|
|
468
|
+
// key — surface the cause instead of masking it as "No API key".
|
|
469
|
+
outer.fail(
|
|
470
|
+
new Error(
|
|
471
|
+
`Failed to resolve API key for provider ${model.provider}: ${error instanceof Error ? error.message : String(error)}`,
|
|
472
|
+
{ cause: error },
|
|
473
|
+
),
|
|
474
|
+
);
|
|
475
|
+
return;
|
|
520
476
|
}
|
|
521
|
-
if (
|
|
522
|
-
|
|
477
|
+
if (lastKey === undefined) {
|
|
478
|
+
outer.fail(new Error(`No API key for provider: ${model.provider}`));
|
|
523
479
|
return;
|
|
524
480
|
}
|
|
525
|
-
await runAttempt(
|
|
481
|
+
let failure = await runAttempt(lastKey, true);
|
|
482
|
+
if (!failure) return;
|
|
483
|
+
// a/b/c policy: refresh the same account (lastChance=false), then
|
|
484
|
+
// switch to a sibling (lastChance=true). A step is skipped when the
|
|
485
|
+
// resolver yields the same key it just tried or `undefined`; the
|
|
486
|
+
// final step's attempt clears the capture flag so it emits directly.
|
|
487
|
+
for (let step = 0; step < AUTH_RETRY_STEPS.length; step++) {
|
|
488
|
+
// Caller aborted between attempts: don't mint a fresh token or fire
|
|
489
|
+
// another doomed request — emit the captured failure instead.
|
|
490
|
+
if (signal?.aborted) break;
|
|
491
|
+
const nextKey = await resolveRetryKey(apiKeyResolver, AUTH_RETRY_STEPS[step]!, failure.error, signal);
|
|
492
|
+
if (nextKey === undefined || nextKey === lastKey) continue;
|
|
493
|
+
lastKey = nextKey;
|
|
494
|
+
const isLastStep = step === AUTH_RETRY_STEPS.length - 1;
|
|
495
|
+
const next = await runAttempt(nextKey, !isLastStep);
|
|
496
|
+
if (!next) return;
|
|
497
|
+
failure = next;
|
|
498
|
+
}
|
|
499
|
+
emitFailure(failure);
|
|
526
500
|
})();
|
|
527
501
|
return outer;
|
|
528
502
|
}
|
|
@@ -553,7 +527,10 @@ export function streamSimple<TApi extends Api>(
|
|
|
553
527
|
return stream(model, context, providerOptions);
|
|
554
528
|
}
|
|
555
529
|
|
|
556
|
-
|
|
530
|
+
// The resolver form is handled by the wrapper above; only a static string
|
|
531
|
+
// key reaches this point.
|
|
532
|
+
const apiKey =
|
|
533
|
+
(typeof requestOptions?.apiKey === "string" ? requestOptions.apiKey : undefined) || getEnvApiKey(model.provider);
|
|
557
534
|
if (!apiKey) {
|
|
558
535
|
throw new Error(`No API key for provider: ${model.provider}`);
|
|
559
536
|
}
|
|
@@ -599,6 +576,16 @@ export async function completeSimple<TApi extends Api>(
|
|
|
599
576
|
}
|
|
600
577
|
|
|
601
578
|
const MIN_OUTPUT_TOKENS = 1024;
|
|
579
|
+
// Fallback total output cap for models whose catalog entry has no maxTokens.
|
|
580
|
+
const OUTPUT_CAP_WHEN_UNKNOWN = 64_000;
|
|
581
|
+
function maxTokensWithThinkingBudget(
|
|
582
|
+
baseMaxTokens: number | undefined,
|
|
583
|
+
modelMaxTokens: number | null,
|
|
584
|
+
thinkingBudget: number,
|
|
585
|
+
): number {
|
|
586
|
+
const uncappedMaxTokens = baseMaxTokens === undefined ? OUTPUT_CAP_WHEN_UNKNOWN : baseMaxTokens + thinkingBudget;
|
|
587
|
+
return Math.min(uncappedMaxTokens, modelMaxTokens ?? Number.POSITIVE_INFINITY);
|
|
588
|
+
}
|
|
602
589
|
export const OUTPUT_FALLBACK_BUFFER = 4000;
|
|
603
590
|
const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PROMETHEUS_NO_INTERLEAVED_THINKING !== "1";
|
|
604
591
|
|
|
@@ -697,24 +684,53 @@ function resolveOpenAiReasoningEffort<TApi extends Api>(
|
|
|
697
684
|
): Effort | undefined {
|
|
698
685
|
const reasoning = options?.reasoning;
|
|
699
686
|
if (!reasoning || !model.reasoning) return undefined;
|
|
700
|
-
// Models
|
|
701
|
-
//
|
|
702
|
-
//
|
|
703
|
-
//
|
|
704
|
-
//
|
|
705
|
-
//
|
|
706
|
-
//
|
|
707
|
-
|
|
687
|
+
// Models that reason natively but expose no effort dial carry
|
|
688
|
+
// `thinking: undefined` (baked at build time from
|
|
689
|
+
// `compat.supportsReasoningEffort: false` on openai-responses*). The
|
|
690
|
+
// wire-side omitReasoningEffort gate (providers/xai-responses.ts:78) is the
|
|
691
|
+
// actual strip; returning undefined here avoids a redundant
|
|
692
|
+
// requireSupportedEffort throw that would defeat the gate and surface a
|
|
693
|
+
// confusing "Compaction failed: Thinking effort high is not supported
|
|
694
|
+
// by..." to the user.
|
|
695
|
+
if (!model.thinking) return undefined;
|
|
708
696
|
return requireSupportedEffort(model, reasoning);
|
|
709
697
|
}
|
|
710
698
|
|
|
711
699
|
const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
|
|
712
700
|
|
|
713
|
-
|
|
701
|
+
/**
|
|
702
|
+
* Mandatory-reasoning endpoints (`thinking.requiresEffort`) reject disabled
|
|
703
|
+
* or omitted thinking ("Reasoning is mandatory for this endpoint and cannot
|
|
704
|
+
* be disabled") — clamp to the lowest supported effort instead.
|
|
705
|
+
* `suppressWhenOff` models handle off provider-side via explicit wire
|
|
706
|
+
* suppression. Collapsed pairs interplay: pair derivation strips member
|
|
707
|
+
* flags (off routes to a bare SKU that CAN disable), while identity backfill
|
|
708
|
+
* re-flags pairs whose logical id is itself mandatory (Gemini 3.x) — there
|
|
709
|
+
* the clamp wins and the floored effort routes to the thinking SKU.
|
|
710
|
+
*/
|
|
711
|
+
function normalizeMandatoryReasoningOptions<TApi extends Api>(
|
|
714
712
|
model: Model<TApi>,
|
|
715
713
|
options?: SimpleStreamOptions,
|
|
714
|
+
): SimpleStreamOptions | undefined {
|
|
715
|
+
if (
|
|
716
|
+
!model.reasoning ||
|
|
717
|
+
!model.thinking?.requiresEffort ||
|
|
718
|
+
model.thinking.suppressWhenOff ||
|
|
719
|
+
(options?.reasoning !== undefined && !options.disableReasoning)
|
|
720
|
+
) {
|
|
721
|
+
return options;
|
|
722
|
+
}
|
|
723
|
+
const floor = minimumSupportedEffort(model);
|
|
724
|
+
if (floor === undefined) return options;
|
|
725
|
+
return { ...options, reasoning: floor, disableReasoning: undefined };
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
function mapOptionsForApi<TApi extends Api>(
|
|
729
|
+
model: Model<TApi>,
|
|
730
|
+
rawOptions?: SimpleStreamOptions,
|
|
716
731
|
apiKey?: string,
|
|
717
732
|
): OptionsForApi<TApi> {
|
|
733
|
+
const options = normalizeMandatoryReasoningOptions(model, rawOptions);
|
|
718
734
|
const base = {
|
|
719
735
|
temperature: options?.temperature,
|
|
720
736
|
topP: options?.topP,
|
|
@@ -722,9 +738,9 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
722
738
|
minP: options?.minP,
|
|
723
739
|
presencePenalty: options?.presencePenalty,
|
|
724
740
|
repetitionPenalty: options?.repetitionPenalty,
|
|
725
|
-
maxTokens: options?.maxTokens ?? model.maxTokens,
|
|
741
|
+
maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
|
|
726
742
|
signal: options?.signal,
|
|
727
|
-
apiKey: apiKey
|
|
743
|
+
apiKey: apiKey ?? (typeof options?.apiKey === "string" ? options.apiKey : undefined),
|
|
728
744
|
cacheRetention: options?.cacheRetention,
|
|
729
745
|
headers: options?.headers,
|
|
730
746
|
initiatorOverride: options?.initiatorOverride,
|
|
@@ -750,6 +766,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
750
766
|
if (!reasoning || !model.reasoning) {
|
|
751
767
|
return castApi<"anthropic-messages">({
|
|
752
768
|
...base,
|
|
769
|
+
requestModelId: resolveWireModelId(model, undefined),
|
|
753
770
|
thinkingEnabled: false,
|
|
754
771
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
755
772
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
@@ -761,6 +778,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
761
778
|
if (thinkingBudget <= 0) {
|
|
762
779
|
return castApi<"anthropic-messages">({
|
|
763
780
|
...base,
|
|
781
|
+
requestModelId: resolveWireModelId(model, undefined),
|
|
764
782
|
thinkingEnabled: false,
|
|
765
783
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
766
784
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
@@ -774,6 +792,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
774
792
|
const effort = mapEffortToAnthropicAdaptiveEffort(model, reasoning);
|
|
775
793
|
return castApi<"anthropic-messages">({
|
|
776
794
|
...base,
|
|
795
|
+
requestModelId: resolveWireModelId(model, reasoning),
|
|
777
796
|
thinkingEnabled: true,
|
|
778
797
|
effort,
|
|
779
798
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
@@ -785,6 +804,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
785
804
|
if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
|
|
786
805
|
return castApi<"anthropic-messages">({
|
|
787
806
|
...base,
|
|
807
|
+
requestModelId: resolveWireModelId(model, reasoning),
|
|
788
808
|
thinkingEnabled: true,
|
|
789
809
|
thinkingBudgetTokens: thinkingBudget,
|
|
790
810
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
@@ -793,8 +813,8 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
793
813
|
});
|
|
794
814
|
}
|
|
795
815
|
|
|
796
|
-
// Caller's maxTokens is
|
|
797
|
-
const maxTokens =
|
|
816
|
+
// Caller's maxTokens is desired output, so add thinking budget on top. With no caller/model cap, use a finite total fallback.
|
|
817
|
+
const maxTokens = maxTokensWithThinkingBudget(base.maxTokens, model.maxTokens, thinkingBudget);
|
|
798
818
|
|
|
799
819
|
// If not enough room for thinking + output, reduce thinking budget
|
|
800
820
|
if (maxTokens <= thinkingBudget) {
|
|
@@ -805,6 +825,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
805
825
|
if (thinkingBudget <= 0) {
|
|
806
826
|
return castApi<"anthropic-messages">({
|
|
807
827
|
...base,
|
|
828
|
+
requestModelId: resolveWireModelId(model, undefined),
|
|
808
829
|
thinkingEnabled: false,
|
|
809
830
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
810
831
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
@@ -814,6 +835,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
814
835
|
return castApi<"anthropic-messages">({
|
|
815
836
|
...base,
|
|
816
837
|
maxTokens,
|
|
838
|
+
requestModelId: resolveWireModelId(model, reasoning),
|
|
817
839
|
thinkingEnabled: true,
|
|
818
840
|
thinkingBudgetTokens: thinkingBudget,
|
|
819
841
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
@@ -837,10 +859,13 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
837
859
|
}
|
|
838
860
|
const budgetInfo = resolveBedrockThinkingBudget(model as Model<"bedrock-converse-stream">, options);
|
|
839
861
|
if (!budgetInfo) return bedrockBase as OptionsForApi<TApi>;
|
|
840
|
-
let maxTokens = bedrockBase.maxTokens ?? model.maxTokens;
|
|
862
|
+
let maxTokens = bedrockBase.maxTokens ?? model.maxTokens ?? OUTPUT_CAP_WHEN_UNKNOWN;
|
|
841
863
|
let thinkingBudgets = bedrockBase.thinkingBudgets;
|
|
842
864
|
if (maxTokens <= budgetInfo.budget) {
|
|
843
|
-
const desiredMaxTokens = Math.min(
|
|
865
|
+
const desiredMaxTokens = Math.min(
|
|
866
|
+
model.maxTokens ?? Number.POSITIVE_INFINITY,
|
|
867
|
+
budgetInfo.budget + MIN_OUTPUT_TOKENS,
|
|
868
|
+
);
|
|
844
869
|
if (desiredMaxTokens > maxTokens) {
|
|
845
870
|
maxTokens = desiredMaxTokens;
|
|
846
871
|
}
|
|
@@ -912,7 +937,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
912
937
|
...base,
|
|
913
938
|
thinking: {
|
|
914
939
|
enabled: true,
|
|
915
|
-
level: mapEffortToGoogleThinkingLevel(
|
|
940
|
+
level: mapEffortToGoogleThinkingLevel(effort),
|
|
916
941
|
},
|
|
917
942
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
918
943
|
});
|
|
@@ -930,53 +955,57 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
930
955
|
|
|
931
956
|
case "google-gemini-cli": {
|
|
932
957
|
const reasoning = options?.reasoning;
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
958
|
+
const toolChoice = mapGoogleToolChoice(options?.toolChoice);
|
|
959
|
+
if (reasoning && model.reasoning) {
|
|
960
|
+
const effort = requireSupportedEffort(model, reasoning);
|
|
961
|
+
|
|
962
|
+
// Gemini 3+ models use thinkingLevel instead of thinkingBudget
|
|
963
|
+
if (model.thinking?.mode === "google-level") {
|
|
964
|
+
return castApi<"google-gemini-cli">({
|
|
965
|
+
...base,
|
|
966
|
+
requestModelId: resolveWireModelId(model, effort),
|
|
967
|
+
thinking: {
|
|
968
|
+
enabled: true,
|
|
969
|
+
level: mapEffortToGoogleThinkingLevel(effort),
|
|
970
|
+
},
|
|
971
|
+
toolChoice,
|
|
972
|
+
});
|
|
973
|
+
}
|
|
942
974
|
|
|
943
|
-
|
|
944
|
-
if (model.thinking?.mode === "google-level") {
|
|
945
|
-
return castApi<"google-gemini-cli">({
|
|
946
|
-
...base,
|
|
947
|
-
thinking: {
|
|
948
|
-
enabled: true,
|
|
949
|
-
level: mapEffortToGoogleThinkingLevel(model, effort),
|
|
950
|
-
},
|
|
951
|
-
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
952
|
-
});
|
|
953
|
-
}
|
|
975
|
+
let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
|
|
954
976
|
|
|
955
|
-
|
|
977
|
+
// Caller's maxTokens is desired output, so add thinking budget on top. With no caller/model cap, use a finite total fallback.
|
|
978
|
+
const maxTokens = maxTokensWithThinkingBudget(base.maxTokens, model.maxTokens, thinkingBudget);
|
|
956
979
|
|
|
957
|
-
|
|
958
|
-
|
|
980
|
+
// If not enough room for thinking + output, reduce thinking budget
|
|
981
|
+
if (maxTokens <= thinkingBudget) {
|
|
982
|
+
thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
|
|
983
|
+
}
|
|
959
984
|
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
985
|
+
if (thinkingBudget > 0) {
|
|
986
|
+
return castApi<"google-gemini-cli">({
|
|
987
|
+
...base,
|
|
988
|
+
maxTokens,
|
|
989
|
+
requestModelId: resolveWireModelId(model, effort),
|
|
990
|
+
thinking: { enabled: true, budgetTokens: thinkingBudget },
|
|
991
|
+
toolChoice,
|
|
992
|
+
});
|
|
993
|
+
}
|
|
994
|
+
// Budget clamped to zero — fall through to the thinking-off path.
|
|
963
995
|
}
|
|
964
996
|
|
|
965
|
-
|
|
966
|
-
if (
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
971
|
-
});
|
|
972
|
-
} else {
|
|
973
|
-
return castApi<"google-gemini-cli">({
|
|
974
|
-
...base,
|
|
975
|
-
maxTokens,
|
|
976
|
-
thinking: { enabled: true, budgetTokens: thinkingBudget },
|
|
977
|
-
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
978
|
-
});
|
|
997
|
+
const thinking: GoogleGeminiCliOptions["thinking"] = { enabled: false };
|
|
998
|
+
if (model.reasoning && model.thinking?.suppressWhenOff) {
|
|
999
|
+
// CCA re-applies the per-id baked server default when the config
|
|
1000
|
+
// is omitted; suppression must be explicit on the wire.
|
|
1001
|
+
thinking.suppress = model.thinking.mode === "google-level" ? { level: "MINIMAL" } : { budget: 0 };
|
|
979
1002
|
}
|
|
1003
|
+
return castApi<"google-gemini-cli">({
|
|
1004
|
+
...base,
|
|
1005
|
+
requestModelId: resolveWireModelId(model, undefined),
|
|
1006
|
+
thinking,
|
|
1007
|
+
toolChoice,
|
|
1008
|
+
});
|
|
980
1009
|
}
|
|
981
1010
|
|
|
982
1011
|
case "google-vertex": {
|
|
@@ -999,7 +1028,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
999
1028
|
...base,
|
|
1000
1029
|
thinking: {
|
|
1001
1030
|
enabled: true,
|
|
1002
|
-
level: mapEffortToGoogleThinkingLevel(
|
|
1031
|
+
level: mapEffortToGoogleThinkingLevel(effort),
|
|
1003
1032
|
},
|
|
1004
1033
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
1005
1034
|
});
|
|
@@ -1019,6 +1048,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
1019
1048
|
return castApi<"ollama-chat">({
|
|
1020
1049
|
...base,
|
|
1021
1050
|
reasoning: resolveOpenAiReasoningEffort(model, options),
|
|
1051
|
+
disableReasoning: options?.disableReasoning,
|
|
1022
1052
|
toolChoice: options?.toolChoice,
|
|
1023
1053
|
});
|
|
1024
1054
|
|