@prometheus-ai/ai 0.5.4 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -1,27 +1,24 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
ChatCompletionMessageParam,
|
|
10
|
-
ChatCompletionToolMessageParam,
|
|
11
|
-
} from "openai/resources/chat/completions";
|
|
1
|
+
import type { Effort } from "@prometheus-ai/catalog/effort";
|
|
2
|
+
import { toFirepassWireModelId, toFireworksWireModelId } from "@prometheus-ai/catalog/fireworks-model-id";
|
|
3
|
+
import { isDeepseekModelIdOrName } from "@prometheus-ai/catalog/identity";
|
|
4
|
+
import { getSupportedEfforts, resolveWireModelId } from "@prometheus-ai/catalog/model-thinking";
|
|
5
|
+
import { calculateCost } from "@prometheus-ai/catalog/models";
|
|
6
|
+
import type { ResolvedOpenAICompat } from "@prometheus-ai/catalog/types";
|
|
7
|
+
import { parseGitHubCopilotApiKey } from "@prometheus-ai/catalog/wire/github-copilot";
|
|
8
|
+
import { $env, extractHttpStatusFromError } from "@prometheus-ai/utils";
|
|
12
9
|
import packageJson from "../../package.json" with { type: "json" };
|
|
13
|
-
import {
|
|
14
|
-
import { calculateCost } from "../models";
|
|
10
|
+
import { getKimiCommonHeaders } from "../registry/oauth/kimi";
|
|
15
11
|
import { getEnvApiKey } from "../stream";
|
|
16
12
|
import {
|
|
17
13
|
type AssistantMessage,
|
|
18
14
|
type Context,
|
|
19
|
-
type FetchImpl,
|
|
20
15
|
type Message,
|
|
21
16
|
type MessageAttribution,
|
|
22
17
|
type Model,
|
|
18
|
+
OPENAI_MAX_OUTPUT_TOKENS,
|
|
23
19
|
type OpenAICompat,
|
|
24
20
|
type ProviderSessionState,
|
|
21
|
+
type RawSseEvent,
|
|
25
22
|
resolveServiceTier,
|
|
26
23
|
type ServiceTier,
|
|
27
24
|
type StopReason,
|
|
@@ -38,7 +35,6 @@ import {
|
|
|
38
35
|
import { normalizeSystemPrompts } from "../utils";
|
|
39
36
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
40
37
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
41
|
-
import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
42
38
|
import {
|
|
43
39
|
type CapturedHttpErrorResponse,
|
|
44
40
|
finalizeErrorMessage,
|
|
@@ -49,27 +45,38 @@ import {
|
|
|
49
45
|
getOpenAIStreamFirstEventTimeoutMs,
|
|
50
46
|
getOpenAIStreamIdleTimeoutMs,
|
|
51
47
|
iterateWithIdleTimeout,
|
|
48
|
+
iterateWithTerminalGrace,
|
|
52
49
|
} from "../utils/idle-iterator";
|
|
53
50
|
import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
|
|
54
|
-
import {
|
|
55
|
-
import { getKimiCommonHeaders } from "../utils/oauth/kimi";
|
|
51
|
+
import { OpenAIHttpError, postOpenAIStream } from "../utils/openai-http";
|
|
56
52
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
57
53
|
import { callWithCopilotModelRetry } from "../utils/retry";
|
|
58
54
|
import { adaptSchemaForStrict, NO_STRICT, toolWireSchema } from "../utils/schema";
|
|
59
|
-
import { wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
60
55
|
import {
|
|
61
56
|
getStreamMarkupHealingPattern,
|
|
62
57
|
type HealedToolCall,
|
|
58
|
+
modelMayLeakThinkingTags,
|
|
63
59
|
StreamMarkupHealing,
|
|
64
60
|
type StreamMarkupHealingEvent,
|
|
65
61
|
} from "../utils/stream-markup-healing";
|
|
66
62
|
import { isForcedToolChoice, mapToOpenAICompletionsToolChoice } from "../utils/tool-choice";
|
|
63
|
+
import { parseAzureDeploymentNameMap } from "./azure-openai-responses";
|
|
67
64
|
import {
|
|
68
65
|
buildCopilotDynamicHeaders,
|
|
69
66
|
hasCopilotVisionInput,
|
|
70
67
|
resolveGitHubCopilotBaseUrl,
|
|
71
68
|
} from "./github-copilot-headers";
|
|
72
|
-
import
|
|
69
|
+
import type {
|
|
70
|
+
ChatCompletionAssistantMessageParam,
|
|
71
|
+
ChatCompletionChunk,
|
|
72
|
+
ChatCompletionContentPart,
|
|
73
|
+
ChatCompletionContentPartImage,
|
|
74
|
+
ChatCompletionContentPartText,
|
|
75
|
+
ChatCompletionCreateParamsStreaming,
|
|
76
|
+
ChatCompletionMessageParam,
|
|
77
|
+
ChatCompletionTool,
|
|
78
|
+
ChatCompletionToolMessageParam,
|
|
79
|
+
} from "./openai-chat-wire";
|
|
73
80
|
import { createInitialResponsesAssistantMessage } from "./openai-responses-shared";
|
|
74
81
|
import { transformMessages } from "./transform-messages";
|
|
75
82
|
import {
|
|
@@ -107,10 +114,16 @@ function resolveOpenAICompletionsModelId(
|
|
|
107
114
|
model: Model<"openai-completions">,
|
|
108
115
|
options: OpenAICompletionsOptions | undefined,
|
|
109
116
|
): string {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
117
|
+
// Effort-tier variants route per request effort (off → bare id, efforts →
|
|
118
|
+
// the thinking backing id); catalog variants (Copilot long-context `-1m`
|
|
119
|
+
// entries) pin via `requestModelId`; everything else serializes `model.id`.
|
|
120
|
+
const effort =
|
|
121
|
+
options?.reasoning && !options.disableReasoning && model.reasoning ? (options.reasoning as Effort) : undefined;
|
|
122
|
+
const wireId = resolveWireModelId(model, effort);
|
|
123
|
+
if (model.provider === "firepass") return toFirepassWireModelId(wireId);
|
|
124
|
+
if (model.provider === "fireworks") return toFireworksWireModelId(wireId);
|
|
125
|
+
if (model.provider === "openrouter") return applyOpenRouterRoutingVariant(wireId, options?.openrouterVariant);
|
|
126
|
+
return wireId;
|
|
114
127
|
}
|
|
115
128
|
|
|
116
129
|
/**
|
|
@@ -255,7 +268,7 @@ export interface OpenAICompletionsOptions extends StreamOptions {
|
|
|
255
268
|
openrouterVariant?: string;
|
|
256
269
|
}
|
|
257
270
|
|
|
258
|
-
type OpenAICompletionsParams =
|
|
271
|
+
type OpenAICompletionsParams = ChatCompletionCreateParamsStreaming & {
|
|
259
272
|
top_k?: number;
|
|
260
273
|
min_p?: number;
|
|
261
274
|
repetition_penalty?: number;
|
|
@@ -271,8 +284,10 @@ type AppliedToolStrictMode = "mixed" | "all_strict" | "none";
|
|
|
271
284
|
type ToolStrictModeOverride = Exclude<ResolvedOpenAICompat["toolStrictMode"], "mixed"> | undefined;
|
|
272
285
|
|
|
273
286
|
type BuiltOpenAICompletionTools = {
|
|
274
|
-
tools:
|
|
287
|
+
tools: ChatCompletionTool[];
|
|
275
288
|
toolStrictMode: AppliedToolStrictMode;
|
|
289
|
+
/** True when at least one wire tool was sent with `strict: true`. */
|
|
290
|
+
strictToolsApplied: boolean;
|
|
276
291
|
};
|
|
277
292
|
|
|
278
293
|
const OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX = "openai-completions:";
|
|
@@ -385,25 +400,13 @@ function getTrailingPartialDeepseekToken(text: string): string {
|
|
|
385
400
|
}
|
|
386
401
|
const OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE =
|
|
387
402
|
"OpenAI completions stream timed out while waiting for the first event";
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
): number | undefined {
|
|
396
|
-
if (!GLM_CODING_PLAN_MODEL_PATTERN.test(model.id)) return undefined;
|
|
397
|
-
if (model.provider === "zhipu-coding-plan" || model.provider === "zai")
|
|
398
|
-
return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
|
|
399
|
-
|
|
400
|
-
const baseUrl = model.baseUrl.toLowerCase();
|
|
401
|
-
if (baseUrl.includes("open.bigmodel.cn") || baseUrl.includes("api.z.ai")) {
|
|
402
|
-
return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
return undefined;
|
|
406
|
-
}
|
|
403
|
+
// How long to keep draining the stream after a `finish_reason` chunk arrived.
|
|
404
|
+
// Compliant hosts follow it (almost) immediately with an optional usage-only
|
|
405
|
+
// chunk and the `[DONE]` sentinel, so the window only ever elapses on hosts
|
|
406
|
+
// that hold the connection open after the response logically completed —
|
|
407
|
+
// without it the turn parks on `iterator.next()` until the idle watchdog
|
|
408
|
+
// converts the already-successful response into a timeout error.
|
|
409
|
+
const OPENAI_COMPLETIONS_POST_FINISH_GRACE_MS = 2_500;
|
|
407
410
|
|
|
408
411
|
export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
409
412
|
model: Model<"openai-completions">,
|
|
@@ -415,41 +418,55 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
415
418
|
(async () => {
|
|
416
419
|
const startTime = Date.now();
|
|
417
420
|
let firstTokenTime: number | undefined;
|
|
418
|
-
let getCapturedErrorResponse: (() => CapturedHttpErrorResponse | undefined) | undefined;
|
|
419
421
|
|
|
420
422
|
const output: AssistantMessage = createInitialResponsesAssistantMessage(model.api, model.provider, model.id);
|
|
421
423
|
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
422
424
|
const abortTracker = createAbortSourceTracker(options?.signal);
|
|
423
425
|
const firstEventTimeoutAbortError = new Error(OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE);
|
|
424
426
|
const { requestAbortController, requestSignal } = abortTracker;
|
|
427
|
+
const onSseEvent = options?.onSseEvent;
|
|
428
|
+
const rawSseObserver = onSseEvent
|
|
429
|
+
? (event: RawSseEvent) => {
|
|
430
|
+
if (!event.event && event.data && event.data !== "[DONE]") {
|
|
431
|
+
try {
|
|
432
|
+
const parsed = JSON.parse(event.data);
|
|
433
|
+
const resolvedEvent =
|
|
434
|
+
typeof parsed.type === "string"
|
|
435
|
+
? parsed.type
|
|
436
|
+
: typeof parsed.object === "string"
|
|
437
|
+
? parsed.object
|
|
438
|
+
: null;
|
|
439
|
+
if (resolvedEvent) {
|
|
440
|
+
event.event = resolvedEvent;
|
|
441
|
+
event.raw = [`event: ${resolvedEvent}`, ...event.raw];
|
|
442
|
+
}
|
|
443
|
+
} catch {}
|
|
444
|
+
}
|
|
445
|
+
onSseEvent(event, model);
|
|
446
|
+
}
|
|
447
|
+
: undefined;
|
|
448
|
+
// Assigned once the block helpers exist (they are scoped to the `try`);
|
|
449
|
+
// the catch handler uses it to close any open blocks before emitting the
|
|
450
|
+
// terminal error so both exit paths obey the same block lifecycle.
|
|
451
|
+
let finishOpenBlocksOnError: () => void = () => {};
|
|
425
452
|
|
|
426
453
|
try {
|
|
427
454
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
428
|
-
const idleTimeoutFallbackMs =
|
|
455
|
+
const idleTimeoutFallbackMs = model.compat.streamIdleTimeoutMs;
|
|
429
456
|
const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs(idleTimeoutFallbackMs);
|
|
430
457
|
const firstEventTimeoutMs =
|
|
431
458
|
options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
|
|
432
459
|
const requestTimeoutMs =
|
|
433
460
|
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
|
|
434
|
-
const {
|
|
435
|
-
client,
|
|
436
|
-
copilotPremiumRequests,
|
|
437
|
-
baseUrl,
|
|
438
|
-
requestHeaders,
|
|
439
|
-
getCapturedErrorResponse: captureErrorResponse,
|
|
440
|
-
clearCapturedErrorResponse,
|
|
441
|
-
} = await createClient(
|
|
461
|
+
const { copilotPremiumRequests, baseUrl, headers, query, requestHeaders } = await createRequestSetup(
|
|
442
462
|
model,
|
|
443
463
|
context,
|
|
444
464
|
apiKey,
|
|
445
465
|
options?.headers,
|
|
446
466
|
options?.initiatorOverride,
|
|
447
|
-
options?.onSseEvent,
|
|
448
|
-
options?.fetch,
|
|
449
467
|
);
|
|
450
468
|
const premiumRequestsTotal = copilotPremiumRequests;
|
|
451
|
-
|
|
452
|
-
let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
|
|
469
|
+
let appliedStrictTools = false;
|
|
453
470
|
const providerSessionState = getOpenAICompletionsProviderSessionState(
|
|
454
471
|
model,
|
|
455
472
|
baseUrl,
|
|
@@ -457,31 +474,29 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
457
474
|
);
|
|
458
475
|
let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
|
|
459
476
|
let strictFallbackErrorMessage: string | undefined;
|
|
477
|
+
const trimmedBaseUrl = baseUrl.replace(/\/+$/, "");
|
|
478
|
+
const completionsUrl = query
|
|
479
|
+
? `${trimmedBaseUrl}/chat/completions?${new URLSearchParams(query)}`
|
|
480
|
+
: `${trimmedBaseUrl}/chat/completions`;
|
|
460
481
|
const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
|
|
461
|
-
clearCapturedErrorResponse();
|
|
462
482
|
const effectiveToolStrictModeOverride = disableStrictTools ? "none" : toolStrictModeOverride;
|
|
463
|
-
const { params,
|
|
483
|
+
const { params, strictToolsApplied } = buildParams(
|
|
464
484
|
model,
|
|
465
485
|
context,
|
|
466
486
|
options,
|
|
467
|
-
baseUrl,
|
|
468
487
|
effectiveToolStrictModeOverride,
|
|
469
488
|
);
|
|
470
|
-
|
|
489
|
+
appliedStrictTools = strictToolsApplied;
|
|
471
490
|
options?.onPayload?.(params);
|
|
472
491
|
rawRequestDump = {
|
|
473
492
|
provider: model.provider,
|
|
474
493
|
api: output.api,
|
|
475
494
|
model: model.id,
|
|
476
495
|
method: "POST",
|
|
477
|
-
url:
|
|
496
|
+
url: completionsUrl,
|
|
478
497
|
headers: requestHeaders,
|
|
479
498
|
body: params,
|
|
480
499
|
};
|
|
481
|
-
const requestOptions =
|
|
482
|
-
requestTimeoutMs === undefined
|
|
483
|
-
? { signal: requestSignal }
|
|
484
|
-
: { signal: requestSignal, timeout: requestTimeoutMs };
|
|
485
500
|
let requestTimeout: NodeJS.Timeout | undefined;
|
|
486
501
|
if (requestTimeoutMs !== undefined) {
|
|
487
502
|
requestTimeout = setTimeout(
|
|
@@ -490,17 +505,26 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
490
505
|
);
|
|
491
506
|
}
|
|
492
507
|
try {
|
|
493
|
-
const
|
|
494
|
-
|
|
495
|
-
.
|
|
496
|
-
await notifyProviderResponse(options, response, model, request_id);
|
|
497
|
-
return data;
|
|
498
|
-
} catch (error) {
|
|
499
|
-
if (error instanceof OpenAIConnectionTimeoutError && !abortTracker.wasCallerAbort()) {
|
|
500
|
-
throw firstEventTimeoutAbortError;
|
|
508
|
+
const headersWithTimeout = { ...headers };
|
|
509
|
+
if (requestTimeoutMs !== undefined) {
|
|
510
|
+
headersWithTimeout["X-Stainless-Timeout"] = Math.floor(requestTimeoutMs / 1000).toString();
|
|
501
511
|
}
|
|
502
|
-
|
|
512
|
+
const { events, response, requestId } = await postOpenAIStream<ChatCompletionChunk>({
|
|
513
|
+
url: completionsUrl,
|
|
514
|
+
headers: headersWithTimeout,
|
|
515
|
+
body: params,
|
|
516
|
+
signal: requestSignal,
|
|
517
|
+
fetch: options?.fetch,
|
|
518
|
+
// With a first-event watchdog armed, transport retries must
|
|
519
|
+
// not silently extend the deadline (old SDK `maxRetries: 0`).
|
|
520
|
+
maxAttempts: requestTimeoutMs === undefined ? undefined : 1,
|
|
521
|
+
onSseEvent: rawSseObserver,
|
|
522
|
+
});
|
|
523
|
+
await notifyProviderResponse(options, response, model, requestId);
|
|
524
|
+
return events;
|
|
503
525
|
} finally {
|
|
526
|
+
// Headers arrived (or the request failed); from here the
|
|
527
|
+
// first-event deadline is enforced by `iterateWithIdleTimeout`.
|
|
504
528
|
if (requestTimeout !== undefined) clearTimeout(requestTimeout);
|
|
505
529
|
}
|
|
506
530
|
};
|
|
@@ -511,7 +535,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
511
535
|
signal: requestSignal,
|
|
512
536
|
});
|
|
513
537
|
} catch (error) {
|
|
514
|
-
const capturedErrorResponse =
|
|
538
|
+
const capturedErrorResponse = error instanceof OpenAIHttpError ? error.captured : undefined;
|
|
515
539
|
if (
|
|
516
540
|
isOpenRouterAnthropicModel(model) &&
|
|
517
541
|
!disableStrictTools &&
|
|
@@ -525,9 +549,15 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
525
549
|
disableStrictTools = true;
|
|
526
550
|
openaiStream = await createCompletionsStream("none");
|
|
527
551
|
} else {
|
|
528
|
-
if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse,
|
|
552
|
+
if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedStrictTools, context.tools)) {
|
|
529
553
|
throw error;
|
|
530
554
|
}
|
|
555
|
+
// Remember the rejection for the rest of the session so every
|
|
556
|
+
// subsequent request doesn't pay a strict-400 + retry round-trip.
|
|
557
|
+
if (providerSessionState) {
|
|
558
|
+
providerSessionState.strictToolsDisabled = true;
|
|
559
|
+
}
|
|
560
|
+
disableStrictTools = true;
|
|
531
561
|
openaiStream = await createCompletionsStream("none");
|
|
532
562
|
}
|
|
533
563
|
}
|
|
@@ -536,13 +566,12 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
536
566
|
}
|
|
537
567
|
stream.push({ type: "start", partial: output });
|
|
538
568
|
|
|
539
|
-
const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
|
|
540
569
|
// Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
|
|
541
570
|
// native API) leak chat-template tool-call markers in `delta.content` even
|
|
542
571
|
// though tool calls are also surfaced structurally. Strip the leaked markers
|
|
543
572
|
// so users don't see raw `<|...|>` tokens.
|
|
544
573
|
const stripDeepseekChatTemplateTokens =
|
|
545
|
-
|
|
574
|
+
isDeepseekModelIdOrName(model.id) && (model.provider === "nvidia" || model.provider === "deepseek");
|
|
546
575
|
type ToolCallStreamBlock = ToolCall & {
|
|
547
576
|
partialArgs?: string | Record<string, unknown>;
|
|
548
577
|
streamIndex?: number;
|
|
@@ -560,6 +589,20 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
560
589
|
if (block.partialArgs === undefined) return;
|
|
561
590
|
const contentIndex = blockIndex(block);
|
|
562
591
|
if (contentIndex < 0) return;
|
|
592
|
+
// Object-shaped `partialArgs` came from MiniMax-compatible hosts that stream
|
|
593
|
+
// `function.arguments` as an object. The per-chunk handler holds them with an
|
|
594
|
+
// empty wire delta (see the object branch below) because emitting each chunk's
|
|
595
|
+
// `JSON.stringify(rawArgs)` would feed concat-based downstream consumers
|
|
596
|
+
// (proxy.ts, openai-chat-server, openai-responses-server, anthropic-messages-server)
|
|
597
|
+
// an invalid concatenation like `{"input":"a"}{"input":"b"}`. Flush the final
|
|
598
|
+
// merged object as one concat-safe delta now so those consumers reconstruct the
|
|
599
|
+
// args correctly before observing `toolcall_end`.
|
|
600
|
+
if (typeof block.partialArgs === "object" && !Array.isArray(block.partialArgs)) {
|
|
601
|
+
const fullJson = JSON.stringify(block.partialArgs);
|
|
602
|
+
if (fullJson.length > 0 && fullJson !== "{}") {
|
|
603
|
+
stream.push({ type: "toolcall_delta", contentIndex, delta: fullJson, partial: output });
|
|
604
|
+
}
|
|
605
|
+
}
|
|
563
606
|
block.arguments =
|
|
564
607
|
typeof block.partialArgs === "string" ? parseStreamingJson(block.partialArgs) : block.partialArgs;
|
|
565
608
|
delete block.partialArgs;
|
|
@@ -591,13 +634,21 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
591
634
|
}
|
|
592
635
|
finishToolCallBlock(block);
|
|
593
636
|
};
|
|
637
|
+
finishOpenBlocksOnError = () => {
|
|
638
|
+
if (currentBlock?.type !== "toolCall") finishCurrentBlock(currentBlock);
|
|
639
|
+
finishPendingToolCallBlocks();
|
|
640
|
+
};
|
|
594
641
|
const appendText = (
|
|
595
642
|
message: AssistantMessage,
|
|
596
643
|
eventStream: AssistantMessageEventStream,
|
|
597
644
|
text: string,
|
|
598
645
|
): void => {
|
|
599
646
|
if (currentBlock?.type !== "text") {
|
|
600
|
-
|
|
647
|
+
// Leave toolCall blocks pending across text transitions: chunks after
|
|
648
|
+
// the first typically carry only `index`, so a finished (de-registered)
|
|
649
|
+
// call would be reborn as a nameless phantom block when its arguments
|
|
650
|
+
// resume. The stream-end sweep finalizes pending calls.
|
|
651
|
+
if (currentBlock?.type !== "toolCall") finishCurrentBlock(currentBlock);
|
|
601
652
|
currentBlock = { type: "text", text: "" };
|
|
602
653
|
message.content.push(currentBlock);
|
|
603
654
|
eventStream.push({ type: "text_start", contentIndex: blockIndex(currentBlock), partial: message });
|
|
@@ -620,7 +671,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
620
671
|
currentBlock?.type !== "thinking" ||
|
|
621
672
|
(signature !== undefined && currentBlock.thinkingSignature !== signature)
|
|
622
673
|
) {
|
|
623
|
-
|
|
674
|
+
// Same as appendText: leave toolCall blocks pending so index-only
|
|
675
|
+
// continuation deltas can still find them.
|
|
676
|
+
if (currentBlock?.type !== "toolCall") finishCurrentBlock(currentBlock);
|
|
624
677
|
currentBlock = { type: "thinking", thinking: "", thinkingSignature: signature };
|
|
625
678
|
message.content.push(currentBlock);
|
|
626
679
|
eventStream.push({
|
|
@@ -646,10 +699,32 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
646
699
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
647
700
|
appendText(output, stream, text);
|
|
648
701
|
};
|
|
649
|
-
|
|
702
|
+
// Tracks the last full cumulative reasoning snapshot per signature (the
|
|
703
|
+
// reasoning field name) so dedup survives block transitions. Required
|
|
704
|
+
// for MiniMax-M3: once `</think>` and visible text arrive, currentBlock
|
|
705
|
+
// flips to "text", but later chunks keep carrying the same cumulative
|
|
706
|
+
// `reasoning_content` snapshot. Without an external tracker the guard
|
|
707
|
+
// below misses and the snapshot gets re-emitted as a fresh thinking
|
|
708
|
+
// block after the answer has started.
|
|
709
|
+
const lastCumulativeReasoningBySignature = new Map<string, string>();
|
|
710
|
+
const appendThinkingDelta = (
|
|
711
|
+
thinking: string,
|
|
712
|
+
signature?: string,
|
|
713
|
+
source: "delta" | "cumulative" = "delta",
|
|
714
|
+
): void => {
|
|
650
715
|
if (!thinking) return;
|
|
716
|
+
let emittedThinking = thinking;
|
|
717
|
+
if (source === "cumulative") {
|
|
718
|
+
const key = signature ?? "";
|
|
719
|
+
const lastSnapshot = lastCumulativeReasoningBySignature.get(key) ?? "";
|
|
720
|
+
if (thinking.startsWith(lastSnapshot)) {
|
|
721
|
+
emittedThinking = thinking.slice(lastSnapshot.length);
|
|
722
|
+
}
|
|
723
|
+
lastCumulativeReasoningBySignature.set(key, thinking);
|
|
724
|
+
if (!emittedThinking) return;
|
|
725
|
+
}
|
|
651
726
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
652
|
-
appendThinking(output, stream,
|
|
727
|
+
appendThinking(output, stream, emittedThinking, signature);
|
|
653
728
|
};
|
|
654
729
|
|
|
655
730
|
let deepseekStripBuffer = "";
|
|
@@ -676,13 +751,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
676
751
|
appendTextDelta(processedText);
|
|
677
752
|
}
|
|
678
753
|
};
|
|
679
|
-
|
|
680
|
-
const streamMarkupHealingPattern = getStreamMarkupHealingPattern(model.provider, model.id, {
|
|
681
|
-
parseThinkingTags: parseMiniMaxThinkTags,
|
|
682
|
-
});
|
|
754
|
+
const streamMarkupHealingPattern = getStreamMarkupHealingPattern(model.provider, model.id);
|
|
683
755
|
const streamMarkupHealing = streamMarkupHealingPattern
|
|
684
756
|
? new StreamMarkupHealing({ pattern: streamMarkupHealingPattern })
|
|
685
757
|
: undefined;
|
|
758
|
+
const explicitReasoningDeltasMayBeCumulative = modelMayLeakThinkingTags(model.provider, model.id);
|
|
686
759
|
let healedToolCallEmitted = false;
|
|
687
760
|
const emitHealedToolCall = (call: HealedToolCall): void => {
|
|
688
761
|
finishCurrentBlock(currentBlock);
|
|
@@ -722,7 +795,12 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
722
795
|
for (const call of calls) emitHealedToolCall(call);
|
|
723
796
|
};
|
|
724
797
|
|
|
725
|
-
for
|
|
798
|
+
// Terminal-chunk bookkeeping for the post-finish grace window below.
|
|
799
|
+
// `streamFinishedAt` flips when a chunk carries `finish_reason`;
|
|
800
|
+
// `sawUsagePayload` flips when any usage payload was parsed.
|
|
801
|
+
let streamFinishedAt: number | undefined;
|
|
802
|
+
let sawUsagePayload = false;
|
|
803
|
+
const timedOpenaiStream = iterateWithIdleTimeout(openaiStream, {
|
|
726
804
|
idleTimeoutMs,
|
|
727
805
|
firstItemTimeoutMs: firstEventTimeoutMs,
|
|
728
806
|
firstItemErrorMessage: OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE,
|
|
@@ -731,24 +809,48 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
731
809
|
onFirstItemTimeout: () => abortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
732
810
|
abortSignal: options?.signal,
|
|
733
811
|
isProgressItem: isOpenAICompletionsProgressChunk,
|
|
734
|
-
})
|
|
812
|
+
});
|
|
813
|
+
const terminalAwareStream = iterateWithTerminalGrace(timedOpenaiStream, {
|
|
814
|
+
finishedAtMs: () => streamFinishedAt,
|
|
815
|
+
graceMs: OPENAI_COMPLETIONS_POST_FINISH_GRACE_MS,
|
|
816
|
+
// The inner idle-timeout generator is parked mid-`next()` when the
|
|
817
|
+
// grace window closes, so abort the transport to settle that read
|
|
818
|
+
// and release the socket immediately (a queued `.return()` alone
|
|
819
|
+
// would wait on the never-arriving next chunk).
|
|
820
|
+
onGraceEnd: () => requestAbortController.abort(),
|
|
821
|
+
});
|
|
822
|
+
for await (const chunk of terminalAwareStream) {
|
|
735
823
|
if (!chunk || typeof chunk !== "object") continue;
|
|
736
824
|
|
|
737
825
|
// OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
|
|
738
826
|
// and each chunk in a streamed completion carries the same id.
|
|
739
827
|
output.responseId ||= chunk.id;
|
|
740
828
|
|
|
829
|
+
// Aggregators (OpenRouter, Vercel AI Gateway, …) report the upstream
|
|
830
|
+
// provider that actually served the request via a top-level `provider`
|
|
831
|
+
// field present on every chunk. Capture the first non-empty value so
|
|
832
|
+
// callers can attribute routing without re-parsing the raw stream.
|
|
833
|
+
output.upstreamProvider ||= getOptionalStringProperty(chunk, "provider");
|
|
834
|
+
|
|
741
835
|
if (chunk.usage) {
|
|
742
836
|
output.usage = parseChunkUsage(chunk.usage, model, premiumRequestsTotal);
|
|
837
|
+
sawUsagePayload = true;
|
|
743
838
|
}
|
|
744
839
|
|
|
745
840
|
const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
|
|
746
|
-
if (!choice)
|
|
841
|
+
if (!choice) {
|
|
842
|
+
// Trailing usage-only chunk (`stream_options.include_usage`) after
|
|
843
|
+
// `finish_reason`: the response is complete — stop pulling instead
|
|
844
|
+
// of waiting for `[DONE]`/close from hosts that never send either.
|
|
845
|
+
if (streamFinishedAt !== undefined && sawUsagePayload) break;
|
|
846
|
+
continue;
|
|
847
|
+
}
|
|
747
848
|
|
|
748
849
|
if (!chunk.usage) {
|
|
749
850
|
const choiceUsage = getChoiceUsage(choice);
|
|
750
851
|
if (choiceUsage) {
|
|
751
852
|
output.usage = parseChunkUsage(choiceUsage, model, premiumRequestsTotal);
|
|
853
|
+
sawUsagePayload = true;
|
|
752
854
|
}
|
|
753
855
|
}
|
|
754
856
|
|
|
@@ -758,14 +860,42 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
758
860
|
if (finishReasonResult.errorMessage) {
|
|
759
861
|
output.errorMessage = finishReasonResult.errorMessage;
|
|
760
862
|
}
|
|
863
|
+
streamFinishedAt ??= Date.now();
|
|
761
864
|
}
|
|
762
865
|
|
|
763
866
|
if (choice.delta) {
|
|
867
|
+
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
868
|
+
// or reasoning (other openai compatible endpoints). Use the first
|
|
869
|
+
// non-empty reasoning field to avoid duplication when a chunk carries
|
|
870
|
+
// multiple aliases for the same reasoning text.
|
|
871
|
+
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
872
|
+
const deltaRecord = choice.delta as Record<string, unknown>;
|
|
873
|
+
let foundReasoningField: string | undefined;
|
|
874
|
+
let foundReasoningDelta = "";
|
|
875
|
+
for (const field of reasoningFields) {
|
|
876
|
+
const reasoningDelta = deltaRecord[field];
|
|
877
|
+
if (typeof reasoningDelta === "string" && reasoningDelta.length > 0) {
|
|
878
|
+
foundReasoningField = field;
|
|
879
|
+
foundReasoningDelta = reasoningDelta;
|
|
880
|
+
break;
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
if (foundReasoningField) {
|
|
885
|
+
appendThinkingDelta(
|
|
886
|
+
foundReasoningDelta,
|
|
887
|
+
foundReasoningField,
|
|
888
|
+
explicitReasoningDeltasMayBeCumulative ? "cumulative" : "delta",
|
|
889
|
+
);
|
|
890
|
+
}
|
|
891
|
+
|
|
764
892
|
const normalizedDeltaText = normalizeStreamingContentText(choice.delta.content);
|
|
765
893
|
if (normalizedDeltaText.length > 0) {
|
|
766
894
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
767
895
|
const hasStructuredToolCalls =
|
|
768
896
|
Array.isArray(choice.delta.tool_calls) && choice.delta.tool_calls.length > 0;
|
|
897
|
+
const suppressContentThinking =
|
|
898
|
+
foundReasoningField !== undefined && streamMarkupHealing?.pattern === "thinking";
|
|
769
899
|
|
|
770
900
|
if (streamMarkupHealing) {
|
|
771
901
|
if (hasStructuredToolCalls) {
|
|
@@ -776,6 +906,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
776
906
|
appendProcessedText(streamMarkupHealing.consumeWithoutCalls(normalizedDeltaText));
|
|
777
907
|
} else {
|
|
778
908
|
for (const event of streamMarkupHealing.feedEvents(normalizedDeltaText)) {
|
|
909
|
+
if (suppressContentThinking && event.type === "thinking") continue;
|
|
779
910
|
emitHealingEvent(event);
|
|
780
911
|
}
|
|
781
912
|
}
|
|
@@ -784,30 +915,6 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
784
915
|
}
|
|
785
916
|
}
|
|
786
917
|
|
|
787
|
-
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
788
|
-
// or reasoning (other openai compatible endpoints)
|
|
789
|
-
// Use the first non-empty reasoning field to avoid duplication
|
|
790
|
-
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
|
791
|
-
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
792
|
-
let foundReasoningField: string | null = null;
|
|
793
|
-
for (const field of reasoningFields) {
|
|
794
|
-
if (
|
|
795
|
-
(choice.delta as any)[field] !== null &&
|
|
796
|
-
(choice.delta as any)[field] !== undefined &&
|
|
797
|
-
(choice.delta as any)[field].length > 0
|
|
798
|
-
) {
|
|
799
|
-
if (!foundReasoningField) {
|
|
800
|
-
foundReasoningField = field;
|
|
801
|
-
break;
|
|
802
|
-
}
|
|
803
|
-
}
|
|
804
|
-
}
|
|
805
|
-
|
|
806
|
-
if (foundReasoningField) {
|
|
807
|
-
const delta = (choice.delta as any)[foundReasoningField];
|
|
808
|
-
appendThinkingDelta(delta, foundReasoningField);
|
|
809
|
-
}
|
|
810
|
-
|
|
811
918
|
if (choice?.delta?.tool_calls && choice.delta.tool_calls.length > 0) {
|
|
812
919
|
for (const toolCall of choice.delta.tool_calls) {
|
|
813
920
|
const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
|
|
@@ -845,6 +952,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
845
952
|
partial: output,
|
|
846
953
|
});
|
|
847
954
|
} else {
|
|
955
|
+
// Resuming a pending call after interleaved text/thinking:
|
|
956
|
+
// close the text/thinking block we drifted into.
|
|
957
|
+
if (currentBlock !== block && currentBlock && currentBlock.type !== "toolCall") {
|
|
958
|
+
finishCurrentBlock(currentBlock);
|
|
959
|
+
}
|
|
848
960
|
currentBlock = block;
|
|
849
961
|
if (streamIndex !== undefined && block.streamIndex === undefined) {
|
|
850
962
|
block.streamIndex = streamIndex;
|
|
@@ -871,13 +983,37 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
871
983
|
}
|
|
872
984
|
}
|
|
873
985
|
} else if (rawArgs && typeof rawArgs === "object" && !Array.isArray(rawArgs)) {
|
|
874
|
-
// MiniMax-compatible hosts stream `function.arguments` as
|
|
875
|
-
//
|
|
876
|
-
//
|
|
877
|
-
// the
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
delta
|
|
986
|
+
// MiniMax-compatible hosts stream `function.arguments` as an object instead of the
|
|
987
|
+
// OpenAI JSON-string contract. Most chunks carry the complete object in one delta,
|
|
988
|
+
// but cannot rely on that: replacing per-chunk drops earlier keys (and earlier
|
|
989
|
+
// string content for the same key) when the host fragments the args across deltas.
|
|
990
|
+
// Shallow-merge into the accumulated object; for shared string keys, detect
|
|
991
|
+
// cumulative-vs-delta semantics with `startsWith` so we neither duplicate cumulative
|
|
992
|
+
// payloads nor lose delta fragments. Degenerates to the previous "last wins"
|
|
993
|
+
// behaviour for the common single-chunk shape (no prior value to merge with).
|
|
994
|
+
//
|
|
995
|
+
// `delta` stays empty here: emitting `JSON.stringify(rawArgs)` per chunk feeds
|
|
996
|
+
// downstream concat-based accumulators (proxy.ts, openai-chat-server,
|
|
997
|
+
// openai-responses-server, anthropic-messages-server) an invalid sequence like
|
|
998
|
+
// `{"input":"a"}{"input":"b"}`. The merged object is flushed as a single
|
|
999
|
+
// concat-safe delta in `finishToolCallBlock` before `toolcall_end` instead.
|
|
1000
|
+
const prev =
|
|
1001
|
+
block.partialArgs &&
|
|
1002
|
+
typeof block.partialArgs === "object" &&
|
|
1003
|
+
!Array.isArray(block.partialArgs)
|
|
1004
|
+
? (block.partialArgs as Record<string, unknown>)
|
|
1005
|
+
: undefined;
|
|
1006
|
+
const merged: Record<string, unknown> = prev ? { ...prev } : {};
|
|
1007
|
+
for (const [key, value] of Object.entries(rawArgs)) {
|
|
1008
|
+
const prevValue = merged[key];
|
|
1009
|
+
if (typeof prevValue === "string" && typeof value === "string") {
|
|
1010
|
+
merged[key] = value.startsWith(prevValue) ? value : prevValue + value;
|
|
1011
|
+
} else {
|
|
1012
|
+
merged[key] = value;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
block.partialArgs = merged;
|
|
1016
|
+
block.arguments = merged;
|
|
881
1017
|
}
|
|
882
1018
|
stream.push({
|
|
883
1019
|
type: "toolcall_delta",
|
|
@@ -902,6 +1038,12 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
902
1038
|
}
|
|
903
1039
|
}
|
|
904
1040
|
}
|
|
1041
|
+
|
|
1042
|
+
// `finish_reason` + usage both observed: the chat-completions
|
|
1043
|
+
// contract has nothing left to deliver. Break instead of waiting
|
|
1044
|
+
// for `[DONE]`/connection close so hosts that hold the socket open
|
|
1045
|
+
// can't park the turn until the idle watchdog errors it out.
|
|
1046
|
+
if (streamFinishedAt !== undefined && sawUsagePayload) break;
|
|
905
1047
|
}
|
|
906
1048
|
|
|
907
1049
|
if (streamMarkupHealing) {
|
|
@@ -962,13 +1104,20 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
962
1104
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
963
1105
|
stream.end();
|
|
964
1106
|
} catch (error) {
|
|
1107
|
+
// Close open blocks first so consumers tracking text_/thinking_/toolcall_
|
|
1108
|
+
// lifecycles never see orphaned starts on the error path. Best-effort: a
|
|
1109
|
+
// throw here must not prevent the terminal error event below.
|
|
1110
|
+
try {
|
|
1111
|
+
finishOpenBlocksOnError();
|
|
1112
|
+
} catch {}
|
|
965
1113
|
for (const block of output.content) delete (block as any).index;
|
|
966
1114
|
const firstEventTimeoutError = abortTracker.getLocalAbortReason();
|
|
967
1115
|
output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
|
|
968
|
-
|
|
1116
|
+
const capturedErrorResponse = error instanceof OpenAIHttpError ? error.captured : undefined;
|
|
1117
|
+
output.errorStatus = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
|
|
969
1118
|
output.errorMessage =
|
|
970
1119
|
firstEventTimeoutError?.message ??
|
|
971
|
-
(await finalizeErrorMessage(error, rawRequestDump,
|
|
1120
|
+
(await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse));
|
|
972
1121
|
// Some providers via OpenRouter include extra details here.
|
|
973
1122
|
const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
|
|
974
1123
|
if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
|
|
@@ -983,21 +1132,21 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
983
1132
|
return stream;
|
|
984
1133
|
};
|
|
985
1134
|
|
|
986
|
-
async function
|
|
1135
|
+
async function createRequestSetup(
|
|
987
1136
|
model: Model<"openai-completions">,
|
|
988
1137
|
context: Context,
|
|
989
1138
|
apiKey?: string,
|
|
990
1139
|
extraHeaders?: Record<string, string>,
|
|
991
1140
|
initiatorOverride?: MessageAttribution,
|
|
992
|
-
onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
|
|
993
|
-
fetchOverride?: FetchImpl,
|
|
994
1141
|
): Promise<{
|
|
995
|
-
client: OpenAI;
|
|
996
1142
|
copilotPremiumRequests: number | undefined;
|
|
997
|
-
baseUrl: string
|
|
1143
|
+
baseUrl: string;
|
|
1144
|
+
/** Headers sent on the wire, including `Authorization`. */
|
|
1145
|
+
headers: Record<string, string>;
|
|
1146
|
+
/** Query params appended to the request URL (Azure `api-version`). */
|
|
1147
|
+
query: Record<string, string> | undefined;
|
|
1148
|
+
/** Headers recorded in `rawRequestDump` (sans `Authorization`). */
|
|
998
1149
|
requestHeaders: Record<string, string>;
|
|
999
|
-
getCapturedErrorResponse: () => CapturedHttpErrorResponse | undefined;
|
|
1000
|
-
clearCapturedErrorResponse: () => void;
|
|
1001
1150
|
}> {
|
|
1002
1151
|
if (!apiKey) {
|
|
1003
1152
|
if (!$env.OPENAI_API_KEY) {
|
|
@@ -1015,12 +1164,12 @@ async function createClient(
|
|
|
1015
1164
|
// analytics. `HTTP-Referer` is the unique app identifier; without it nothing is
|
|
1016
1165
|
// tracked. `X-OpenRouter-Title` is the display name (`X-Title` is the legacy
|
|
1017
1166
|
// alias kept for back-compat). `X-OpenRouter-Categories` slots us into the
|
|
1018
|
-
// `cli-agent` marketplace category. `User-Agent`
|
|
1019
|
-
//
|
|
1167
|
+
// `cli-agent` marketplace category. `User-Agent` makes our traffic
|
|
1168
|
+
// identifiable in upstream provider logs.
|
|
1020
1169
|
// https://openrouter.ai/docs/app-attribution
|
|
1021
|
-
headers["User-Agent"] =
|
|
1022
|
-
headers["HTTP-Referer"] = "https://prometheus.
|
|
1023
|
-
headers["X-OpenRouter-Title"] =
|
|
1170
|
+
headers["User-Agent"] = `Prometheus/${packageJson.version}`;
|
|
1171
|
+
headers["HTTP-Referer"] = "https://prometheus.sh/";
|
|
1172
|
+
headers["X-OpenRouter-Title"] = "Prometheus";
|
|
1024
1173
|
headers["X-OpenRouter-Categories"] = "cli-agent";
|
|
1025
1174
|
// Always-on response caching: identical requests return cached responses for free.
|
|
1026
1175
|
// TTL 1h; first call hits the provider, every identical call within the window
|
|
@@ -1055,114 +1204,68 @@ async function createClient(
|
|
|
1055
1204
|
if (baseUrl?.includes(".openai.azure.com")) {
|
|
1056
1205
|
const apiVersion = $env.AZURE_OPENAI_API_VERSION || "2024-10-21";
|
|
1057
1206
|
if (!baseUrl.includes("/deployments/")) {
|
|
1058
|
-
|
|
1207
|
+
// Honor AZURE_OPENAI_DEPLOYMENT_NAME_MAP like the responses provider:
|
|
1208
|
+
// deployment names routinely differ from catalog model ids.
|
|
1209
|
+
const deploymentName =
|
|
1210
|
+
parseAzureDeploymentNameMap($env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP).get(model.id) ?? model.id;
|
|
1211
|
+
baseUrl = `${baseUrl}/deployments/${deploymentName}`;
|
|
1059
1212
|
}
|
|
1060
1213
|
azureDefaultQuery = { "api-version": apiVersion };
|
|
1061
1214
|
}
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
const response = await baseFetch(input, init);
|
|
1067
|
-
if (response.ok) {
|
|
1068
|
-
capturedErrorResponse = undefined;
|
|
1069
|
-
return response;
|
|
1070
|
-
}
|
|
1071
|
-
let bodyText: string | undefined;
|
|
1072
|
-
let bodyJson: unknown;
|
|
1073
|
-
try {
|
|
1074
|
-
bodyText = await response.clone().text();
|
|
1075
|
-
if (bodyText.trim().length > 0) {
|
|
1076
|
-
try {
|
|
1077
|
-
bodyJson = JSON.parse(bodyText);
|
|
1078
|
-
} catch {}
|
|
1079
|
-
}
|
|
1080
|
-
} catch {}
|
|
1081
|
-
capturedErrorResponse = {
|
|
1082
|
-
status: response.status,
|
|
1083
|
-
headers: response.headers,
|
|
1084
|
-
bodyText,
|
|
1085
|
-
bodyJson,
|
|
1086
|
-
};
|
|
1087
|
-
return response;
|
|
1088
|
-
},
|
|
1089
|
-
baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
|
|
1090
|
-
);
|
|
1091
|
-
const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
|
|
1215
|
+
// The removed SDK client resolved its base URL as
|
|
1216
|
+
// `baseURL ?? $OPENAI_BASE_URL ?? https://api.openai.com/v1`; keep that
|
|
1217
|
+
// resolution explicit now that we build the request URL ourselves.
|
|
1218
|
+
const resolvedBaseUrl = baseUrl ?? ($env.OPENAI_BASE_URL?.trim() || "https://api.openai.com/v1");
|
|
1092
1219
|
return {
|
|
1093
|
-
client: new OpenAI({
|
|
1094
|
-
apiKey,
|
|
1095
|
-
baseURL: baseUrl,
|
|
1096
|
-
dangerouslyAllowBrowser: true,
|
|
1097
|
-
maxRetries: 5,
|
|
1098
|
-
defaultHeaders: headers,
|
|
1099
|
-
defaultQuery: azureDefaultQuery,
|
|
1100
|
-
fetch: debugFetch,
|
|
1101
|
-
}),
|
|
1102
1220
|
copilotPremiumRequests,
|
|
1103
|
-
baseUrl,
|
|
1221
|
+
baseUrl: resolvedBaseUrl,
|
|
1222
|
+
headers: { Authorization: `Bearer ${apiKey}`, ...headers },
|
|
1223
|
+
query: azureDefaultQuery,
|
|
1104
1224
|
requestHeaders: headers,
|
|
1105
|
-
getCapturedErrorResponse: () => capturedErrorResponse,
|
|
1106
|
-
clearCapturedErrorResponse: () => {
|
|
1107
|
-
capturedErrorResponse = undefined;
|
|
1108
|
-
},
|
|
1109
1225
|
};
|
|
1110
1226
|
}
|
|
1111
1227
|
|
|
1228
|
+
function getForcedCompletionsToolName(toolChoice: OpenAICompletionsParams["tool_choice"]): string | undefined {
|
|
1229
|
+
if (typeof toolChoice !== "object" || toolChoice === null || !("function" in toolChoice)) return undefined;
|
|
1230
|
+
return toolChoice.function.name;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1112
1233
|
function buildParams(
|
|
1113
1234
|
model: Model<"openai-completions">,
|
|
1114
1235
|
context: Context,
|
|
1115
1236
|
options: OpenAICompletionsOptions | undefined,
|
|
1116
|
-
resolvedBaseUrl?: string,
|
|
1117
1237
|
toolStrictModeOverride?: ToolStrictModeOverride,
|
|
1118
|
-
): { params: OpenAICompletionsParams; toolStrictMode: AppliedToolStrictMode } {
|
|
1119
|
-
|
|
1120
|
-
// Opencode Zen's gateway (https://opencode.ai/zen/go/v1) gates
|
|
1121
|
-
// `reasoning_content` on the request's thinking state for every model it
|
|
1122
|
-
// fronts (Kimi K2.x, DeepSeek V4, GLM-5.x, Qwen3.x, MiMo, MiniMax, …): it
|
|
1123
|
-
// 400s with `Extra inputs are not permitted` when thinking is off but the
|
|
1124
|
-
// field is supplied (#1071), and 400s with `thinking is enabled but
|
|
1125
|
-
// reasoning_content is missing in assistant tool call message at index N`
|
|
1126
|
-
// (#1484) when thinking is on and the field is absent. `detectOpenAICompat`
|
|
1127
|
-
// only set `requiresReasoningContentForToolCalls` for the DeepSeek family
|
|
1128
|
-
// (and previously for Kimi until #1071 carved out opencode); reactivate it
|
|
1129
|
-
// per request for every opencode model whenever this turn is in thinking
|
|
1130
|
-
// mode so prior tool-call turns replay reasoning_content. Forced-tool
|
|
1131
|
-
// turns are excluded because the later `disableReasoningOnForcedToolChoice`
|
|
1132
|
-
// guard at the bottom of `buildParams` strips thinking from the wire body
|
|
1133
|
-
// for Kimi-style models — keeping the replay on under those conditions
|
|
1134
|
-
// would resurrect the #1071 failure.
|
|
1135
|
-
//
|
|
1136
|
-
// `allowsSyntheticReasoningContentForToolCalls` is forced to `false` on
|
|
1137
|
-
// the same path: the gateway specifically requires `reasoning_content`,
|
|
1138
|
-
// and the default synthetic-friendly behavior would echo whichever field
|
|
1139
|
-
// the upstream streamed (e.g. `reasoning` for many opencode turns),
|
|
1140
|
-
// landing the replay in the wrong key and re-triggering the 400.
|
|
1141
|
-
const isOpenCodeProvider = model.provider === "opencode-go" || model.provider === "opencode-zen";
|
|
1238
|
+
): { params: OpenAICompletionsParams; toolStrictMode: AppliedToolStrictMode; strictToolsApplied: boolean } {
|
|
1239
|
+
let compat = model.compat;
|
|
1142
1240
|
const thinkingEnabledForRequest =
|
|
1143
1241
|
Boolean(options?.reasoning) && !options?.disableReasoning && Boolean(model.reasoning);
|
|
1144
1242
|
const forcedToolChoiceSuppressesThinking =
|
|
1145
1243
|
compat.disableReasoningOnForcedToolChoice &&
|
|
1244
|
+
compat.supportsForcedToolChoice &&
|
|
1146
1245
|
isForcedToolChoice(mapToOpenAICompletionsToolChoice(options?.toolChoice));
|
|
1147
|
-
if (
|
|
1148
|
-
compat
|
|
1149
|
-
compat.allowsSyntheticReasoningContentForToolCalls = false;
|
|
1150
|
-
compat.reasoningContentField = "reasoning_content";
|
|
1246
|
+
if (compat.whenThinking && thinkingEnabledForRequest && !forcedToolChoiceSuppressesThinking) {
|
|
1247
|
+
compat = compat.whenThinking; // precomputed at model build — pointer swap, no allocation
|
|
1151
1248
|
}
|
|
1152
|
-
const isKimiModelId = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
|
|
1153
1249
|
const messages = convertMessages(model, context, compat);
|
|
1154
1250
|
maybeAddAnthropicCacheControl(compat, messages);
|
|
1155
|
-
const supportsReasoningParams =
|
|
1156
|
-
|
|
1157
|
-
// Kimi
|
|
1158
|
-
//
|
|
1159
|
-
//
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
//
|
|
1163
|
-
//
|
|
1164
|
-
//
|
|
1165
|
-
|
|
1251
|
+
const supportsReasoningParams = compat.supportsReasoningParams;
|
|
1252
|
+
|
|
1253
|
+
// Kimi-family models calculate TPM rate limits from max_tokens (not actual
|
|
1254
|
+
// output) and the official guidance requires sending it on every call —
|
|
1255
|
+
// `compat.alwaysSendMaxTokens` carries that detection.
|
|
1256
|
+
const requestedMaxTokens =
|
|
1257
|
+
options?.maxTokens ?? (compat.alwaysSendMaxTokens ? (model.maxTokens ?? OPENAI_MAX_OUTPUT_TOKENS) : undefined);
|
|
1258
|
+
// OpenRouter fans out to upstreams whose output caps differ from the catalog
|
|
1259
|
+
// value (which tracks the highest-cap provider). A max_tokens above the routed
|
|
1260
|
+
// upstream's cap makes OpenRouter silently skip that provider (e.g. Cerebras
|
|
1261
|
+
// GLM-4.7, ~40k) for a higher-cap one, defeating `provider.order`/`only`. Omit
|
|
1262
|
+
// it for OpenRouter so each upstream self-caps and routing is honored — unless
|
|
1263
|
+
// the model always requires max_tokens (Kimi TPM accounting, see above).
|
|
1264
|
+
const omitMaxTokensForRouting = compat.isOpenRouterHost && !compat.alwaysSendMaxTokens;
|
|
1265
|
+
const effectiveMaxTokens =
|
|
1266
|
+
requestedMaxTokens === undefined || omitMaxTokensForRouting
|
|
1267
|
+
? undefined
|
|
1268
|
+
: Math.min(requestedMaxTokens, model.maxTokens ?? Number.POSITIVE_INFINITY, OPENAI_MAX_OUTPUT_TOKENS);
|
|
1166
1269
|
|
|
1167
1270
|
const requestModelId = resolveOpenAICompletionsModelId(model, options);
|
|
1168
1271
|
const params: OpenAICompletionsParams = {
|
|
@@ -1171,6 +1274,7 @@ function buildParams(
|
|
|
1171
1274
|
stream: true,
|
|
1172
1275
|
};
|
|
1173
1276
|
let toolStrictMode: AppliedToolStrictMode = "none";
|
|
1277
|
+
let strictToolsApplied = false;
|
|
1174
1278
|
|
|
1175
1279
|
if (compat.supportsUsageInStreaming !== false) {
|
|
1176
1280
|
params.stream_options = { include_usage: true };
|
|
@@ -1224,6 +1328,7 @@ function buildParams(
|
|
|
1224
1328
|
const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
|
|
1225
1329
|
params.tools = builtTools.tools;
|
|
1226
1330
|
toolStrictMode = builtTools.toolStrictMode;
|
|
1331
|
+
strictToolsApplied = builtTools.strictToolsApplied;
|
|
1227
1332
|
} else if (context.tools === undefined && hasToolHistory(context.messages)) {
|
|
1228
1333
|
// Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
|
|
1229
1334
|
// contains tool_calls/tool_results, even when no tools are offered this turn.
|
|
@@ -1238,6 +1343,12 @@ function buildParams(
|
|
|
1238
1343
|
if (options?.toolChoice && compat.supportsToolChoice) {
|
|
1239
1344
|
params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
|
|
1240
1345
|
}
|
|
1346
|
+
if (isForcedToolChoice(params.tool_choice) && !compat.supportsForcedToolChoice) {
|
|
1347
|
+
// Some thinking-required OpenAI-compatible models reject forced
|
|
1348
|
+
// `tool_choice` while still accepting tools with the default auto
|
|
1349
|
+
// selector. Keep the tool available and let the model choose it.
|
|
1350
|
+
params.tool_choice = "auto";
|
|
1351
|
+
}
|
|
1241
1352
|
|
|
1242
1353
|
if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
|
|
1243
1354
|
// `tool_choice: "none"` with no tools to gate is redundant and also
|
|
@@ -1251,6 +1362,19 @@ function buildParams(
|
|
|
1251
1362
|
delete params.tool_choice;
|
|
1252
1363
|
}
|
|
1253
1364
|
|
|
1365
|
+
const forcedToolName = getForcedCompletionsToolName(params.tool_choice);
|
|
1366
|
+
if (
|
|
1367
|
+
forcedToolName !== undefined &&
|
|
1368
|
+
(!Array.isArray(params.tools) ||
|
|
1369
|
+
!params.tools.some(tool => tool.type === "function" && tool.function.name === forcedToolName))
|
|
1370
|
+
) {
|
|
1371
|
+
// A forced named tool_choice is only valid when the same request offers
|
|
1372
|
+
// that function in `tools`. Active-tool filtering normally enforces this
|
|
1373
|
+
// before provider dispatch; this guard keeps raw provider callers from
|
|
1374
|
+
// emitting a self-inconsistent OpenAI-compatible payload.
|
|
1375
|
+
delete params.tool_choice;
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1254
1378
|
if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
|
|
1255
1379
|
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
|
|
1256
1380
|
// Must explicitly disable since z.ai defaults to thinking enabled.
|
|
@@ -1278,7 +1402,10 @@ function buildParams(
|
|
|
1278
1402
|
openRouterParams.reasoning = { enabled: false };
|
|
1279
1403
|
} else if (options?.reasoning) {
|
|
1280
1404
|
openRouterParams.reasoning = {
|
|
1281
|
-
effort:
|
|
1405
|
+
effort:
|
|
1406
|
+
compat.reasoningEffortMap?.[options.reasoning] ??
|
|
1407
|
+
model.thinking?.effortMap?.[options.reasoning] ??
|
|
1408
|
+
options.reasoning,
|
|
1282
1409
|
};
|
|
1283
1410
|
}
|
|
1284
1411
|
} else if (
|
|
@@ -1289,7 +1416,9 @@ function buildParams(
|
|
|
1289
1416
|
compat.supportsReasoningEffort
|
|
1290
1417
|
) {
|
|
1291
1418
|
// OpenAI-style reasoning_effort
|
|
1292
|
-
params.reasoning_effort =
|
|
1419
|
+
params.reasoning_effort = (compat.reasoningEffortMap?.[options.reasoning] ??
|
|
1420
|
+
model.thinking?.effortMap?.[options.reasoning] ??
|
|
1421
|
+
options.reasoning) as Effort;
|
|
1293
1422
|
} else if (
|
|
1294
1423
|
supportsReasoningParams &&
|
|
1295
1424
|
options?.disableReasoning &&
|
|
@@ -1304,7 +1433,9 @@ function buildParams(
|
|
|
1304
1433
|
if (minEffort === undefined) {
|
|
1305
1434
|
throw new Error(`Model ${model.provider}/${model.id} has no supported reasoning efforts`);
|
|
1306
1435
|
}
|
|
1307
|
-
params.reasoning_effort =
|
|
1436
|
+
params.reasoning_effort = (compat.reasoningEffortMap?.[minEffort] ??
|
|
1437
|
+
model.thinking?.effortMap?.[minEffort] ??
|
|
1438
|
+
minEffort) as Effort;
|
|
1308
1439
|
}
|
|
1309
1440
|
|
|
1310
1441
|
if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
|
|
@@ -1327,13 +1458,13 @@ function buildParams(
|
|
|
1327
1458
|
}
|
|
1328
1459
|
|
|
1329
1460
|
// OpenRouter provider routing preferences
|
|
1330
|
-
if (
|
|
1461
|
+
if (compat.isOpenRouterHost && compat.openRouterRouting) {
|
|
1331
1462
|
params.provider = compat.openRouterRouting;
|
|
1332
1463
|
}
|
|
1333
1464
|
|
|
1334
1465
|
// Vercel AI Gateway provider routing preferences
|
|
1335
|
-
if (
|
|
1336
|
-
const routing =
|
|
1466
|
+
if (compat.isVercelGatewayHost && compat.vercelGatewayRouting) {
|
|
1467
|
+
const routing = compat.vercelGatewayRouting;
|
|
1337
1468
|
if (routing.only || routing.order) {
|
|
1338
1469
|
const gatewayOptions: Record<string, string[]> = {};
|
|
1339
1470
|
if (routing.only) gatewayOptions.only = routing.only;
|
|
@@ -1344,9 +1475,14 @@ function buildParams(
|
|
|
1344
1475
|
|
|
1345
1476
|
if (compat.extraBody) {
|
|
1346
1477
|
Object.assign(params, compat.extraBody);
|
|
1478
|
+
if (model.provider === "fireworks" && params.reasoning_effort !== undefined) {
|
|
1479
|
+
// Fireworks rejects simultaneous DeepSeek-style `thinking` toggles and
|
|
1480
|
+
// OpenAI-style `reasoning_effort`; the effort field carries the user's level.
|
|
1481
|
+
delete params.thinking;
|
|
1482
|
+
}
|
|
1347
1483
|
}
|
|
1348
1484
|
|
|
1349
|
-
return { params, toolStrictMode };
|
|
1485
|
+
return { params, toolStrictMode, strictToolsApplied };
|
|
1350
1486
|
}
|
|
1351
1487
|
|
|
1352
1488
|
function getOptionalNumberProperty(value: object, key: string): number | undefined {
|
|
@@ -1354,6 +1490,11 @@ function getOptionalNumberProperty(value: object, key: string): number | undefin
|
|
|
1354
1490
|
return typeof property === "number" ? property : undefined;
|
|
1355
1491
|
}
|
|
1356
1492
|
|
|
1493
|
+
function getOptionalStringProperty(value: object, key: string): string | undefined {
|
|
1494
|
+
const property = Reflect.get(value, key);
|
|
1495
|
+
return typeof property === "string" && property.length > 0 ? property : undefined;
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1357
1498
|
function getOptionalObjectProperty(value: object, key: string): object | undefined {
|
|
1358
1499
|
const property = Reflect.get(value, key);
|
|
1359
1500
|
return typeof property === "object" && property !== null ? property : undefined;
|
|
@@ -1430,13 +1571,6 @@ export function parseChunkUsage(
|
|
|
1430
1571
|
return usage;
|
|
1431
1572
|
}
|
|
1432
1573
|
|
|
1433
|
-
function mapReasoningEffort(
|
|
1434
|
-
effort: NonNullable<OpenAICompletionsOptions["reasoning"]>,
|
|
1435
|
-
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoning"]>, string>>,
|
|
1436
|
-
): string {
|
|
1437
|
-
return reasoningEffortMap[effort] ?? effort;
|
|
1438
|
-
}
|
|
1439
|
-
|
|
1440
1574
|
function maybeAddAnthropicCacheControl(compat: ResolvedOpenAICompat, messages: ChatCompletionMessageParam[]): void {
|
|
1441
1575
|
if (compat.cacheControlFormat !== "anthropic") return;
|
|
1442
1576
|
// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
|
|
@@ -1447,6 +1581,7 @@ function maybeAddAnthropicCacheControl(compat: ResolvedOpenAICompat, messages: C
|
|
|
1447
1581
|
|
|
1448
1582
|
const content = msg.content;
|
|
1449
1583
|
if (typeof content === "string") {
|
|
1584
|
+
if (content.trim().length === 0) continue;
|
|
1450
1585
|
msg.content = [
|
|
1451
1586
|
Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
|
|
1452
1587
|
];
|
|
@@ -1455,10 +1590,12 @@ function maybeAddAnthropicCacheControl(compat: ResolvedOpenAICompat, messages: C
|
|
|
1455
1590
|
|
|
1456
1591
|
if (!Array.isArray(content)) continue;
|
|
1457
1592
|
|
|
1458
|
-
// Find last text part and add cache_control
|
|
1593
|
+
// Find last non-empty text part and add cache_control. Empty assistant
|
|
1594
|
+
// content is valid for tool-call replay, but Anthropic/OpenRouter reject
|
|
1595
|
+
// empty text blocks once cache_control turns it into structured content.
|
|
1459
1596
|
for (let j = content.length - 1; j >= 0; j--) {
|
|
1460
1597
|
const part = content[j];
|
|
1461
|
-
if (part?.type === "text") {
|
|
1598
|
+
if (part?.type === "text" && part.text.trim().length > 0) {
|
|
1462
1599
|
Object.assign(part, { cache_control: { type: "ephemeral" } });
|
|
1463
1600
|
return;
|
|
1464
1601
|
}
|
|
@@ -1473,6 +1610,12 @@ export function convertMessages(
|
|
|
1473
1610
|
): ChatCompletionMessageParam[] {
|
|
1474
1611
|
const params: ChatCompletionMessageParam[] = [];
|
|
1475
1612
|
|
|
1613
|
+
const maxNormalizedToolCallIdLength = compat.requiresMistralToolIds
|
|
1614
|
+
? 9
|
|
1615
|
+
: model.provider === "openai"
|
|
1616
|
+
? 40
|
|
1617
|
+
: undefined;
|
|
1618
|
+
const duplicateToolCallIdSuffixPrefix = compat.requiresMistralToolIds ? "dup" : undefined;
|
|
1476
1619
|
const normalizeToolCallId = (id: string): string => {
|
|
1477
1620
|
if (compat.requiresMistralToolIds) return normalizeMistralToolId(id, true);
|
|
1478
1621
|
|
|
@@ -1489,7 +1632,13 @@ export function convertMessages(
|
|
|
1489
1632
|
if (model.provider === "openai") return id.length > 40 ? id.slice(0, 40) : id;
|
|
1490
1633
|
return id;
|
|
1491
1634
|
};
|
|
1492
|
-
const transformedMessages = transformMessages(
|
|
1635
|
+
const transformedMessages = transformMessages(
|
|
1636
|
+
context.messages,
|
|
1637
|
+
model,
|
|
1638
|
+
id => normalizeToolCallId(id),
|
|
1639
|
+
maxNormalizedToolCallIdLength,
|
|
1640
|
+
duplicateToolCallIdSuffixPrefix,
|
|
1641
|
+
);
|
|
1493
1642
|
|
|
1494
1643
|
const remappedToolCallIds = new Map<string, string[]>();
|
|
1495
1644
|
let generatedToolCallIdCounter = 0;
|
|
@@ -1586,6 +1735,8 @@ export function convertMessages(
|
|
|
1586
1735
|
type: "image_url",
|
|
1587
1736
|
image_url: {
|
|
1588
1737
|
url: `data:${item.mimeType};base64,${item.data}`,
|
|
1738
|
+
// Chat Completions has no "original"; omit it (provider default).
|
|
1739
|
+
...(item.detail && item.detail !== "original" ? { detail: item.detail } : {}),
|
|
1589
1740
|
},
|
|
1590
1741
|
} satisfies ChatCompletionContentPartImage);
|
|
1591
1742
|
} else {
|
|
@@ -1628,12 +1779,12 @@ export function convertMessages(
|
|
|
1628
1779
|
if (compat.requiresThinkingAsText) {
|
|
1629
1780
|
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
1630
1781
|
const thinkingText = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n\n");
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1782
|
+
// `content` is a plain string at this point (set above) or null —
|
|
1783
|
+
// never an array. Prepend the thinking text to the string form.
|
|
1784
|
+
assistantMsg.content =
|
|
1785
|
+
typeof assistantMsg.content === "string" && assistantMsg.content.length > 0
|
|
1786
|
+
? `${thinkingText}\n\n${assistantMsg.content}`
|
|
1787
|
+
: thinkingText;
|
|
1637
1788
|
} else if (compat.requiresReasoningContentForToolCalls) {
|
|
1638
1789
|
// Use the streamed signature when the backend accepts whichever
|
|
1639
1790
|
// recognized field name was emitted (allowsSynthetic=true). Backends
|
|
@@ -1934,16 +2085,19 @@ function convertTools(
|
|
|
1934
2085
|
};
|
|
1935
2086
|
}),
|
|
1936
2087
|
toolStrictMode,
|
|
2088
|
+
strictToolsApplied:
|
|
2089
|
+
tools.length > 0 &&
|
|
2090
|
+
(toolStrictMode === "all_strict" || (toolStrictMode === "mixed" && adaptedTools.some(tool => tool.strict))),
|
|
1937
2091
|
};
|
|
1938
2092
|
}
|
|
1939
2093
|
|
|
1940
2094
|
function shouldRetryWithoutStrictTools(
|
|
1941
2095
|
error: unknown,
|
|
1942
2096
|
capturedErrorResponse: CapturedHttpErrorResponse | undefined,
|
|
1943
|
-
|
|
2097
|
+
strictToolsApplied: boolean,
|
|
1944
2098
|
tools: Tool[] | undefined,
|
|
1945
2099
|
): boolean {
|
|
1946
|
-
if (!tools || tools.length === 0 ||
|
|
2100
|
+
if (!tools || tools.length === 0 || !strictToolsApplied) {
|
|
1947
2101
|
return false;
|
|
1948
2102
|
}
|
|
1949
2103
|
const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
|
|
@@ -1953,7 +2107,14 @@ function shouldRetryWithoutStrictTools(
|
|
|
1953
2107
|
const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
|
|
1954
2108
|
.filter((value): value is string => typeof value === "string" && value.trim().length > 0)
|
|
1955
2109
|
.join("\n");
|
|
1956
|
-
|
|
2110
|
+
// Last two alternatives catch upstream tool-schema validators rejecting our
|
|
2111
|
+
// strictified schemas outright (e.g. OpenRouter DeepSeek's "Invalid tool
|
|
2112
|
+
// parameters schema : field `anyOf`: missing field `type`", #2270, and
|
|
2113
|
+
// OpenAI's own "Invalid schema for function 'x'"). Retrying non-strict sends
|
|
2114
|
+
// the unmodified base schemas, which those validators accept.
|
|
2115
|
+
return /wrong_api_format|mixed values for 'strict'|tool[s]?\b.*strict|\bstrict\b.*tool|tool parameters? schema|invalid schema for function/i.test(
|
|
2116
|
+
messageParts,
|
|
2117
|
+
);
|
|
1957
2118
|
}
|
|
1958
2119
|
|
|
1959
2120
|
function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
|
|
@@ -1974,6 +2135,13 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | str
|
|
|
1974
2135
|
return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
|
|
1975
2136
|
case "network_error":
|
|
1976
2137
|
return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
|
|
2138
|
+
case "error":
|
|
2139
|
+
// Gateways (OpenRouter, Vercel AI Gateway, …) report upstream model
|
|
2140
|
+
// failures as a bare `finish_reason: "error"` with no detail. These are
|
|
2141
|
+
// almost always transient (e.g. Gemini MALFORMED_FUNCTION_CALL), so word
|
|
2142
|
+
// the message to match the session retry classifier's transient-transport
|
|
2143
|
+
// pattern (`provider.?returned.?error`) and get the turn auto-retried.
|
|
2144
|
+
return { stopReason: "error", errorMessage: "Provider returned error finish_reason" };
|
|
1977
2145
|
default:
|
|
1978
2146
|
return {
|
|
1979
2147
|
stopReason: "error",
|
|
@@ -1981,22 +2149,3 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | str
|
|
|
1981
2149
|
};
|
|
1982
2150
|
}
|
|
1983
2151
|
}
|
|
1984
|
-
|
|
1985
|
-
/**
|
|
1986
|
-
* Detect compatibility settings from provider and baseUrl for known providers.
|
|
1987
|
-
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
1988
|
-
* Returns a fully resolved OpenAICompat object with all fields set.
|
|
1989
|
-
*/
|
|
1990
|
-
export function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
|
|
1991
|
-
return detectOpenAICompat(model);
|
|
1992
|
-
}
|
|
1993
|
-
|
|
1994
|
-
/**
|
|
1995
|
-
* Get resolved compatibility settings for a model.
|
|
1996
|
-
* Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
|
|
1997
|
-
* @param model - The model configuration
|
|
1998
|
-
* @param resolvedBaseUrl - Optional resolved base URL (e.g., after GitHub Copilot proxy-ep resolution).
|
|
1999
|
-
*/
|
|
2000
|
-
function getCompat(model: Model<"openai-completions">, resolvedBaseUrl?: string): ResolvedOpenAICompat {
|
|
2001
|
-
return resolveOpenAICompat(model, resolvedBaseUrl);
|
|
2002
|
-
}
|