@aryee337/aery-ai 0.2.28 → 0.2.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2914 -0
- package/README.md +614 -813
- package/package.json +140 -105
- package/src/api-registry.ts +96 -0
- package/src/auth-broker/client.ts +358 -0
- package/src/auth-broker/index.ts +5 -0
- package/src/auth-broker/refresher.ts +117 -0
- package/src/auth-broker/remote-store.ts +623 -0
- package/src/auth-broker/server.ts +644 -0
- package/src/auth-broker/types.ts +127 -0
- package/src/auth-broker/wire-schemas.ts +200 -0
- package/src/auth-gateway/http.ts +194 -0
- package/src/auth-gateway/index.ts +3 -0
- package/src/auth-gateway/server.ts +818 -0
- package/src/auth-gateway/types.ts +143 -0
- package/src/auth-storage.ts +4422 -0
- package/src/index.ts +54 -0
- package/src/model-cache.ts +129 -0
- package/src/model-manager.ts +469 -0
- package/src/model-thinking.ts +782 -0
- package/src/models.json +83530 -0
- package/src/models.json.d.ts +9 -0
- package/src/models.ts +56 -0
- package/src/prompts/turn-aborted-guidance.md +4 -0
- package/src/provider-details.ts +90 -0
- package/src/provider-models/bundled-references.ts +38 -0
- package/src/provider-models/descriptors.ts +355 -0
- package/src/provider-models/google.ts +88 -0
- package/src/provider-models/index.ts +5 -0
- package/src/provider-models/ollama.ts +153 -0
- package/src/provider-models/openai-compat.ts +2817 -0
- package/src/provider-models/special.ts +67 -0
- package/src/providers/aery-native-client.ts +228 -0
- package/src/providers/aery-native-server.ts +212 -0
- package/src/providers/amazon-bedrock.ts +873 -0
- package/src/providers/anthropic-client.ts +318 -0
- package/src/providers/anthropic-messages-server-schema.ts +243 -0
- package/src/providers/anthropic-messages-server.ts +683 -0
- package/src/providers/anthropic-wire.ts +268 -0
- package/src/providers/anthropic.ts +3094 -0
- package/src/providers/aws-credentials.ts +501 -0
- package/src/providers/aws-eventstream.ts +185 -0
- package/src/providers/aws-sigv4.ts +218 -0
- package/src/providers/azure-openai-responses.ts +361 -0
- package/src/providers/cursor/gen/agent_pb.ts +15274 -0
- package/src/providers/cursor/proto/agent.proto +3526 -0
- package/src/providers/cursor/proto/buf.gen.yaml +6 -0
- package/src/providers/cursor/proto/buf.yaml +17 -0
- package/src/providers/cursor.ts +2621 -0
- package/src/providers/error-message.ts +21 -0
- package/src/providers/github-copilot-headers.ts +140 -0
- package/src/providers/gitlab-duo.ts +372 -0
- package/src/providers/google-auth.ts +252 -0
- package/src/providers/google-gemini-cli.ts +809 -0
- package/src/providers/google-gemini-headers.ts +41 -0
- package/src/providers/google-shared.ts +917 -0
- package/src/providers/google-types.ts +167 -0
- package/src/providers/google-vertex.ts +91 -0
- package/src/providers/google.ts +41 -0
- package/src/providers/grammar.ts +70 -0
- package/src/providers/kimi.ts +52 -0
- package/src/providers/mock.ts +496 -0
- package/src/providers/ollama.ts +644 -0
- package/src/providers/openai-anthropic-shim.ts +138 -0
- package/src/providers/openai-chat-server-schema.ts +252 -0
- package/src/providers/openai-chat-server.ts +647 -0
- package/src/providers/openai-codex/constants.ts +43 -0
- package/src/providers/openai-codex/request-transformer.ts +161 -0
- package/src/providers/openai-codex/response-handler.ts +81 -0
- package/src/providers/openai-codex-responses.ts +3018 -0
- package/src/providers/openai-completions-compat.ts +300 -0
- package/src/providers/openai-completions.ts +1979 -0
- package/src/providers/openai-responses-server-schema.ts +290 -0
- package/src/providers/openai-responses-server.ts +1183 -0
- package/src/providers/openai-responses-shared.ts +873 -0
- package/src/providers/openai-responses.ts +679 -0
- package/src/providers/register-builtins.ts +436 -0
- package/src/providers/synthetic.ts +50 -0
- package/src/providers/transform-messages.ts +382 -0
- package/src/providers/vision-guard.ts +31 -0
- package/src/providers/xai-responses.ts +82 -0
- package/src/rate-limit-utils.ts +84 -0
- package/src/stream.ts +1065 -0
- package/src/types.ts +944 -0
- package/src/usage/claude.ts +482 -0
- package/src/usage/gemini.ts +250 -0
- package/src/usage/github-copilot.ts +421 -0
- package/src/usage/google-antigravity.ts +201 -0
- package/src/usage/kimi.ts +271 -0
- package/src/usage/minimax-code.ts +31 -0
- package/src/usage/openai-codex.ts +503 -0
- package/src/usage/shared.ts +10 -0
- package/src/usage/zai.ts +247 -0
- package/src/usage.ts +185 -0
- package/src/utils/abort.ts +51 -0
- package/src/utils/abortable-iterator.ts +69 -0
- package/src/utils/anthropic-auth.ts +93 -0
- package/src/utils/discovery/antigravity.ts +261 -0
- package/src/utils/discovery/codex.ts +371 -0
- package/src/utils/discovery/cursor.ts +306 -0
- package/src/utils/discovery/gemini.ts +248 -0
- package/src/utils/discovery/index.ts +4 -0
- package/src/utils/discovery/openai-compatible.ts +224 -0
- package/src/utils/event-stream.ts +142 -0
- package/src/utils/fireworks-model-id.ts +30 -0
- package/src/utils/foundry.ts +8 -0
- package/src/utils/http-inspector.ts +176 -0
- package/src/utils/idle-iterator.ts +267 -0
- package/src/utils/json-parse.ts +182 -0
- package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
- package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
- package/src/utils/oauth/anthropic.ts +273 -0
- package/src/utils/oauth/api-key-login.ts +87 -0
- package/src/utils/oauth/api-key-validation.ts +92 -0
- package/src/utils/oauth/callback-server.ts +276 -0
- package/src/utils/oauth/cerebras.ts +16 -0
- package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
- package/src/utils/oauth/cursor.ts +157 -0
- package/src/utils/oauth/deepseek.ts +53 -0
- package/src/utils/oauth/firepass.ts +24 -0
- package/src/utils/oauth/fireworks.ts +15 -0
- package/src/utils/oauth/github-copilot.ts +362 -0
- package/src/utils/oauth/gitlab-duo.ts +123 -0
- package/src/utils/oauth/google-antigravity.ts +200 -0
- package/src/utils/oauth/google-gemini-cli.ts +256 -0
- package/src/utils/oauth/google-oauth-shared.ts +110 -0
- package/src/utils/oauth/huggingface.ts +62 -0
- package/src/utils/oauth/index.ts +484 -0
- package/src/utils/oauth/kagi.ts +47 -0
- package/src/utils/oauth/kilo.ts +87 -0
- package/src/utils/oauth/kimi.ts +254 -0
- package/src/utils/oauth/litellm.ts +47 -0
- package/src/utils/oauth/lm-studio.ts +38 -0
- package/src/utils/oauth/minimax-code.ts +78 -0
- package/src/utils/oauth/moonshot.ts +23 -0
- package/src/utils/oauth/nanogpt.ts +15 -0
- package/src/utils/oauth/nvidia.ts +70 -0
- package/src/utils/oauth/oauth.html +203 -0
- package/src/utils/oauth/ollama-cloud.ts +28 -0
- package/src/utils/oauth/ollama.ts +47 -0
- package/src/utils/oauth/openai-codex.ts +299 -0
- package/src/utils/oauth/opencode.ts +49 -0
- package/src/utils/oauth/openrouter.ts +20 -0
- package/src/utils/oauth/parallel.ts +46 -0
- package/src/utils/oauth/perplexity.ts +206 -0
- package/src/utils/oauth/pkce.ts +18 -0
- package/src/utils/oauth/qianfan.ts +58 -0
- package/src/utils/oauth/qwen-portal.ts +60 -0
- package/src/utils/oauth/synthetic.ts +15 -0
- package/src/utils/oauth/tavily.ts +46 -0
- package/src/utils/oauth/together.ts +16 -0
- package/src/utils/oauth/types.ts +99 -0
- package/src/utils/oauth/venice.ts +59 -0
- package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
- package/src/utils/oauth/vllm.ts +40 -0
- package/src/utils/oauth/wafer.ts +50 -0
- package/src/utils/oauth/xai-oauth.ts +342 -0
- package/src/utils/oauth/xiaomi.ts +139 -0
- package/src/utils/oauth/zai.ts +60 -0
- package/src/utils/oauth/zenmux.ts +15 -0
- package/src/utils/oauth/zhipu.ts +60 -0
- package/src/utils/overflow.ts +137 -0
- package/src/utils/parse-bind.ts +54 -0
- package/src/utils/provider-response.ts +30 -0
- package/src/utils/request-debug.ts +336 -0
- package/src/utils/retry-after.ts +110 -0
- package/src/utils/retry.ts +54 -0
- package/src/utils/schema/CONSTRAINTS.md +164 -0
- package/src/utils/schema/adapt.ts +36 -0
- package/src/utils/schema/compatibility.ts +435 -0
- package/src/utils/schema/dereference.ts +98 -0
- package/src/utils/schema/draft.ts +341 -0
- package/src/utils/schema/equality.ts +97 -0
- package/src/utils/schema/fields.ts +191 -0
- package/src/utils/schema/index.ts +13 -0
- package/src/utils/schema/json-schema-validator.ts +577 -0
- package/src/utils/schema/meta-validator.ts +167 -0
- package/src/utils/schema/normalize.ts +1588 -0
- package/src/utils/schema/spill.ts +43 -0
- package/src/utils/schema/stamps.ts +97 -0
- package/src/utils/schema/types.ts +10 -0
- package/src/utils/schema/wire.ts +293 -0
- package/src/utils/schema/zod-decontaminate.ts +331 -0
- package/src/utils/sdk-stream-timeout.ts +43 -0
- package/src/utils/sse-debug.ts +289 -0
- package/src/utils/stream-markup-healing.ts +612 -0
- package/src/utils/tool-choice.ts +99 -0
- package/src/utils/validation.ts +1024 -0
- package/src/utils.ts +166 -0
- package/dist/api-registry.d.ts +0 -20
- package/dist/api-registry.d.ts.map +0 -1
- package/dist/api-registry.js +0 -44
- package/dist/api-registry.js.map +0 -1
- package/dist/bedrock-provider.d.ts +0 -5
- package/dist/bedrock-provider.d.ts.map +0 -1
- package/dist/bedrock-provider.js +0 -6
- package/dist/bedrock-provider.js.map +0 -1
- package/dist/cli.d.ts +0 -3
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -130
- package/dist/cli.js.map +0 -1
- package/dist/env-api-keys.d.ts +0 -18
- package/dist/env-api-keys.d.ts.map +0 -1
- package/dist/env-api-keys.js +0 -178
- package/dist/env-api-keys.js.map +0 -1
- package/dist/image-models.d.ts +0 -10
- package/dist/image-models.d.ts.map +0 -1
- package/dist/image-models.generated.d.ts +0 -440
- package/dist/image-models.generated.d.ts.map +0 -1
- package/dist/image-models.generated.js +0 -442
- package/dist/image-models.generated.js.map +0 -1
- package/dist/image-models.js +0 -23
- package/dist/image-models.js.map +0 -1
- package/dist/images-api-registry.d.ts +0 -14
- package/dist/images-api-registry.d.ts.map +0 -1
- package/dist/images-api-registry.js +0 -22
- package/dist/images-api-registry.js.map +0 -1
- package/dist/images.d.ts +0 -4
- package/dist/images.d.ts.map +0 -1
- package/dist/images.js +0 -14
- package/dist/images.js.map +0 -1
- package/dist/index.d.ts +0 -32
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -20
- package/dist/index.js.map +0 -1
- package/dist/models.d.ts +0 -18
- package/dist/models.d.ts.map +0 -1
- package/dist/models.generated.d.ts +0 -17707
- package/dist/models.generated.d.ts.map +0 -1
- package/dist/models.generated.js +0 -16561
- package/dist/models.generated.js.map +0 -1
- package/dist/models.js +0 -71
- package/dist/models.js.map +0 -1
- package/dist/oauth.d.ts +0 -2
- package/dist/oauth.d.ts.map +0 -1
- package/dist/oauth.js +0 -2
- package/dist/oauth.js.map +0 -1
- package/dist/providers/aery-error-formatting.d.ts +0 -13
- package/dist/providers/aery-error-formatting.d.ts.map +0 -1
- package/dist/providers/aery-error-formatting.js +0 -112
- package/dist/providers/aery-error-formatting.js.map +0 -1
- package/dist/providers/amazon-bedrock.d.ts +0 -38
- package/dist/providers/amazon-bedrock.d.ts.map +0 -1
- package/dist/providers/amazon-bedrock.js +0 -763
- package/dist/providers/amazon-bedrock.js.map +0 -1
- package/dist/providers/anthropic.d.ts +0 -71
- package/dist/providers/anthropic.d.ts.map +0 -1
- package/dist/providers/anthropic.js +0 -949
- package/dist/providers/anthropic.js.map +0 -1
- package/dist/providers/azure-openai-responses.d.ts +0 -15
- package/dist/providers/azure-openai-responses.d.ts.map +0 -1
- package/dist/providers/azure-openai-responses.js +0 -225
- package/dist/providers/azure-openai-responses.js.map +0 -1
- package/dist/providers/cloudflare.d.ts +0 -13
- package/dist/providers/cloudflare.d.ts.map +0 -1
- package/dist/providers/cloudflare.js +0 -26
- package/dist/providers/cloudflare.js.map +0 -1
- package/dist/providers/faux.d.ts +0 -56
- package/dist/providers/faux.d.ts.map +0 -1
- package/dist/providers/faux.js +0 -368
- package/dist/providers/faux.js.map +0 -1
- package/dist/providers/github-copilot-headers.d.ts +0 -8
- package/dist/providers/github-copilot-headers.d.ts.map +0 -1
- package/dist/providers/github-copilot-headers.js +0 -29
- package/dist/providers/github-copilot-headers.js.map +0 -1
- package/dist/providers/google-gemini-cli.d.ts +0 -74
- package/dist/providers/google-gemini-cli.d.ts.map +0 -1
- package/dist/providers/google-gemini-cli.js +0 -779
- package/dist/providers/google-gemini-cli.js.map +0 -1
- package/dist/providers/google-shared.d.ts +0 -70
- package/dist/providers/google-shared.d.ts.map +0 -1
- package/dist/providers/google-shared.js +0 -329
- package/dist/providers/google-shared.js.map +0 -1
- package/dist/providers/google-vertex.d.ts +0 -15
- package/dist/providers/google-vertex.d.ts.map +0 -1
- package/dist/providers/google-vertex.js +0 -442
- package/dist/providers/google-vertex.js.map +0 -1
- package/dist/providers/google.d.ts +0 -13
- package/dist/providers/google.d.ts.map +0 -1
- package/dist/providers/google.js +0 -400
- package/dist/providers/google.js.map +0 -1
- package/dist/providers/images/openrouter.d.ts +0 -3
- package/dist/providers/images/openrouter.d.ts.map +0 -1
- package/dist/providers/images/openrouter.js +0 -129
- package/dist/providers/images/openrouter.js.map +0 -1
- package/dist/providers/images/register-builtins.d.ts +0 -4
- package/dist/providers/images/register-builtins.d.ts.map +0 -1
- package/dist/providers/images/register-builtins.js +0 -34
- package/dist/providers/images/register-builtins.js.map +0 -1
- package/dist/providers/mistral.d.ts +0 -25
- package/dist/providers/mistral.d.ts.map +0 -1
- package/dist/providers/mistral.js +0 -535
- package/dist/providers/mistral.js.map +0 -1
- package/dist/providers/openai-codex-responses.d.ts +0 -30
- package/dist/providers/openai-codex-responses.d.ts.map +0 -1
- package/dist/providers/openai-codex-responses.js +0 -1090
- package/dist/providers/openai-codex-responses.js.map +0 -1
- package/dist/providers/openai-completions.d.ts +0 -19
- package/dist/providers/openai-completions.d.ts.map +0 -1
- package/dist/providers/openai-completions.js +0 -950
- package/dist/providers/openai-completions.js.map +0 -1
- package/dist/providers/openai-prompt-cache.d.ts +0 -3
- package/dist/providers/openai-prompt-cache.d.ts.map +0 -1
- package/dist/providers/openai-prompt-cache.js +0 -10
- package/dist/providers/openai-prompt-cache.js.map +0 -1
- package/dist/providers/openai-responses-shared.d.ts +0 -18
- package/dist/providers/openai-responses-shared.d.ts.map +0 -1
- package/dist/providers/openai-responses-shared.js +0 -492
- package/dist/providers/openai-responses-shared.js.map +0 -1
- package/dist/providers/openai-responses.d.ts +0 -13
- package/dist/providers/openai-responses.d.ts.map +0 -1
- package/dist/providers/openai-responses.js +0 -237
- package/dist/providers/openai-responses.js.map +0 -1
- package/dist/providers/register-builtins.d.ts +0 -38
- package/dist/providers/register-builtins.d.ts.map +0 -1
- package/dist/providers/register-builtins.js +0 -278
- package/dist/providers/register-builtins.js.map +0 -1
- package/dist/providers/simple-options.d.ts +0 -8
- package/dist/providers/simple-options.d.ts.map +0 -1
- package/dist/providers/simple-options.js +0 -41
- package/dist/providers/simple-options.js.map +0 -1
- package/dist/providers/transform-messages.d.ts +0 -8
- package/dist/providers/transform-messages.d.ts.map +0 -1
- package/dist/providers/transform-messages.js +0 -184
- package/dist/providers/transform-messages.js.map +0 -1
- package/dist/session-resources.d.ts +0 -4
- package/dist/session-resources.d.ts.map +0 -1
- package/dist/session-resources.js +0 -22
- package/dist/session-resources.js.map +0 -1
- package/dist/stream.d.ts +0 -8
- package/dist/stream.d.ts.map +0 -1
- package/dist/stream.js +0 -27
- package/dist/stream.js.map +0 -1
- package/dist/types.d.ts +0 -498
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -2
- package/dist/types.js.map +0 -1
- package/dist/utils/diagnostics.d.ts +0 -19
- package/dist/utils/diagnostics.d.ts.map +0 -1
- package/dist/utils/diagnostics.js +0 -25
- package/dist/utils/diagnostics.js.map +0 -1
- package/dist/utils/event-stream.d.ts +0 -21
- package/dist/utils/event-stream.d.ts.map +0 -1
- package/dist/utils/event-stream.js +0 -81
- package/dist/utils/event-stream.js.map +0 -1
- package/dist/utils/hash.d.ts +0 -3
- package/dist/utils/hash.d.ts.map +0 -1
- package/dist/utils/hash.js +0 -14
- package/dist/utils/hash.js.map +0 -1
- package/dist/utils/headers.d.ts +0 -2
- package/dist/utils/headers.d.ts.map +0 -1
- package/dist/utils/headers.js +0 -8
- package/dist/utils/headers.js.map +0 -1
- package/dist/utils/json-parse.d.ts +0 -16
- package/dist/utils/json-parse.d.ts.map +0 -1
- package/dist/utils/json-parse.js +0 -113
- package/dist/utils/json-parse.js.map +0 -1
- package/dist/utils/node-http-proxy.d.ts +0 -10
- package/dist/utils/node-http-proxy.d.ts.map +0 -1
- package/dist/utils/node-http-proxy.js +0 -97
- package/dist/utils/node-http-proxy.js.map +0 -1
- package/dist/utils/oauth/anthropic.d.ts +0 -25
- package/dist/utils/oauth/anthropic.d.ts.map +0 -1
- package/dist/utils/oauth/anthropic.js +0 -335
- package/dist/utils/oauth/anthropic.js.map +0 -1
- package/dist/utils/oauth/device-code.d.ts +0 -19
- package/dist/utils/oauth/device-code.d.ts.map +0 -1
- package/dist/utils/oauth/device-code.js +0 -55
- package/dist/utils/oauth/device-code.js.map +0 -1
- package/dist/utils/oauth/github-copilot.d.ts +0 -30
- package/dist/utils/oauth/github-copilot.d.ts.map +0 -1
- package/dist/utils/oauth/github-copilot.js +0 -268
- package/dist/utils/oauth/github-copilot.js.map +0 -1
- package/dist/utils/oauth/google-antigravity.d.ts +0 -26
- package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
- package/dist/utils/oauth/google-antigravity.js +0 -377
- package/dist/utils/oauth/google-antigravity.js.map +0 -1
- package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
- package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
- package/dist/utils/oauth/google-gemini-cli.js +0 -482
- package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
- package/dist/utils/oauth/index.d.ts +0 -63
- package/dist/utils/oauth/index.d.ts.map +0 -1
- package/dist/utils/oauth/index.js +0 -131
- package/dist/utils/oauth/index.js.map +0 -1
- package/dist/utils/oauth/oauth-page.d.ts +0 -3
- package/dist/utils/oauth/oauth-page.d.ts.map +0 -1
- package/dist/utils/oauth/oauth-page.js +0 -105
- package/dist/utils/oauth/oauth-page.js.map +0 -1
- package/dist/utils/oauth/openai-codex.d.ts +0 -34
- package/dist/utils/oauth/openai-codex.d.ts.map +0 -1
- package/dist/utils/oauth/openai-codex.js +0 -385
- package/dist/utils/oauth/openai-codex.js.map +0 -1
- package/dist/utils/oauth/pkce.d.ts +0 -13
- package/dist/utils/oauth/pkce.d.ts.map +0 -1
- package/dist/utils/oauth/pkce.js +0 -31
- package/dist/utils/oauth/pkce.js.map +0 -1
- package/dist/utils/oauth/types.d.ts +0 -64
- package/dist/utils/oauth/types.d.ts.map +0 -1
- package/dist/utils/oauth/types.js +0 -2
- package/dist/utils/oauth/types.js.map +0 -1
- package/dist/utils/overflow.d.ts +0 -56
- package/dist/utils/overflow.d.ts.map +0 -1
- package/dist/utils/overflow.js +0 -151
- package/dist/utils/overflow.js.map +0 -1
- package/dist/utils/sanitize-unicode.d.ts +0 -22
- package/dist/utils/sanitize-unicode.d.ts.map +0 -1
- package/dist/utils/sanitize-unicode.js +0 -26
- package/dist/utils/sanitize-unicode.js.map +0 -1
- package/dist/utils/typebox-helpers.d.ts +0 -17
- package/dist/utils/typebox-helpers.d.ts.map +0 -1
- package/dist/utils/typebox-helpers.js +0 -21
- package/dist/utils/typebox-helpers.js.map +0 -1
- package/dist/utils/validation.d.ts +0 -18
- package/dist/utils/validation.d.ts.map +0 -1
- package/dist/utils/validation.js +0 -281
- package/dist/utils/validation.js.map +0 -1
|
@@ -0,0 +1,1979 @@
|
|
|
1
|
+
import { $env, extractHttpStatusFromError } from "@aryee337/aery-utils";
|
|
2
|
+
import OpenAI, { APIConnectionTimeoutError as OpenAIConnectionTimeoutError } from "openai";
|
|
3
|
+
import type {
|
|
4
|
+
ChatCompletionAssistantMessageParam,
|
|
5
|
+
ChatCompletionChunk,
|
|
6
|
+
ChatCompletionContentPart,
|
|
7
|
+
ChatCompletionContentPartImage,
|
|
8
|
+
ChatCompletionContentPartText,
|
|
9
|
+
ChatCompletionMessageParam,
|
|
10
|
+
ChatCompletionToolMessageParam,
|
|
11
|
+
} from "openai/resources/chat/completions";
|
|
12
|
+
import packageJson from "../../package.json" with { type: "json" };
|
|
13
|
+
import { type Effort, getSupportedEfforts } from "../model-thinking";
|
|
14
|
+
import { calculateCost } from "../models";
|
|
15
|
+
import { getEnvApiKey } from "../stream";
|
|
16
|
+
import {
|
|
17
|
+
type AssistantMessage,
|
|
18
|
+
type Context,
|
|
19
|
+
type FetchImpl,
|
|
20
|
+
type Message,
|
|
21
|
+
type MessageAttribution,
|
|
22
|
+
type Model,
|
|
23
|
+
type OpenAICompat,
|
|
24
|
+
type ProviderSessionState,
|
|
25
|
+
resolveServiceTier,
|
|
26
|
+
type ServiceTier,
|
|
27
|
+
type StopReason,
|
|
28
|
+
type StreamFunction,
|
|
29
|
+
type StreamOptions,
|
|
30
|
+
shouldSendServiceTier,
|
|
31
|
+
type TextContent,
|
|
32
|
+
type ThinkingContent,
|
|
33
|
+
type Tool,
|
|
34
|
+
type ToolCall,
|
|
35
|
+
type ToolChoice,
|
|
36
|
+
type ToolResultMessage,
|
|
37
|
+
} from "../types";
|
|
38
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
39
|
+
import { createAbortSourceTracker } from "../utils/abort";
|
|
40
|
+
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
41
|
+
import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
42
|
+
import {
|
|
43
|
+
type CapturedHttpErrorResponse,
|
|
44
|
+
finalizeErrorMessage,
|
|
45
|
+
type RawHttpRequestDump,
|
|
46
|
+
rewriteCopilotError,
|
|
47
|
+
} from "../utils/http-inspector";
|
|
48
|
+
import {
|
|
49
|
+
getOpenAIStreamFirstEventTimeoutMs,
|
|
50
|
+
getOpenAIStreamIdleTimeoutMs,
|
|
51
|
+
iterateWithIdleTimeout,
|
|
52
|
+
} from "../utils/idle-iterator";
|
|
53
|
+
import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
|
|
54
|
+
import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
55
|
+
import { getKimiCommonHeaders } from "../utils/oauth/kimi";
|
|
56
|
+
import { notifyProviderResponse } from "../utils/provider-response";
|
|
57
|
+
import { callWithCopilotModelRetry } from "../utils/retry";
|
|
58
|
+
import { adaptSchemaForStrict, NO_STRICT, toolWireSchema } from "../utils/schema";
|
|
59
|
+
import { wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
60
|
+
import {
|
|
61
|
+
getStreamMarkupHealingPattern,
|
|
62
|
+
type HealedToolCall,
|
|
63
|
+
StreamMarkupHealing,
|
|
64
|
+
type StreamMarkupHealingEvent,
|
|
65
|
+
} from "../utils/stream-markup-healing";
|
|
66
|
+
import { isForcedToolChoice, mapToOpenAICompletionsToolChoice } from "../utils/tool-choice";
|
|
67
|
+
import {
|
|
68
|
+
buildCopilotDynamicHeaders,
|
|
69
|
+
hasCopilotVisionInput,
|
|
70
|
+
resolveGitHubCopilotBaseUrl,
|
|
71
|
+
} from "./github-copilot-headers";
|
|
72
|
+
import { detectOpenAICompat, type ResolvedOpenAICompat, resolveOpenAICompat } from "./openai-completions-compat";
|
|
73
|
+
import { createInitialResponsesAssistantMessage } from "./openai-responses-shared";
|
|
74
|
+
import { transformMessages } from "./transform-messages";
|
|
75
|
+
import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER } from "./vision-guard";
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Normalize tool call ID for Mistral.
|
|
79
|
+
* Mistral requires tool IDs to be exactly 9 alphanumeric characters (a-z, A-Z, 0-9).
|
|
80
|
+
*/
|
|
81
|
+
function normalizeMistralToolId(id: string, isMistral: boolean): string {
|
|
82
|
+
if (!isMistral) return id;
|
|
83
|
+
// Remove non-alphanumeric characters
|
|
84
|
+
let normalized = id.replace(/[^a-zA-Z0-9]/g, "");
|
|
85
|
+
// Mistral requires exactly 9 characters
|
|
86
|
+
if (normalized.length < 9) {
|
|
87
|
+
// Pad with deterministic characters based on original ID to ensure matching
|
|
88
|
+
const padding = "ABCDEFGHI";
|
|
89
|
+
normalized = normalized + padding.slice(0, 9 - normalized.length);
|
|
90
|
+
} else if (normalized.length > 9) {
|
|
91
|
+
normalized = normalized.slice(0, 9);
|
|
92
|
+
}
|
|
93
|
+
return normalized;
|
|
94
|
+
}
|
|
95
|
+
// Direct DeepSeek model ids on NanoGPT are routed via the default tools-capable
|
|
96
|
+
// path. We deliberately do NOT append `:tools` here: with `:tools`, NanoGPT
|
|
97
|
+
// performs server-side tool-call parsing on the upstream DeepSeek stream and
|
|
98
|
+
// 502s with `code: "malformed_tool_call"` on more complex tool schemas (issue
|
|
99
|
+
// #1488). The default route forwards `delta.content` (including any DSML
|
|
100
|
+
// envelope leaks) which `StreamMarkupHealing` heals into a structured call
|
|
101
|
+
// client-side.
|
|
102
|
+
function resolveOpenAICompletionsModelId(
|
|
103
|
+
model: Model<"openai-completions">,
|
|
104
|
+
options: OpenAICompletionsOptions | undefined,
|
|
105
|
+
): string {
|
|
106
|
+
if (model.provider === "firepass") return toFirepassWireModelId(model.id);
|
|
107
|
+
if (model.provider === "fireworks") return toFireworksWireModelId(model.id);
|
|
108
|
+
if (model.provider === "openrouter") return applyOpenRouterRoutingVariant(model.id, options?.openrouterVariant);
|
|
109
|
+
return model.id;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Normalize OpenAI-compatible streaming `delta.content` into plain text.
|
|
114
|
+
* Most providers stream `delta.content` as a string, but some (notably Mistral
|
|
115
|
+
* Medium 3.5 / `mistral-medium-2604`) return an array of typed content parts
|
|
116
|
+
* — e.g. `[{ type: "text", text: "Hello" }]`. Without normalization those
|
|
117
|
+
* parts get string-coerced via `text += array`, producing the literal
|
|
118
|
+
* `[object Object]` sequences observed in issue #911.
|
|
119
|
+
*
|
|
120
|
+
* Returns the joined text. Non-text parts and unknown shapes are skipped so
|
|
121
|
+
* we never emit JS object sigils as visible output.
|
|
122
|
+
*/
|
|
123
|
+
function normalizeStreamingContentText(content: unknown): string {
|
|
124
|
+
if (typeof content === "string") return content;
|
|
125
|
+
if (Array.isArray(content)) {
|
|
126
|
+
let out = "";
|
|
127
|
+
for (const part of content) {
|
|
128
|
+
if (typeof part === "string") {
|
|
129
|
+
out += part;
|
|
130
|
+
} else if (part && typeof part === "object") {
|
|
131
|
+
const obj = part as { type?: unknown; text?: unknown };
|
|
132
|
+
if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
|
|
133
|
+
out += obj.text;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return out;
|
|
138
|
+
}
|
|
139
|
+
if (content && typeof content === "object") {
|
|
140
|
+
const obj = content as { type?: unknown; text?: unknown };
|
|
141
|
+
if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
|
|
142
|
+
return obj.text;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return "";
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function serializeToolArguments(value: unknown): string {
|
|
149
|
+
if (value && typeof value === "object" && !Array.isArray(value)) {
|
|
150
|
+
try {
|
|
151
|
+
return JSON.stringify(value);
|
|
152
|
+
} catch {
|
|
153
|
+
return "{}";
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (typeof value === "string") {
|
|
158
|
+
const trimmed = value.trim();
|
|
159
|
+
if (trimmed.length === 0) return "{}";
|
|
160
|
+
try {
|
|
161
|
+
const parsed = JSON.parse(trimmed);
|
|
162
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
163
|
+
return JSON.stringify(parsed);
|
|
164
|
+
}
|
|
165
|
+
} catch {}
|
|
166
|
+
return "{}";
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return "{}";
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Check if conversation messages contain tool calls or tool results.
|
|
174
|
+
* This is needed because Anthropic (via proxy) requires the tools param
|
|
175
|
+
* to be present when messages include tool_calls or tool role messages.
|
|
176
|
+
*/
|
|
177
|
+
function hasToolHistory(messages: Message[]): boolean {
|
|
178
|
+
for (const msg of messages) {
|
|
179
|
+
if (msg.role === "toolResult") {
|
|
180
|
+
return true;
|
|
181
|
+
}
|
|
182
|
+
if (msg.role === "assistant") {
|
|
183
|
+
if (msg.content.some(block => block.type === "toolCall")) {
|
|
184
|
+
return true;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Identify "real progress" stream chunks vs. keepalives, role-only preambles,
|
|
192
|
+
* and empty `{choices:[]}` no-ops emitted by some OpenAI-compatible endpoints.
|
|
193
|
+
* Without this filter, every keepalive resets `iterateWithIdleTimeout`'s
|
|
194
|
+
* deadline, so a provider that streams nothing but pings keeps the watchdog
|
|
195
|
+
* asleep indefinitely — observed against z.ai/GLM via OpenRouter where a
|
|
196
|
+
* subagent stalled for hours with no error surfaced.
|
|
197
|
+
*
|
|
198
|
+
* A chunk counts as progress when it carries terminal usage, a finish reason,
|
|
199
|
+
* or any model-produced delta (content / tool calls / reasoning / refusal).
|
|
200
|
+
* Role-only `delta: { role: "assistant" }` preambles do NOT count; we want the
|
|
201
|
+
* (longer) first-event timeout to keep governing until real output appears.
|
|
202
|
+
*/
|
|
203
|
+
export function isOpenAICompletionsProgressChunk(chunk: unknown): boolean {
|
|
204
|
+
if (!chunk || typeof chunk !== "object") return false;
|
|
205
|
+
const record = chunk as {
|
|
206
|
+
usage?: unknown;
|
|
207
|
+
choices?: ReadonlyArray<{
|
|
208
|
+
finish_reason?: unknown;
|
|
209
|
+
usage?: unknown;
|
|
210
|
+
delta?: {
|
|
211
|
+
content?: unknown;
|
|
212
|
+
tool_calls?: unknown;
|
|
213
|
+
reasoning?: unknown;
|
|
214
|
+
reasoning_content?: unknown;
|
|
215
|
+
reasoning_text?: unknown;
|
|
216
|
+
refusal?: unknown;
|
|
217
|
+
};
|
|
218
|
+
}>;
|
|
219
|
+
};
|
|
220
|
+
if (record.usage) return true;
|
|
221
|
+
const choice = Array.isArray(record.choices) ? record.choices[0] : undefined;
|
|
222
|
+
if (!choice) return false;
|
|
223
|
+
if (choice.finish_reason) return true;
|
|
224
|
+
if (choice.usage) return true;
|
|
225
|
+
const delta = choice.delta;
|
|
226
|
+
if (!delta) return false;
|
|
227
|
+
const content = delta.content;
|
|
228
|
+
if (typeof content === "string" ? content.length > 0 : Array.isArray(content) && content.length > 0) return true;
|
|
229
|
+
if (Array.isArray(delta.tool_calls) && delta.tool_calls.length > 0) return true;
|
|
230
|
+
if (typeof delta.reasoning === "string" && delta.reasoning.length > 0) return true;
|
|
231
|
+
if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) return true;
|
|
232
|
+
if (typeof delta.reasoning_text === "string" && delta.reasoning_text.length > 0) return true;
|
|
233
|
+
if (typeof delta.refusal === "string" && delta.refusal.length > 0) return true;
|
|
234
|
+
return false;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
export interface OpenAICompletionsOptions extends StreamOptions {
|
|
238
|
+
toolChoice?: ToolChoice;
|
|
239
|
+
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
240
|
+
/** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
|
|
241
|
+
disableReasoning?: boolean;
|
|
242
|
+
serviceTier?: ServiceTier;
|
|
243
|
+
/**
|
|
244
|
+
* Routing-variant suffix appended to OpenRouter model IDs when none is
|
|
245
|
+
* already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
|
|
246
|
+
* values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
|
|
247
|
+
* resolved `model.id` already contains a colon-suffix after the last
|
|
248
|
+
* provider segment (explicit `:nitro` in the selector or a catalog entry
|
|
249
|
+
* with the variant baked in).
|
|
250
|
+
*/
|
|
251
|
+
openrouterVariant?: string;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
type OpenAICompletionsParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
|
|
255
|
+
top_k?: number;
|
|
256
|
+
min_p?: number;
|
|
257
|
+
repetition_penalty?: number;
|
|
258
|
+
thinking?: { type: "enabled" | "disabled" };
|
|
259
|
+
enable_thinking?: boolean;
|
|
260
|
+
chat_template_kwargs?: { enable_thinking: boolean };
|
|
261
|
+
reasoning?: { effort?: string } | { enabled: false };
|
|
262
|
+
provider?: OpenAICompat["openRouterRouting"];
|
|
263
|
+
providerOptions?: { gateway?: { only?: string[]; order?: string[] } };
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
type AppliedToolStrictMode = "mixed" | "all_strict" | "none";
|
|
267
|
+
type ToolStrictModeOverride = Exclude<ResolvedOpenAICompat["toolStrictMode"], "mixed"> | undefined;
|
|
268
|
+
|
|
269
|
+
type BuiltOpenAICompletionTools = {
|
|
270
|
+
tools: OpenAI.Chat.Completions.ChatCompletionTool[];
|
|
271
|
+
toolStrictMode: AppliedToolStrictMode;
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
const OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX = "openai-completions:";
|
|
275
|
+
|
|
276
|
+
type OpenAICompletionsProviderSessionState = ProviderSessionState & {
|
|
277
|
+
strictToolsDisabled: boolean;
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
function createOpenAICompletionsProviderSessionState(): OpenAICompletionsProviderSessionState {
|
|
281
|
+
const state: OpenAICompletionsProviderSessionState = {
|
|
282
|
+
strictToolsDisabled: false,
|
|
283
|
+
close: () => {
|
|
284
|
+
state.strictToolsDisabled = false;
|
|
285
|
+
},
|
|
286
|
+
};
|
|
287
|
+
return state;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function getOpenAICompletionsProviderSessionState(
|
|
291
|
+
model: Model<"openai-completions">,
|
|
292
|
+
baseUrl: string | undefined,
|
|
293
|
+
providerSessionState: Map<string, ProviderSessionState> | undefined,
|
|
294
|
+
): OpenAICompletionsProviderSessionState | undefined {
|
|
295
|
+
if (!providerSessionState) return undefined;
|
|
296
|
+
const key = `${OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX}${model.provider}:${baseUrl ?? ""}:${model.id}`;
|
|
297
|
+
const existing = providerSessionState.get(key) as OpenAICompletionsProviderSessionState | undefined;
|
|
298
|
+
if (existing) return existing;
|
|
299
|
+
const created = createOpenAICompletionsProviderSessionState();
|
|
300
|
+
providerSessionState.set(key, created);
|
|
301
|
+
return created;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function isOpenRouterAnthropicModel(model: Model<"openai-completions">): boolean {
|
|
305
|
+
return model.provider === "openrouter" && model.id.toLowerCase().startsWith("anthropic/");
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
|
|
310
|
+
* to a model id when no explicit variant is already present. A variant is considered
|
|
311
|
+
* "already present" when `modelId` contains a colon after the last `/` separator —
|
|
312
|
+
* which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
|
|
313
|
+
* entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
|
|
314
|
+
*
|
|
315
|
+
* Exported for unit testing.
|
|
316
|
+
*/
|
|
317
|
+
export function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string {
|
|
318
|
+
if (!variant) return modelId;
|
|
319
|
+
const lastSlash = modelId.lastIndexOf("/");
|
|
320
|
+
const lastColon = modelId.lastIndexOf(":");
|
|
321
|
+
// Existing `:suffix` after the last path segment — leave the id untouched.
|
|
322
|
+
if (lastColon > lastSlash) return modelId;
|
|
323
|
+
return `${modelId}:${variant}`;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
function isCompiledGrammarTooLargeStrictError(
|
|
327
|
+
error: unknown,
|
|
328
|
+
capturedErrorResponse: CapturedHttpErrorResponse | undefined,
|
|
329
|
+
): boolean {
|
|
330
|
+
const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
|
|
331
|
+
if (status !== 400) return false;
|
|
332
|
+
const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
|
|
333
|
+
.filter((value): value is string => typeof value === "string" && value.trim().length > 0)
|
|
334
|
+
.join("\n");
|
|
335
|
+
return (
|
|
336
|
+
/invalid_request_error/i.test(messageParts) &&
|
|
337
|
+
/compiled grammar/i.test(messageParts) &&
|
|
338
|
+
/too large/i.test(messageParts)
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// DeepSeek models leak chat-template special tokens (e.g. `<|tool_calls_begin|>`,
|
|
343
|
+
// `<|DSML|tool_calls|>`) into visible `content` deltas when hosted behind providers
|
|
344
|
+
// (such as NVIDIA NIM) that don't strip them server-side. The structured `tool_calls`
|
|
345
|
+
// payload is still emitted correctly — we only need to filter the leaked markers from
|
|
346
|
+
// user-visible text. Tokens use either fullwidth pipes (|, U+FF5C) or ASCII pipes.
|
|
347
|
+
// Body is restricted to identifier-like chars (with the DeepSeek tokenizer's `▁`),
|
|
348
|
+
// capped at a sane length to avoid swallowing legitimate angle-bracket text.
|
|
349
|
+
const DEEPSEEK_SPECIAL_TOKEN_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/g;
|
|
350
|
+
const DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX = /^\s*<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/;
|
|
351
|
+
const DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>\s*$/;
|
|
352
|
+
const DEEPSEEK_OPEN_DELIMS = ["<|", "<|"] as const;
|
|
353
|
+
|
|
354
|
+
function stripDeepseekSpecialTokens(text: string): string {
|
|
355
|
+
const stripped = text.replace(DEEPSEEK_SPECIAL_TOKEN_REGEX, "");
|
|
356
|
+
if (stripped === text) return text;
|
|
357
|
+
|
|
358
|
+
let normalized = stripped;
|
|
359
|
+
if (DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX.test(text)) normalized = normalized.replace(/^\s+/u, "");
|
|
360
|
+
if (DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX.test(text)) normalized = normalized.replace(/\s+$/u, "");
|
|
361
|
+
return normalized;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Find any trailing partial `<|...` (or `<|...`) that has not yet been closed by a
|
|
365
|
+
// matching `|>`/`|>`, so it can be held back until the next chunk arrives. A solo
|
|
366
|
+
// trailing `<` is also held in case it is the start of a new token.
|
|
367
|
+
function getTrailingPartialDeepseekToken(text: string): string {
|
|
368
|
+
let bestIdx = -1;
|
|
369
|
+
for (const delim of DEEPSEEK_OPEN_DELIMS) {
|
|
370
|
+
const idx = text.lastIndexOf(delim);
|
|
371
|
+
if (idx > bestIdx) bestIdx = idx;
|
|
372
|
+
}
|
|
373
|
+
if (bestIdx === -1) {
|
|
374
|
+
return text.endsWith("<") ? "<" : "";
|
|
375
|
+
}
|
|
376
|
+
const tail = text.slice(bestIdx);
|
|
377
|
+
if (tail.includes("|>") || tail.includes("|>")) return "";
|
|
378
|
+
// Cap the held-back length so a stray `<|` in normal prose can't grow unboundedly.
|
|
379
|
+
if (tail.length > 256) return "";
|
|
380
|
+
return tail;
|
|
381
|
+
}
|
|
382
|
+
const OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE =
|
|
383
|
+
"OpenAI completions stream timed out while waiting for the first event";
|
|
384
|
+
|
|
385
|
+
const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
|
|
386
|
+
const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
|
|
387
|
+
|
|
388
|
+
/** Returns the widened OpenAI stream watchdog floor for slow GLM coding-plan reasoning models. */
|
|
389
|
+
export function getOpenAICompletionsStreamIdleTimeoutFallbackMs(
|
|
390
|
+
model: Model<"openai-completions">,
|
|
391
|
+
): number | undefined {
|
|
392
|
+
if (!GLM_CODING_PLAN_MODEL_PATTERN.test(model.id)) return undefined;
|
|
393
|
+
if (model.provider === "zhipu-coding-plan" || model.provider === "zai")
|
|
394
|
+
return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
|
|
395
|
+
|
|
396
|
+
const baseUrl = model.baseUrl.toLowerCase();
|
|
397
|
+
if (baseUrl.includes("open.bigmodel.cn") || baseUrl.includes("api.z.ai")) {
|
|
398
|
+
return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
return undefined;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
405
|
+
model: Model<"openai-completions">,
|
|
406
|
+
context: Context,
|
|
407
|
+
options?: OpenAICompletionsOptions,
|
|
408
|
+
): AssistantMessageEventStream => {
|
|
409
|
+
const stream = new AssistantMessageEventStream();
|
|
410
|
+
|
|
411
|
+
(async () => {
|
|
412
|
+
const startTime = Date.now();
|
|
413
|
+
let firstTokenTime: number | undefined;
|
|
414
|
+
let getCapturedErrorResponse: (() => CapturedHttpErrorResponse | undefined) | undefined;
|
|
415
|
+
|
|
416
|
+
const output: AssistantMessage = createInitialResponsesAssistantMessage(model.api, model.provider, model.id);
|
|
417
|
+
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
418
|
+
const abortTracker = createAbortSourceTracker(options?.signal);
|
|
419
|
+
const firstEventTimeoutAbortError = new Error(OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE);
|
|
420
|
+
const { requestAbortController, requestSignal } = abortTracker;
|
|
421
|
+
|
|
422
|
+
try {
|
|
423
|
+
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
424
|
+
const idleTimeoutFallbackMs = getOpenAICompletionsStreamIdleTimeoutFallbackMs(model);
|
|
425
|
+
const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs(idleTimeoutFallbackMs);
|
|
426
|
+
const firstEventTimeoutMs =
|
|
427
|
+
options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
|
|
428
|
+
const requestTimeoutMs =
|
|
429
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
|
|
430
|
+
const {
|
|
431
|
+
client,
|
|
432
|
+
copilotPremiumRequests,
|
|
433
|
+
baseUrl,
|
|
434
|
+
requestHeaders,
|
|
435
|
+
getCapturedErrorResponse: captureErrorResponse,
|
|
436
|
+
clearCapturedErrorResponse,
|
|
437
|
+
} = await createClient(
|
|
438
|
+
model,
|
|
439
|
+
context,
|
|
440
|
+
apiKey,
|
|
441
|
+
options?.headers,
|
|
442
|
+
options?.initiatorOverride,
|
|
443
|
+
options?.onSseEvent,
|
|
444
|
+
options?.fetch,
|
|
445
|
+
);
|
|
446
|
+
const premiumRequestsTotal = copilotPremiumRequests;
|
|
447
|
+
getCapturedErrorResponse = captureErrorResponse;
|
|
448
|
+
let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
|
|
449
|
+
const providerSessionState = getOpenAICompletionsProviderSessionState(
|
|
450
|
+
model,
|
|
451
|
+
baseUrl,
|
|
452
|
+
options?.providerSessionState,
|
|
453
|
+
);
|
|
454
|
+
let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
|
|
455
|
+
let strictFallbackErrorMessage: string | undefined;
|
|
456
|
+
const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
|
|
457
|
+
clearCapturedErrorResponse();
|
|
458
|
+
const effectiveToolStrictModeOverride = disableStrictTools ? "none" : toolStrictModeOverride;
|
|
459
|
+
const { params, toolStrictMode } = buildParams(
|
|
460
|
+
model,
|
|
461
|
+
context,
|
|
462
|
+
options,
|
|
463
|
+
baseUrl,
|
|
464
|
+
effectiveToolStrictModeOverride,
|
|
465
|
+
);
|
|
466
|
+
appliedToolStrictMode = toolStrictMode;
|
|
467
|
+
options?.onPayload?.(params);
|
|
468
|
+
rawRequestDump = {
|
|
469
|
+
provider: model.provider,
|
|
470
|
+
api: output.api,
|
|
471
|
+
model: model.id,
|
|
472
|
+
method: "POST",
|
|
473
|
+
url: `${baseUrl}/chat/completions`,
|
|
474
|
+
headers: requestHeaders,
|
|
475
|
+
body: params,
|
|
476
|
+
};
|
|
477
|
+
const requestOptions =
|
|
478
|
+
requestTimeoutMs === undefined
|
|
479
|
+
? { signal: requestSignal }
|
|
480
|
+
: { signal: requestSignal, timeout: requestTimeoutMs };
|
|
481
|
+
let requestTimeout: NodeJS.Timeout | undefined;
|
|
482
|
+
if (requestTimeoutMs !== undefined) {
|
|
483
|
+
requestTimeout = setTimeout(
|
|
484
|
+
() => abortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
485
|
+
requestTimeoutMs,
|
|
486
|
+
);
|
|
487
|
+
}
|
|
488
|
+
try {
|
|
489
|
+
const { data, response, request_id } = await client.chat.completions
|
|
490
|
+
.create(params, requestOptions)
|
|
491
|
+
.withResponse();
|
|
492
|
+
await notifyProviderResponse(options, response, model, request_id);
|
|
493
|
+
return data;
|
|
494
|
+
} catch (error) {
|
|
495
|
+
if (error instanceof OpenAIConnectionTimeoutError && !abortTracker.wasCallerAbort()) {
|
|
496
|
+
throw firstEventTimeoutAbortError;
|
|
497
|
+
}
|
|
498
|
+
throw error;
|
|
499
|
+
} finally {
|
|
500
|
+
if (requestTimeout !== undefined) clearTimeout(requestTimeout);
|
|
501
|
+
}
|
|
502
|
+
};
|
|
503
|
+
let openaiStream: AsyncIterable<ChatCompletionChunk>;
|
|
504
|
+
try {
|
|
505
|
+
openaiStream = await callWithCopilotModelRetry(() => createCompletionsStream(), {
|
|
506
|
+
provider: model.provider,
|
|
507
|
+
signal: requestSignal,
|
|
508
|
+
});
|
|
509
|
+
} catch (error) {
|
|
510
|
+
const capturedErrorResponse = getCapturedErrorResponse();
|
|
511
|
+
if (
|
|
512
|
+
isOpenRouterAnthropicModel(model) &&
|
|
513
|
+
!disableStrictTools &&
|
|
514
|
+
isCompiledGrammarTooLargeStrictError(error, capturedErrorResponse)
|
|
515
|
+
) {
|
|
516
|
+
strictFallbackErrorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
|
|
517
|
+
output.errorMessage = strictFallbackErrorMessage;
|
|
518
|
+
if (providerSessionState) {
|
|
519
|
+
providerSessionState.strictToolsDisabled = true;
|
|
520
|
+
}
|
|
521
|
+
disableStrictTools = true;
|
|
522
|
+
openaiStream = await createCompletionsStream("none");
|
|
523
|
+
} else {
|
|
524
|
+
if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
|
|
525
|
+
throw error;
|
|
526
|
+
}
|
|
527
|
+
openaiStream = await createCompletionsStream("none");
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
if (premiumRequestsTotal !== undefined) {
|
|
531
|
+
output.usage.premiumRequests = premiumRequestsTotal;
|
|
532
|
+
}
|
|
533
|
+
stream.push({ type: "start", partial: output });
|
|
534
|
+
|
|
535
|
+
const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
|
|
536
|
+
// Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
|
|
537
|
+
// native API) leak chat-template tool-call markers in `delta.content` even
|
|
538
|
+
// though tool calls are also surfaced structurally. Strip the leaked markers
|
|
539
|
+
// so users don't see raw `<|...|>` tokens.
|
|
540
|
+
const stripDeepseekChatTemplateTokens =
|
|
541
|
+
/deepseek/i.test(model.id) && (model.provider === "nvidia" || model.provider === "deepseek");
|
|
542
|
+
type ToolCallStreamBlock = ToolCall & { partialArgs?: string; streamIndex?: number; lastParseLen?: number };
|
|
543
|
+
type OpenAIStreamBlock = TextContent | ThinkingContent | ToolCallStreamBlock;
|
|
544
|
+
const pendingToolCallBlocks: ToolCallStreamBlock[] = [];
|
|
545
|
+
const toolCallBlockByIndex = new Map<number, ToolCallStreamBlock>();
|
|
546
|
+
let currentBlock: OpenAIStreamBlock | undefined;
|
|
547
|
+
const blockIndex = (block: OpenAIStreamBlock | undefined): number => {
|
|
548
|
+
if (!block) return Math.max(0, output.content.length - 1);
|
|
549
|
+
return output.content.indexOf(block);
|
|
550
|
+
};
|
|
551
|
+
const finishToolCallBlock = (block: ToolCallStreamBlock): void => {
|
|
552
|
+
if (block.partialArgs === undefined) return;
|
|
553
|
+
const contentIndex = blockIndex(block);
|
|
554
|
+
if (contentIndex < 0) return;
|
|
555
|
+
block.arguments = parseStreamingJson(block.partialArgs);
|
|
556
|
+
delete block.partialArgs;
|
|
557
|
+
delete block.lastParseLen;
|
|
558
|
+
if (block.streamIndex !== undefined) {
|
|
559
|
+
toolCallBlockByIndex.delete(block.streamIndex);
|
|
560
|
+
delete block.streamIndex;
|
|
561
|
+
}
|
|
562
|
+
const pendingIndex = pendingToolCallBlocks.indexOf(block);
|
|
563
|
+
if (pendingIndex >= 0) pendingToolCallBlocks.splice(pendingIndex, 1);
|
|
564
|
+
stream.push({ type: "toolcall_end", contentIndex, toolCall: block, partial: output });
|
|
565
|
+
};
|
|
566
|
+
const finishPendingToolCallBlocks = (): void => {
|
|
567
|
+
for (const block of [...pendingToolCallBlocks]) {
|
|
568
|
+
finishToolCallBlock(block);
|
|
569
|
+
}
|
|
570
|
+
};
|
|
571
|
+
const finishCurrentBlock = (block: OpenAIStreamBlock | undefined): void => {
|
|
572
|
+
if (!block) return;
|
|
573
|
+
const contentIndex = blockIndex(block);
|
|
574
|
+
if (contentIndex < 0) return;
|
|
575
|
+
if (block.type === "text") {
|
|
576
|
+
stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
if (block.type === "thinking") {
|
|
580
|
+
stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
|
|
581
|
+
return;
|
|
582
|
+
}
|
|
583
|
+
finishToolCallBlock(block);
|
|
584
|
+
};
|
|
585
|
+
const appendText = (
|
|
586
|
+
message: AssistantMessage,
|
|
587
|
+
eventStream: AssistantMessageEventStream,
|
|
588
|
+
text: string,
|
|
589
|
+
): void => {
|
|
590
|
+
if (currentBlock?.type !== "text") {
|
|
591
|
+
finishCurrentBlock(currentBlock);
|
|
592
|
+
currentBlock = { type: "text", text: "" };
|
|
593
|
+
message.content.push(currentBlock);
|
|
594
|
+
eventStream.push({ type: "text_start", contentIndex: blockIndex(currentBlock), partial: message });
|
|
595
|
+
}
|
|
596
|
+
currentBlock.text += text;
|
|
597
|
+
eventStream.push({
|
|
598
|
+
type: "text_delta",
|
|
599
|
+
contentIndex: blockIndex(currentBlock),
|
|
600
|
+
delta: text,
|
|
601
|
+
partial: message,
|
|
602
|
+
});
|
|
603
|
+
};
|
|
604
|
+
const appendThinking = (
|
|
605
|
+
message: AssistantMessage,
|
|
606
|
+
eventStream: AssistantMessageEventStream,
|
|
607
|
+
thinking: string,
|
|
608
|
+
signature?: string,
|
|
609
|
+
): void => {
|
|
610
|
+
if (
|
|
611
|
+
currentBlock?.type !== "thinking" ||
|
|
612
|
+
(signature !== undefined && currentBlock.thinkingSignature !== signature)
|
|
613
|
+
) {
|
|
614
|
+
finishCurrentBlock(currentBlock);
|
|
615
|
+
currentBlock = { type: "thinking", thinking: "", thinkingSignature: signature };
|
|
616
|
+
message.content.push(currentBlock);
|
|
617
|
+
eventStream.push({
|
|
618
|
+
type: "thinking_start",
|
|
619
|
+
contentIndex: blockIndex(currentBlock),
|
|
620
|
+
partial: message,
|
|
621
|
+
});
|
|
622
|
+
}
|
|
623
|
+
if (signature !== undefined && !currentBlock.thinkingSignature) {
|
|
624
|
+
currentBlock.thinkingSignature = signature;
|
|
625
|
+
}
|
|
626
|
+
currentBlock.thinking += thinking;
|
|
627
|
+
eventStream.push({
|
|
628
|
+
type: "thinking_delta",
|
|
629
|
+
contentIndex: blockIndex(currentBlock),
|
|
630
|
+
delta: thinking,
|
|
631
|
+
partial: message,
|
|
632
|
+
});
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
const appendTextDelta = (text: string): void => {
|
|
636
|
+
if (!text) return;
|
|
637
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
638
|
+
appendText(output, stream, text);
|
|
639
|
+
};
|
|
640
|
+
const appendThinkingDelta = (thinking: string, signature?: string): void => {
|
|
641
|
+
if (!thinking) return;
|
|
642
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
643
|
+
appendThinking(output, stream, thinking, signature);
|
|
644
|
+
};
|
|
645
|
+
|
|
646
|
+
let deepseekStripBuffer = "";
|
|
647
|
+
const flushDeepseekStripBuffer = (final: boolean): void => {
|
|
648
|
+
if (deepseekStripBuffer.length === 0) return;
|
|
649
|
+
let flushable: string;
|
|
650
|
+
if (final) {
|
|
651
|
+
flushable = deepseekStripBuffer;
|
|
652
|
+
deepseekStripBuffer = "";
|
|
653
|
+
} else {
|
|
654
|
+
const trailing = getTrailingPartialDeepseekToken(deepseekStripBuffer);
|
|
655
|
+
flushable = deepseekStripBuffer.slice(0, deepseekStripBuffer.length - trailing.length);
|
|
656
|
+
deepseekStripBuffer = trailing;
|
|
657
|
+
}
|
|
658
|
+
const stripped = stripDeepseekSpecialTokens(flushable);
|
|
659
|
+
if (stripped && (stripped === flushable || stripped.trim().length > 0)) appendTextDelta(stripped);
|
|
660
|
+
};
|
|
661
|
+
const appendProcessedText = (processedText: string): void => {
|
|
662
|
+
if (processedText.length === 0) return;
|
|
663
|
+
if (stripDeepseekChatTemplateTokens) {
|
|
664
|
+
deepseekStripBuffer += processedText;
|
|
665
|
+
flushDeepseekStripBuffer(false);
|
|
666
|
+
} else {
|
|
667
|
+
appendTextDelta(processedText);
|
|
668
|
+
}
|
|
669
|
+
};
|
|
670
|
+
|
|
671
|
+
const streamMarkupHealingPattern = getStreamMarkupHealingPattern(model.provider, model.id, {
|
|
672
|
+
parseThinkingTags: parseMiniMaxThinkTags,
|
|
673
|
+
});
|
|
674
|
+
const streamMarkupHealing = streamMarkupHealingPattern
|
|
675
|
+
? new StreamMarkupHealing({ pattern: streamMarkupHealingPattern })
|
|
676
|
+
: undefined;
|
|
677
|
+
let healedToolCallEmitted = false;
|
|
678
|
+
const emitHealedToolCall = (call: HealedToolCall): void => {
|
|
679
|
+
finishCurrentBlock(currentBlock);
|
|
680
|
+
const block: ToolCall & { partialArgs: string } = {
|
|
681
|
+
type: "toolCall",
|
|
682
|
+
id: call.id,
|
|
683
|
+
name: call.name,
|
|
684
|
+
arguments: {},
|
|
685
|
+
partialArgs: call.arguments,
|
|
686
|
+
};
|
|
687
|
+
block.arguments = parseStreamingJson(call.arguments);
|
|
688
|
+
currentBlock = block;
|
|
689
|
+
output.content.push(block);
|
|
690
|
+
stream.push({ type: "toolcall_start", contentIndex: blockIndex(block), partial: output });
|
|
691
|
+
stream.push({
|
|
692
|
+
type: "toolcall_delta",
|
|
693
|
+
contentIndex: blockIndex(block),
|
|
694
|
+
delta: call.arguments,
|
|
695
|
+
partial: output,
|
|
696
|
+
});
|
|
697
|
+
finishCurrentBlock(block);
|
|
698
|
+
currentBlock = undefined;
|
|
699
|
+
healedToolCallEmitted = true;
|
|
700
|
+
};
|
|
701
|
+
const emitHealingEvent = (event: StreamMarkupHealingEvent): void => {
|
|
702
|
+
if (event.type === "text") {
|
|
703
|
+
appendProcessedText(event.text);
|
|
704
|
+
} else if (event.type === "thinking") {
|
|
705
|
+
appendThinkingDelta(event.thinking);
|
|
706
|
+
} else {
|
|
707
|
+
emitHealedToolCall(event.call);
|
|
708
|
+
}
|
|
709
|
+
};
|
|
710
|
+
const flushHealedToolCalls = (): void => {
|
|
711
|
+
if (!streamMarkupHealing) return;
|
|
712
|
+
const calls = streamMarkupHealing.drainCompleted();
|
|
713
|
+
for (const call of calls) emitHealedToolCall(call);
|
|
714
|
+
};
|
|
715
|
+
|
|
716
|
+
for await (const chunk of iterateWithIdleTimeout(openaiStream, {
|
|
717
|
+
idleTimeoutMs,
|
|
718
|
+
firstItemTimeoutMs: firstEventTimeoutMs,
|
|
719
|
+
firstItemErrorMessage: OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE,
|
|
720
|
+
errorMessage: "OpenAI completions stream stalled while waiting for the next event",
|
|
721
|
+
onIdle: () => requestAbortController.abort(),
|
|
722
|
+
onFirstItemTimeout: () => abortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
723
|
+
abortSignal: options?.signal,
|
|
724
|
+
isProgressItem: isOpenAICompletionsProgressChunk,
|
|
725
|
+
})) {
|
|
726
|
+
if (!chunk || typeof chunk !== "object") continue;
|
|
727
|
+
|
|
728
|
+
// OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
|
|
729
|
+
// and each chunk in a streamed completion carries the same id.
|
|
730
|
+
output.responseId ||= chunk.id;
|
|
731
|
+
|
|
732
|
+
if (chunk.usage) {
|
|
733
|
+
output.usage = parseChunkUsage(chunk.usage, model, premiumRequestsTotal);
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
|
|
737
|
+
if (!choice) continue;
|
|
738
|
+
|
|
739
|
+
if (!chunk.usage) {
|
|
740
|
+
const choiceUsage = getChoiceUsage(choice);
|
|
741
|
+
if (choiceUsage) {
|
|
742
|
+
output.usage = parseChunkUsage(choiceUsage, model, premiumRequestsTotal);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
if (choice.finish_reason) {
|
|
747
|
+
const finishReasonResult = mapStopReason(choice.finish_reason);
|
|
748
|
+
output.stopReason = finishReasonResult.stopReason;
|
|
749
|
+
if (finishReasonResult.errorMessage) {
|
|
750
|
+
output.errorMessage = finishReasonResult.errorMessage;
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
if (choice.delta) {
|
|
755
|
+
const normalizedDeltaText = normalizeStreamingContentText(choice.delta.content);
|
|
756
|
+
if (normalizedDeltaText.length > 0) {
|
|
757
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
758
|
+
const hasStructuredToolCalls =
|
|
759
|
+
Array.isArray(choice.delta.tool_calls) && choice.delta.tool_calls.length > 0;
|
|
760
|
+
|
|
761
|
+
if (streamMarkupHealing) {
|
|
762
|
+
if (hasStructuredToolCalls) {
|
|
763
|
+
// Same chunk leaks markers AND carries structured tool_calls.
|
|
764
|
+
// Strip the marker text from visible output, but drop any
|
|
765
|
+
// synthesized calls so the structured payload stays the
|
|
766
|
+
// single source of truth (avoids double-dispatch).
|
|
767
|
+
appendProcessedText(streamMarkupHealing.consumeWithoutCalls(normalizedDeltaText));
|
|
768
|
+
} else {
|
|
769
|
+
for (const event of streamMarkupHealing.feedEvents(normalizedDeltaText)) {
|
|
770
|
+
emitHealingEvent(event);
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
} else {
|
|
774
|
+
appendProcessedText(normalizedDeltaText);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
779
|
+
// or reasoning (other openai compatible endpoints)
|
|
780
|
+
// Use the first non-empty reasoning field to avoid duplication
|
|
781
|
+
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
|
782
|
+
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
783
|
+
let foundReasoningField: string | null = null;
|
|
784
|
+
for (const field of reasoningFields) {
|
|
785
|
+
if (
|
|
786
|
+
(choice.delta as any)[field] !== null &&
|
|
787
|
+
(choice.delta as any)[field] !== undefined &&
|
|
788
|
+
(choice.delta as any)[field].length > 0
|
|
789
|
+
) {
|
|
790
|
+
if (!foundReasoningField) {
|
|
791
|
+
foundReasoningField = field;
|
|
792
|
+
break;
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
if (foundReasoningField) {
|
|
798
|
+
const delta = (choice.delta as any)[foundReasoningField];
|
|
799
|
+
appendThinkingDelta(delta, foundReasoningField);
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
if (choice?.delta?.tool_calls && choice.delta.tool_calls.length > 0) {
|
|
803
|
+
for (const toolCall of choice.delta.tool_calls) {
|
|
804
|
+
const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
|
|
805
|
+
let block = streamIndex !== undefined ? toolCallBlockByIndex.get(streamIndex) : undefined;
|
|
806
|
+
if (!block && toolCall.id) {
|
|
807
|
+
block = pendingToolCallBlocks.find(candidate => candidate.id === toolCall.id);
|
|
808
|
+
}
|
|
809
|
+
if (
|
|
810
|
+
!block &&
|
|
811
|
+
currentBlock?.type === "toolCall" &&
|
|
812
|
+
(!toolCall.id || currentBlock.id === toolCall.id)
|
|
813
|
+
) {
|
|
814
|
+
block = currentBlock;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
if (!block) {
|
|
818
|
+
if (currentBlock?.type !== "toolCall") {
|
|
819
|
+
finishCurrentBlock(currentBlock);
|
|
820
|
+
}
|
|
821
|
+
block = {
|
|
822
|
+
type: "toolCall",
|
|
823
|
+
id: toolCall.id || "",
|
|
824
|
+
name: toolCall.function?.name || "",
|
|
825
|
+
arguments: {},
|
|
826
|
+
partialArgs: "",
|
|
827
|
+
streamIndex,
|
|
828
|
+
};
|
|
829
|
+
if (streamIndex !== undefined) toolCallBlockByIndex.set(streamIndex, block);
|
|
830
|
+
pendingToolCallBlocks.push(block);
|
|
831
|
+
currentBlock = block;
|
|
832
|
+
output.content.push(block);
|
|
833
|
+
stream.push({
|
|
834
|
+
type: "toolcall_start",
|
|
835
|
+
contentIndex: blockIndex(block),
|
|
836
|
+
partial: output,
|
|
837
|
+
});
|
|
838
|
+
} else {
|
|
839
|
+
currentBlock = block;
|
|
840
|
+
if (streamIndex !== undefined && block.streamIndex === undefined) {
|
|
841
|
+
block.streamIndex = streamIndex;
|
|
842
|
+
toolCallBlockByIndex.set(streamIndex, block);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (toolCall.id) block.id = toolCall.id;
|
|
847
|
+
if (toolCall.function?.name) block.name = toolCall.function.name;
|
|
848
|
+
let delta = "";
|
|
849
|
+
if (toolCall.function?.arguments) {
|
|
850
|
+
delta = toolCall.function.arguments;
|
|
851
|
+
block.partialArgs = (block.partialArgs ?? "") + toolCall.function.arguments;
|
|
852
|
+
const throttled = parseStreamingJsonThrottled(block.partialArgs, block.lastParseLen ?? 0);
|
|
853
|
+
if (throttled) {
|
|
854
|
+
block.arguments = throttled.value;
|
|
855
|
+
block.lastParseLen = throttled.parsedLen;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
stream.push({
|
|
859
|
+
type: "toolcall_delta",
|
|
860
|
+
contentIndex: blockIndex(block),
|
|
861
|
+
delta,
|
|
862
|
+
partial: output,
|
|
863
|
+
});
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
const reasoningDetails = (choice.delta as any).reasoning_details;
|
|
868
|
+
if (reasoningDetails && Array.isArray(reasoningDetails)) {
|
|
869
|
+
for (const detail of reasoningDetails) {
|
|
870
|
+
if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
|
|
871
|
+
const matchingToolCall = output.content.find(
|
|
872
|
+
b => b.type === "toolCall" && b.id === detail.id,
|
|
873
|
+
) as ToolCall | undefined;
|
|
874
|
+
if (matchingToolCall) {
|
|
875
|
+
matchingToolCall.thoughtSignature = JSON.stringify(detail);
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
if (streamMarkupHealing) {
|
|
884
|
+
for (const event of streamMarkupHealing.flushEvents()) {
|
|
885
|
+
emitHealingEvent(event);
|
|
886
|
+
}
|
|
887
|
+
flushHealedToolCalls();
|
|
888
|
+
if (healedToolCallEmitted && output.stopReason === "stop") {
|
|
889
|
+
// Hosts that leak tool-call templates often still report
|
|
890
|
+
// `finish_reason: stop` for the surrounding turn. Promote
|
|
891
|
+
// only that natural-completion finish — leave `error`,
|
|
892
|
+
// `length`, `aborted`, etc. untouched.
|
|
893
|
+
output.stopReason = "toolUse";
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
if (stripDeepseekChatTemplateTokens) {
|
|
898
|
+
flushDeepseekStripBuffer(true);
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
if (currentBlock?.type === "toolCall") {
|
|
902
|
+
finishPendingToolCallBlocks();
|
|
903
|
+
} else {
|
|
904
|
+
finishCurrentBlock(currentBlock);
|
|
905
|
+
finishPendingToolCallBlocks();
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// Some OpenAI-compatible hosts stream structured `tool_calls` but report
|
|
909
|
+
// `finish_reason: "stop"` instead of `"tool_calls"`. In the OpenAI contract a
|
|
910
|
+
// tool call always means "execute and continue", so promote that
|
|
911
|
+
// natural-completion finish to `toolUse` whenever the turn produced tool-call
|
|
912
|
+
// blocks — the agent loop gates execution on the stop reason. `error`,
|
|
913
|
+
// `length`, and `aborted` are intentionally left untouched. (Anthropic's
|
|
914
|
+
// distinct `end_turn`-with-tool-calls "abandon" semantics live in its own
|
|
915
|
+
// provider and correctly keep `stop`.)
|
|
916
|
+
if (output.stopReason === "stop" && output.content.some(b => b.type === "toolCall")) {
|
|
917
|
+
output.stopReason = "toolUse";
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
const firstEventTimeoutError = abortTracker.getLocalAbortReason();
|
|
921
|
+
if (firstEventTimeoutError) {
|
|
922
|
+
throw firstEventTimeoutError;
|
|
923
|
+
}
|
|
924
|
+
if (abortTracker.wasCallerAbort()) {
|
|
925
|
+
throw new Error("Request was aborted");
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
if (output.stopReason === "aborted") {
|
|
929
|
+
throw new Error("Request was aborted");
|
|
930
|
+
}
|
|
931
|
+
if (output.stopReason === "error") {
|
|
932
|
+
throw new Error(output.errorMessage || "Provider returned an error stop reason");
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
output.errorMessage = strictFallbackErrorMessage;
|
|
936
|
+
output.duration = Date.now() - startTime;
|
|
937
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
938
|
+
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
939
|
+
stream.end();
|
|
940
|
+
} catch (error) {
|
|
941
|
+
for (const block of output.content) delete (block as any).index;
|
|
942
|
+
const firstEventTimeoutError = abortTracker.getLocalAbortReason();
|
|
943
|
+
output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
|
|
944
|
+
output.errorStatus = extractHttpStatusFromError(error) ?? getCapturedErrorResponse?.()?.status;
|
|
945
|
+
output.errorMessage =
|
|
946
|
+
firstEventTimeoutError?.message ??
|
|
947
|
+
(await finalizeErrorMessage(error, rawRequestDump, getCapturedErrorResponse?.()));
|
|
948
|
+
// Some providers via OpenRouter include extra details here.
|
|
949
|
+
const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
|
|
950
|
+
if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
|
|
951
|
+
output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
|
|
952
|
+
output.duration = Date.now() - startTime;
|
|
953
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
954
|
+
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
955
|
+
stream.end();
|
|
956
|
+
}
|
|
957
|
+
})();
|
|
958
|
+
|
|
959
|
+
return stream;
|
|
960
|
+
};
|
|
961
|
+
|
|
962
|
+
async function createClient(
|
|
963
|
+
model: Model<"openai-completions">,
|
|
964
|
+
context: Context,
|
|
965
|
+
apiKey?: string,
|
|
966
|
+
extraHeaders?: Record<string, string>,
|
|
967
|
+
initiatorOverride?: MessageAttribution,
|
|
968
|
+
onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
|
|
969
|
+
fetchOverride?: FetchImpl,
|
|
970
|
+
): Promise<{
|
|
971
|
+
client: OpenAI;
|
|
972
|
+
copilotPremiumRequests: number | undefined;
|
|
973
|
+
baseUrl: string | undefined;
|
|
974
|
+
requestHeaders: Record<string, string>;
|
|
975
|
+
getCapturedErrorResponse: () => CapturedHttpErrorResponse | undefined;
|
|
976
|
+
clearCapturedErrorResponse: () => void;
|
|
977
|
+
}> {
|
|
978
|
+
if (!apiKey) {
|
|
979
|
+
if (!$env.OPENAI_API_KEY) {
|
|
980
|
+
throw new Error(
|
|
981
|
+
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
|
982
|
+
);
|
|
983
|
+
}
|
|
984
|
+
apiKey = $env.OPENAI_API_KEY;
|
|
985
|
+
}
|
|
986
|
+
const rawApiKey = apiKey;
|
|
987
|
+
|
|
988
|
+
let headers = { ...model.headers };
|
|
989
|
+
if (model.provider === "openrouter") {
|
|
990
|
+
// App attribution — opts the agent into OpenRouter's public rankings and per-app
|
|
991
|
+
// analytics. `HTTP-Referer` is the unique app identifier; without it nothing is
|
|
992
|
+
// tracked. `X-OpenRouter-Title` is the display name (`X-Title` is the legacy
|
|
993
|
+
// alias kept for back-compat). `X-OpenRouter-Categories` slots us into the
|
|
994
|
+
// `cli-agent` marketplace category. `User-Agent` overrides the default OpenAI
|
|
995
|
+
// SDK UA so traffic is identifiable in upstream provider logs.
|
|
996
|
+
// https://openrouter.ai/docs/app-attribution
|
|
997
|
+
headers["User-Agent"] = `Aery/${packageJson.version}`;
|
|
998
|
+
headers["HTTP-Referer"] = "https://aery.dev/";
|
|
999
|
+
headers["X-OpenRouter-Title"] = "Aery";
|
|
1000
|
+
headers["X-OpenRouter-Categories"] = "cli-agent";
|
|
1001
|
+
// Always-on response caching: identical requests return cached responses for free.
|
|
1002
|
+
// TTL 1h; first call hits the provider, every identical call within the window
|
|
1003
|
+
// replays from OpenRouter's edge cache. https://openrouter.ai/docs/features/response-caching
|
|
1004
|
+
headers["X-OpenRouter-Cache"] = "true";
|
|
1005
|
+
headers["X-OpenRouter-Cache-TTL"] = "3600";
|
|
1006
|
+
}
|
|
1007
|
+
Object.assign(headers, extraHeaders);
|
|
1008
|
+
if (model.provider === "kimi-code") {
|
|
1009
|
+
headers = { ...getKimiCommonHeaders(), ...headers };
|
|
1010
|
+
}
|
|
1011
|
+
let copilotPremiumRequests: number | undefined;
|
|
1012
|
+
|
|
1013
|
+
let baseUrl = model.baseUrl;
|
|
1014
|
+
if (model.provider === "github-copilot") {
|
|
1015
|
+
apiKey = parseGitHubCopilotApiKey(rawApiKey).accessToken;
|
|
1016
|
+
const hasImages = hasCopilotVisionInput(context.messages);
|
|
1017
|
+
const copilot = buildCopilotDynamicHeaders({
|
|
1018
|
+
messages: context.messages,
|
|
1019
|
+
hasImages,
|
|
1020
|
+
premiumMultiplier: model.premiumMultiplier,
|
|
1021
|
+
headers,
|
|
1022
|
+
initiatorOverride,
|
|
1023
|
+
});
|
|
1024
|
+
Object.assign(headers, copilot.headers);
|
|
1025
|
+
copilotPremiumRequests = copilot.premiumRequests;
|
|
1026
|
+
baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
|
|
1027
|
+
}
|
|
1028
|
+
// Azure OpenAI requires /deployments/{id}/chat/completions?api-version=YYYY-MM-DD.
|
|
1029
|
+
// The generic openai-completions path adds neither, producing silent 404s.
|
|
1030
|
+
let azureDefaultQuery: Record<string, string> | undefined;
|
|
1031
|
+
if (baseUrl?.includes(".openai.azure.com")) {
|
|
1032
|
+
const apiVersion = $env.AZURE_OPENAI_API_VERSION || "2024-10-21";
|
|
1033
|
+
if (!baseUrl.includes("/deployments/")) {
|
|
1034
|
+
baseUrl = `${baseUrl}/deployments/${model.id}`;
|
|
1035
|
+
}
|
|
1036
|
+
azureDefaultQuery = { "api-version": apiVersion };
|
|
1037
|
+
}
|
|
1038
|
+
let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
|
|
1039
|
+
const baseFetch = fetchOverride ?? fetch;
|
|
1040
|
+
const wrappedFetch = Object.assign(
|
|
1041
|
+
async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
|
|
1042
|
+
const response = await baseFetch(input, init);
|
|
1043
|
+
if (response.ok) {
|
|
1044
|
+
capturedErrorResponse = undefined;
|
|
1045
|
+
return response;
|
|
1046
|
+
}
|
|
1047
|
+
let bodyText: string | undefined;
|
|
1048
|
+
let bodyJson: unknown;
|
|
1049
|
+
try {
|
|
1050
|
+
bodyText = await response.clone().text();
|
|
1051
|
+
if (bodyText.trim().length > 0) {
|
|
1052
|
+
try {
|
|
1053
|
+
bodyJson = JSON.parse(bodyText);
|
|
1054
|
+
} catch {}
|
|
1055
|
+
}
|
|
1056
|
+
} catch {}
|
|
1057
|
+
capturedErrorResponse = {
|
|
1058
|
+
status: response.status,
|
|
1059
|
+
headers: response.headers,
|
|
1060
|
+
bodyText,
|
|
1061
|
+
bodyJson,
|
|
1062
|
+
};
|
|
1063
|
+
return response;
|
|
1064
|
+
},
|
|
1065
|
+
baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
|
|
1066
|
+
);
|
|
1067
|
+
const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
|
|
1068
|
+
return {
|
|
1069
|
+
client: new OpenAI({
|
|
1070
|
+
apiKey,
|
|
1071
|
+
baseURL: baseUrl,
|
|
1072
|
+
dangerouslyAllowBrowser: true,
|
|
1073
|
+
maxRetries: 5,
|
|
1074
|
+
defaultHeaders: headers,
|
|
1075
|
+
defaultQuery: azureDefaultQuery,
|
|
1076
|
+
fetch: debugFetch,
|
|
1077
|
+
}),
|
|
1078
|
+
copilotPremiumRequests,
|
|
1079
|
+
baseUrl,
|
|
1080
|
+
requestHeaders: headers,
|
|
1081
|
+
getCapturedErrorResponse: () => capturedErrorResponse,
|
|
1082
|
+
clearCapturedErrorResponse: () => {
|
|
1083
|
+
capturedErrorResponse = undefined;
|
|
1084
|
+
},
|
|
1085
|
+
};
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
function buildParams(
|
|
1089
|
+
model: Model<"openai-completions">,
|
|
1090
|
+
context: Context,
|
|
1091
|
+
options: OpenAICompletionsOptions | undefined,
|
|
1092
|
+
resolvedBaseUrl?: string,
|
|
1093
|
+
toolStrictModeOverride?: ToolStrictModeOverride,
|
|
1094
|
+
): { params: OpenAICompletionsParams; toolStrictMode: AppliedToolStrictMode } {
|
|
1095
|
+
const compat = getCompat(model, resolvedBaseUrl);
|
|
1096
|
+
// Opencode Zen's gateway (https://opencode.ai/zen/go/v1) gates
|
|
1097
|
+
// `reasoning_content` on the request's thinking state for every model it
|
|
1098
|
+
// fronts (Kimi K2.x, DeepSeek V4, GLM-5.x, Qwen3.x, MiMo, MiniMax, …): it
|
|
1099
|
+
// 400s with `Extra inputs are not permitted` when thinking is off but the
|
|
1100
|
+
// field is supplied (#1071), and 400s with `thinking is enabled but
|
|
1101
|
+
// reasoning_content is missing in assistant tool call message at index N`
|
|
1102
|
+
// (#1484) when thinking is on and the field is absent. `detectOpenAICompat`
|
|
1103
|
+
// only set `requiresReasoningContentForToolCalls` for the DeepSeek family
|
|
1104
|
+
// (and previously for Kimi until #1071 carved out opencode); reactivate it
|
|
1105
|
+
// per request for every opencode model whenever this turn is in thinking
|
|
1106
|
+
// mode so prior tool-call turns replay reasoning_content. Forced-tool
|
|
1107
|
+
// turns are excluded because the later `disableReasoningOnForcedToolChoice`
|
|
1108
|
+
// guard at the bottom of `buildParams` strips thinking from the wire body
|
|
1109
|
+
// for Kimi-style models — keeping the replay on under those conditions
|
|
1110
|
+
// would resurrect the #1071 failure.
|
|
1111
|
+
//
|
|
1112
|
+
// `allowsSyntheticReasoningContentForToolCalls` is forced to `false` on
|
|
1113
|
+
// the same path: the gateway specifically requires `reasoning_content`,
|
|
1114
|
+
// and the default synthetic-friendly behavior would echo whichever field
|
|
1115
|
+
// the upstream streamed (e.g. `reasoning` for many opencode turns),
|
|
1116
|
+
// landing the replay in the wrong key and re-triggering the 400.
|
|
1117
|
+
const isOpenCodeProvider = model.provider === "opencode-go" || model.provider === "opencode-zen";
|
|
1118
|
+
const thinkingEnabledForRequest =
|
|
1119
|
+
Boolean(options?.reasoning) && !options?.disableReasoning && Boolean(model.reasoning);
|
|
1120
|
+
const forcedToolChoiceSuppressesThinking =
|
|
1121
|
+
compat.disableReasoningOnForcedToolChoice &&
|
|
1122
|
+
isForcedToolChoice(mapToOpenAICompletionsToolChoice(options?.toolChoice));
|
|
1123
|
+
if (isOpenCodeProvider && thinkingEnabledForRequest && !forcedToolChoiceSuppressesThinking) {
|
|
1124
|
+
compat.requiresReasoningContentForToolCalls = true;
|
|
1125
|
+
compat.allowsSyntheticReasoningContentForToolCalls = false;
|
|
1126
|
+
compat.reasoningContentField = "reasoning_content";
|
|
1127
|
+
}
|
|
1128
|
+
const isKimiModelId = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
|
|
1129
|
+
const messages = convertMessages(model, context, compat);
|
|
1130
|
+
maybeAddOpenRouterAnthropicCacheControl(model, messages);
|
|
1131
|
+
const supportsReasoningParams = model.provider !== "github-copilot";
|
|
1132
|
+
|
|
1133
|
+
// Kimi (including via OpenRouter and Fireworks router-form IDs such as
|
|
1134
|
+
// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
|
|
1135
|
+
// max_tokens, not actual output. The official Kimi K2 model guidance
|
|
1136
|
+
// (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
|
|
1137
|
+
// every call since the family can otherwise emit very long reasoning traces
|
|
1138
|
+
// before the final answer. Always send max_tokens — match the same
|
|
1139
|
+
// Kimi-family regex used by the compat detector.
|
|
1140
|
+
// Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
|
|
1141
|
+
const effectiveMaxTokens = options?.maxTokens ?? (isKimiModelId ? model.maxTokens : undefined);
|
|
1142
|
+
|
|
1143
|
+
const requestModelId = resolveOpenAICompletionsModelId(model, options);
|
|
1144
|
+
const params: OpenAICompletionsParams = {
|
|
1145
|
+
model: requestModelId,
|
|
1146
|
+
messages,
|
|
1147
|
+
stream: true,
|
|
1148
|
+
};
|
|
1149
|
+
let toolStrictMode: AppliedToolStrictMode = "none";
|
|
1150
|
+
|
|
1151
|
+
if (compat.supportsUsageInStreaming !== false) {
|
|
1152
|
+
params.stream_options = { include_usage: true };
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
if (compat.supportsStore) {
|
|
1156
|
+
params.store = false;
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
if (effectiveMaxTokens) {
|
|
1160
|
+
if (compat.maxTokensField === "max_tokens") {
|
|
1161
|
+
params.max_tokens = effectiveMaxTokens;
|
|
1162
|
+
} else {
|
|
1163
|
+
params.max_completion_tokens = effectiveMaxTokens;
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
if (options?.temperature !== undefined) {
|
|
1168
|
+
params.temperature = options.temperature;
|
|
1169
|
+
}
|
|
1170
|
+
if (options?.topP !== undefined) {
|
|
1171
|
+
params.top_p = options.topP;
|
|
1172
|
+
}
|
|
1173
|
+
if (options?.topK !== undefined) {
|
|
1174
|
+
params.top_k = options.topK;
|
|
1175
|
+
}
|
|
1176
|
+
if (options?.minP !== undefined) {
|
|
1177
|
+
params.min_p = options.minP;
|
|
1178
|
+
}
|
|
1179
|
+
if (options?.presencePenalty !== undefined) {
|
|
1180
|
+
params.presence_penalty = options.presencePenalty;
|
|
1181
|
+
}
|
|
1182
|
+
if (options?.repetitionPenalty !== undefined) {
|
|
1183
|
+
params.repetition_penalty = options.repetitionPenalty;
|
|
1184
|
+
}
|
|
1185
|
+
if (options?.stopSequences?.length) {
|
|
1186
|
+
const seqs = options.stopSequences;
|
|
1187
|
+
params.stop = seqs.length === 1 ? seqs[0] : seqs.slice(0, 4);
|
|
1188
|
+
}
|
|
1189
|
+
if (options?.frequencyPenalty !== undefined) {
|
|
1190
|
+
params.frequency_penalty = options.frequencyPenalty;
|
|
1191
|
+
}
|
|
1192
|
+
if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
|
|
1193
|
+
const resolved = resolveServiceTier(options?.serviceTier, model.provider);
|
|
1194
|
+
if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
|
|
1195
|
+
params.service_tier = resolved;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
if (context.tools?.length) {
|
|
1200
|
+
const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
|
|
1201
|
+
params.tools = builtTools.tools;
|
|
1202
|
+
toolStrictMode = builtTools.toolStrictMode;
|
|
1203
|
+
} else if (context.tools === undefined && hasToolHistory(context.messages)) {
|
|
1204
|
+
// Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
|
|
1205
|
+
// contains tool_calls/tool_results, even when no tools are offered this turn.
|
|
1206
|
+
// Only inject the sentinel when the caller passed `context.tools = undefined`
|
|
1207
|
+
// (i.e. tools were not specified at all). An explicit `context.tools = []` means
|
|
1208
|
+
// the caller opted out of tools for this turn (as /btw and IRC background replies
|
|
1209
|
+
// do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
|
|
1210
|
+
// so LiteLLM → Bedrock never sees an empty `toolConfig` block.
|
|
1211
|
+
params.tools = [];
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
if (options?.toolChoice && compat.supportsToolChoice) {
|
|
1215
|
+
params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
|
|
1219
|
+
// `tool_choice: "none"` with no tools to gate is redundant and also
|
|
1220
|
+
// trips LiteLLM → Bedrock: the proxy serializes the directive into a
|
|
1221
|
+
// `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
|
|
1222
|
+
// non-empty whenever the conversation already holds `toolUse`/`toolResult`
|
|
1223
|
+
// content. Drop it whenever the resolved tools list is missing or empty.
|
|
1224
|
+
// Side-channel turns hit this: `/btw` and IRC background replies route
|
|
1225
|
+
// through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
|
|
1226
|
+
// and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
|
|
1227
|
+
delete params.tool_choice;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
|
|
1231
|
+
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
|
|
1232
|
+
// Must explicitly disable since z.ai defaults to thinking enabled.
|
|
1233
|
+
const enabled = options?.reasoning && !options?.disableReasoning;
|
|
1234
|
+
params.thinking = { type: enabled ? "enabled" : "disabled" };
|
|
1235
|
+
} else if (supportsReasoningParams && compat.thinkingFormat === "qwen" && model.reasoning) {
|
|
1236
|
+
// Qwen uses top-level enable_thinking: boolean
|
|
1237
|
+
params.enable_thinking = !!options?.reasoning && !options?.disableReasoning;
|
|
1238
|
+
} else if (supportsReasoningParams && compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
|
|
1239
|
+
params.chat_template_kwargs = {
|
|
1240
|
+
enable_thinking: !!options?.reasoning && !options?.disableReasoning,
|
|
1241
|
+
};
|
|
1242
|
+
} else if (supportsReasoningParams && compat.thinkingFormat === "openrouter" && model.reasoning) {
|
|
1243
|
+
// OpenRouter normalizes reasoning across providers via a nested reasoning object.
|
|
1244
|
+
// Without an explicit signal, OpenRouter defaults reasoning models to thinking, which
|
|
1245
|
+
// silently consumes the entire output budget on small `max_tokens` requests (e.g.
|
|
1246
|
+
// title generation). Honor `disableReasoning` to opt out cleanly.
|
|
1247
|
+
const openRouterParams = params as typeof params & {
|
|
1248
|
+
reasoning?: { effort?: string } | { enabled: false };
|
|
1249
|
+
};
|
|
1250
|
+
if (options?.disableReasoning) {
|
|
1251
|
+
openRouterParams.reasoning = { enabled: false };
|
|
1252
|
+
} else if (options?.reasoning) {
|
|
1253
|
+
openRouterParams.reasoning = {
|
|
1254
|
+
effort: mapReasoningEffort(options.reasoning, compat.reasoningEffortMap),
|
|
1255
|
+
};
|
|
1256
|
+
}
|
|
1257
|
+
} else if (
|
|
1258
|
+
supportsReasoningParams &&
|
|
1259
|
+
options?.reasoning &&
|
|
1260
|
+
!options?.disableReasoning &&
|
|
1261
|
+
model.reasoning &&
|
|
1262
|
+
compat.supportsReasoningEffort
|
|
1263
|
+
) {
|
|
1264
|
+
// OpenAI-style reasoning_effort
|
|
1265
|
+
params.reasoning_effort = mapReasoningEffort(options.reasoning, compat.reasoningEffortMap) as Effort;
|
|
1266
|
+
} else if (
|
|
1267
|
+
supportsReasoningParams &&
|
|
1268
|
+
options?.disableReasoning &&
|
|
1269
|
+
!options?.reasoning &&
|
|
1270
|
+
model.reasoning &&
|
|
1271
|
+
compat.supportsReasoningEffort
|
|
1272
|
+
) {
|
|
1273
|
+
// Generic OpenAI-compatible effort endpoints do not expose a true off
|
|
1274
|
+
// switch. Use the model's lowest supported effort as the closest
|
|
1275
|
+
// transport-level approximation when callers request disabled reasoning.
|
|
1276
|
+
const minEffort = getSupportedEfforts(model)[0];
|
|
1277
|
+
if (minEffort === undefined) {
|
|
1278
|
+
throw new Error(`Model ${model.provider}/${model.id} has no supported reasoning efforts`);
|
|
1279
|
+
}
|
|
1280
|
+
params.reasoning_effort = mapReasoningEffort(minEffort, compat.reasoningEffortMap) as Effort;
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
|
|
1284
|
+
// DeepSeek reasoning models accept tools/tool_choice, but reject that
|
|
1285
|
+
// control field while thinking is enabled. Keep the tool-selection
|
|
1286
|
+
// contract and suppress reasoning for this single request.
|
|
1287
|
+
delete params.reasoning_effort;
|
|
1288
|
+
delete params.reasoning;
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
|
|
1292
|
+
// Backends like Kimi 400 with `tool_choice 'specified' is incompatible
|
|
1293
|
+
// with thinking enabled`. Suppress thinking for this single forced-tool
|
|
1294
|
+
// turn while keeping the tool-selection contract intact.
|
|
1295
|
+
delete params.reasoning_effort;
|
|
1296
|
+
delete params.reasoning;
|
|
1297
|
+
if (compat.thinkingFormat === "zai") {
|
|
1298
|
+
params.thinking = { type: "disabled" };
|
|
1299
|
+
}
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
// OpenRouter provider routing preferences
|
|
1303
|
+
if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
|
|
1304
|
+
params.provider = compat.openRouterRouting;
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
// Vercel AI Gateway provider routing preferences
|
|
1308
|
+
if (model.baseUrl.includes("ai-gateway.vercel.sh") && model.compat?.vercelGatewayRouting) {
|
|
1309
|
+
const routing = model.compat.vercelGatewayRouting;
|
|
1310
|
+
if (routing.only || routing.order) {
|
|
1311
|
+
const gatewayOptions: Record<string, string[]> = {};
|
|
1312
|
+
if (routing.only) gatewayOptions.only = routing.only;
|
|
1313
|
+
if (routing.order) gatewayOptions.order = routing.order;
|
|
1314
|
+
params.providerOptions = { gateway: gatewayOptions };
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
if (compat.extraBody) {
|
|
1319
|
+
Object.assign(params, compat.extraBody);
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
return { params, toolStrictMode };
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
function getOptionalNumberProperty(value: object, key: string): number | undefined {
|
|
1326
|
+
const property = Reflect.get(value, key);
|
|
1327
|
+
return typeof property === "number" ? property : undefined;
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
function getOptionalObjectProperty(value: object, key: string): object | undefined {
|
|
1331
|
+
const property = Reflect.get(value, key);
|
|
1332
|
+
return typeof property === "object" && property !== null ? property : undefined;
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
function getChoiceUsage(choice: ChatCompletionChunk.Choice): object | undefined {
|
|
1336
|
+
return getOptionalObjectProperty(choice, "usage");
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
export function parseChunkUsage(
|
|
1340
|
+
rawUsage: object,
|
|
1341
|
+
model: Model<"openai-completions">,
|
|
1342
|
+
premiumRequests: number | undefined,
|
|
1343
|
+
): AssistantMessage["usage"] {
|
|
1344
|
+
const promptTokenDetails = getOptionalObjectProperty(rawUsage, "prompt_tokens_details");
|
|
1345
|
+
const completionTokenDetails = getOptionalObjectProperty(rawUsage, "completion_tokens_details");
|
|
1346
|
+
const cachedTokens =
|
|
1347
|
+
getOptionalNumberProperty(rawUsage, "cached_tokens") ??
|
|
1348
|
+
getOptionalNumberProperty(rawUsage, "prompt_cache_hit_tokens") ??
|
|
1349
|
+
(promptTokenDetails ? getOptionalNumberProperty(promptTokenDetails, "cached_tokens") : undefined) ??
|
|
1350
|
+
0;
|
|
1351
|
+
// OpenRouter exposes cache writes via `prompt_tokens_details.cache_write_tokens`
|
|
1352
|
+
// and INCLUDES them in `prompt_tokens` — they are billed on top of the input, so
|
|
1353
|
+
// we subtract them to get the real billed input.
|
|
1354
|
+
// DeepSeek exposes cache hit/miss via `prompt_cache_hit_tokens` /
|
|
1355
|
+
// `prompt_cache_miss_tokens` at the top level where `prompt_tokens` equals their
|
|
1356
|
+
// sum. The miss portion IS the billed input — we must NOT subtract it.
|
|
1357
|
+
// Ref: https://openrouter.ai/docs/guides/best-practices/prompt-caching
|
|
1358
|
+
// Ref: https://api-docs.deepseek.com/api/create-chat-completion
|
|
1359
|
+
//
|
|
1360
|
+
// Resolve cacheWrite from both possible sources separately.
|
|
1361
|
+
// They have different billing semantics: OpenRouter's cache_write is billed
|
|
1362
|
+
// on top of prompt_tokens, while DeepSeek's miss IS the billed input.
|
|
1363
|
+
const cacheWriteOpenRouter = promptTokenDetails
|
|
1364
|
+
? getOptionalNumberProperty(promptTokenDetails, "cache_write_tokens")
|
|
1365
|
+
: undefined;
|
|
1366
|
+
const cacheWriteDeepSeek = getOptionalNumberProperty(rawUsage, "prompt_cache_miss_tokens");
|
|
1367
|
+
// Prefer OpenRouter's value for the input subtraction; fall back to DeepSeek.
|
|
1368
|
+
const cacheWriteTokens = cacheWriteOpenRouter ?? cacheWriteDeepSeek ?? 0;
|
|
1369
|
+
|
|
1370
|
+
const reasoningTokens =
|
|
1371
|
+
(completionTokenDetails ? getOptionalNumberProperty(completionTokenDetails, "reasoning_tokens") : undefined) ?? 0;
|
|
1372
|
+
const promptTokens = getOptionalNumberProperty(rawUsage, "prompt_tokens") ?? 0;
|
|
1373
|
+
|
|
1374
|
+
const isDeepSeekNative =
|
|
1375
|
+
getOptionalNumberProperty(rawUsage, "prompt_cache_hit_tokens") !== undefined && cacheWriteDeepSeek !== undefined;
|
|
1376
|
+
// Only use the DeepSeek input path when cacheWrite came from DeepSeek's
|
|
1377
|
+
// miss field, not from prompt_tokens_details. Avoids false positives when
|
|
1378
|
+
// DeepSeek models route through OpenRouter (which may pass through native
|
|
1379
|
+
// fields alongside its own cache_write_tokens).
|
|
1380
|
+
const isDeepSeekUsage = isDeepSeekNative && cacheWriteOpenRouter === undefined && cacheWriteDeepSeek > 0;
|
|
1381
|
+
const input = isDeepSeekUsage
|
|
1382
|
+
? Math.max(0, promptTokens - cachedTokens)
|
|
1383
|
+
: Math.max(0, promptTokens - cachedTokens - cacheWriteTokens);
|
|
1384
|
+
// Per OpenAI's CompletionUsage spec, `reasoning_tokens` is a subset of
|
|
1385
|
+
// `completion_tokens` (which is the total billed output). Adding them would
|
|
1386
|
+
// double-count.
|
|
1387
|
+
const outputTokens = getOptionalNumberProperty(rawUsage, "completion_tokens") ?? 0;
|
|
1388
|
+
// DeepSeek only exposes cache hit/miss (no cache-write data).
|
|
1389
|
+
// Emitting miss tokens as cacheWrite would make downstream consumers
|
|
1390
|
+
// double-count them (input already equals miss for DeepSeek).
|
|
1391
|
+
const emittedCacheWrite = isDeepSeekUsage ? 0 : cacheWriteTokens;
|
|
1392
|
+
const usage: AssistantMessage["usage"] = {
|
|
1393
|
+
input,
|
|
1394
|
+
output: outputTokens,
|
|
1395
|
+
cacheRead: cachedTokens,
|
|
1396
|
+
cacheWrite: emittedCacheWrite,
|
|
1397
|
+
totalTokens: input + outputTokens + cachedTokens + emittedCacheWrite,
|
|
1398
|
+
...(reasoningTokens > 0 ? { reasoningTokens } : {}),
|
|
1399
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
1400
|
+
...(premiumRequests !== undefined ? { premiumRequests } : {}),
|
|
1401
|
+
};
|
|
1402
|
+
calculateCost(model, usage);
|
|
1403
|
+
return usage;
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
function mapReasoningEffort(
|
|
1407
|
+
effort: NonNullable<OpenAICompletionsOptions["reasoning"]>,
|
|
1408
|
+
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoning"]>, string>>,
|
|
1409
|
+
): string {
|
|
1410
|
+
return reasoningEffortMap[effort] ?? effort;
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
function maybeAddOpenRouterAnthropicCacheControl(
|
|
1414
|
+
model: Model<"openai-completions">,
|
|
1415
|
+
messages: ChatCompletionMessageParam[],
|
|
1416
|
+
): void {
|
|
1417
|
+
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
|
|
1418
|
+
|
|
1419
|
+
// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
|
|
1420
|
+
// on the last user/assistant message (walking backwards until we find text content).
|
|
1421
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1422
|
+
const msg = messages[i];
|
|
1423
|
+
if (msg.role !== "user" && msg.role !== "assistant" && msg.role !== "developer") continue;
|
|
1424
|
+
|
|
1425
|
+
const content = msg.content;
|
|
1426
|
+
if (typeof content === "string") {
|
|
1427
|
+
msg.content = [
|
|
1428
|
+
Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
|
|
1429
|
+
];
|
|
1430
|
+
return;
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
if (!Array.isArray(content)) continue;
|
|
1434
|
+
|
|
1435
|
+
// Find last text part and add cache_control
|
|
1436
|
+
for (let j = content.length - 1; j >= 0; j--) {
|
|
1437
|
+
const part = content[j];
|
|
1438
|
+
if (part?.type === "text") {
|
|
1439
|
+
Object.assign(part, { cache_control: { type: "ephemeral" } });
|
|
1440
|
+
return;
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
export function convertMessages(
|
|
1447
|
+
model: Model<"openai-completions">,
|
|
1448
|
+
context: Context,
|
|
1449
|
+
compat: ResolvedOpenAICompat,
|
|
1450
|
+
): ChatCompletionMessageParam[] {
|
|
1451
|
+
const params: ChatCompletionMessageParam[] = [];
|
|
1452
|
+
|
|
1453
|
+
const normalizeToolCallId = (id: string): string => {
|
|
1454
|
+
if (compat.requiresMistralToolIds) return normalizeMistralToolId(id, true);
|
|
1455
|
+
|
|
1456
|
+
// Handle pipe-separated IDs from OpenAI Responses API
|
|
1457
|
+
// Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)
|
|
1458
|
+
// These come from providers like github-copilot, openai-codex, opencode
|
|
1459
|
+
// Extract just the call_id part and normalize it
|
|
1460
|
+
if (id.includes("|")) {
|
|
1461
|
+
const [callId] = id.split("|");
|
|
1462
|
+
// Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
|
|
1463
|
+
return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
if (model.provider === "openai") return id.length > 40 ? id.slice(0, 40) : id;
|
|
1467
|
+
return id;
|
|
1468
|
+
};
|
|
1469
|
+
const transformedMessages = transformMessages(context.messages, model, id => normalizeToolCallId(id));
|
|
1470
|
+
|
|
1471
|
+
const remappedToolCallIds = new Map<string, string[]>();
|
|
1472
|
+
let generatedToolCallIdCounter = 0;
|
|
1473
|
+
|
|
1474
|
+
const generateFallbackToolCallId = (seed: string): string => {
|
|
1475
|
+
generatedToolCallIdCounter += 1;
|
|
1476
|
+
const hash = Bun.hash(`${model.provider}:${model.id}:${seed}:${generatedToolCallIdCounter}`).toString(36);
|
|
1477
|
+
return `call_${hash}`;
|
|
1478
|
+
};
|
|
1479
|
+
|
|
1480
|
+
const rememberToolCallId = (originalId: string, normalizedId: string): void => {
|
|
1481
|
+
const queue = remappedToolCallIds.get(originalId);
|
|
1482
|
+
if (queue) {
|
|
1483
|
+
queue.push(normalizedId);
|
|
1484
|
+
return;
|
|
1485
|
+
}
|
|
1486
|
+
remappedToolCallIds.set(originalId, [normalizedId]);
|
|
1487
|
+
};
|
|
1488
|
+
|
|
1489
|
+
const consumeToolCallId = (originalId: string): string | null => {
|
|
1490
|
+
const queue = remappedToolCallIds.get(originalId);
|
|
1491
|
+
if (!queue || queue.length === 0) return null;
|
|
1492
|
+
const nextId = queue.shift() ?? null;
|
|
1493
|
+
if (queue.length === 0) remappedToolCallIds.delete(originalId);
|
|
1494
|
+
return nextId;
|
|
1495
|
+
};
|
|
1496
|
+
|
|
1497
|
+
const ensureToolCallId = (rawId: string, seed: string): string => {
|
|
1498
|
+
const normalized = normalizeToolCallId(rawId);
|
|
1499
|
+
if (normalized.trim().length > 0) return normalized;
|
|
1500
|
+
return generateFallbackToolCallId(seed);
|
|
1501
|
+
};
|
|
1502
|
+
|
|
1503
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
1504
|
+
if (systemPrompts.length > 0) {
|
|
1505
|
+
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
|
1506
|
+
const role = useDeveloperRole ? "developer" : "system";
|
|
1507
|
+
// Default to one block per ordered system prompt so the leading prefix
|
|
1508
|
+
// stays byte-identical between turns and the provider's KV cache can
|
|
1509
|
+
// reuse it. Hosts whose chat templates reject follow-up system messages
|
|
1510
|
+
// (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
|
|
1511
|
+
// via `compat.supportsMultipleSystemMessages = false`; in that mode we
|
|
1512
|
+
// coalesce into a single message joined by `\n\n`.
|
|
1513
|
+
if (compat.supportsMultipleSystemMessages) {
|
|
1514
|
+
for (const systemPrompt of systemPrompts) {
|
|
1515
|
+
params.push({ role, content: systemPrompt });
|
|
1516
|
+
}
|
|
1517
|
+
} else {
|
|
1518
|
+
params.push({ role, content: systemPrompts.join("\n\n") });
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
let lastRole: string | null = null;
|
|
1523
|
+
|
|
1524
|
+
for (let i = 0; i < transformedMessages.length; i++) {
|
|
1525
|
+
const msg = transformedMessages[i];
|
|
1526
|
+
// Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
|
|
1527
|
+
// Insert a synthetic assistant message to bridge the gap
|
|
1528
|
+
if (
|
|
1529
|
+
compat.requiresAssistantAfterToolResult &&
|
|
1530
|
+
lastRole === "toolResult" &&
|
|
1531
|
+
(msg.role === "user" || msg.role === "developer")
|
|
1532
|
+
) {
|
|
1533
|
+
params.push({
|
|
1534
|
+
role: "assistant",
|
|
1535
|
+
content: "I have processed the tool results.",
|
|
1536
|
+
});
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
const devAsUser = !compat.supportsDeveloperRole;
|
|
1540
|
+
if (msg.role === "user" || msg.role === "developer") {
|
|
1541
|
+
const role = !devAsUser && msg.role === "developer" ? "developer" : "user";
|
|
1542
|
+
if (typeof msg.content === "string") {
|
|
1543
|
+
const text = msg.content.toWellFormed();
|
|
1544
|
+
if (text.trim().length === 0) continue;
|
|
1545
|
+
params.push({
|
|
1546
|
+
role: role,
|
|
1547
|
+
content: text,
|
|
1548
|
+
});
|
|
1549
|
+
} else {
|
|
1550
|
+
const supportsImages = model.input.includes("image");
|
|
1551
|
+
const content: ChatCompletionContentPart[] = [];
|
|
1552
|
+
let omittedImages = false;
|
|
1553
|
+
for (const item of msg.content) {
|
|
1554
|
+
if (item.type === "text") {
|
|
1555
|
+
const text = item.text.toWellFormed();
|
|
1556
|
+
if (text.trim().length === 0) continue;
|
|
1557
|
+
content.push({
|
|
1558
|
+
type: "text",
|
|
1559
|
+
text,
|
|
1560
|
+
} satisfies ChatCompletionContentPartText);
|
|
1561
|
+
} else if (supportsImages) {
|
|
1562
|
+
content.push({
|
|
1563
|
+
type: "image_url",
|
|
1564
|
+
image_url: {
|
|
1565
|
+
url: `data:${item.mimeType};base64,${item.data}`,
|
|
1566
|
+
},
|
|
1567
|
+
} satisfies ChatCompletionContentPartImage);
|
|
1568
|
+
} else {
|
|
1569
|
+
omittedImages = true;
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
if (omittedImages) {
|
|
1573
|
+
content.push({
|
|
1574
|
+
type: "text",
|
|
1575
|
+
text: NON_VISION_IMAGE_PLACEHOLDER,
|
|
1576
|
+
} satisfies ChatCompletionContentPartText);
|
|
1577
|
+
}
|
|
1578
|
+
if (content.length === 0) continue;
|
|
1579
|
+
params.push({
|
|
1580
|
+
role: "user",
|
|
1581
|
+
content,
|
|
1582
|
+
});
|
|
1583
|
+
}
|
|
1584
|
+
} else if (msg.role === "assistant") {
|
|
1585
|
+
const assistantMsg: ChatCompletionAssistantMessageParam = {
|
|
1586
|
+
role: "assistant",
|
|
1587
|
+
content: null,
|
|
1588
|
+
};
|
|
1589
|
+
|
|
1590
|
+
const textBlocks = msg.content.filter(b => b.type === "text") as TextContent[];
|
|
1591
|
+
// Filter out empty text blocks to avoid API validation errors
|
|
1592
|
+
const nonEmptyTextBlocks = textBlocks.filter(b => b.text && b.text.trim().length > 0);
|
|
1593
|
+
if (nonEmptyTextBlocks.length > 0) {
|
|
1594
|
+
// Always send assistant content as a plain string. Some OpenAI-compatible
|
|
1595
|
+
// backends mirror array-of-text-block payloads back to the model literally,
|
|
1596
|
+
// causing recursive nested content in subsequent turns.
|
|
1597
|
+
assistantMsg.content = nonEmptyTextBlocks.map(b => b.text.toWellFormed()).join("");
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
// Handle thinking blocks
|
|
1601
|
+
const thinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
|
|
1602
|
+
// Filter out empty thinking blocks to avoid API validation errors
|
|
1603
|
+
const nonEmptyThinkingBlocks = thinkingBlocks.filter(b => b.thinking && b.thinking.trim().length > 0);
|
|
1604
|
+
if (nonEmptyThinkingBlocks.length > 0) {
|
|
1605
|
+
if (compat.requiresThinkingAsText) {
|
|
1606
|
+
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
1607
|
+
const thinkingText = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n\n");
|
|
1608
|
+
const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
|
|
1609
|
+
if (textContent) {
|
|
1610
|
+
textContent.unshift({ type: "text", text: thinkingText });
|
|
1611
|
+
} else {
|
|
1612
|
+
assistantMsg.content = [{ type: "text", text: thinkingText }];
|
|
1613
|
+
}
|
|
1614
|
+
} else if (compat.requiresReasoningContentForToolCalls) {
|
|
1615
|
+
// Use the streamed signature when the backend accepts whichever
|
|
1616
|
+
// recognized field name was emitted (allowsSynthetic=true). Backends
|
|
1617
|
+
// like opencode-kimi-with-thinking and DeepSeek demand the exact
|
|
1618
|
+
// configured `reasoningContentField` instead, so honor that here
|
|
1619
|
+
// rather than echoing the upstream field name.
|
|
1620
|
+
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
|
1621
|
+
const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
1622
|
+
const wireField =
|
|
1623
|
+
compat.allowsSyntheticReasoningContentForToolCalls &&
|
|
1624
|
+
signature &&
|
|
1625
|
+
recognizedFields.includes(signature)
|
|
1626
|
+
? signature
|
|
1627
|
+
: signature && recognizedFields.includes(signature)
|
|
1628
|
+
? (compat.reasoningContentField ?? "reasoning_content")
|
|
1629
|
+
: undefined;
|
|
1630
|
+
if (wireField) {
|
|
1631
|
+
(assistantMsg as any)[wireField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
|
|
1636
|
+
if (compat.requiresReasoningContentForToolCalls) {
|
|
1637
|
+
const streamedReasoningField = nonEmptyThinkingBlocks[0]?.thinkingSignature;
|
|
1638
|
+
const reasoningField =
|
|
1639
|
+
compat.allowsSyntheticReasoningContentForToolCalls &&
|
|
1640
|
+
(streamedReasoningField === "reasoning_content" ||
|
|
1641
|
+
streamedReasoningField === "reasoning" ||
|
|
1642
|
+
streamedReasoningField === "reasoning_text")
|
|
1643
|
+
? streamedReasoningField
|
|
1644
|
+
: (compat.reasoningContentField ?? "reasoning_content");
|
|
1645
|
+
const reasoningContent = (assistantMsg as any)[reasoningField];
|
|
1646
|
+
if (!reasoningContent) {
|
|
1647
|
+
const reasoning = (assistantMsg as any).reasoning;
|
|
1648
|
+
const reasoningText = (assistantMsg as any).reasoning_text;
|
|
1649
|
+
if (reasoning && reasoningField !== "reasoning") {
|
|
1650
|
+
(assistantMsg as any)[reasoningField] = reasoning;
|
|
1651
|
+
} else if (reasoningText && reasoningField !== "reasoning_text") {
|
|
1652
|
+
(assistantMsg as any)[reasoningField] = reasoningText;
|
|
1653
|
+
} else if (nonEmptyThinkingBlocks.length > 0) {
|
|
1654
|
+
(assistantMsg as any)[reasoningField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
const toolCalls = msg.content.filter(b => b.type === "toolCall") as ToolCall[];
|
|
1660
|
+
// Replay reasoning_content on assistant turns for backends that validate
|
|
1661
|
+
// thinking-mode history. DeepSeek V4 requires reasoning_content on EVERY
|
|
1662
|
+
// assistant turn once any prior turn included it — not just tool-call turns.
|
|
1663
|
+
// The replay logic has three tiers:
|
|
1664
|
+
// 1. Recover from thinking blocks with valid signatures (covers same-model replay
|
|
1665
|
+
// where nonEmptyThinkingBlocks may have filtered out empty-text blocks)
|
|
1666
|
+
// 2. For providers that require the field but returned no reasoning at all
|
|
1667
|
+
// (e.g. proxy-stripped reasoning_content), emit an empty string
|
|
1668
|
+
// 3. For providers that accept synthetic placeholders (Kimi, OpenRouter), emit "."
|
|
1669
|
+
// DeepSeek V4 rejects synthetic "." placeholders — it validates the exact value —
|
|
1670
|
+
// so the allowsSyntheticReasoningContentForToolCalls flag controls tier 3.
|
|
1671
|
+
const canUseSyntheticReasoningContent =
|
|
1672
|
+
compat.requiresReasoningContentForToolCalls &&
|
|
1673
|
+
compat.allowsSyntheticReasoningContentForToolCalls &&
|
|
1674
|
+
(compat.thinkingFormat === "openai" ||
|
|
1675
|
+
compat.thinkingFormat === "openrouter" ||
|
|
1676
|
+
compat.thinkingFormat === "zai");
|
|
1677
|
+
// DeepSeek-compatible reasoning models require reasoning_content on all
|
|
1678
|
+
// assistant turns. Providers that allow placeholders only need it on
|
|
1679
|
+
// tool-call turns.
|
|
1680
|
+
const needsReasoningOnAllTurns =
|
|
1681
|
+
compat.requiresReasoningContentForToolCalls && !compat.allowsSyntheticReasoningContentForToolCalls;
|
|
1682
|
+
const needsReasoningField = needsReasoningOnAllTurns || toolCalls.length > 0;
|
|
1683
|
+
let hasReasoningField =
|
|
1684
|
+
(assistantMsg as any).reasoning_content !== undefined ||
|
|
1685
|
+
(assistantMsg as any).reasoning !== undefined ||
|
|
1686
|
+
(assistantMsg as any).reasoning_text !== undefined;
|
|
1687
|
+
// Tier 1: Recover reasoning_content from ALL thinking blocks (including empty-text
|
|
1688
|
+
// ones) when the provider requires exact replay and rejects synthetic placeholders.
|
|
1689
|
+
// This covers the case where thinking blocks have valid signatures but were excluded
|
|
1690
|
+
// by the nonEmptyThinkingBlocks filter above, or where thinking text is empty but
|
|
1691
|
+
// the signature identifies the correct field name for replay.
|
|
1692
|
+
// Only recognized OpenAI-compat reasoning field names qualify — opaque signatures
|
|
1693
|
+
// from other providers (Anthropic encrypted, OpenAI Responses JSON, etc.) are not
|
|
1694
|
+
// valid property names for the wire message.
|
|
1695
|
+
if (
|
|
1696
|
+
needsReasoningField &&
|
|
1697
|
+
!hasReasoningField &&
|
|
1698
|
+
compat.requiresReasoningContentForToolCalls &&
|
|
1699
|
+
!compat.allowsSyntheticReasoningContentForToolCalls
|
|
1700
|
+
) {
|
|
1701
|
+
const allThinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
|
|
1702
|
+
if (allThinkingBlocks.length > 0) {
|
|
1703
|
+
const signature = allThinkingBlocks[0].thinkingSignature;
|
|
1704
|
+
const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
1705
|
+
if (signature && recognizedFields.includes(signature)) {
|
|
1706
|
+
const reasoningField = compat.reasoningContentField ?? "reasoning_content";
|
|
1707
|
+
(assistantMsg as any)[reasoningField] = allThinkingBlocks.map(b => b.thinking).join("\n");
|
|
1708
|
+
hasReasoningField = true;
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
}
|
|
1712
|
+
// Tier 2: When the provider requires reasoning_content but there are genuinely no
|
|
1713
|
+
// thinking blocks at all (e.g. proxy stripped reasoning_content from the response),
|
|
1714
|
+
// emit an empty string. The field must be present; an empty string is the most honest
|
|
1715
|
+
// representation of "no reasoning was captured."
|
|
1716
|
+
if (
|
|
1717
|
+
needsReasoningField &&
|
|
1718
|
+
!hasReasoningField &&
|
|
1719
|
+
compat.requiresReasoningContentForToolCalls &&
|
|
1720
|
+
!compat.allowsSyntheticReasoningContentForToolCalls
|
|
1721
|
+
) {
|
|
1722
|
+
const reasoningField = compat.reasoningContentField ?? "reasoning_content";
|
|
1723
|
+
(assistantMsg as any)[reasoningField] = "";
|
|
1724
|
+
hasReasoningField = true;
|
|
1725
|
+
}
|
|
1726
|
+
// Tier 3: For providers that accept synthetic placeholders (Kimi, OpenRouter).
|
|
1727
|
+
if (toolCalls.length > 0 && canUseSyntheticReasoningContent && !hasReasoningField) {
|
|
1728
|
+
const reasoningField = compat.reasoningContentField ?? "reasoning_content";
|
|
1729
|
+
(assistantMsg as any)[reasoningField] = ".";
|
|
1730
|
+
hasReasoningField = true;
|
|
1731
|
+
}
|
|
1732
|
+
if (toolCalls.length > 0) {
|
|
1733
|
+
assistantMsg.tool_calls = toolCalls.map((tc, toolCallIndex) => {
|
|
1734
|
+
const toolCallId = ensureToolCallId(tc.id, `${i}:${toolCallIndex}:${tc.name}`);
|
|
1735
|
+
rememberToolCallId(tc.id, toolCallId);
|
|
1736
|
+
return {
|
|
1737
|
+
id: normalizeMistralToolId(toolCallId, compat.requiresMistralToolIds),
|
|
1738
|
+
type: "function" as const,
|
|
1739
|
+
function: {
|
|
1740
|
+
name: tc.name,
|
|
1741
|
+
arguments: serializeToolArguments(tc.arguments),
|
|
1742
|
+
},
|
|
1743
|
+
};
|
|
1744
|
+
});
|
|
1745
|
+
const reasoningDetails = toolCalls
|
|
1746
|
+
.filter(tc => tc.thoughtSignature)
|
|
1747
|
+
.map(tc => {
|
|
1748
|
+
try {
|
|
1749
|
+
return JSON.parse(tc.thoughtSignature!);
|
|
1750
|
+
} catch {
|
|
1751
|
+
return null;
|
|
1752
|
+
}
|
|
1753
|
+
})
|
|
1754
|
+
.filter(Boolean);
|
|
1755
|
+
if (reasoningDetails.length > 0) {
|
|
1756
|
+
(assistantMsg as any).reasoning_details = reasoningDetails;
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
// Some OpenAI-compatible backends concatenate assistant content as a
|
|
1760
|
+
// string even for tool-call replay. OpenAI accepts an empty string here;
|
|
1761
|
+
// null trips strict/proxy implementations before the tool result is read.
|
|
1762
|
+
if (assistantMsg.content === null && (hasReasoningField || assistantMsg.tool_calls)) {
|
|
1763
|
+
assistantMsg.content = "";
|
|
1764
|
+
}
|
|
1765
|
+
// Skip assistant messages that have no content, no tool calls, and no reasoning payload.
|
|
1766
|
+
// Some OpenAI-compatible backends require replaying reasoning-only assistant turns
|
|
1767
|
+
// so follow-up requests preserve the provider-specific reasoning field name.
|
|
1768
|
+
const content = assistantMsg.content;
|
|
1769
|
+
const hasContent =
|
|
1770
|
+
content !== null &&
|
|
1771
|
+
content !== undefined &&
|
|
1772
|
+
(typeof content === "string" ? content.length > 0 : content.length > 0);
|
|
1773
|
+
if (!hasContent && assistantMsg.tool_calls && compat.requiresAssistantContentForToolCalls) {
|
|
1774
|
+
assistantMsg.content = ".";
|
|
1775
|
+
}
|
|
1776
|
+
if (!hasContent && !assistantMsg.tool_calls && !hasReasoningField) {
|
|
1777
|
+
continue;
|
|
1778
|
+
}
|
|
1779
|
+
params.push(assistantMsg);
|
|
1780
|
+
} else if (msg.role === "toolResult") {
|
|
1781
|
+
// Batch consecutive tool results and collect all images
|
|
1782
|
+
const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
|
|
1783
|
+
let j = i;
|
|
1784
|
+
|
|
1785
|
+
for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
|
|
1786
|
+
const toolMsg = transformedMessages[j] as ToolResultMessage;
|
|
1787
|
+
|
|
1788
|
+
// Extract text and image content
|
|
1789
|
+
const textResult = toolMsg.content
|
|
1790
|
+
.filter(c => c.type === "text")
|
|
1791
|
+
.map(c => (c as TextContent).text)
|
|
1792
|
+
.join("\n");
|
|
1793
|
+
const supportsImages = model.input.includes("image");
|
|
1794
|
+
const hasImages = toolMsg.content.some(c => c.type === "image");
|
|
1795
|
+
const omittedImages = hasImages && !supportsImages;
|
|
1796
|
+
|
|
1797
|
+
// Always send tool result with text (or placeholder if only images)
|
|
1798
|
+
const hasText = textResult.length > 0;
|
|
1799
|
+
const remappedToolCallId = consumeToolCallId(toolMsg.toolCallId);
|
|
1800
|
+
const resolvedToolCallId =
|
|
1801
|
+
remappedToolCallId ?? ensureToolCallId(toolMsg.toolCallId, `${j}:${toolMsg.toolName ?? "tool"}`);
|
|
1802
|
+
const toolResultContent = omittedImages
|
|
1803
|
+
? joinTextWithImagePlaceholder(textResult, true)
|
|
1804
|
+
: hasText
|
|
1805
|
+
? textResult
|
|
1806
|
+
: hasImages
|
|
1807
|
+
? "(see attached image)"
|
|
1808
|
+
: "";
|
|
1809
|
+
const toolResultMsg: ChatCompletionToolMessageParam = {
|
|
1810
|
+
role: "tool",
|
|
1811
|
+
content: toolResultContent.toWellFormed(),
|
|
1812
|
+
tool_call_id: normalizeMistralToolId(resolvedToolCallId, compat.requiresMistralToolIds),
|
|
1813
|
+
};
|
|
1814
|
+
if (compat.requiresToolResultName && toolMsg.toolName) {
|
|
1815
|
+
(toolResultMsg as any).name = toolMsg.toolName;
|
|
1816
|
+
}
|
|
1817
|
+
params.push(toolResultMsg);
|
|
1818
|
+
|
|
1819
|
+
if (hasImages && supportsImages) {
|
|
1820
|
+
for (const block of toolMsg.content) {
|
|
1821
|
+
if (block.type === "image") {
|
|
1822
|
+
imageBlocks.push({
|
|
1823
|
+
type: "image_url",
|
|
1824
|
+
image_url: {
|
|
1825
|
+
url: `data:${block.mimeType};base64,${block.data}`,
|
|
1826
|
+
},
|
|
1827
|
+
});
|
|
1828
|
+
}
|
|
1829
|
+
}
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
|
|
1833
|
+
i = j - 1;
|
|
1834
|
+
|
|
1835
|
+
// After all consecutive tool results, add a single user message with all images
|
|
1836
|
+
if (imageBlocks.length > 0) {
|
|
1837
|
+
if (compat.requiresAssistantAfterToolResult) {
|
|
1838
|
+
params.push({
|
|
1839
|
+
role: "assistant",
|
|
1840
|
+
content: "I have processed the tool results.",
|
|
1841
|
+
});
|
|
1842
|
+
}
|
|
1843
|
+
|
|
1844
|
+
params.push({
|
|
1845
|
+
role: "user",
|
|
1846
|
+
content: [
|
|
1847
|
+
{
|
|
1848
|
+
type: "text",
|
|
1849
|
+
text: "Attached image(s) from tool result:",
|
|
1850
|
+
},
|
|
1851
|
+
...imageBlocks,
|
|
1852
|
+
],
|
|
1853
|
+
});
|
|
1854
|
+
lastRole = "user";
|
|
1855
|
+
} else {
|
|
1856
|
+
lastRole = "toolResult";
|
|
1857
|
+
}
|
|
1858
|
+
continue;
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
lastRole =
|
|
1862
|
+
msg.role === "developer"
|
|
1863
|
+
? model.reasoning && compat.supportsDeveloperRole
|
|
1864
|
+
? "developer"
|
|
1865
|
+
: "system"
|
|
1866
|
+
: msg.role;
|
|
1867
|
+
}
|
|
1868
|
+
|
|
1869
|
+
return params;
|
|
1870
|
+
}
|
|
1871
|
+
|
|
1872
|
+
function convertTools(
|
|
1873
|
+
tools: Tool[],
|
|
1874
|
+
compat: ResolvedOpenAICompat,
|
|
1875
|
+
toolStrictModeOverride?: ToolStrictModeOverride,
|
|
1876
|
+
): BuiltOpenAICompletionTools {
|
|
1877
|
+
const adaptedTools = tools.map(tool => {
|
|
1878
|
+
const strict = !NO_STRICT && compat.supportsStrictMode !== false && tool.strict !== false;
|
|
1879
|
+
const baseParameters = toolWireSchema(tool);
|
|
1880
|
+
const adapted = adaptSchemaForStrict(baseParameters, strict);
|
|
1881
|
+
return {
|
|
1882
|
+
tool,
|
|
1883
|
+
baseParameters,
|
|
1884
|
+
parameters: adapted.schema,
|
|
1885
|
+
strict: adapted.strict,
|
|
1886
|
+
};
|
|
1887
|
+
});
|
|
1888
|
+
|
|
1889
|
+
const requestedStrictMode = toolStrictModeOverride ?? compat.toolStrictMode;
|
|
1890
|
+
const toolStrictMode =
|
|
1891
|
+
requestedStrictMode === "none"
|
|
1892
|
+
? "none"
|
|
1893
|
+
: requestedStrictMode === "all_strict"
|
|
1894
|
+
? adaptedTools.every(tool => tool.strict)
|
|
1895
|
+
? "all_strict"
|
|
1896
|
+
: "none"
|
|
1897
|
+
: "mixed";
|
|
1898
|
+
|
|
1899
|
+
return {
|
|
1900
|
+
tools: adaptedTools.map(({ tool, baseParameters, parameters, strict }) => {
|
|
1901
|
+
const includeStrict = toolStrictMode === "all_strict" || (toolStrictMode === "mixed" && strict);
|
|
1902
|
+
return {
|
|
1903
|
+
type: "function",
|
|
1904
|
+
function: {
|
|
1905
|
+
name: tool.name,
|
|
1906
|
+
description: tool.description || "",
|
|
1907
|
+
parameters: includeStrict ? parameters : baseParameters,
|
|
1908
|
+
// Only include strict if provider supports it. Some reject unknown fields.
|
|
1909
|
+
...(includeStrict && { strict: true }),
|
|
1910
|
+
},
|
|
1911
|
+
};
|
|
1912
|
+
}),
|
|
1913
|
+
toolStrictMode,
|
|
1914
|
+
};
|
|
1915
|
+
}
|
|
1916
|
+
|
|
1917
|
+
function shouldRetryWithoutStrictTools(
|
|
1918
|
+
error: unknown,
|
|
1919
|
+
capturedErrorResponse: CapturedHttpErrorResponse | undefined,
|
|
1920
|
+
toolStrictMode: AppliedToolStrictMode,
|
|
1921
|
+
tools: Tool[] | undefined,
|
|
1922
|
+
): boolean {
|
|
1923
|
+
if (!tools || tools.length === 0 || toolStrictMode !== "all_strict") {
|
|
1924
|
+
return false;
|
|
1925
|
+
}
|
|
1926
|
+
const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
|
|
1927
|
+
if (status !== 400 && status !== 422) {
|
|
1928
|
+
return false;
|
|
1929
|
+
}
|
|
1930
|
+
const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
|
|
1931
|
+
.filter((value): value is string => typeof value === "string" && value.trim().length > 0)
|
|
1932
|
+
.join("\n");
|
|
1933
|
+
return /wrong_api_format|mixed values for 'strict'|tool[s]?\b.*strict|\bstrict\b.*tool/i.test(messageParts);
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
|
|
1937
|
+
stopReason: StopReason;
|
|
1938
|
+
errorMessage?: string;
|
|
1939
|
+
} {
|
|
1940
|
+
if (reason === null) return { stopReason: "stop" };
|
|
1941
|
+
switch (reason) {
|
|
1942
|
+
case "stop":
|
|
1943
|
+
case "end":
|
|
1944
|
+
return { stopReason: "stop" };
|
|
1945
|
+
case "length":
|
|
1946
|
+
return { stopReason: "length" };
|
|
1947
|
+
case "function_call":
|
|
1948
|
+
case "tool_calls":
|
|
1949
|
+
return { stopReason: "toolUse" };
|
|
1950
|
+
case "content_filter":
|
|
1951
|
+
return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
|
|
1952
|
+
case "network_error":
|
|
1953
|
+
return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
|
|
1954
|
+
default:
|
|
1955
|
+
return {
|
|
1956
|
+
stopReason: "error",
|
|
1957
|
+
errorMessage: `Provider finish_reason: ${reason}`,
|
|
1958
|
+
};
|
|
1959
|
+
}
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
/**
|
|
1963
|
+
* Detect compatibility settings from provider and baseUrl for known providers.
|
|
1964
|
+
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
1965
|
+
* Returns a fully resolved OpenAICompat object with all fields set.
|
|
1966
|
+
*/
|
|
1967
|
+
export function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
|
|
1968
|
+
return detectOpenAICompat(model);
|
|
1969
|
+
}
|
|
1970
|
+
|
|
1971
|
+
/**
|
|
1972
|
+
* Get resolved compatibility settings for a model.
|
|
1973
|
+
* Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
|
|
1974
|
+
* @param model - The model configuration
|
|
1975
|
+
* @param resolvedBaseUrl - Optional resolved base URL (e.g., after GitHub Copilot proxy-ep resolution).
|
|
1976
|
+
*/
|
|
1977
|
+
function getCompat(model: Model<"openai-completions">, resolvedBaseUrl?: string): ResolvedOpenAICompat {
|
|
1978
|
+
return resolveOpenAICompat(model, resolvedBaseUrl);
|
|
1979
|
+
}
|