@gajae-code/ai 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2644 -0
- package/README.md +1181 -0
- package/dist/types/api-registry.d.ts +30 -0
- package/dist/types/auth-broker/client.d.ts +66 -0
- package/dist/types/auth-broker/index.d.ts +5 -0
- package/dist/types/auth-broker/refresher.d.ts +25 -0
- package/dist/types/auth-broker/remote-store.d.ts +96 -0
- package/dist/types/auth-broker/server.d.ts +32 -0
- package/dist/types/auth-broker/types.d.ts +105 -0
- package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
- package/dist/types/auth-gateway/http.d.ts +39 -0
- package/dist/types/auth-gateway/index.d.ts +3 -0
- package/dist/types/auth-gateway/server.d.ts +17 -0
- package/dist/types/auth-gateway/types.d.ts +115 -0
- package/dist/types/auth-storage.d.ts +641 -0
- package/dist/types/cli.d.ts +2 -0
- package/dist/types/index.d.ts +49 -0
- package/dist/types/model-cache.d.ts +17 -0
- package/dist/types/model-manager.d.ts +62 -0
- package/dist/types/model-thinking.d.ts +71 -0
- package/dist/types/models.d.ts +12 -0
- package/dist/types/provider-details.d.ts +24 -0
- package/dist/types/provider-models/bundled-references.d.ts +4 -0
- package/dist/types/provider-models/descriptors.d.ts +48 -0
- package/dist/types/provider-models/google.d.ts +20 -0
- package/dist/types/provider-models/index.d.ts +5 -0
- package/dist/types/provider-models/ollama.d.ts +7 -0
- package/dist/types/provider-models/openai-compat.d.ts +237 -0
- package/dist/types/provider-models/special.d.ts +16 -0
- package/dist/types/providers/amazon-bedrock.d.ts +36 -0
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
- package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
- package/dist/types/providers/anthropic.d.ts +188 -0
- package/dist/types/providers/aws-credentials.d.ts +43 -0
- package/dist/types/providers/aws-eventstream.d.ts +38 -0
- package/dist/types/providers/aws-sigv4.d.ts +55 -0
- package/dist/types/providers/azure-openai-responses.d.ts +15 -0
- package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
- package/dist/types/providers/cursor.d.ts +42 -0
- package/dist/types/providers/error-message.d.ts +27 -0
- package/dist/types/providers/github-copilot-headers.d.ts +40 -0
- package/dist/types/providers/gitlab-duo.d.ts +27 -0
- package/dist/types/providers/google-auth.d.ts +24 -0
- package/dist/types/providers/google-gemini-cli.d.ts +72 -0
- package/dist/types/providers/google-gemini-headers.d.ts +18 -0
- package/dist/types/providers/google-shared.d.ts +163 -0
- package/dist/types/providers/google-types.d.ts +138 -0
- package/dist/types/providers/google-vertex.d.ts +7 -0
- package/dist/types/providers/google.d.ts +4 -0
- package/dist/types/providers/grammar.d.ts +1 -0
- package/dist/types/providers/kimi.d.ts +27 -0
- package/dist/types/providers/mock.d.ts +175 -0
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
- package/dist/types/providers/openai-chat-server.d.ts +16 -0
- package/dist/types/providers/openai-codex/constants.d.ts +26 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
- package/dist/types/providers/openai-codex-responses.d.ts +67 -0
- package/dist/types/providers/openai-completions-compat.d.ts +25 -0
- package/dist/types/providers/openai-completions.d.ts +33 -0
- package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
- package/dist/types/providers/openai-responses-server.d.ts +17 -0
- package/dist/types/providers/openai-responses-shared.d.ts +89 -0
- package/dist/types/providers/openai-responses.d.ts +32 -0
- package/dist/types/providers/pi-native-client.d.ts +13 -0
- package/dist/types/providers/pi-native-server.d.ts +68 -0
- package/dist/types/providers/register-builtins.d.ts +31 -0
- package/dist/types/providers/synthetic.d.ts +26 -0
- package/dist/types/providers/transform-messages.d.ts +12 -0
- package/dist/types/providers/vision-guard.d.ts +8 -0
- package/dist/types/rate-limit-utils.d.ts +19 -0
- package/dist/types/stream.d.ts +24 -0
- package/dist/types/types.d.ts +746 -0
- package/dist/types/usage/claude.d.ts +3 -0
- package/dist/types/usage/gemini.d.ts +2 -0
- package/dist/types/usage/github-copilot.d.ts +7 -0
- package/dist/types/usage/google-antigravity.d.ts +2 -0
- package/dist/types/usage/kimi.d.ts +2 -0
- package/dist/types/usage/minimax-code.d.ts +2 -0
- package/dist/types/usage/openai-codex.d.ts +3 -0
- package/dist/types/usage/shared.d.ts +1 -0
- package/dist/types/usage/zai.d.ts +2 -0
- package/dist/types/usage.d.ts +258 -0
- package/dist/types/utils/abort.d.ts +19 -0
- package/dist/types/utils/anthropic-auth.d.ts +31 -0
- package/dist/types/utils/discovery/antigravity.d.ts +61 -0
- package/dist/types/utils/discovery/codex.d.ts +38 -0
- package/dist/types/utils/discovery/cursor.d.ts +23 -0
- package/dist/types/utils/discovery/gemini.d.ts +25 -0
- package/dist/types/utils/discovery/index.d.ts +4 -0
- package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
- package/dist/types/utils/event-stream.d.ts +28 -0
- package/dist/types/utils/fireworks-model-id.d.ts +10 -0
- package/dist/types/utils/foundry.d.ts +1 -0
- package/dist/types/utils/h2-fetch.d.ts +22 -0
- package/dist/types/utils/http-inspector.d.ts +31 -0
- package/dist/types/utils/idle-iterator.d.ts +67 -0
- package/dist/types/utils/json-parse.d.ts +10 -0
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
- package/dist/types/utils/oauth/anthropic.d.ts +22 -0
- package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
- package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
- package/dist/types/utils/oauth/callback-server.d.ts +57 -0
- package/dist/types/utils/oauth/cerebras.d.ts +1 -0
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/cursor.d.ts +15 -0
- package/dist/types/utils/oauth/deepseek.d.ts +10 -0
- package/dist/types/utils/oauth/firepass.d.ts +1 -0
- package/dist/types/utils/oauth/fireworks.d.ts +1 -0
- package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
- package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
- package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
- package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
- package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
- package/dist/types/utils/oauth/huggingface.d.ts +19 -0
- package/dist/types/utils/oauth/index.d.ts +38 -0
- package/dist/types/utils/oauth/kagi.d.ts +17 -0
- package/dist/types/utils/oauth/kilo.d.ts +5 -0
- package/dist/types/utils/oauth/kimi.d.ts +21 -0
- package/dist/types/utils/oauth/litellm.d.ts +18 -0
- package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
- package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
- package/dist/types/utils/oauth/moonshot.d.ts +1 -0
- package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
- package/dist/types/utils/oauth/nvidia.d.ts +18 -0
- package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
- package/dist/types/utils/oauth/ollama.d.ts +18 -0
- package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
- package/dist/types/utils/oauth/opencode.d.ts +18 -0
- package/dist/types/utils/oauth/parallel.d.ts +17 -0
- package/dist/types/utils/oauth/perplexity.d.ts +9 -0
- package/dist/types/utils/oauth/pkce.d.ts +8 -0
- package/dist/types/utils/oauth/qianfan.d.ts +17 -0
- package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
- package/dist/types/utils/oauth/synthetic.d.ts +1 -0
- package/dist/types/utils/oauth/tavily.d.ts +17 -0
- package/dist/types/utils/oauth/together.d.ts +1 -0
- package/dist/types/utils/oauth/types.d.ts +44 -0
- package/dist/types/utils/oauth/venice.d.ts +18 -0
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/vllm.d.ts +16 -0
- package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
- package/dist/types/utils/oauth/zai.d.ts +18 -0
- package/dist/types/utils/oauth/zenmux.d.ts +1 -0
- package/dist/types/utils/overflow.d.ts +54 -0
- package/dist/types/utils/parse-bind.d.ts +23 -0
- package/dist/types/utils/provider-response.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +3 -0
- package/dist/types/utils/retry.d.ts +26 -0
- package/dist/types/utils/schema/adapt.d.ts +24 -0
- package/dist/types/utils/schema/compatibility.d.ts +30 -0
- package/dist/types/utils/schema/dereference.d.ts +11 -0
- package/dist/types/utils/schema/draft.d.ts +10 -0
- package/dist/types/utils/schema/equality.d.ts +4 -0
- package/dist/types/utils/schema/fields.d.ts +49 -0
- package/dist/types/utils/schema/index.d.ts +13 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
- package/dist/types/utils/schema/meta-validator.d.ts +2 -0
- package/dist/types/utils/schema/normalize.d.ts +93 -0
- package/dist/types/utils/schema/spill.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +25 -0
- package/dist/types/utils/schema/types.d.ts +4 -0
- package/dist/types/utils/schema/wire.d.ts +54 -0
- package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
- package/dist/types/utils/sse-debug.d.ts +10 -0
- package/dist/types/utils/tool-call-healing.d.ts +71 -0
- package/dist/types/utils/tool-choice.d.ts +50 -0
- package/dist/types/utils/validation.d.ts +17 -0
- package/dist/types/utils.d.ts +28 -0
- package/package.json +146 -0
- package/src/api-registry.ts +96 -0
- package/src/auth-broker/client.ts +358 -0
- package/src/auth-broker/index.ts +5 -0
- package/src/auth-broker/refresher.ts +127 -0
- package/src/auth-broker/remote-store.ts +623 -0
- package/src/auth-broker/server.ts +644 -0
- package/src/auth-broker/types.ts +127 -0
- package/src/auth-broker/wire-schemas.ts +200 -0
- package/src/auth-gateway/http.ts +194 -0
- package/src/auth-gateway/index.ts +3 -0
- package/src/auth-gateway/server.ts +717 -0
- package/src/auth-gateway/types.ts +134 -0
- package/src/auth-storage.ts +4104 -0
- package/src/cli.ts +262 -0
- package/src/index.ts +54 -0
- package/src/model-cache.ts +129 -0
- package/src/model-manager.ts +450 -0
- package/src/model-thinking.ts +691 -0
- package/src/models.json +73853 -0
- package/src/models.json.d.ts +9 -0
- package/src/models.ts +56 -0
- package/src/prompts/turn-aborted-guidance.md +4 -0
- package/src/provider-details.ts +90 -0
- package/src/provider-models/bundled-references.ts +38 -0
- package/src/provider-models/descriptors.ts +308 -0
- package/src/provider-models/google.ts +91 -0
- package/src/provider-models/index.ts +5 -0
- package/src/provider-models/ollama.ts +153 -0
- package/src/provider-models/openai-compat.ts +2275 -0
- package/src/provider-models/special.ts +67 -0
- package/src/providers/amazon-bedrock.ts +849 -0
- package/src/providers/anthropic-messages-server-schema.ts +229 -0
- package/src/providers/anthropic-messages-server.ts +677 -0
- package/src/providers/anthropic.ts +2696 -0
- package/src/providers/aws-credentials.ts +501 -0
- package/src/providers/aws-eventstream.ts +185 -0
- package/src/providers/aws-sigv4.ts +218 -0
- package/src/providers/azure-openai-responses.ts +337 -0
- package/src/providers/cursor/gen/agent_pb.ts +15274 -0
- package/src/providers/cursor/proto/agent.proto +3526 -0
- package/src/providers/cursor/proto/buf.gen.yaml +6 -0
- package/src/providers/cursor/proto/buf.yaml +17 -0
- package/src/providers/cursor.ts +2561 -0
- package/src/providers/error-message.ts +21 -0
- package/src/providers/github-copilot-headers.ts +140 -0
- package/src/providers/gitlab-duo.ts +372 -0
- package/src/providers/google-auth.ts +252 -0
- package/src/providers/google-gemini-cli.ts +795 -0
- package/src/providers/google-gemini-headers.ts +41 -0
- package/src/providers/google-shared.ts +902 -0
- package/src/providers/google-types.ts +167 -0
- package/src/providers/google-vertex.ts +88 -0
- package/src/providers/google.ts +41 -0
- package/src/providers/grammar.ts +70 -0
- package/src/providers/kimi.ts +52 -0
- package/src/providers/mock.ts +500 -0
- package/src/providers/ollama.ts +544 -0
- package/src/providers/openai-anthropic-shim.ts +138 -0
- package/src/providers/openai-chat-server-schema.ts +243 -0
- package/src/providers/openai-chat-server.ts +628 -0
- package/src/providers/openai-codex/constants.ts +43 -0
- package/src/providers/openai-codex/request-transformer.ts +161 -0
- package/src/providers/openai-codex/response-handler.ts +81 -0
- package/src/providers/openai-codex-responses.ts +2598 -0
- package/src/providers/openai-completions-compat.ts +279 -0
- package/src/providers/openai-completions.ts +1853 -0
- package/src/providers/openai-responses-server-schema.ts +290 -0
- package/src/providers/openai-responses-server.ts +1183 -0
- package/src/providers/openai-responses-shared.ts +800 -0
- package/src/providers/openai-responses.ts +621 -0
- package/src/providers/pi-native-client.ts +228 -0
- package/src/providers/pi-native-server.ts +210 -0
- package/src/providers/register-builtins.ts +412 -0
- package/src/providers/synthetic.ts +50 -0
- package/src/providers/transform-messages.ts +309 -0
- package/src/providers/vision-guard.ts +31 -0
- package/src/rate-limit-utils.ts +84 -0
- package/src/stream.ts +895 -0
- package/src/types.ts +884 -0
- package/src/usage/claude.ts +431 -0
- package/src/usage/gemini.ts +250 -0
- package/src/usage/github-copilot.ts +421 -0
- package/src/usage/google-antigravity.ts +201 -0
- package/src/usage/kimi.ts +271 -0
- package/src/usage/minimax-code.ts +31 -0
- package/src/usage/openai-codex.ts +503 -0
- package/src/usage/shared.ts +10 -0
- package/src/usage/zai.ts +247 -0
- package/src/usage.ts +183 -0
- package/src/utils/abort.ts +51 -0
- package/src/utils/anthropic-auth.ts +87 -0
- package/src/utils/discovery/antigravity.ts +261 -0
- package/src/utils/discovery/codex.ts +371 -0
- package/src/utils/discovery/cursor.ts +306 -0
- package/src/utils/discovery/gemini.ts +248 -0
- package/src/utils/discovery/index.ts +4 -0
- package/src/utils/discovery/openai-compatible.ts +224 -0
- package/src/utils/event-stream.ts +142 -0
- package/src/utils/fireworks-model-id.ts +30 -0
- package/src/utils/foundry.ts +8 -0
- package/src/utils/h2-fetch.ts +60 -0
- package/src/utils/http-inspector.ts +176 -0
- package/src/utils/idle-iterator.ts +250 -0
- package/src/utils/json-parse.ts +148 -0
- package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
- package/src/utils/oauth/anthropic.ts +200 -0
- package/src/utils/oauth/api-key-login.ts +87 -0
- package/src/utils/oauth/api-key-validation.ts +92 -0
- package/src/utils/oauth/callback-server.ts +276 -0
- package/src/utils/oauth/cerebras.ts +16 -0
- package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
- package/src/utils/oauth/cursor.ts +157 -0
- package/src/utils/oauth/deepseek.ts +53 -0
- package/src/utils/oauth/firepass.ts +24 -0
- package/src/utils/oauth/fireworks.ts +15 -0
- package/src/utils/oauth/github-copilot.ts +362 -0
- package/src/utils/oauth/gitlab-duo.ts +123 -0
- package/src/utils/oauth/google-antigravity.ts +200 -0
- package/src/utils/oauth/google-gemini-cli.ts +256 -0
- package/src/utils/oauth/google-oauth-shared.ts +110 -0
- package/src/utils/oauth/huggingface.ts +62 -0
- package/src/utils/oauth/index.ts +444 -0
- package/src/utils/oauth/kagi.ts +47 -0
- package/src/utils/oauth/kilo.ts +87 -0
- package/src/utils/oauth/kimi.ts +254 -0
- package/src/utils/oauth/litellm.ts +47 -0
- package/src/utils/oauth/lm-studio.ts +38 -0
- package/src/utils/oauth/minimax-code.ts +78 -0
- package/src/utils/oauth/moonshot.ts +16 -0
- package/src/utils/oauth/nanogpt.ts +15 -0
- package/src/utils/oauth/nvidia.ts +70 -0
- package/src/utils/oauth/oauth.html +199 -0
- package/src/utils/oauth/ollama-cloud.ts +28 -0
- package/src/utils/oauth/ollama.ts +47 -0
- package/src/utils/oauth/openai-codex.ts +299 -0
- package/src/utils/oauth/opencode.ts +49 -0
- package/src/utils/oauth/parallel.ts +46 -0
- package/src/utils/oauth/perplexity.ts +206 -0
- package/src/utils/oauth/pkce.ts +18 -0
- package/src/utils/oauth/qianfan.ts +58 -0
- package/src/utils/oauth/qwen-portal.ts +60 -0
- package/src/utils/oauth/synthetic.ts +16 -0
- package/src/utils/oauth/tavily.ts +46 -0
- package/src/utils/oauth/together.ts +16 -0
- package/src/utils/oauth/types.ts +94 -0
- package/src/utils/oauth/venice.ts +59 -0
- package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
- package/src/utils/oauth/vllm.ts +40 -0
- package/src/utils/oauth/xiaomi.ts +137 -0
- package/src/utils/oauth/zai.ts +60 -0
- package/src/utils/oauth/zenmux.ts +15 -0
- package/src/utils/overflow.ts +137 -0
- package/src/utils/parse-bind.ts +54 -0
- package/src/utils/provider-response.ts +30 -0
- package/src/utils/retry-after.ts +110 -0
- package/src/utils/retry.ts +54 -0
- package/src/utils/schema/CONSTRAINTS.md +164 -0
- package/src/utils/schema/adapt.ts +36 -0
- package/src/utils/schema/compatibility.ts +435 -0
- package/src/utils/schema/dereference.ts +98 -0
- package/src/utils/schema/draft.ts +341 -0
- package/src/utils/schema/equality.ts +97 -0
- package/src/utils/schema/fields.ts +190 -0
- package/src/utils/schema/index.ts +13 -0
- package/src/utils/schema/json-schema-validator.ts +577 -0
- package/src/utils/schema/meta-validator.ts +167 -0
- package/src/utils/schema/normalize.ts +1588 -0
- package/src/utils/schema/spill.ts +43 -0
- package/src/utils/schema/stamps.ts +97 -0
- package/src/utils/schema/types.ts +11 -0
- package/src/utils/schema/wire.ts +213 -0
- package/src/utils/schema/zod-decontaminate.ts +331 -0
- package/src/utils/sse-debug.ts +289 -0
- package/src/utils/tool-call-healing.ts +271 -0
- package/src/utils/tool-choice.ts +99 -0
- package/src/utils/validation.ts +1019 -0
- package/src/utils.ts +166 -0
|
@@ -0,0 +1,1853 @@
|
|
|
1
|
+
import { $env, extractHttpStatusFromError } from "@gajae-code/utils";
|
|
2
|
+
import OpenAI from "openai";
|
|
3
|
+
import type {
|
|
4
|
+
ChatCompletionAssistantMessageParam,
|
|
5
|
+
ChatCompletionChunk,
|
|
6
|
+
ChatCompletionContentPart,
|
|
7
|
+
ChatCompletionContentPartImage,
|
|
8
|
+
ChatCompletionContentPartText,
|
|
9
|
+
ChatCompletionMessageParam,
|
|
10
|
+
ChatCompletionToolMessageParam,
|
|
11
|
+
} from "openai/resources/chat/completions";
|
|
12
|
+
import packageJson from "../../package.json" with { type: "json" };
|
|
13
|
+
import { type Effort, getSupportedEfforts } from "../model-thinking";
|
|
14
|
+
import { calculateCost } from "../models";
|
|
15
|
+
import { getEnvApiKey } from "../stream";
|
|
16
|
+
import {
|
|
17
|
+
type AssistantMessage,
|
|
18
|
+
type Context,
|
|
19
|
+
type FetchImpl,
|
|
20
|
+
type Message,
|
|
21
|
+
type MessageAttribution,
|
|
22
|
+
type Model,
|
|
23
|
+
type OpenAICompat,
|
|
24
|
+
type ProviderSessionState,
|
|
25
|
+
resolveServiceTier,
|
|
26
|
+
type ServiceTier,
|
|
27
|
+
type StopReason,
|
|
28
|
+
type StreamFunction,
|
|
29
|
+
type StreamOptions,
|
|
30
|
+
shouldSendServiceTier,
|
|
31
|
+
type TextContent,
|
|
32
|
+
type ThinkingContent,
|
|
33
|
+
type Tool,
|
|
34
|
+
type ToolCall,
|
|
35
|
+
type ToolChoice,
|
|
36
|
+
type ToolResultMessage,
|
|
37
|
+
} from "../types";
|
|
38
|
+
import { normalizeSystemPrompts } from "../utils";
|
|
39
|
+
import { createAbortSourceTracker } from "../utils/abort";
|
|
40
|
+
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
41
|
+
import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
42
|
+
import {
|
|
43
|
+
type CapturedHttpErrorResponse,
|
|
44
|
+
finalizeErrorMessage,
|
|
45
|
+
type RawHttpRequestDump,
|
|
46
|
+
rewriteCopilotError,
|
|
47
|
+
} from "../utils/http-inspector";
|
|
48
|
+
import {
|
|
49
|
+
createWatchdog,
|
|
50
|
+
getOpenAIStreamIdleTimeoutMs,
|
|
51
|
+
getStreamFirstEventTimeoutMs,
|
|
52
|
+
iterateWithIdleTimeout,
|
|
53
|
+
} from "../utils/idle-iterator";
|
|
54
|
+
import { parseStreamingJson } from "../utils/json-parse";
|
|
55
|
+
import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
56
|
+
import { getKimiCommonHeaders } from "../utils/oauth/kimi";
|
|
57
|
+
import { notifyProviderResponse } from "../utils/provider-response";
|
|
58
|
+
import { callWithCopilotModelRetry } from "../utils/retry";
|
|
59
|
+
import { adaptSchemaForStrict, NO_STRICT, toolWireSchema } from "../utils/schema";
|
|
60
|
+
import { wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
61
|
+
import { type HealedToolCall, modelMayLeakKimiToolCalls, ToolCallHealer } from "../utils/tool-call-healing";
|
|
62
|
+
import { isForcedToolChoice, mapToOpenAICompletionsToolChoice } from "../utils/tool-choice";
|
|
63
|
+
import {
|
|
64
|
+
buildCopilotDynamicHeaders,
|
|
65
|
+
hasCopilotVisionInput,
|
|
66
|
+
resolveGitHubCopilotBaseUrl,
|
|
67
|
+
} from "./github-copilot-headers";
|
|
68
|
+
import { detectOpenAICompat, type ResolvedOpenAICompat, resolveOpenAICompat } from "./openai-completions-compat";
|
|
69
|
+
import { createInitialResponsesAssistantMessage } from "./openai-responses-shared";
|
|
70
|
+
import { transformMessages } from "./transform-messages";
|
|
71
|
+
import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER } from "./vision-guard";
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Normalize tool call ID for Mistral.
|
|
75
|
+
* Mistral requires tool IDs to be exactly 9 alphanumeric characters (a-z, A-Z, 0-9).
|
|
76
|
+
*/
|
|
77
|
+
function normalizeMistralToolId(id: string, isMistral: boolean): string {
|
|
78
|
+
if (!isMistral) return id;
|
|
79
|
+
// Remove non-alphanumeric characters
|
|
80
|
+
let normalized = id.replace(/[^a-zA-Z0-9]/g, "");
|
|
81
|
+
// Mistral requires exactly 9 characters
|
|
82
|
+
if (normalized.length < 9) {
|
|
83
|
+
// Pad with deterministic characters based on original ID to ensure matching
|
|
84
|
+
const padding = "ABCDEFGHI";
|
|
85
|
+
normalized = normalized + padding.slice(0, 9 - normalized.length);
|
|
86
|
+
} else if (normalized.length > 9) {
|
|
87
|
+
normalized = normalized.slice(0, 9);
|
|
88
|
+
}
|
|
89
|
+
return normalized;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Normalize OpenAI-compatible streaming `delta.content` into plain text.
|
|
94
|
+
*
|
|
95
|
+
* Most providers stream `delta.content` as a string, but some (notably Mistral
|
|
96
|
+
* Medium 3.5 / `mistral-medium-2604`) return an array of typed content parts
|
|
97
|
+
* — e.g. `[{ type: "text", text: "Hello" }]`. Without normalization those
|
|
98
|
+
* parts get string-coerced via `text += array`, producing the literal
|
|
99
|
+
* `[object Object]` sequences observed in issue #911.
|
|
100
|
+
*
|
|
101
|
+
* Returns the joined text. Non-text parts and unknown shapes are skipped so
|
|
102
|
+
* we never emit JS object sigils as visible output.
|
|
103
|
+
*/
|
|
104
|
+
function normalizeStreamingContentText(content: unknown): string {
|
|
105
|
+
if (typeof content === "string") return content;
|
|
106
|
+
if (Array.isArray(content)) {
|
|
107
|
+
let out = "";
|
|
108
|
+
for (const part of content) {
|
|
109
|
+
if (typeof part === "string") {
|
|
110
|
+
out += part;
|
|
111
|
+
} else if (part && typeof part === "object") {
|
|
112
|
+
const obj = part as { type?: unknown; text?: unknown };
|
|
113
|
+
if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
|
|
114
|
+
out += obj.text;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return out;
|
|
119
|
+
}
|
|
120
|
+
if (content && typeof content === "object") {
|
|
121
|
+
const obj = content as { type?: unknown; text?: unknown };
|
|
122
|
+
if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
|
|
123
|
+
return obj.text;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return "";
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function serializeToolArguments(value: unknown): string {
|
|
130
|
+
if (value && typeof value === "object" && !Array.isArray(value)) {
|
|
131
|
+
try {
|
|
132
|
+
return JSON.stringify(value);
|
|
133
|
+
} catch {
|
|
134
|
+
return "{}";
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (typeof value === "string") {
|
|
139
|
+
const trimmed = value.trim();
|
|
140
|
+
if (trimmed.length === 0) return "{}";
|
|
141
|
+
try {
|
|
142
|
+
const parsed = JSON.parse(trimmed);
|
|
143
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
144
|
+
return JSON.stringify(parsed);
|
|
145
|
+
}
|
|
146
|
+
} catch {}
|
|
147
|
+
return "{}";
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return "{}";
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Check if conversation messages contain tool calls or tool results.
|
|
155
|
+
* This is needed because Anthropic (via proxy) requires the tools param
|
|
156
|
+
* to be present when messages include tool_calls or tool role messages.
|
|
157
|
+
*/
|
|
158
|
+
function hasToolHistory(messages: Message[]): boolean {
|
|
159
|
+
for (const msg of messages) {
|
|
160
|
+
if (msg.role === "toolResult") {
|
|
161
|
+
return true;
|
|
162
|
+
}
|
|
163
|
+
if (msg.role === "assistant") {
|
|
164
|
+
if (msg.content.some(block => block.type === "toolCall")) {
|
|
165
|
+
return true;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Identify "real progress" stream chunks vs. keepalives, role-only preambles,
|
|
174
|
+
* and empty `{choices:[]}` no-ops emitted by some OpenAI-compatible endpoints.
|
|
175
|
+
* Without this filter, every keepalive resets `iterateWithIdleTimeout`'s
|
|
176
|
+
* deadline, so a provider that streams nothing but pings keeps the watchdog
|
|
177
|
+
* asleep indefinitely — observed against z.ai/GLM via OpenRouter where a
|
|
178
|
+
* subagent stalled for hours with no error surfaced.
|
|
179
|
+
*
|
|
180
|
+
* A chunk counts as progress when it carries terminal usage, a finish reason,
|
|
181
|
+
* or any model-produced delta (content / tool calls / reasoning / refusal).
|
|
182
|
+
* Role-only `delta: { role: "assistant" }` preambles do NOT count; we want the
|
|
183
|
+
* (longer) first-event timeout to keep governing until real output appears.
|
|
184
|
+
*/
|
|
185
|
+
export function isOpenAICompletionsProgressChunk(chunk: unknown): boolean {
|
|
186
|
+
if (!chunk || typeof chunk !== "object") return false;
|
|
187
|
+
const record = chunk as {
|
|
188
|
+
usage?: unknown;
|
|
189
|
+
choices?: ReadonlyArray<{
|
|
190
|
+
finish_reason?: unknown;
|
|
191
|
+
usage?: unknown;
|
|
192
|
+
delta?: {
|
|
193
|
+
content?: unknown;
|
|
194
|
+
tool_calls?: unknown;
|
|
195
|
+
reasoning?: unknown;
|
|
196
|
+
reasoning_content?: unknown;
|
|
197
|
+
reasoning_text?: unknown;
|
|
198
|
+
refusal?: unknown;
|
|
199
|
+
};
|
|
200
|
+
}>;
|
|
201
|
+
};
|
|
202
|
+
if (record.usage) return true;
|
|
203
|
+
const choice = Array.isArray(record.choices) ? record.choices[0] : undefined;
|
|
204
|
+
if (!choice) return false;
|
|
205
|
+
if (choice.finish_reason) return true;
|
|
206
|
+
if (choice.usage) return true;
|
|
207
|
+
const delta = choice.delta;
|
|
208
|
+
if (!delta) return false;
|
|
209
|
+
const content = delta.content;
|
|
210
|
+
if (typeof content === "string" ? content.length > 0 : Array.isArray(content) && content.length > 0) return true;
|
|
211
|
+
if (Array.isArray(delta.tool_calls) && delta.tool_calls.length > 0) return true;
|
|
212
|
+
if (typeof delta.reasoning === "string" && delta.reasoning.length > 0) return true;
|
|
213
|
+
if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) return true;
|
|
214
|
+
if (typeof delta.reasoning_text === "string" && delta.reasoning_text.length > 0) return true;
|
|
215
|
+
if (typeof delta.refusal === "string" && delta.refusal.length > 0) return true;
|
|
216
|
+
return false;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export interface OpenAICompletionsOptions extends StreamOptions {
|
|
220
|
+
toolChoice?: ToolChoice;
|
|
221
|
+
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
222
|
+
/** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
|
|
223
|
+
disableReasoning?: boolean;
|
|
224
|
+
serviceTier?: ServiceTier;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
type OpenAICompletionsParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
|
|
228
|
+
top_k?: number;
|
|
229
|
+
min_p?: number;
|
|
230
|
+
repetition_penalty?: number;
|
|
231
|
+
thinking?: { type: "enabled" | "disabled" };
|
|
232
|
+
enable_thinking?: boolean;
|
|
233
|
+
chat_template_kwargs?: { enable_thinking: boolean };
|
|
234
|
+
reasoning?: { effort?: string } | { enabled: false };
|
|
235
|
+
provider?: OpenAICompat["openRouterRouting"];
|
|
236
|
+
providerOptions?: { gateway?: { only?: string[]; order?: string[] } };
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
type AppliedToolStrictMode = "mixed" | "all_strict" | "none";
|
|
240
|
+
type ToolStrictModeOverride = Exclude<ResolvedOpenAICompat["toolStrictMode"], "mixed"> | undefined;
|
|
241
|
+
|
|
242
|
+
type BuiltOpenAICompletionTools = {
|
|
243
|
+
tools: OpenAI.Chat.Completions.ChatCompletionTool[];
|
|
244
|
+
toolStrictMode: AppliedToolStrictMode;
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
const OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX = "openai-completions:";
|
|
248
|
+
|
|
249
|
+
type OpenAICompletionsProviderSessionState = ProviderSessionState & {
|
|
250
|
+
strictToolsDisabled: boolean;
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
function createOpenAICompletionsProviderSessionState(): OpenAICompletionsProviderSessionState {
|
|
254
|
+
const state: OpenAICompletionsProviderSessionState = {
|
|
255
|
+
strictToolsDisabled: false,
|
|
256
|
+
close: () => {
|
|
257
|
+
state.strictToolsDisabled = false;
|
|
258
|
+
},
|
|
259
|
+
};
|
|
260
|
+
return state;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function getOpenAICompletionsProviderSessionState(
|
|
264
|
+
model: Model<"openai-completions">,
|
|
265
|
+
baseUrl: string | undefined,
|
|
266
|
+
providerSessionState: Map<string, ProviderSessionState> | undefined,
|
|
267
|
+
): OpenAICompletionsProviderSessionState | undefined {
|
|
268
|
+
if (!providerSessionState) return undefined;
|
|
269
|
+
const key = `${OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX}${model.provider}:${baseUrl ?? ""}:${model.id}`;
|
|
270
|
+
const existing = providerSessionState.get(key) as OpenAICompletionsProviderSessionState | undefined;
|
|
271
|
+
if (existing) return existing;
|
|
272
|
+
const created = createOpenAICompletionsProviderSessionState();
|
|
273
|
+
providerSessionState.set(key, created);
|
|
274
|
+
return created;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function isOpenRouterAnthropicModel(model: Model<"openai-completions">): boolean {
|
|
278
|
+
return model.provider === "openrouter" && model.id.toLowerCase().startsWith("anthropic/");
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function isCompiledGrammarTooLargeStrictError(
|
|
282
|
+
error: unknown,
|
|
283
|
+
capturedErrorResponse: CapturedHttpErrorResponse | undefined,
|
|
284
|
+
): boolean {
|
|
285
|
+
const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
|
|
286
|
+
if (status !== 400) return false;
|
|
287
|
+
const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
|
|
288
|
+
.filter((value): value is string => typeof value === "string" && value.trim().length > 0)
|
|
289
|
+
.join("\n");
|
|
290
|
+
return (
|
|
291
|
+
/invalid_request_error/i.test(messageParts) &&
|
|
292
|
+
/compiled grammar/i.test(messageParts) &&
|
|
293
|
+
/too large/i.test(messageParts)
|
|
294
|
+
);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// LIMITATION: The think tag parser uses naive string matching for <think>/<thinking> tags.
|
|
298
|
+
// If MiniMax models output these literal strings in code blocks, XML examples, or explanations,
|
|
299
|
+
// they will be incorrectly consumed as thinking delimiters, truncating visible output.
|
|
300
|
+
// A streaming parser with arbitrary chunk boundaries cannot reliably detect code block context.
|
|
301
|
+
// This is acceptable because: (1) only enabled for minimax-code providers, (2) MiniMax models
|
|
302
|
+
// use these tags as their actual thinking format, and (3) false positives are rare in practice.
|
|
303
|
+
const MINIMAX_THINK_OPEN_TAGS = ["<think>", "<thinking>"] as const;
|
|
304
|
+
const MINIMAX_THINK_CLOSE_TAGS = ["</think>", "</thinking>"] as const;
|
|
305
|
+
|
|
306
|
+
function findFirstTag(text: string, tags: readonly string[]): { index: number; tag: string } | undefined {
|
|
307
|
+
let earliestIndex = Number.POSITIVE_INFINITY;
|
|
308
|
+
let earliestTag: string | undefined;
|
|
309
|
+
for (const tag of tags) {
|
|
310
|
+
const index = text.indexOf(tag);
|
|
311
|
+
if (index !== -1 && index < earliestIndex) {
|
|
312
|
+
earliestIndex = index;
|
|
313
|
+
earliestTag = tag;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
if (!earliestTag) return undefined;
|
|
317
|
+
return { index: earliestIndex, tag: earliestTag };
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function getTrailingPartialTag(text: string, tags: readonly string[]): string {
|
|
321
|
+
let maxLength = 0;
|
|
322
|
+
for (const tag of tags) {
|
|
323
|
+
const maxCandidateLength = Math.min(tag.length - 1, text.length);
|
|
324
|
+
for (let length = maxCandidateLength; length > 0; length--) {
|
|
325
|
+
if (text.endsWith(tag.slice(0, length))) {
|
|
326
|
+
if (length > maxLength) maxLength = length;
|
|
327
|
+
break;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
if (maxLength === 0) return "";
|
|
332
|
+
return text.slice(-maxLength);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// DeepSeek models leak chat-template special tokens (e.g. `<|tool_calls_begin|>`,
|
|
336
|
+
// `<|DSML|tool_calls|>`) into visible `content` deltas when hosted behind providers
|
|
337
|
+
// (such as NVIDIA NIM) that don't strip them server-side. The structured `tool_calls`
|
|
338
|
+
// payload is still emitted correctly — we only need to filter the leaked markers from
|
|
339
|
+
// user-visible text. Tokens use either fullwidth pipes (|, U+FF5C) or ASCII pipes.
|
|
340
|
+
// Body is restricted to identifier-like chars (with the DeepSeek tokenizer's `▁`),
|
|
341
|
+
// capped at a sane length to avoid swallowing legitimate angle-bracket text.
|
|
342
|
+
const DEEPSEEK_SPECIAL_TOKEN_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/g;
|
|
343
|
+
const DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX = /^\s*<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/;
|
|
344
|
+
const DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>\s*$/;
|
|
345
|
+
const DEEPSEEK_OPEN_DELIMS = ["<|", "<|"] as const;
|
|
346
|
+
|
|
347
|
+
function stripDeepseekSpecialTokens(text: string): string {
|
|
348
|
+
const stripped = text.replace(DEEPSEEK_SPECIAL_TOKEN_REGEX, "");
|
|
349
|
+
if (stripped === text) return text;
|
|
350
|
+
|
|
351
|
+
let normalized = stripped;
|
|
352
|
+
if (DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX.test(text)) normalized = normalized.replace(/^\s+/u, "");
|
|
353
|
+
if (DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX.test(text)) normalized = normalized.replace(/\s+$/u, "");
|
|
354
|
+
return normalized;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Find any trailing partial `<|...` (or `<|...`) that has not yet been closed by a
|
|
358
|
+
// matching `|>`/`|>`, so it can be held back until the next chunk arrives. A solo
|
|
359
|
+
// trailing `<` is also held in case it is the start of a new token.
|
|
360
|
+
function getTrailingPartialDeepseekToken(text: string): string {
|
|
361
|
+
let bestIdx = -1;
|
|
362
|
+
for (const delim of DEEPSEEK_OPEN_DELIMS) {
|
|
363
|
+
const idx = text.lastIndexOf(delim);
|
|
364
|
+
if (idx > bestIdx) bestIdx = idx;
|
|
365
|
+
}
|
|
366
|
+
if (bestIdx === -1) {
|
|
367
|
+
return text.endsWith("<") ? "<" : "";
|
|
368
|
+
}
|
|
369
|
+
const tail = text.slice(bestIdx);
|
|
370
|
+
if (tail.includes("|>") || tail.includes("|>")) return "";
|
|
371
|
+
// Cap the held-back length so a stray `<|` in normal prose can't grow unboundedly.
|
|
372
|
+
if (tail.length > 256) return "";
|
|
373
|
+
return tail;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
const OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE =
|
|
377
|
+
"OpenAI completions stream timed out while waiting for the first event";
|
|
378
|
+
|
|
379
|
+
export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
380
|
+
model: Model<"openai-completions">,
|
|
381
|
+
context: Context,
|
|
382
|
+
options?: OpenAICompletionsOptions,
|
|
383
|
+
): AssistantMessageEventStream => {
|
|
384
|
+
const stream = new AssistantMessageEventStream();
|
|
385
|
+
|
|
386
|
+
(async () => {
|
|
387
|
+
const startTime = Date.now();
|
|
388
|
+
let firstTokenTime: number | undefined;
|
|
389
|
+
let getCapturedErrorResponse: (() => CapturedHttpErrorResponse | undefined) | undefined;
|
|
390
|
+
|
|
391
|
+
const output: AssistantMessage = createInitialResponsesAssistantMessage(model.api, model.provider, model.id);
|
|
392
|
+
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
393
|
+
const abortTracker = createAbortSourceTracker(options?.signal);
|
|
394
|
+
const firstEventTimeoutAbortError = new Error(OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE);
|
|
395
|
+
const { requestAbortController, requestSignal } = abortTracker;
|
|
396
|
+
|
|
397
|
+
try {
|
|
398
|
+
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
399
|
+
const idleTimeoutMs = getOpenAIStreamIdleTimeoutMs();
|
|
400
|
+
const {
|
|
401
|
+
client,
|
|
402
|
+
copilotPremiumRequests,
|
|
403
|
+
baseUrl,
|
|
404
|
+
requestHeaders,
|
|
405
|
+
getCapturedErrorResponse: captureErrorResponse,
|
|
406
|
+
clearCapturedErrorResponse,
|
|
407
|
+
} = await createClient(
|
|
408
|
+
model,
|
|
409
|
+
context,
|
|
410
|
+
apiKey,
|
|
411
|
+
options?.headers,
|
|
412
|
+
options?.initiatorOverride,
|
|
413
|
+
options?.onSseEvent,
|
|
414
|
+
options?.fetch,
|
|
415
|
+
options?.streamFirstEventTimeoutMs,
|
|
416
|
+
);
|
|
417
|
+
const premiumRequestsTotal = copilotPremiumRequests;
|
|
418
|
+
getCapturedErrorResponse = captureErrorResponse;
|
|
419
|
+
let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
|
|
420
|
+
const providerSessionState = getOpenAICompletionsProviderSessionState(
|
|
421
|
+
model,
|
|
422
|
+
baseUrl,
|
|
423
|
+
options?.providerSessionState,
|
|
424
|
+
);
|
|
425
|
+
let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
|
|
426
|
+
let strictFallbackErrorMessage: string | undefined;
|
|
427
|
+
const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
|
|
428
|
+
clearCapturedErrorResponse();
|
|
429
|
+
const effectiveToolStrictModeOverride = disableStrictTools ? "none" : toolStrictModeOverride;
|
|
430
|
+
const { params, toolStrictMode } = buildParams(
|
|
431
|
+
model,
|
|
432
|
+
context,
|
|
433
|
+
options,
|
|
434
|
+
baseUrl,
|
|
435
|
+
effectiveToolStrictModeOverride,
|
|
436
|
+
);
|
|
437
|
+
appliedToolStrictMode = toolStrictMode;
|
|
438
|
+
options?.onPayload?.(params);
|
|
439
|
+
rawRequestDump = {
|
|
440
|
+
provider: model.provider,
|
|
441
|
+
api: output.api,
|
|
442
|
+
model: model.id,
|
|
443
|
+
method: "POST",
|
|
444
|
+
url: `${baseUrl}/chat/completions`,
|
|
445
|
+
headers: requestHeaders,
|
|
446
|
+
body: params,
|
|
447
|
+
};
|
|
448
|
+
const { data, response, request_id } = await client.chat.completions
|
|
449
|
+
.create(params, { signal: requestSignal })
|
|
450
|
+
.withResponse();
|
|
451
|
+
await notifyProviderResponse(options, response, model, request_id);
|
|
452
|
+
return data;
|
|
453
|
+
};
|
|
454
|
+
let openaiStream: AsyncIterable<ChatCompletionChunk>;
|
|
455
|
+
try {
|
|
456
|
+
openaiStream = await callWithCopilotModelRetry(() => createCompletionsStream(), {
|
|
457
|
+
provider: model.provider,
|
|
458
|
+
signal: requestSignal,
|
|
459
|
+
});
|
|
460
|
+
} catch (error) {
|
|
461
|
+
const capturedErrorResponse = getCapturedErrorResponse();
|
|
462
|
+
if (
|
|
463
|
+
isOpenRouterAnthropicModel(model) &&
|
|
464
|
+
!disableStrictTools &&
|
|
465
|
+
isCompiledGrammarTooLargeStrictError(error, capturedErrorResponse)
|
|
466
|
+
) {
|
|
467
|
+
strictFallbackErrorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
|
|
468
|
+
output.errorMessage = strictFallbackErrorMessage;
|
|
469
|
+
if (providerSessionState) {
|
|
470
|
+
providerSessionState.strictToolsDisabled = true;
|
|
471
|
+
}
|
|
472
|
+
disableStrictTools = true;
|
|
473
|
+
openaiStream = await createCompletionsStream("none");
|
|
474
|
+
} else {
|
|
475
|
+
if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
|
|
476
|
+
throw error;
|
|
477
|
+
}
|
|
478
|
+
openaiStream = await createCompletionsStream("none");
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
const firstEventWatchdog = createWatchdog(
|
|
482
|
+
options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs),
|
|
483
|
+
() => abortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
484
|
+
);
|
|
485
|
+
if (premiumRequestsTotal !== undefined) {
|
|
486
|
+
output.usage.premiumRequests = premiumRequestsTotal;
|
|
487
|
+
}
|
|
488
|
+
stream.push({ type: "start", partial: output });
|
|
489
|
+
|
|
490
|
+
const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
|
|
491
|
+
// Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
|
|
492
|
+
// native API) leak chat-template tool-call markers in `delta.content` even
|
|
493
|
+
// though tool calls are also surfaced structurally. Strip the leaked markers
|
|
494
|
+
// so users don't see raw `<|...|>` tokens.
|
|
495
|
+
const stripDeepseekChatTemplateTokens =
|
|
496
|
+
/deepseek/i.test(model.id) && (model.provider === "nvidia" || model.provider === "deepseek");
|
|
497
|
+
type OpenAIStreamBlock = TextContent | ThinkingContent | (ToolCall & { partialArgs: string });
|
|
498
|
+
let currentBlock: OpenAIStreamBlock | undefined;
|
|
499
|
+
const blockIndex = (block: OpenAIStreamBlock | undefined): number => {
|
|
500
|
+
if (!block) return Math.max(0, output.content.length - 1);
|
|
501
|
+
return output.content.indexOf(block);
|
|
502
|
+
};
|
|
503
|
+
const finishCurrentBlock = (block: OpenAIStreamBlock | undefined): void => {
|
|
504
|
+
if (!block) return;
|
|
505
|
+
const contentIndex = blockIndex(block);
|
|
506
|
+
if (contentIndex < 0) return;
|
|
507
|
+
if (block.type === "text") {
|
|
508
|
+
stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
|
|
509
|
+
return;
|
|
510
|
+
}
|
|
511
|
+
if (block.type === "thinking") {
|
|
512
|
+
stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
block.arguments = parseStreamingJson(block.partialArgs);
|
|
516
|
+
delete (block as { partialArgs?: string }).partialArgs;
|
|
517
|
+
stream.push({ type: "toolcall_end", contentIndex, toolCall: block, partial: output });
|
|
518
|
+
};
|
|
519
|
+
const appendText = (
|
|
520
|
+
message: AssistantMessage,
|
|
521
|
+
eventStream: AssistantMessageEventStream,
|
|
522
|
+
text: string,
|
|
523
|
+
): void => {
|
|
524
|
+
if (!currentBlock || currentBlock.type !== "text") {
|
|
525
|
+
finishCurrentBlock(currentBlock);
|
|
526
|
+
currentBlock = { type: "text", text: "" };
|
|
527
|
+
message.content.push(currentBlock);
|
|
528
|
+
eventStream.push({ type: "text_start", contentIndex: blockIndex(currentBlock), partial: message });
|
|
529
|
+
}
|
|
530
|
+
currentBlock.text += text;
|
|
531
|
+
eventStream.push({
|
|
532
|
+
type: "text_delta",
|
|
533
|
+
contentIndex: blockIndex(currentBlock),
|
|
534
|
+
delta: text,
|
|
535
|
+
partial: message,
|
|
536
|
+
});
|
|
537
|
+
};
|
|
538
|
+
const appendThinking = (
|
|
539
|
+
message: AssistantMessage,
|
|
540
|
+
eventStream: AssistantMessageEventStream,
|
|
541
|
+
thinking: string,
|
|
542
|
+
signature?: string,
|
|
543
|
+
): void => {
|
|
544
|
+
if (
|
|
545
|
+
!currentBlock ||
|
|
546
|
+
currentBlock.type !== "thinking" ||
|
|
547
|
+
(signature !== undefined && currentBlock.thinkingSignature !== signature)
|
|
548
|
+
) {
|
|
549
|
+
finishCurrentBlock(currentBlock);
|
|
550
|
+
currentBlock = { type: "thinking", thinking: "", thinkingSignature: signature };
|
|
551
|
+
message.content.push(currentBlock);
|
|
552
|
+
eventStream.push({
|
|
553
|
+
type: "thinking_start",
|
|
554
|
+
contentIndex: blockIndex(currentBlock),
|
|
555
|
+
partial: message,
|
|
556
|
+
});
|
|
557
|
+
}
|
|
558
|
+
if (signature !== undefined && !currentBlock.thinkingSignature) {
|
|
559
|
+
currentBlock.thinkingSignature = signature;
|
|
560
|
+
}
|
|
561
|
+
currentBlock.thinking += thinking;
|
|
562
|
+
eventStream.push({
|
|
563
|
+
type: "thinking_delta",
|
|
564
|
+
contentIndex: blockIndex(currentBlock),
|
|
565
|
+
delta: thinking,
|
|
566
|
+
partial: message,
|
|
567
|
+
});
|
|
568
|
+
};
|
|
569
|
+
|
|
570
|
+
let taggedTextBuffer = "";
|
|
571
|
+
let insideTaggedThinking = false;
|
|
572
|
+
const appendTextDelta = (text: string) => {
|
|
573
|
+
if (!text) return;
|
|
574
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
575
|
+
appendText(output, stream, text);
|
|
576
|
+
};
|
|
577
|
+
const appendThinkingDelta = (thinking: string, signature?: string) => {
|
|
578
|
+
if (!thinking) return;
|
|
579
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
580
|
+
appendThinking(output, stream, thinking, signature);
|
|
581
|
+
};
|
|
582
|
+
|
|
583
|
+
const flushTaggedTextBuffer = () => {
|
|
584
|
+
while (taggedTextBuffer.length > 0) {
|
|
585
|
+
if (insideTaggedThinking) {
|
|
586
|
+
const closingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
|
|
587
|
+
if (closingTag) {
|
|
588
|
+
appendThinkingDelta(taggedTextBuffer.slice(0, closingTag.index));
|
|
589
|
+
taggedTextBuffer = taggedTextBuffer.slice(closingTag.index + closingTag.tag.length);
|
|
590
|
+
insideTaggedThinking = false;
|
|
591
|
+
continue;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
|
|
595
|
+
const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
|
|
596
|
+
appendThinkingDelta(taggedTextBuffer.slice(0, flushLength));
|
|
597
|
+
taggedTextBuffer = trailingPartialTag;
|
|
598
|
+
break;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
const openingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
|
|
602
|
+
if (openingTag) {
|
|
603
|
+
appendTextDelta(taggedTextBuffer.slice(0, openingTag.index));
|
|
604
|
+
taggedTextBuffer = taggedTextBuffer.slice(openingTag.index + openingTag.tag.length);
|
|
605
|
+
insideTaggedThinking = true;
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
|
|
610
|
+
const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
|
|
611
|
+
appendTextDelta(taggedTextBuffer.slice(0, flushLength));
|
|
612
|
+
taggedTextBuffer = trailingPartialTag;
|
|
613
|
+
break;
|
|
614
|
+
}
|
|
615
|
+
};
|
|
616
|
+
|
|
617
|
+
let deepseekStripBuffer = "";
|
|
618
|
+
const flushDeepseekStripBuffer = (final: boolean): void => {
|
|
619
|
+
if (deepseekStripBuffer.length === 0) return;
|
|
620
|
+
let flushable: string;
|
|
621
|
+
if (final) {
|
|
622
|
+
flushable = deepseekStripBuffer;
|
|
623
|
+
deepseekStripBuffer = "";
|
|
624
|
+
} else {
|
|
625
|
+
const trailing = getTrailingPartialDeepseekToken(deepseekStripBuffer);
|
|
626
|
+
flushable = deepseekStripBuffer.slice(0, deepseekStripBuffer.length - trailing.length);
|
|
627
|
+
deepseekStripBuffer = trailing;
|
|
628
|
+
}
|
|
629
|
+
const stripped = stripDeepseekSpecialTokens(flushable);
|
|
630
|
+
if (stripped && (stripped === flushable || stripped.trim().length > 0)) appendTextDelta(stripped);
|
|
631
|
+
};
|
|
632
|
+
|
|
633
|
+
const kimiHealer = modelMayLeakKimiToolCalls(model.provider, model.id) ? new ToolCallHealer() : undefined;
|
|
634
|
+
let healedToolCallEmitted = false;
|
|
635
|
+
const emitHealedToolCall = (call: HealedToolCall): void => {
|
|
636
|
+
finishCurrentBlock(currentBlock);
|
|
637
|
+
const block: ToolCall & { partialArgs: string } = {
|
|
638
|
+
type: "toolCall",
|
|
639
|
+
id: call.id,
|
|
640
|
+
name: call.name,
|
|
641
|
+
arguments: {},
|
|
642
|
+
partialArgs: call.arguments,
|
|
643
|
+
};
|
|
644
|
+
block.arguments = parseStreamingJson(call.arguments);
|
|
645
|
+
currentBlock = block;
|
|
646
|
+
output.content.push(block);
|
|
647
|
+
stream.push({ type: "toolcall_start", contentIndex: blockIndex(block), partial: output });
|
|
648
|
+
stream.push({
|
|
649
|
+
type: "toolcall_delta",
|
|
650
|
+
contentIndex: blockIndex(block),
|
|
651
|
+
delta: call.arguments,
|
|
652
|
+
partial: output,
|
|
653
|
+
});
|
|
654
|
+
finishCurrentBlock(block);
|
|
655
|
+
currentBlock = undefined;
|
|
656
|
+
healedToolCallEmitted = true;
|
|
657
|
+
};
|
|
658
|
+
const flushHealedToolCalls = (): void => {
|
|
659
|
+
if (!kimiHealer) return;
|
|
660
|
+
const calls = kimiHealer.drainCompleted();
|
|
661
|
+
for (const call of calls) emitHealedToolCall(call);
|
|
662
|
+
};
|
|
663
|
+
|
|
664
|
+
for await (const chunk of iterateWithIdleTimeout(openaiStream, {
|
|
665
|
+
watchdog: firstEventWatchdog,
|
|
666
|
+
idleTimeoutMs,
|
|
667
|
+
errorMessage: "OpenAI completions stream stalled while waiting for the next event",
|
|
668
|
+
onIdle: () => requestAbortController.abort(),
|
|
669
|
+
abortSignal: options?.signal,
|
|
670
|
+
isProgressItem: isOpenAICompletionsProgressChunk,
|
|
671
|
+
})) {
|
|
672
|
+
if (!chunk || typeof chunk !== "object") continue;
|
|
673
|
+
|
|
674
|
+
// OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
|
|
675
|
+
// and each chunk in a streamed completion carries the same id.
|
|
676
|
+
output.responseId ||= chunk.id;
|
|
677
|
+
|
|
678
|
+
if (chunk.usage) {
|
|
679
|
+
output.usage = parseChunkUsage(chunk.usage, model, premiumRequestsTotal);
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
|
|
683
|
+
if (!choice) continue;
|
|
684
|
+
|
|
685
|
+
if (!chunk.usage) {
|
|
686
|
+
const choiceUsage = getChoiceUsage(choice);
|
|
687
|
+
if (choiceUsage) {
|
|
688
|
+
output.usage = parseChunkUsage(choiceUsage, model, premiumRequestsTotal);
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
if (choice.finish_reason) {
|
|
693
|
+
const finishReasonResult = mapStopReason(choice.finish_reason);
|
|
694
|
+
output.stopReason = finishReasonResult.stopReason;
|
|
695
|
+
if (finishReasonResult.errorMessage) {
|
|
696
|
+
output.errorMessage = finishReasonResult.errorMessage;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
if (choice.delta) {
|
|
701
|
+
const normalizedDeltaText = normalizeStreamingContentText(choice.delta.content);
|
|
702
|
+
if (normalizedDeltaText.length > 0) {
|
|
703
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
704
|
+
if (parseMiniMaxThinkTags) {
|
|
705
|
+
taggedTextBuffer += normalizedDeltaText;
|
|
706
|
+
flushTaggedTextBuffer();
|
|
707
|
+
} else if (stripDeepseekChatTemplateTokens) {
|
|
708
|
+
deepseekStripBuffer += normalizedDeltaText;
|
|
709
|
+
flushDeepseekStripBuffer(false);
|
|
710
|
+
} else if (kimiHealer) {
|
|
711
|
+
const hasStructuredToolCalls =
|
|
712
|
+
Array.isArray(choice.delta.tool_calls) && choice.delta.tool_calls.length > 0;
|
|
713
|
+
if (hasStructuredToolCalls) {
|
|
714
|
+
// Same chunk leaks markers AND carries structured tool_calls.
|
|
715
|
+
// Strip the marker text from visible output, but drop any
|
|
716
|
+
// synthesized calls so the structured payload stays the
|
|
717
|
+
// single source of truth (avoids double-dispatch).
|
|
718
|
+
const clean = kimiHealer.consumeWithoutCalls(normalizedDeltaText);
|
|
719
|
+
if (clean.length > 0) appendTextDelta(clean);
|
|
720
|
+
} else {
|
|
721
|
+
const clean = kimiHealer.feed(normalizedDeltaText);
|
|
722
|
+
if (clean.length > 0) appendTextDelta(clean);
|
|
723
|
+
flushHealedToolCalls();
|
|
724
|
+
}
|
|
725
|
+
} else {
|
|
726
|
+
appendTextDelta(normalizedDeltaText);
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
731
|
+
// or reasoning (other openai compatible endpoints)
|
|
732
|
+
// Use the first non-empty reasoning field to avoid duplication
|
|
733
|
+
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
|
734
|
+
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
735
|
+
let foundReasoningField: string | null = null;
|
|
736
|
+
for (const field of reasoningFields) {
|
|
737
|
+
if (
|
|
738
|
+
(choice.delta as any)[field] !== null &&
|
|
739
|
+
(choice.delta as any)[field] !== undefined &&
|
|
740
|
+
(choice.delta as any)[field].length > 0
|
|
741
|
+
) {
|
|
742
|
+
if (!foundReasoningField) {
|
|
743
|
+
foundReasoningField = field;
|
|
744
|
+
break;
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
if (foundReasoningField) {
|
|
750
|
+
const delta = (choice.delta as any)[foundReasoningField];
|
|
751
|
+
appendThinkingDelta(delta, foundReasoningField);
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
if (choice?.delta?.tool_calls && choice.delta.tool_calls.length > 0) {
|
|
755
|
+
for (const toolCall of choice.delta.tool_calls) {
|
|
756
|
+
if (
|
|
757
|
+
!currentBlock ||
|
|
758
|
+
currentBlock.type !== "toolCall" ||
|
|
759
|
+
(toolCall.id && currentBlock.id !== toolCall.id)
|
|
760
|
+
) {
|
|
761
|
+
finishCurrentBlock(currentBlock);
|
|
762
|
+
currentBlock = {
|
|
763
|
+
type: "toolCall",
|
|
764
|
+
id: toolCall.id || "",
|
|
765
|
+
name: toolCall.function?.name || "",
|
|
766
|
+
arguments: {},
|
|
767
|
+
partialArgs: "",
|
|
768
|
+
};
|
|
769
|
+
output.content.push(currentBlock);
|
|
770
|
+
stream.push({
|
|
771
|
+
type: "toolcall_start",
|
|
772
|
+
contentIndex: blockIndex(currentBlock),
|
|
773
|
+
partial: output,
|
|
774
|
+
});
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
if (currentBlock.type === "toolCall") {
|
|
778
|
+
if (toolCall.id) currentBlock.id = toolCall.id;
|
|
779
|
+
if (toolCall.function?.name) currentBlock.name = toolCall.function.name;
|
|
780
|
+
let delta = "";
|
|
781
|
+
if (toolCall.function?.arguments) {
|
|
782
|
+
delta = toolCall.function.arguments;
|
|
783
|
+
currentBlock.partialArgs += toolCall.function.arguments;
|
|
784
|
+
currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
|
|
785
|
+
}
|
|
786
|
+
stream.push({
|
|
787
|
+
type: "toolcall_delta",
|
|
788
|
+
contentIndex: blockIndex(currentBlock),
|
|
789
|
+
delta,
|
|
790
|
+
partial: output,
|
|
791
|
+
});
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
const reasoningDetails = (choice.delta as any).reasoning_details;
|
|
797
|
+
if (reasoningDetails && Array.isArray(reasoningDetails)) {
|
|
798
|
+
for (const detail of reasoningDetails) {
|
|
799
|
+
if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
|
|
800
|
+
const matchingToolCall = output.content.find(
|
|
801
|
+
b => b.type === "toolCall" && b.id === detail.id,
|
|
802
|
+
) as ToolCall | undefined;
|
|
803
|
+
if (matchingToolCall) {
|
|
804
|
+
matchingToolCall.thoughtSignature = JSON.stringify(detail);
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
if (parseMiniMaxThinkTags && taggedTextBuffer.length > 0) {
|
|
813
|
+
if (insideTaggedThinking) {
|
|
814
|
+
appendThinkingDelta(taggedTextBuffer);
|
|
815
|
+
} else {
|
|
816
|
+
appendTextDelta(taggedTextBuffer);
|
|
817
|
+
}
|
|
818
|
+
taggedTextBuffer = "";
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
if (stripDeepseekChatTemplateTokens) {
|
|
822
|
+
flushDeepseekStripBuffer(true);
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
if (kimiHealer) {
|
|
826
|
+
const trailing = kimiHealer.flushPending();
|
|
827
|
+
if (trailing.length > 0) appendTextDelta(trailing);
|
|
828
|
+
flushHealedToolCalls();
|
|
829
|
+
if (healedToolCallEmitted && output.stopReason === "stop") {
|
|
830
|
+
// Hosts that leak Kimi tool tokens often still report
|
|
831
|
+
// `finish_reason: stop` for the surrounding turn. Promote
|
|
832
|
+
// only that natural-completion finish — leave `error`,
|
|
833
|
+
// `length`, `aborted`, etc. untouched.
|
|
834
|
+
output.stopReason = "toolUse";
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
finishCurrentBlock(currentBlock);
|
|
839
|
+
|
|
840
|
+
const firstEventTimeoutError = abortTracker.getLocalAbortReason();
|
|
841
|
+
if (firstEventTimeoutError) {
|
|
842
|
+
throw firstEventTimeoutError;
|
|
843
|
+
}
|
|
844
|
+
if (abortTracker.wasCallerAbort()) {
|
|
845
|
+
throw new Error("Request was aborted");
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if (output.stopReason === "aborted") {
|
|
849
|
+
throw new Error("Request was aborted");
|
|
850
|
+
}
|
|
851
|
+
if (output.stopReason === "error") {
|
|
852
|
+
throw new Error(output.errorMessage || "Provider returned an error stop reason");
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
output.errorMessage = strictFallbackErrorMessage;
|
|
856
|
+
output.duration = Date.now() - startTime;
|
|
857
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
858
|
+
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
859
|
+
stream.end();
|
|
860
|
+
} catch (error) {
|
|
861
|
+
for (const block of output.content) delete (block as any).index;
|
|
862
|
+
const firstEventTimeoutError = abortTracker.getLocalAbortReason();
|
|
863
|
+
output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
|
|
864
|
+
output.errorStatus = extractHttpStatusFromError(error) ?? getCapturedErrorResponse?.()?.status;
|
|
865
|
+
output.errorMessage =
|
|
866
|
+
firstEventTimeoutError?.message ??
|
|
867
|
+
(await finalizeErrorMessage(error, rawRequestDump, getCapturedErrorResponse?.()));
|
|
868
|
+
// Some providers via OpenRouter include extra details here.
|
|
869
|
+
const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
|
|
870
|
+
if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
|
|
871
|
+
output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
|
|
872
|
+
output.duration = Date.now() - startTime;
|
|
873
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
874
|
+
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
875
|
+
stream.end();
|
|
876
|
+
}
|
|
877
|
+
})();
|
|
878
|
+
|
|
879
|
+
return stream;
|
|
880
|
+
};
|
|
881
|
+
|
|
882
|
+
async function createClient(
|
|
883
|
+
model: Model<"openai-completions">,
|
|
884
|
+
context: Context,
|
|
885
|
+
apiKey?: string,
|
|
886
|
+
extraHeaders?: Record<string, string>,
|
|
887
|
+
initiatorOverride?: MessageAttribution,
|
|
888
|
+
onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
|
|
889
|
+
fetchOverride?: FetchImpl,
|
|
890
|
+
streamFirstEventTimeoutOverride?: number,
|
|
891
|
+
): Promise<{
|
|
892
|
+
client: OpenAI;
|
|
893
|
+
copilotPremiumRequests: number | undefined;
|
|
894
|
+
baseUrl: string | undefined;
|
|
895
|
+
requestHeaders: Record<string, string>;
|
|
896
|
+
getCapturedErrorResponse: () => CapturedHttpErrorResponse | undefined;
|
|
897
|
+
clearCapturedErrorResponse: () => void;
|
|
898
|
+
}> {
|
|
899
|
+
if (!apiKey) {
|
|
900
|
+
if (!$env.OPENAI_API_KEY) {
|
|
901
|
+
throw new Error(
|
|
902
|
+
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
|
903
|
+
);
|
|
904
|
+
}
|
|
905
|
+
apiKey = $env.OPENAI_API_KEY;
|
|
906
|
+
}
|
|
907
|
+
const rawApiKey = apiKey;
|
|
908
|
+
|
|
909
|
+
let headers = { ...model.headers };
|
|
910
|
+
if (model.provider === "openrouter") {
|
|
911
|
+
// App attribution — opts the agent into OpenRouter's public rankings and per-app
|
|
912
|
+
// analytics. `HTTP-Referer` is the unique app identifier; without it nothing is
|
|
913
|
+
// tracked. `X-OpenRouter-Title` is the display name (`X-Title` is the legacy
|
|
914
|
+
// alias kept for back-compat). `X-OpenRouter-Categories` slots us into the
|
|
915
|
+
// `cli-agent` marketplace category. `User-Agent` overrides the default OpenAI
|
|
916
|
+
// SDK UA so traffic is identifiable in upstream provider logs.
|
|
917
|
+
// https://openrouter.ai/docs/app-attribution
|
|
918
|
+
headers["User-Agent"] = `Gajae-Code/${packageJson.version}`;
|
|
919
|
+
headers["HTTP-Referer"] = "https://gajae-code.dev/";
|
|
920
|
+
headers["X-OpenRouter-Title"] = "Gajae-Code";
|
|
921
|
+
headers["X-OpenRouter-Categories"] = "cli-agent";
|
|
922
|
+
// Always-on response caching: identical requests return cached responses for free.
|
|
923
|
+
// TTL 1h; first call hits the provider, every identical call within the window
|
|
924
|
+
// replays from OpenRouter's edge cache. https://openrouter.ai/docs/features/response-caching
|
|
925
|
+
headers["X-OpenRouter-Cache"] = "true";
|
|
926
|
+
headers["X-OpenRouter-Cache-TTL"] = "3600";
|
|
927
|
+
}
|
|
928
|
+
Object.assign(headers, extraHeaders);
|
|
929
|
+
if (model.provider === "kimi-code") {
|
|
930
|
+
headers = { ...getKimiCommonHeaders(), ...headers };
|
|
931
|
+
}
|
|
932
|
+
let copilotPremiumRequests: number | undefined;
|
|
933
|
+
|
|
934
|
+
let baseUrl = model.baseUrl;
|
|
935
|
+
if (model.provider === "github-copilot") {
|
|
936
|
+
apiKey = parseGitHubCopilotApiKey(rawApiKey).accessToken;
|
|
937
|
+
const hasImages = hasCopilotVisionInput(context.messages);
|
|
938
|
+
const copilot = buildCopilotDynamicHeaders({
|
|
939
|
+
messages: context.messages,
|
|
940
|
+
hasImages,
|
|
941
|
+
premiumMultiplier: model.premiumMultiplier,
|
|
942
|
+
headers,
|
|
943
|
+
initiatorOverride,
|
|
944
|
+
});
|
|
945
|
+
Object.assign(headers, copilot.headers);
|
|
946
|
+
copilotPremiumRequests = copilot.premiumRequests;
|
|
947
|
+
baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
|
|
948
|
+
}
|
|
949
|
+
// Azure OpenAI requires /deployments/{id}/chat/completions?api-version=YYYY-MM-DD.
|
|
950
|
+
// The generic openai-completions path adds neither, producing silent 404s.
|
|
951
|
+
let azureDefaultQuery: Record<string, string> | undefined;
|
|
952
|
+
if (baseUrl?.includes(".openai.azure.com")) {
|
|
953
|
+
const apiVersion = $env.AZURE_OPENAI_API_VERSION || "2024-10-21";
|
|
954
|
+
if (!baseUrl.includes("/deployments/")) {
|
|
955
|
+
baseUrl = `${baseUrl}/deployments/${model.id}`;
|
|
956
|
+
}
|
|
957
|
+
azureDefaultQuery = { "api-version": apiVersion };
|
|
958
|
+
}
|
|
959
|
+
let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
|
|
960
|
+
const baseFetch = fetchOverride ?? fetch;
|
|
961
|
+
const wrappedFetch = Object.assign(
|
|
962
|
+
async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
|
|
963
|
+
const response = await baseFetch(input, init);
|
|
964
|
+
if (response.ok) {
|
|
965
|
+
capturedErrorResponse = undefined;
|
|
966
|
+
return response;
|
|
967
|
+
}
|
|
968
|
+
let bodyText: string | undefined;
|
|
969
|
+
let bodyJson: unknown;
|
|
970
|
+
try {
|
|
971
|
+
bodyText = await response.clone().text();
|
|
972
|
+
if (bodyText.trim().length > 0) {
|
|
973
|
+
try {
|
|
974
|
+
bodyJson = JSON.parse(bodyText);
|
|
975
|
+
} catch {}
|
|
976
|
+
}
|
|
977
|
+
} catch {}
|
|
978
|
+
capturedErrorResponse = {
|
|
979
|
+
status: response.status,
|
|
980
|
+
headers: response.headers,
|
|
981
|
+
bodyText,
|
|
982
|
+
bodyJson,
|
|
983
|
+
};
|
|
984
|
+
return response;
|
|
985
|
+
},
|
|
986
|
+
baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
|
|
987
|
+
);
|
|
988
|
+
const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
|
|
989
|
+
// Bound HTTP request timeout to roughly the first-event watchdog window.
|
|
990
|
+
// The OpenAI SDK's default is 10 minutes per attempt × `maxRetries`, which
|
|
991
|
+
// turns a stalled-before-headers fetch into a multi-minute hang invisible
|
|
992
|
+
// to the agent loop (the iterator watchdog only arms AFTER `create()` returns).
|
|
993
|
+
// Using the first-event timeout keeps both layers aligned: the SDK gives up
|
|
994
|
+
// before the agent watchdog would have, surfacing a real error to the catch
|
|
995
|
+
// in the IIFE.
|
|
996
|
+
// A caller may raise `StreamOptions.streamFirstEventTimeoutMs` for a slow-
|
|
997
|
+
// before-headers provider; respect it so the SDK doesn't give up before the
|
|
998
|
+
// wrapping watchdog arms. An explicit `0` disables the first-event watchdog,
|
|
999
|
+
// and the SDK treats `timeout: 0` as an immediate timeout, so do not pass a
|
|
1000
|
+
// request timeout in that case.
|
|
1001
|
+
const envSdkTimeoutMs = getStreamFirstEventTimeoutMs(getOpenAIStreamIdleTimeoutMs());
|
|
1002
|
+
const sdkTimeoutMs =
|
|
1003
|
+
streamFirstEventTimeoutOverride === 0
|
|
1004
|
+
? undefined
|
|
1005
|
+
: streamFirstEventTimeoutOverride !== undefined
|
|
1006
|
+
? Math.max(envSdkTimeoutMs ?? 0, streamFirstEventTimeoutOverride)
|
|
1007
|
+
: envSdkTimeoutMs;
|
|
1008
|
+
return {
|
|
1009
|
+
client: new OpenAI({
|
|
1010
|
+
apiKey,
|
|
1011
|
+
baseURL: baseUrl,
|
|
1012
|
+
dangerouslyAllowBrowser: true,
|
|
1013
|
+
maxRetries: 5,
|
|
1014
|
+
defaultHeaders: headers,
|
|
1015
|
+
defaultQuery: azureDefaultQuery,
|
|
1016
|
+
fetch: debugFetch,
|
|
1017
|
+
...(sdkTimeoutMs !== undefined ? { timeout: sdkTimeoutMs } : {}),
|
|
1018
|
+
}),
|
|
1019
|
+
copilotPremiumRequests,
|
|
1020
|
+
baseUrl,
|
|
1021
|
+
requestHeaders: headers,
|
|
1022
|
+
getCapturedErrorResponse: () => capturedErrorResponse,
|
|
1023
|
+
clearCapturedErrorResponse: () => {
|
|
1024
|
+
capturedErrorResponse = undefined;
|
|
1025
|
+
},
|
|
1026
|
+
};
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
function buildParams(
|
|
1030
|
+
model: Model<"openai-completions">,
|
|
1031
|
+
context: Context,
|
|
1032
|
+
options: OpenAICompletionsOptions | undefined,
|
|
1033
|
+
resolvedBaseUrl?: string,
|
|
1034
|
+
toolStrictModeOverride?: ToolStrictModeOverride,
|
|
1035
|
+
): { params: OpenAICompletionsParams; toolStrictMode: AppliedToolStrictMode } {
|
|
1036
|
+
const compat = getCompat(model, resolvedBaseUrl);
|
|
1037
|
+
const messages = convertMessages(model, context, compat);
|
|
1038
|
+
maybeAddOpenRouterAnthropicCacheControl(model, messages);
|
|
1039
|
+
const supportsReasoningParams = model.provider !== "github-copilot";
|
|
1040
|
+
|
|
1041
|
+
// Kimi (including via OpenRouter and Fireworks router-form IDs such as
|
|
1042
|
+
// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
|
|
1043
|
+
// max_tokens, not actual output. The official Kimi K2 model guidance
|
|
1044
|
+
// (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
|
|
1045
|
+
// every call since the family can otherwise emit very long reasoning traces
|
|
1046
|
+
// before the final answer. Always send max_tokens — match the same
|
|
1047
|
+
// Kimi-family regex used by the compat detector.
|
|
1048
|
+
// Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
|
|
1049
|
+
const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
|
|
1050
|
+
const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
|
|
1051
|
+
|
|
1052
|
+
const requestModelId =
|
|
1053
|
+
model.provider === "fireworks"
|
|
1054
|
+
? toFireworksWireModelId(model.id)
|
|
1055
|
+
: model.provider === "firepass"
|
|
1056
|
+
? toFirepassWireModelId(model.id)
|
|
1057
|
+
: model.id;
|
|
1058
|
+
const params: OpenAICompletionsParams = {
|
|
1059
|
+
model: requestModelId,
|
|
1060
|
+
messages,
|
|
1061
|
+
stream: true,
|
|
1062
|
+
};
|
|
1063
|
+
let toolStrictMode: AppliedToolStrictMode = "none";
|
|
1064
|
+
|
|
1065
|
+
if (compat.supportsUsageInStreaming !== false) {
|
|
1066
|
+
params.stream_options = { include_usage: true };
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
if (compat.supportsStore) {
|
|
1070
|
+
params.store = false;
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
if (effectiveMaxTokens) {
|
|
1074
|
+
if (compat.maxTokensField === "max_tokens") {
|
|
1075
|
+
params.max_tokens = effectiveMaxTokens;
|
|
1076
|
+
} else {
|
|
1077
|
+
params.max_completion_tokens = effectiveMaxTokens;
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
if (options?.temperature !== undefined) {
|
|
1082
|
+
params.temperature = options.temperature;
|
|
1083
|
+
}
|
|
1084
|
+
if (options?.topP !== undefined) {
|
|
1085
|
+
params.top_p = options.topP;
|
|
1086
|
+
}
|
|
1087
|
+
if (options?.topK !== undefined) {
|
|
1088
|
+
params.top_k = options.topK;
|
|
1089
|
+
}
|
|
1090
|
+
if (options?.minP !== undefined) {
|
|
1091
|
+
params.min_p = options.minP;
|
|
1092
|
+
}
|
|
1093
|
+
if (options?.presencePenalty !== undefined) {
|
|
1094
|
+
params.presence_penalty = options.presencePenalty;
|
|
1095
|
+
}
|
|
1096
|
+
if (options?.repetitionPenalty !== undefined) {
|
|
1097
|
+
params.repetition_penalty = options.repetitionPenalty;
|
|
1098
|
+
}
|
|
1099
|
+
if (options?.stopSequences?.length) {
|
|
1100
|
+
const seqs = options.stopSequences;
|
|
1101
|
+
params.stop = seqs.length === 1 ? seqs[0] : seqs.slice(0, 4);
|
|
1102
|
+
}
|
|
1103
|
+
if (options?.frequencyPenalty !== undefined) {
|
|
1104
|
+
params.frequency_penalty = options.frequencyPenalty;
|
|
1105
|
+
}
|
|
1106
|
+
if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
|
|
1107
|
+
const resolved = resolveServiceTier(options?.serviceTier, model.provider);
|
|
1108
|
+
if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
|
|
1109
|
+
params.service_tier = resolved;
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
if (context.tools?.length) {
|
|
1114
|
+
const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
|
|
1115
|
+
params.tools = builtTools.tools;
|
|
1116
|
+
toolStrictMode = builtTools.toolStrictMode;
|
|
1117
|
+
} else if (context.tools === undefined && hasToolHistory(context.messages)) {
|
|
1118
|
+
// Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
|
|
1119
|
+
// contains tool_calls/tool_results, even when no tools are offered this turn.
|
|
1120
|
+
// Only inject the sentinel when the caller passed `context.tools = undefined`
|
|
1121
|
+
// (i.e. tools were not specified at all). An explicit `context.tools = []` means
|
|
1122
|
+
// the caller opted out of tools for this turn (as /btw and IRC background replies
|
|
1123
|
+
// do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
|
|
1124
|
+
// so LiteLLM → Bedrock never sees an empty `toolConfig` block.
|
|
1125
|
+
params.tools = [];
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
if (options?.toolChoice && compat.supportsToolChoice) {
|
|
1129
|
+
params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
|
|
1133
|
+
// `tool_choice: "none"` with no tools to gate is redundant and also
|
|
1134
|
+
// trips LiteLLM → Bedrock: the proxy serializes the directive into a
|
|
1135
|
+
// `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
|
|
1136
|
+
// non-empty whenever the conversation already holds `toolUse`/`toolResult`
|
|
1137
|
+
// content. Drop it whenever the resolved tools list is missing or empty.
|
|
1138
|
+
// Side-channel turns hit this: `/btw` and IRC background replies route
|
|
1139
|
+
// through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
|
|
1140
|
+
// and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
|
|
1141
|
+
delete params.tool_choice;
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
|
|
1145
|
+
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
|
|
1146
|
+
// Must explicitly disable since z.ai defaults to thinking enabled.
|
|
1147
|
+
const enabled = options?.reasoning && !options?.disableReasoning;
|
|
1148
|
+
params.thinking = { type: enabled ? "enabled" : "disabled" };
|
|
1149
|
+
} else if (supportsReasoningParams && compat.thinkingFormat === "qwen" && model.reasoning) {
|
|
1150
|
+
// Qwen uses top-level enable_thinking: boolean
|
|
1151
|
+
params.enable_thinking = !!options?.reasoning && !options?.disableReasoning;
|
|
1152
|
+
} else if (supportsReasoningParams && compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
|
|
1153
|
+
params.chat_template_kwargs = {
|
|
1154
|
+
enable_thinking: !!options?.reasoning && !options?.disableReasoning,
|
|
1155
|
+
};
|
|
1156
|
+
} else if (supportsReasoningParams && compat.thinkingFormat === "openrouter" && model.reasoning) {
|
|
1157
|
+
// OpenRouter normalizes reasoning across providers via a nested reasoning object.
|
|
1158
|
+
// Without an explicit signal, OpenRouter defaults reasoning models to thinking, which
|
|
1159
|
+
// silently consumes the entire output budget on small `max_tokens` requests (e.g.
|
|
1160
|
+
// title generation). Honor `disableReasoning` to opt out cleanly.
|
|
1161
|
+
const openRouterParams = params as typeof params & {
|
|
1162
|
+
reasoning?: { effort?: string } | { enabled: false };
|
|
1163
|
+
};
|
|
1164
|
+
if (options?.disableReasoning) {
|
|
1165
|
+
openRouterParams.reasoning = { enabled: false };
|
|
1166
|
+
} else if (options?.reasoning) {
|
|
1167
|
+
openRouterParams.reasoning = {
|
|
1168
|
+
effort: mapReasoningEffort(options.reasoning, compat.reasoningEffortMap),
|
|
1169
|
+
};
|
|
1170
|
+
}
|
|
1171
|
+
} else if (
|
|
1172
|
+
supportsReasoningParams &&
|
|
1173
|
+
options?.reasoning &&
|
|
1174
|
+
!options?.disableReasoning &&
|
|
1175
|
+
model.reasoning &&
|
|
1176
|
+
compat.supportsReasoningEffort
|
|
1177
|
+
) {
|
|
1178
|
+
// OpenAI-style reasoning_effort
|
|
1179
|
+
params.reasoning_effort = mapReasoningEffort(options.reasoning, compat.reasoningEffortMap) as Effort;
|
|
1180
|
+
} else if (
|
|
1181
|
+
supportsReasoningParams &&
|
|
1182
|
+
options?.disableReasoning &&
|
|
1183
|
+
!options?.reasoning &&
|
|
1184
|
+
model.reasoning &&
|
|
1185
|
+
compat.supportsReasoningEffort
|
|
1186
|
+
) {
|
|
1187
|
+
// Generic OpenAI-compatible effort endpoints do not expose a true off
|
|
1188
|
+
// switch. Use the model's lowest supported effort as the closest
|
|
1189
|
+
// transport-level approximation when callers request disabled reasoning.
|
|
1190
|
+
const minEffort = getSupportedEfforts(model)[0];
|
|
1191
|
+
if (minEffort === undefined) {
|
|
1192
|
+
throw new Error(`Model ${model.provider}/${model.id} has no supported reasoning efforts`);
|
|
1193
|
+
}
|
|
1194
|
+
params.reasoning_effort = mapReasoningEffort(minEffort, compat.reasoningEffortMap) as Effort;
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
|
|
1198
|
+
// DeepSeek reasoning models accept tools/tool_choice, but reject that
|
|
1199
|
+
// control field while thinking is enabled. Keep the tool-selection
|
|
1200
|
+
// contract and suppress reasoning for this single request.
|
|
1201
|
+
delete params.reasoning_effort;
|
|
1202
|
+
delete params.reasoning;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
|
|
1206
|
+
// Backends like Kimi 400 with `tool_choice 'specified' is incompatible
|
|
1207
|
+
// with thinking enabled`. Suppress thinking for this single forced-tool
|
|
1208
|
+
// turn while keeping the tool-selection contract intact.
|
|
1209
|
+
delete params.reasoning_effort;
|
|
1210
|
+
delete params.reasoning;
|
|
1211
|
+
if (compat.thinkingFormat === "zai") {
|
|
1212
|
+
params.thinking = { type: "disabled" };
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
// OpenRouter provider routing preferences
|
|
1217
|
+
if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
|
|
1218
|
+
params.provider = compat.openRouterRouting;
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
// Vercel AI Gateway provider routing preferences
|
|
1222
|
+
if (model.baseUrl.includes("ai-gateway.vercel.sh") && model.compat?.vercelGatewayRouting) {
|
|
1223
|
+
const routing = model.compat.vercelGatewayRouting;
|
|
1224
|
+
if (routing.only || routing.order) {
|
|
1225
|
+
const gatewayOptions: Record<string, string[]> = {};
|
|
1226
|
+
if (routing.only) gatewayOptions.only = routing.only;
|
|
1227
|
+
if (routing.order) gatewayOptions.order = routing.order;
|
|
1228
|
+
params.providerOptions = { gateway: gatewayOptions };
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
if (compat.extraBody) {
|
|
1233
|
+
Object.assign(params, compat.extraBody);
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
return { params, toolStrictMode };
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
function getOptionalNumberProperty(value: object, key: string): number | undefined {
|
|
1240
|
+
const property = Reflect.get(value, key);
|
|
1241
|
+
return typeof property === "number" ? property : undefined;
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
function getOptionalObjectProperty(value: object, key: string): object | undefined {
|
|
1245
|
+
const property = Reflect.get(value, key);
|
|
1246
|
+
return typeof property === "object" && property !== null ? property : undefined;
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
function getChoiceUsage(choice: ChatCompletionChunk.Choice): object | undefined {
|
|
1250
|
+
return getOptionalObjectProperty(choice, "usage");
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
export function parseChunkUsage(
|
|
1254
|
+
rawUsage: object,
|
|
1255
|
+
model: Model<"openai-completions">,
|
|
1256
|
+
premiumRequests: number | undefined,
|
|
1257
|
+
): AssistantMessage["usage"] {
|
|
1258
|
+
const promptTokenDetails = getOptionalObjectProperty(rawUsage, "prompt_tokens_details");
|
|
1259
|
+
const completionTokenDetails = getOptionalObjectProperty(rawUsage, "completion_tokens_details");
|
|
1260
|
+
const cachedTokens =
|
|
1261
|
+
getOptionalNumberProperty(rawUsage, "cached_tokens") ??
|
|
1262
|
+
(promptTokenDetails ? getOptionalNumberProperty(promptTokenDetails, "cached_tokens") : undefined) ??
|
|
1263
|
+
0;
|
|
1264
|
+
// OpenRouter exposes cache writes via `prompt_tokens_details.cache_write_tokens`
|
|
1265
|
+
// and INCLUDES them in `prompt_tokens`. Without subtracting, cache-write tokens
|
|
1266
|
+
// leak into `input` (e.g. GLM/Anthropic via OpenRouter on a fresh cache).
|
|
1267
|
+
// Ref: https://openrouter.ai/docs/guides/best-practices/prompt-caching
|
|
1268
|
+
const cacheWriteTokens = promptTokenDetails
|
|
1269
|
+
? (getOptionalNumberProperty(promptTokenDetails, "cache_write_tokens") ?? 0)
|
|
1270
|
+
: 0;
|
|
1271
|
+
const reasoningTokens =
|
|
1272
|
+
(completionTokenDetails ? getOptionalNumberProperty(completionTokenDetails, "reasoning_tokens") : undefined) ?? 0;
|
|
1273
|
+
const promptTokens = getOptionalNumberProperty(rawUsage, "prompt_tokens") ?? 0;
|
|
1274
|
+
const input = Math.max(0, promptTokens - cachedTokens - cacheWriteTokens);
|
|
1275
|
+
// Per OpenAI's CompletionUsage spec, `reasoning_tokens` is a subset of
|
|
1276
|
+
// `completion_tokens` (which is the total billed output). Adding them would
|
|
1277
|
+
// double-count.
|
|
1278
|
+
const outputTokens = getOptionalNumberProperty(rawUsage, "completion_tokens") ?? 0;
|
|
1279
|
+
const usage: AssistantMessage["usage"] = {
|
|
1280
|
+
input,
|
|
1281
|
+
output: outputTokens,
|
|
1282
|
+
cacheRead: cachedTokens,
|
|
1283
|
+
cacheWrite: cacheWriteTokens,
|
|
1284
|
+
totalTokens: input + outputTokens + cachedTokens + cacheWriteTokens,
|
|
1285
|
+
...(reasoningTokens > 0 ? { reasoningTokens } : {}),
|
|
1286
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
1287
|
+
...(premiumRequests !== undefined ? { premiumRequests } : {}),
|
|
1288
|
+
};
|
|
1289
|
+
calculateCost(model, usage);
|
|
1290
|
+
return usage;
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
function mapReasoningEffort(
|
|
1294
|
+
effort: NonNullable<OpenAICompletionsOptions["reasoning"]>,
|
|
1295
|
+
reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoning"]>, string>>,
|
|
1296
|
+
): string {
|
|
1297
|
+
return reasoningEffortMap[effort] ?? effort;
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
function maybeAddOpenRouterAnthropicCacheControl(
|
|
1301
|
+
model: Model<"openai-completions">,
|
|
1302
|
+
messages: ChatCompletionMessageParam[],
|
|
1303
|
+
): void {
|
|
1304
|
+
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
|
|
1305
|
+
|
|
1306
|
+
// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
|
|
1307
|
+
// on the last user/assistant message (walking backwards until we find text content).
|
|
1308
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1309
|
+
const msg = messages[i];
|
|
1310
|
+
if (msg.role !== "user" && msg.role !== "assistant" && msg.role !== "developer") continue;
|
|
1311
|
+
|
|
1312
|
+
const content = msg.content;
|
|
1313
|
+
if (typeof content === "string") {
|
|
1314
|
+
msg.content = [
|
|
1315
|
+
Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
|
|
1316
|
+
];
|
|
1317
|
+
return;
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
if (!Array.isArray(content)) continue;
|
|
1321
|
+
|
|
1322
|
+
// Find last text part and add cache_control
|
|
1323
|
+
for (let j = content.length - 1; j >= 0; j--) {
|
|
1324
|
+
const part = content[j];
|
|
1325
|
+
if (part?.type === "text") {
|
|
1326
|
+
Object.assign(part, { cache_control: { type: "ephemeral" } });
|
|
1327
|
+
return;
|
|
1328
|
+
}
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
export function convertMessages(
|
|
1334
|
+
model: Model<"openai-completions">,
|
|
1335
|
+
context: Context,
|
|
1336
|
+
compat: ResolvedOpenAICompat,
|
|
1337
|
+
): ChatCompletionMessageParam[] {
|
|
1338
|
+
const params: ChatCompletionMessageParam[] = [];
|
|
1339
|
+
|
|
1340
|
+
const normalizeToolCallId = (id: string): string => {
|
|
1341
|
+
if (compat.requiresMistralToolIds) return normalizeMistralToolId(id, true);
|
|
1342
|
+
|
|
1343
|
+
// Handle pipe-separated IDs from OpenAI Responses API
|
|
1344
|
+
// Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)
|
|
1345
|
+
// These come from providers like github-copilot, OpenAI code provider, opencode
|
|
1346
|
+
// Extract just the call_id part and normalize it
|
|
1347
|
+
if (id.includes("|")) {
|
|
1348
|
+
const [callId] = id.split("|");
|
|
1349
|
+
// Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
|
|
1350
|
+
return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
|
|
1351
|
+
}
|
|
1352
|
+
|
|
1353
|
+
if (model.provider === "openai") return id.length > 40 ? id.slice(0, 40) : id;
|
|
1354
|
+
return id;
|
|
1355
|
+
};
|
|
1356
|
+
const transformedMessages = transformMessages(context.messages, model, id => normalizeToolCallId(id));
|
|
1357
|
+
|
|
1358
|
+
const remappedToolCallIds = new Map<string, string[]>();
|
|
1359
|
+
let generatedToolCallIdCounter = 0;
|
|
1360
|
+
|
|
1361
|
+
const generateFallbackToolCallId = (seed: string): string => {
|
|
1362
|
+
generatedToolCallIdCounter += 1;
|
|
1363
|
+
const hash = Bun.hash(`${model.provider}:${model.id}:${seed}:${generatedToolCallIdCounter}`).toString(36);
|
|
1364
|
+
return `call_${hash}`;
|
|
1365
|
+
};
|
|
1366
|
+
|
|
1367
|
+
const rememberToolCallId = (originalId: string, normalizedId: string): void => {
|
|
1368
|
+
const queue = remappedToolCallIds.get(originalId);
|
|
1369
|
+
if (queue) {
|
|
1370
|
+
queue.push(normalizedId);
|
|
1371
|
+
return;
|
|
1372
|
+
}
|
|
1373
|
+
remappedToolCallIds.set(originalId, [normalizedId]);
|
|
1374
|
+
};
|
|
1375
|
+
|
|
1376
|
+
const consumeToolCallId = (originalId: string): string | null => {
|
|
1377
|
+
const queue = remappedToolCallIds.get(originalId);
|
|
1378
|
+
if (!queue || queue.length === 0) return null;
|
|
1379
|
+
const nextId = queue.shift() ?? null;
|
|
1380
|
+
if (queue.length === 0) remappedToolCallIds.delete(originalId);
|
|
1381
|
+
return nextId;
|
|
1382
|
+
};
|
|
1383
|
+
|
|
1384
|
+
const ensureToolCallId = (rawId: string, seed: string): string => {
|
|
1385
|
+
const normalized = normalizeToolCallId(rawId);
|
|
1386
|
+
if (normalized.trim().length > 0) return normalized;
|
|
1387
|
+
return generateFallbackToolCallId(seed);
|
|
1388
|
+
};
|
|
1389
|
+
|
|
1390
|
+
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
1391
|
+
if (systemPrompts.length > 0) {
|
|
1392
|
+
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
|
1393
|
+
const role = useDeveloperRole ? "developer" : "system";
|
|
1394
|
+
// Default to one block per ordered system prompt so the leading prefix
|
|
1395
|
+
// stays byte-identical between turns and the provider's KV cache can
|
|
1396
|
+
// reuse it. Hosts whose chat templates reject follow-up system messages
|
|
1397
|
+
// (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
|
|
1398
|
+
// via `compat.supportsMultipleSystemMessages = false`; in that mode we
|
|
1399
|
+
// coalesce into a single message joined by `\n\n`.
|
|
1400
|
+
if (compat.supportsMultipleSystemMessages) {
|
|
1401
|
+
for (const systemPrompt of systemPrompts) {
|
|
1402
|
+
params.push({ role, content: systemPrompt });
|
|
1403
|
+
}
|
|
1404
|
+
} else {
|
|
1405
|
+
params.push({ role, content: systemPrompts.join("\n\n") });
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
let lastRole: string | null = null;
|
|
1410
|
+
|
|
1411
|
+
for (let i = 0; i < transformedMessages.length; i++) {
|
|
1412
|
+
const msg = transformedMessages[i];
|
|
1413
|
+
// Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
|
|
1414
|
+
// Insert a synthetic assistant message to bridge the gap
|
|
1415
|
+
if (
|
|
1416
|
+
compat.requiresAssistantAfterToolResult &&
|
|
1417
|
+
lastRole === "toolResult" &&
|
|
1418
|
+
(msg.role === "user" || msg.role === "developer")
|
|
1419
|
+
) {
|
|
1420
|
+
params.push({
|
|
1421
|
+
role: "assistant",
|
|
1422
|
+
content: "I have processed the tool results.",
|
|
1423
|
+
});
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
const devAsUser = !compat.supportsDeveloperRole;
|
|
1427
|
+
if (msg.role === "user" || msg.role === "developer") {
|
|
1428
|
+
const role = !devAsUser && msg.role === "developer" ? "developer" : "user";
|
|
1429
|
+
if (typeof msg.content === "string") {
|
|
1430
|
+
const text = msg.content.toWellFormed();
|
|
1431
|
+
if (text.trim().length === 0) continue;
|
|
1432
|
+
params.push({
|
|
1433
|
+
role: role,
|
|
1434
|
+
content: text,
|
|
1435
|
+
});
|
|
1436
|
+
} else {
|
|
1437
|
+
const supportsImages = model.input.includes("image");
|
|
1438
|
+
const content: ChatCompletionContentPart[] = [];
|
|
1439
|
+
let omittedImages = false;
|
|
1440
|
+
for (const item of msg.content) {
|
|
1441
|
+
if (item.type === "text") {
|
|
1442
|
+
const text = item.text.toWellFormed();
|
|
1443
|
+
if (text.trim().length === 0) continue;
|
|
1444
|
+
content.push({
|
|
1445
|
+
type: "text",
|
|
1446
|
+
text,
|
|
1447
|
+
} satisfies ChatCompletionContentPartText);
|
|
1448
|
+
} else if (supportsImages) {
|
|
1449
|
+
content.push({
|
|
1450
|
+
type: "image_url",
|
|
1451
|
+
image_url: {
|
|
1452
|
+
url: `data:${item.mimeType};base64,${item.data}`,
|
|
1453
|
+
},
|
|
1454
|
+
} satisfies ChatCompletionContentPartImage);
|
|
1455
|
+
} else {
|
|
1456
|
+
omittedImages = true;
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
if (omittedImages) {
|
|
1460
|
+
content.push({
|
|
1461
|
+
type: "text",
|
|
1462
|
+
text: NON_VISION_IMAGE_PLACEHOLDER,
|
|
1463
|
+
} satisfies ChatCompletionContentPartText);
|
|
1464
|
+
}
|
|
1465
|
+
if (content.length === 0) continue;
|
|
1466
|
+
params.push({
|
|
1467
|
+
role: "user",
|
|
1468
|
+
content,
|
|
1469
|
+
});
|
|
1470
|
+
}
|
|
1471
|
+
} else if (msg.role === "assistant") {
|
|
1472
|
+
// Some providers (e.g. Mistral) don't accept null content, use empty string instead
|
|
1473
|
+
const assistantMsg: ChatCompletionAssistantMessageParam = {
|
|
1474
|
+
role: "assistant",
|
|
1475
|
+
content: compat.requiresAssistantAfterToolResult ? "" : null,
|
|
1476
|
+
};
|
|
1477
|
+
|
|
1478
|
+
const textBlocks = msg.content.filter(b => b.type === "text") as TextContent[];
|
|
1479
|
+
// Filter out empty text blocks to avoid API validation errors
|
|
1480
|
+
const nonEmptyTextBlocks = textBlocks.filter(b => b.text && b.text.trim().length > 0);
|
|
1481
|
+
if (nonEmptyTextBlocks.length > 0) {
|
|
1482
|
+
// Always send assistant content as a plain string. Some OpenAI-compatible
|
|
1483
|
+
// backends mirror array-of-text-block payloads back to the model literally,
|
|
1484
|
+
// causing recursive nested content in subsequent turns.
|
|
1485
|
+
assistantMsg.content = nonEmptyTextBlocks.map(b => b.text.toWellFormed()).join("");
|
|
1486
|
+
}
|
|
1487
|
+
|
|
1488
|
+
// Handle thinking blocks
|
|
1489
|
+
const thinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
|
|
1490
|
+
// Filter out empty thinking blocks to avoid API validation errors
|
|
1491
|
+
const nonEmptyThinkingBlocks = thinkingBlocks.filter(b => b.thinking && b.thinking.trim().length > 0);
|
|
1492
|
+
if (nonEmptyThinkingBlocks.length > 0) {
|
|
1493
|
+
if (compat.requiresThinkingAsText) {
|
|
1494
|
+
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
1495
|
+
const thinkingText = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n\n");
|
|
1496
|
+
const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
|
|
1497
|
+
if (textContent) {
|
|
1498
|
+
textContent.unshift({ type: "text", text: thinkingText });
|
|
1499
|
+
} else {
|
|
1500
|
+
assistantMsg.content = [{ type: "text", text: thinkingText }];
|
|
1501
|
+
}
|
|
1502
|
+
} else if (compat.requiresReasoningContentForToolCalls) {
|
|
1503
|
+
// Use the signature from the first thinking block if available, but only for
|
|
1504
|
+
// recognized OpenAI-compat reasoning field names. Opaque signatures from other
|
|
1505
|
+
// providers (Anthropic encrypted, OpenAI Responses JSON) are not valid property names.
|
|
1506
|
+
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
|
1507
|
+
const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
1508
|
+
if (signature && recognizedFields.includes(signature)) {
|
|
1509
|
+
(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1514
|
+
if (compat.thinkingFormat === "openai" && compat.requiresReasoningContentForToolCalls) {
|
|
1515
|
+
const streamedReasoningField = nonEmptyThinkingBlocks[0]?.thinkingSignature;
|
|
1516
|
+
const reasoningField =
|
|
1517
|
+
streamedReasoningField === "reasoning_content" ||
|
|
1518
|
+
streamedReasoningField === "reasoning" ||
|
|
1519
|
+
streamedReasoningField === "reasoning_text"
|
|
1520
|
+
? streamedReasoningField
|
|
1521
|
+
: (compat.reasoningContentField ?? "reasoning_content");
|
|
1522
|
+
const reasoningContent = (assistantMsg as any)[reasoningField];
|
|
1523
|
+
if (!reasoningContent) {
|
|
1524
|
+
const reasoning = (assistantMsg as any).reasoning;
|
|
1525
|
+
const reasoningText = (assistantMsg as any).reasoning_text;
|
|
1526
|
+
if (reasoning && reasoningField !== "reasoning") {
|
|
1527
|
+
(assistantMsg as any)[reasoningField] = reasoning;
|
|
1528
|
+
} else if (reasoningText && reasoningField !== "reasoning_text") {
|
|
1529
|
+
(assistantMsg as any)[reasoningField] = reasoningText;
|
|
1530
|
+
} else if (nonEmptyThinkingBlocks.length > 0) {
|
|
1531
|
+
(assistantMsg as any)[reasoningField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
const toolCalls = msg.content.filter(b => b.type === "toolCall") as ToolCall[];
|
|
1537
|
+
// Replay reasoning_content on assistant turns for backends that validate
|
|
1538
|
+
// thinking-mode history. DeepSeek V4 requires reasoning_content on EVERY
|
|
1539
|
+
// assistant turn once any prior turn included it — not just tool-call turns.
|
|
1540
|
+
// The replay logic has three tiers:
|
|
1541
|
+
// 1. Recover from thinking blocks with valid signatures (covers same-model replay
|
|
1542
|
+
// where nonEmptyThinkingBlocks may have filtered out empty-text blocks)
|
|
1543
|
+
// 2. For providers that require the field but returned no reasoning at all
|
|
1544
|
+
// (e.g. proxy-stripped reasoning_content), emit an empty string
|
|
1545
|
+
// 3. For providers that accept synthetic placeholders (Kimi, OpenRouter), emit "."
|
|
1546
|
+
// DeepSeek V4 rejects synthetic "." placeholders — it validates the exact value —
|
|
1547
|
+
// so the allowsSyntheticReasoningContentForToolCalls flag controls tier 3.
|
|
1548
|
+
const canUseSyntheticReasoningContent =
|
|
1549
|
+
compat.requiresReasoningContentForToolCalls &&
|
|
1550
|
+
compat.allowsSyntheticReasoningContentForToolCalls &&
|
|
1551
|
+
(compat.thinkingFormat === "openai" ||
|
|
1552
|
+
compat.thinkingFormat === "openrouter" ||
|
|
1553
|
+
compat.thinkingFormat === "zai");
|
|
1554
|
+
// DeepSeek reasoning models require reasoning_content on ALL assistant turns,
|
|
1555
|
+
// not just tool-call turns. Other providers (Kimi, OpenRouter) only require it
|
|
1556
|
+
// on tool-call turns.
|
|
1557
|
+
const needsReasoningOnAllTurns =
|
|
1558
|
+
compat.requiresReasoningContentForToolCalls && !compat.allowsSyntheticReasoningContentForToolCalls;
|
|
1559
|
+
const needsReasoningField = needsReasoningOnAllTurns || toolCalls.length > 0;
|
|
1560
|
+
let hasReasoningField =
|
|
1561
|
+
(assistantMsg as any).reasoning_content !== undefined ||
|
|
1562
|
+
(assistantMsg as any).reasoning !== undefined ||
|
|
1563
|
+
(assistantMsg as any).reasoning_text !== undefined;
|
|
1564
|
+
// Tier 1: Recover reasoning_content from ALL thinking blocks (including empty-text
|
|
1565
|
+
// ones) when the provider requires exact replay and rejects synthetic placeholders.
|
|
1566
|
+
// This covers the case where thinking blocks have valid signatures but were excluded
|
|
1567
|
+
// by the nonEmptyThinkingBlocks filter above, or where thinking text is empty but
|
|
1568
|
+
// the signature identifies the correct field name for replay.
|
|
1569
|
+
// Only recognized OpenAI-compat reasoning field names qualify — opaque signatures
|
|
1570
|
+
// from other providers (Anthropic encrypted, OpenAI Responses JSON, etc.) are not
|
|
1571
|
+
// valid property names for the wire message.
|
|
1572
|
+
if (
|
|
1573
|
+
needsReasoningField &&
|
|
1574
|
+
!hasReasoningField &&
|
|
1575
|
+
compat.requiresReasoningContentForToolCalls &&
|
|
1576
|
+
!compat.allowsSyntheticReasoningContentForToolCalls
|
|
1577
|
+
) {
|
|
1578
|
+
const allThinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
|
|
1579
|
+
if (allThinkingBlocks.length > 0) {
|
|
1580
|
+
const signature = allThinkingBlocks[0].thinkingSignature;
|
|
1581
|
+
const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
1582
|
+
if (signature && recognizedFields.includes(signature)) {
|
|
1583
|
+
(assistantMsg as any)[signature] = allThinkingBlocks.map(b => b.thinking).join("\n");
|
|
1584
|
+
hasReasoningField = true;
|
|
1585
|
+
}
|
|
1586
|
+
}
|
|
1587
|
+
}
|
|
1588
|
+
// Tier 2: When the provider requires reasoning_content but there are genuinely no
|
|
1589
|
+
// thinking blocks at all (e.g. proxy stripped reasoning_content from the response),
|
|
1590
|
+
// emit an empty string. The field must be present; an empty string is the most honest
|
|
1591
|
+
// representation of "no reasoning was captured."
|
|
1592
|
+
if (
|
|
1593
|
+
needsReasoningField &&
|
|
1594
|
+
!hasReasoningField &&
|
|
1595
|
+
compat.requiresReasoningContentForToolCalls &&
|
|
1596
|
+
!compat.allowsSyntheticReasoningContentForToolCalls
|
|
1597
|
+
) {
|
|
1598
|
+
const reasoningField = compat.reasoningContentField ?? "reasoning_content";
|
|
1599
|
+
(assistantMsg as any)[reasoningField] = "";
|
|
1600
|
+
hasReasoningField = true;
|
|
1601
|
+
}
|
|
1602
|
+
// Tier 3: For providers that accept synthetic placeholders (Kimi, OpenRouter).
|
|
1603
|
+
if (toolCalls.length > 0 && canUseSyntheticReasoningContent && !hasReasoningField) {
|
|
1604
|
+
const reasoningField = compat.reasoningContentField ?? "reasoning_content";
|
|
1605
|
+
(assistantMsg as any)[reasoningField] = ".";
|
|
1606
|
+
hasReasoningField = true;
|
|
1607
|
+
}
|
|
1608
|
+
if (toolCalls.length > 0) {
|
|
1609
|
+
assistantMsg.tool_calls = toolCalls.map((tc, toolCallIndex) => {
|
|
1610
|
+
const toolCallId = ensureToolCallId(tc.id, `${i}:${toolCallIndex}:${tc.name}`);
|
|
1611
|
+
rememberToolCallId(tc.id, toolCallId);
|
|
1612
|
+
return {
|
|
1613
|
+
id: normalizeMistralToolId(toolCallId, compat.requiresMistralToolIds),
|
|
1614
|
+
type: "function" as const,
|
|
1615
|
+
function: {
|
|
1616
|
+
name: tc.name,
|
|
1617
|
+
arguments: serializeToolArguments(tc.arguments),
|
|
1618
|
+
},
|
|
1619
|
+
};
|
|
1620
|
+
});
|
|
1621
|
+
const reasoningDetails = toolCalls
|
|
1622
|
+
.filter(tc => tc.thoughtSignature)
|
|
1623
|
+
.map(tc => {
|
|
1624
|
+
try {
|
|
1625
|
+
return JSON.parse(tc.thoughtSignature!);
|
|
1626
|
+
} catch {
|
|
1627
|
+
return null;
|
|
1628
|
+
}
|
|
1629
|
+
})
|
|
1630
|
+
.filter(Boolean);
|
|
1631
|
+
if (reasoningDetails.length > 0) {
|
|
1632
|
+
(assistantMsg as any).reasoning_details = reasoningDetails;
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
// DeepSeek requires non-null content when reasoning_content is present
|
|
1636
|
+
if (assistantMsg.content === null && hasReasoningField) {
|
|
1637
|
+
assistantMsg.content = "";
|
|
1638
|
+
}
|
|
1639
|
+
// Skip assistant messages that have no content, no tool calls, and no reasoning payload.
|
|
1640
|
+
// Some OpenAI-compatible backends require replaying reasoning-only assistant turns
|
|
1641
|
+
// so follow-up requests preserve the provider-specific reasoning field name.
|
|
1642
|
+
const content = assistantMsg.content;
|
|
1643
|
+
const hasContent =
|
|
1644
|
+
content !== null &&
|
|
1645
|
+
content !== undefined &&
|
|
1646
|
+
(typeof content === "string" ? content.length > 0 : content.length > 0);
|
|
1647
|
+
if (!hasContent && assistantMsg.tool_calls && compat.requiresAssistantContentForToolCalls) {
|
|
1648
|
+
assistantMsg.content = ".";
|
|
1649
|
+
}
|
|
1650
|
+
if (!hasContent && !assistantMsg.tool_calls && !hasReasoningField) {
|
|
1651
|
+
continue;
|
|
1652
|
+
}
|
|
1653
|
+
params.push(assistantMsg);
|
|
1654
|
+
} else if (msg.role === "toolResult") {
|
|
1655
|
+
// Batch consecutive tool results and collect all images
|
|
1656
|
+
const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
|
|
1657
|
+
let j = i;
|
|
1658
|
+
|
|
1659
|
+
for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
|
|
1660
|
+
const toolMsg = transformedMessages[j] as ToolResultMessage;
|
|
1661
|
+
|
|
1662
|
+
// Extract text and image content
|
|
1663
|
+
const textResult = toolMsg.content
|
|
1664
|
+
.filter(c => c.type === "text")
|
|
1665
|
+
.map(c => (c as TextContent).text)
|
|
1666
|
+
.join("\n");
|
|
1667
|
+
const supportsImages = model.input.includes("image");
|
|
1668
|
+
const hasImages = toolMsg.content.some(c => c.type === "image");
|
|
1669
|
+
const omittedImages = hasImages && !supportsImages;
|
|
1670
|
+
|
|
1671
|
+
// Always send tool result with text (or placeholder if only images)
|
|
1672
|
+
const hasText = textResult.length > 0;
|
|
1673
|
+
const remappedToolCallId = consumeToolCallId(toolMsg.toolCallId);
|
|
1674
|
+
const resolvedToolCallId =
|
|
1675
|
+
remappedToolCallId ?? ensureToolCallId(toolMsg.toolCallId, `${j}:${toolMsg.toolName ?? "tool"}`);
|
|
1676
|
+
const toolResultContent = omittedImages
|
|
1677
|
+
? joinTextWithImagePlaceholder(textResult, true)
|
|
1678
|
+
: hasText
|
|
1679
|
+
? textResult
|
|
1680
|
+
: hasImages
|
|
1681
|
+
? "(see attached image)"
|
|
1682
|
+
: "";
|
|
1683
|
+
const toolResultMsg: ChatCompletionToolMessageParam = {
|
|
1684
|
+
role: "tool",
|
|
1685
|
+
content: toolResultContent.toWellFormed(),
|
|
1686
|
+
tool_call_id: normalizeMistralToolId(resolvedToolCallId, compat.requiresMistralToolIds),
|
|
1687
|
+
};
|
|
1688
|
+
if (compat.requiresToolResultName && toolMsg.toolName) {
|
|
1689
|
+
(toolResultMsg as any).name = toolMsg.toolName;
|
|
1690
|
+
}
|
|
1691
|
+
params.push(toolResultMsg);
|
|
1692
|
+
|
|
1693
|
+
if (hasImages && supportsImages) {
|
|
1694
|
+
for (const block of toolMsg.content) {
|
|
1695
|
+
if (block.type === "image") {
|
|
1696
|
+
imageBlocks.push({
|
|
1697
|
+
type: "image_url",
|
|
1698
|
+
image_url: {
|
|
1699
|
+
url: `data:${block.mimeType};base64,${block.data}`,
|
|
1700
|
+
},
|
|
1701
|
+
});
|
|
1702
|
+
}
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
|
|
1707
|
+
i = j - 1;
|
|
1708
|
+
|
|
1709
|
+
// After all consecutive tool results, add a single user message with all images
|
|
1710
|
+
if (imageBlocks.length > 0) {
|
|
1711
|
+
if (compat.requiresAssistantAfterToolResult) {
|
|
1712
|
+
params.push({
|
|
1713
|
+
role: "assistant",
|
|
1714
|
+
content: "I have processed the tool results.",
|
|
1715
|
+
});
|
|
1716
|
+
}
|
|
1717
|
+
|
|
1718
|
+
params.push({
|
|
1719
|
+
role: "user",
|
|
1720
|
+
content: [
|
|
1721
|
+
{
|
|
1722
|
+
type: "text",
|
|
1723
|
+
text: "Attached image(s) from tool result:",
|
|
1724
|
+
},
|
|
1725
|
+
...imageBlocks,
|
|
1726
|
+
],
|
|
1727
|
+
});
|
|
1728
|
+
lastRole = "user";
|
|
1729
|
+
} else {
|
|
1730
|
+
lastRole = "toolResult";
|
|
1731
|
+
}
|
|
1732
|
+
continue;
|
|
1733
|
+
}
|
|
1734
|
+
|
|
1735
|
+
lastRole =
|
|
1736
|
+
msg.role === "developer"
|
|
1737
|
+
? model.reasoning && compat.supportsDeveloperRole
|
|
1738
|
+
? "developer"
|
|
1739
|
+
: "system"
|
|
1740
|
+
: msg.role;
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
return params;
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
function convertTools(
|
|
1747
|
+
tools: Tool[],
|
|
1748
|
+
compat: ResolvedOpenAICompat,
|
|
1749
|
+
toolStrictModeOverride?: ToolStrictModeOverride,
|
|
1750
|
+
): BuiltOpenAICompletionTools {
|
|
1751
|
+
const adaptedTools = tools.map(tool => {
|
|
1752
|
+
const strict = !NO_STRICT && compat.supportsStrictMode !== false && tool.strict !== false;
|
|
1753
|
+
const baseParameters = toolWireSchema(tool);
|
|
1754
|
+
const adapted = adaptSchemaForStrict(baseParameters, strict);
|
|
1755
|
+
return {
|
|
1756
|
+
tool,
|
|
1757
|
+
baseParameters,
|
|
1758
|
+
parameters: adapted.schema,
|
|
1759
|
+
strict: adapted.strict,
|
|
1760
|
+
};
|
|
1761
|
+
});
|
|
1762
|
+
|
|
1763
|
+
const requestedStrictMode = toolStrictModeOverride ?? compat.toolStrictMode;
|
|
1764
|
+
const toolStrictMode =
|
|
1765
|
+
requestedStrictMode === "none"
|
|
1766
|
+
? "none"
|
|
1767
|
+
: requestedStrictMode === "all_strict"
|
|
1768
|
+
? adaptedTools.every(tool => tool.strict)
|
|
1769
|
+
? "all_strict"
|
|
1770
|
+
: "none"
|
|
1771
|
+
: "mixed";
|
|
1772
|
+
|
|
1773
|
+
return {
|
|
1774
|
+
tools: adaptedTools.map(({ tool, baseParameters, parameters, strict }) => {
|
|
1775
|
+
const includeStrict = toolStrictMode === "all_strict" || (toolStrictMode === "mixed" && strict);
|
|
1776
|
+
return {
|
|
1777
|
+
type: "function",
|
|
1778
|
+
function: {
|
|
1779
|
+
name: tool.name,
|
|
1780
|
+
description: tool.description || "",
|
|
1781
|
+
parameters: includeStrict ? parameters : baseParameters,
|
|
1782
|
+
// Only include strict if provider supports it. Some reject unknown fields.
|
|
1783
|
+
...(includeStrict && { strict: true }),
|
|
1784
|
+
},
|
|
1785
|
+
};
|
|
1786
|
+
}),
|
|
1787
|
+
toolStrictMode,
|
|
1788
|
+
};
|
|
1789
|
+
}
|
|
1790
|
+
|
|
1791
|
+
function shouldRetryWithoutStrictTools(
|
|
1792
|
+
error: unknown,
|
|
1793
|
+
capturedErrorResponse: CapturedHttpErrorResponse | undefined,
|
|
1794
|
+
toolStrictMode: AppliedToolStrictMode,
|
|
1795
|
+
tools: Tool[] | undefined,
|
|
1796
|
+
): boolean {
|
|
1797
|
+
if (!tools || tools.length === 0 || toolStrictMode !== "all_strict") {
|
|
1798
|
+
return false;
|
|
1799
|
+
}
|
|
1800
|
+
const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
|
|
1801
|
+
if (status !== 400 && status !== 422) {
|
|
1802
|
+
return false;
|
|
1803
|
+
}
|
|
1804
|
+
const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
|
|
1805
|
+
.filter((value): value is string => typeof value === "string" && value.trim().length > 0)
|
|
1806
|
+
.join("\n");
|
|
1807
|
+
return /wrong_api_format|mixed values for 'strict'|tool[s]?\b.*strict|\bstrict\b.*tool/i.test(messageParts);
|
|
1808
|
+
}
|
|
1809
|
+
|
|
1810
|
+
function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
|
|
1811
|
+
stopReason: StopReason;
|
|
1812
|
+
errorMessage?: string;
|
|
1813
|
+
} {
|
|
1814
|
+
if (reason === null) return { stopReason: "stop" };
|
|
1815
|
+
switch (reason) {
|
|
1816
|
+
case "stop":
|
|
1817
|
+
case "end":
|
|
1818
|
+
return { stopReason: "stop" };
|
|
1819
|
+
case "length":
|
|
1820
|
+
return { stopReason: "length" };
|
|
1821
|
+
case "function_call":
|
|
1822
|
+
case "tool_calls":
|
|
1823
|
+
return { stopReason: "toolUse" };
|
|
1824
|
+
case "content_filter":
|
|
1825
|
+
return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
|
|
1826
|
+
case "network_error":
|
|
1827
|
+
return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
|
|
1828
|
+
default:
|
|
1829
|
+
return {
|
|
1830
|
+
stopReason: "error",
|
|
1831
|
+
errorMessage: `Provider finish_reason: ${reason}`,
|
|
1832
|
+
};
|
|
1833
|
+
}
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
/**
|
|
1837
|
+
* Detect compatibility settings from provider and baseUrl for known providers.
|
|
1838
|
+
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
1839
|
+
* Returns a fully resolved OpenAICompat object with all fields set.
|
|
1840
|
+
*/
|
|
1841
|
+
export function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
|
|
1842
|
+
return detectOpenAICompat(model);
|
|
1843
|
+
}
|
|
1844
|
+
|
|
1845
|
+
/**
|
|
1846
|
+
* Get resolved compatibility settings for a model.
|
|
1847
|
+
* Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
|
|
1848
|
+
* @param model - The model configuration
|
|
1849
|
+
* @param resolvedBaseUrl - Optional resolved base URL (e.g., after GitHub Copilot proxy-ep resolution).
|
|
1850
|
+
*/
|
|
1851
|
+
function getCompat(model: Model<"openai-completions">, resolvedBaseUrl?: string): ResolvedOpenAICompat {
|
|
1852
|
+
return resolveOpenAICompat(model, resolvedBaseUrl);
|
|
1853
|
+
}
|