@oh-my-pi/pi-catalog 15.10.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/dist/types/build.d.ts +3 -0
- package/dist/types/compat/anthropic.d.ts +11 -0
- package/dist/types/compat/apply.d.ts +7 -0
- package/dist/types/compat/openai.d.ts +21 -0
- package/dist/types/discovery/antigravity.d.ts +61 -0
- package/dist/types/discovery/codex.d.ts +38 -0
- package/dist/types/discovery/cursor-gen/agent_pb.d.ts +13022 -0
- package/dist/types/discovery/cursor.d.ts +23 -0
- package/dist/types/discovery/gemini.d.ts +25 -0
- package/dist/types/discovery/index.d.ts +4 -0
- package/dist/types/discovery/openai-compatible.d.ts +72 -0
- package/dist/types/effort.d.ts +9 -0
- package/dist/types/fireworks-model-id.d.ts +10 -0
- package/dist/types/hosts.d.ts +128 -0
- package/dist/types/identity/bundled.d.ts +6 -0
- package/dist/types/identity/classify.d.ts +45 -0
- package/dist/types/identity/equivalence.d.ts +46 -0
- package/dist/types/identity/family.d.ts +45 -0
- package/dist/types/identity/id.d.ts +12 -0
- package/dist/types/identity/index.d.ts +9 -0
- package/dist/types/identity/markers.d.ts +4 -0
- package/dist/types/identity/priority.d.ts +1 -0
- package/dist/types/identity/reference.d.ts +22 -0
- package/dist/types/identity/selection.d.ts +20 -0
- package/dist/types/index.d.ts +15 -0
- package/dist/types/model-cache.d.ts +17 -0
- package/dist/types/model-manager.d.ts +64 -0
- package/dist/types/model-thinking.d.ts +67 -0
- package/dist/types/models.d.ts +12 -0
- package/dist/types/provider-models/bundled-references.d.ts +11 -0
- package/dist/types/provider-models/descriptor-types.d.ts +74 -0
- package/dist/types/provider-models/descriptors.d.ts +384 -0
- package/dist/types/provider-models/discovery-constants.d.ts +11 -0
- package/dist/types/provider-models/google.d.ts +27 -0
- package/dist/types/provider-models/index.d.ts +6 -0
- package/dist/types/provider-models/ollama.d.ts +9 -0
- package/dist/types/provider-models/openai-compat.d.ts +385 -0
- package/dist/types/provider-models/special.d.ts +16 -0
- package/dist/types/types.d.ts +405 -0
- package/dist/types/utils.d.ts +5 -0
- package/dist/types/wire/codex.d.ts +26 -0
- package/dist/types/wire/gemini-headers.d.ts +18 -0
- package/dist/types/wire/github-copilot.d.ts +18 -0
- package/package.json +100 -0
- package/src/build.ts +40 -0
- package/src/compat/anthropic.ts +67 -0
- package/src/compat/apply.ts +15 -0
- package/src/compat/openai.ts +365 -0
- package/src/discovery/antigravity.ts +261 -0
- package/src/discovery/codex.ts +371 -0
- package/src/discovery/cursor-gen/agent_pb.ts +15274 -0
- package/src/discovery/cursor.ts +307 -0
- package/src/discovery/gemini.ts +249 -0
- package/src/discovery/index.ts +4 -0
- package/src/discovery/openai-compatible.ts +224 -0
- package/src/effort.ts +16 -0
- package/src/fireworks-model-id.ts +30 -0
- package/src/hosts.ts +114 -0
- package/src/identity/bundled.ts +38 -0
- package/src/identity/classify.ts +141 -0
- package/src/identity/equivalence.ts +870 -0
- package/src/identity/family.ts +88 -0
- package/src/identity/id.ts +81 -0
- package/src/identity/index.ts +9 -0
- package/src/identity/markers.ts +49 -0
- package/src/identity/priority.ts +56 -0
- package/src/identity/reference.ts +134 -0
- package/src/identity/selection.ts +65 -0
- package/src/index.ts +15 -0
- package/src/model-cache.ts +132 -0
- package/src/model-manager.ts +472 -0
- package/src/model-thinking.ts +407 -0
- package/src/models.json +75308 -0
- package/src/models.json.d.ts +9 -0
- package/src/models.ts +64 -0
- package/src/provider-models/bundled-references.ts +54 -0
- package/src/provider-models/descriptor-types.ts +79 -0
- package/src/provider-models/descriptors.ts +456 -0
- package/src/provider-models/discovery-constants.ts +11 -0
- package/src/provider-models/google.ts +105 -0
- package/src/provider-models/index.ts +6 -0
- package/src/provider-models/ollama.ts +154 -0
- package/src/provider-models/openai-compat.ts +3106 -0
- package/src/provider-models/special.ts +67 -0
- package/src/types.ts +470 -0
- package/src/utils.ts +27 -0
- package/src/wire/codex.ts +43 -0
- package/src/wire/gemini-headers.ts +41 -0
- package/src/wire/github-copilot.ts +72 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic-messages compat builder — the anthropic-side analogue of
|
|
3
|
+
* `./openai`. Runs exactly once per model (from `buildModel`); detect-time
|
|
4
|
+
* defaults come from provider ids, strict host checks, and model-id
|
|
5
|
+
* classification, with explicit spec overrides assigned on top.
|
|
6
|
+
*/
|
|
7
|
+
import { modelMatchesHost } from "../hosts";
|
|
8
|
+
import {
|
|
9
|
+
hasOpus47ApiRestrictions,
|
|
10
|
+
isAnthropicFableOrMythosModel,
|
|
11
|
+
supportsMidConversationSystemMessages,
|
|
12
|
+
} from "../identity/family";
|
|
13
|
+
import type { ModelSpec, ResolvedAnthropicCompat } from "../types";
|
|
14
|
+
import { applyCompatOverrides } from "./apply";
|
|
15
|
+
|
|
16
|
+
const OFFICIAL_ANTHROPIC_URL = "https://api.anthropic.com";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Official first-party Anthropic API. A missing baseUrl is official on purpose:
|
|
20
|
+
* request dispatch falls back to `https://api.anthropic.com`. This is the one
|
|
21
|
+
* auth-sensitive host check — OAuth credentials are attached based on it — so
|
|
22
|
+
* it requires the exact origin or a path boundary (`/`) after it; a bare
|
|
23
|
+
* prefix check would accept lookalikes like `https://api.anthropic.com.evil.com`.
|
|
24
|
+
*/
|
|
25
|
+
export function isOfficialAnthropicApiUrl(baseUrl?: string): boolean {
|
|
26
|
+
if (!baseUrl) return true;
|
|
27
|
+
const lower = baseUrl.toLowerCase();
|
|
28
|
+
return lower === OFFICIAL_ANTHROPIC_URL || lower.startsWith(`${OFFICIAL_ANTHROPIC_URL}/`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Build the resolved anthropic-messages compat record for a model spec. */
|
|
32
|
+
export function buildAnthropicCompat(spec: ModelSpec<"anthropic-messages">): ResolvedAnthropicCompat {
|
|
33
|
+
const baseUrl = spec.baseUrl;
|
|
34
|
+
const official = isOfficialAnthropicApiUrl(baseUrl);
|
|
35
|
+
// Z.AI's Anthropic-compatible proxy lives at `api.z.ai/api/anthropic`.
|
|
36
|
+
const isZai = modelMatchesHost(spec, "zai");
|
|
37
|
+
const compat: ResolvedAnthropicCompat = {
|
|
38
|
+
officialEndpoint: official,
|
|
39
|
+
disableStrictTools: false,
|
|
40
|
+
disableAdaptiveThinking: false,
|
|
41
|
+
supportsEagerToolInputStreaming: true,
|
|
42
|
+
// Long cache retention is only sent to the official API by default;
|
|
43
|
+
// proxies opt in explicitly via `compat.supportsLongCacheRetention: true`.
|
|
44
|
+
supportsLongCacheRetention: official,
|
|
45
|
+
// First-party Claude API only. Bedrock/Vertex/Foundry and other
|
|
46
|
+
// Anthropic-compatible gateways reject mid-conversation system roles, so
|
|
47
|
+
// detection requires the canonical api.anthropic.com host plus a
|
|
48
|
+
// supported model id.
|
|
49
|
+
supportsMidConversationSystem: official && supportsMidConversationSystemMessages(spec.id),
|
|
50
|
+
supportsForcedToolChoice: !isAnthropicFableOrMythosModel(spec.id),
|
|
51
|
+
// Opus 4.7+ and Fable/Mythos reject temperature/top_p/top_k with a 400.
|
|
52
|
+
supportsSamplingParams: !hasOpus47ApiRestrictions(spec.id),
|
|
53
|
+
// Z.AI workaround (issue #814): its proxy deserializes tool_result blocks
|
|
54
|
+
// into a class that reads `.id`.
|
|
55
|
+
requiresToolResultId: isZai,
|
|
56
|
+
// Official Anthropic enforces signature-based thinking-chain integrity, so
|
|
57
|
+
// unsigned thinking blocks must stay text there. Anthropic-compatible
|
|
58
|
+
// reasoning endpoints commonly emit unsigned thinking blocks while still
|
|
59
|
+
// expecting them back as `type: "thinking"` on continuation; demoting them
|
|
60
|
+
// loses the reasoning chain and can destabilize the next tool-call
|
|
61
|
+
// arguments (#2005). Known non-signing hosts (Z.AI, DeepSeek) are also
|
|
62
|
+
// preserved for compatibility.
|
|
63
|
+
replayUnsignedThinking: isZai || modelMatchesHost(spec, "deepseekFamily") || (spec.reasoning && !official),
|
|
64
|
+
};
|
|
65
|
+
applyCompatOverrides(compat, spec.compat);
|
|
66
|
+
return compat;
|
|
67
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assign defined override values onto a freshly-built resolved compat record,
|
|
3
|
+
* in place. Keys the record doesn't declare are ignored (loosely-typed config
|
|
4
|
+
* may carry junk). `buildModel` is the only intended caller — the record being
|
|
5
|
+
* mutated is the single per-model allocation; nothing here runs per request.
|
|
6
|
+
*/
|
|
7
|
+
export function applyCompatOverrides(compat: object, overrides: object | undefined): void {
|
|
8
|
+
if (!overrides) return;
|
|
9
|
+
for (const key in overrides) {
|
|
10
|
+
const value = (overrides as Record<string, unknown>)[key];
|
|
11
|
+
if (value !== undefined && key in compat) {
|
|
12
|
+
(compat as Record<string, unknown>)[key] = value;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI-API compat builders — chat-completions and Responses flavors.
|
|
3
|
+
*
|
|
4
|
+
* `buildOpenAICompat`/`buildOpenAIResponsesCompat` run exactly once per model
|
|
5
|
+
* (from `buildModel`): detection writes a fresh record, sparse spec overrides
|
|
6
|
+
* are assigned onto it in place, and conditional policies are materialized as
|
|
7
|
+
* complete alternate views. Request handlers read `model.compat` fields and
|
|
8
|
+
* never detect, resolve, or allocate.
|
|
9
|
+
*/
|
|
10
|
+
import { hostMatchesUrl, modelMatchesHost } from "../hosts";
|
|
11
|
+
import { bareModelId, isFableOrMythos, parseAnthropicModel, semverGte } from "../identity/classify";
|
|
12
|
+
import {
|
|
13
|
+
isAnthropicNamespacedModelId,
|
|
14
|
+
isClaudeModelId,
|
|
15
|
+
isDeepseekModelIdOrName,
|
|
16
|
+
isKimiK26ModelId,
|
|
17
|
+
isKimiModelId,
|
|
18
|
+
isMimoModelIdOrName,
|
|
19
|
+
isQwenModelId,
|
|
20
|
+
} from "../identity/family";
|
|
21
|
+
import { ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER, ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER } from "../model-thinking";
|
|
22
|
+
import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
|
|
23
|
+
import { applyCompatOverrides } from "./apply";
|
|
24
|
+
|
|
25
|
+
type OpenAIReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
26
|
+
|
|
27
|
+
/** GLM coding-plan SKUs idle for minutes mid-reasoning; see `streamIdleTimeoutMs`. */
|
|
28
|
+
const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
|
|
29
|
+
const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
|
|
30
|
+
/** Direct DeepSeek reasoning models stall between thinking and answer phases. */
|
|
31
|
+
const DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS = 300_000;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* OpenCode's gateways (https://opencode.ai/zen|go) gate `reasoning_content`
|
|
35
|
+
* on the request's thinking state for every model they front (Kimi K2.x,
|
|
36
|
+
* DeepSeek V4, GLM-5.x, Qwen3.x, MiMo, MiniMax, …): they 400 with `Extra
|
|
37
|
+
* inputs are not permitted` when thinking is off but the field is supplied
|
|
38
|
+
* (#1071), and 400 with `thinking is enabled but reasoning_content is missing
|
|
39
|
+
* in assistant tool call message at index N` (#1484) when thinking is on and
|
|
40
|
+
* the field is absent. The base compat therefore leaves the replay off, and
|
|
41
|
+
* this `whenThinking` policy reactivates it for thinking-engaged requests.
|
|
42
|
+
* `allowsSyntheticReasoningContentForToolCalls` is forced to `false` on the
|
|
43
|
+
* same path: the gateway specifically requires `reasoning_content`, and the
|
|
44
|
+
* synthetic-friendly default would echo whichever field the upstream streamed
|
|
45
|
+
* (e.g. `reasoning` for many opencode turns), landing the replay in the wrong
|
|
46
|
+
* key and re-triggering the 400.
|
|
47
|
+
*/
|
|
48
|
+
const OPENCODE_WHEN_THINKING: NonNullable<OpenAICompat["whenThinking"]> = {
|
|
49
|
+
requiresReasoningContentForToolCalls: true,
|
|
50
|
+
allowsSyntheticReasoningContentForToolCalls: false,
|
|
51
|
+
reasoningContentField: "reasoning_content",
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
|
|
55
|
+
if (
|
|
56
|
+
provider === "openai" ||
|
|
57
|
+
provider === "openrouter" ||
|
|
58
|
+
provider === "cerebras" ||
|
|
59
|
+
provider === "together" ||
|
|
60
|
+
provider === "github-copilot" ||
|
|
61
|
+
provider === "zenmux"
|
|
62
|
+
) {
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
return (
|
|
66
|
+
hostMatchesUrl(baseUrl, "openai") ||
|
|
67
|
+
hostMatchesUrl(baseUrl, "azureOpenAI") ||
|
|
68
|
+
hostMatchesUrl(baseUrl, "cerebras") ||
|
|
69
|
+
hostMatchesUrl(baseUrl, "together") ||
|
|
70
|
+
hostMatchesUrl(baseUrl, "openrouter") ||
|
|
71
|
+
hostMatchesUrl(baseUrl, "deepseekFamily")
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function getOpenRouterAnthropicReasoningEffortMap(
|
|
76
|
+
modelId: string,
|
|
77
|
+
): Partial<Record<OpenAIReasoningEffort, string>> | undefined {
|
|
78
|
+
const parsed = parseAnthropicModel(bareModelId(modelId));
|
|
79
|
+
if (!parsed) return undefined;
|
|
80
|
+
// Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
|
|
81
|
+
// Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
|
|
82
|
+
const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
|
|
83
|
+
if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
|
|
84
|
+
|
|
85
|
+
const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
|
|
86
|
+
return (hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER) as Partial<
|
|
87
|
+
Record<OpenAIReasoningEffort, string>
|
|
88
|
+
>;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Build the resolved chat-completions compat record for a model spec.
|
|
93
|
+
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
94
|
+
*/
|
|
95
|
+
export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat {
|
|
96
|
+
const provider = spec.provider;
|
|
97
|
+
const baseUrl = spec.baseUrl;
|
|
98
|
+
const hostModel = { provider, baseUrl };
|
|
99
|
+
|
|
100
|
+
const isCerebras = modelMatchesHost(hostModel, "cerebras");
|
|
101
|
+
const isZai = modelMatchesHost(hostModel, "zai");
|
|
102
|
+
const isZhipu = modelMatchesHost(hostModel, "zhipu");
|
|
103
|
+
const isKilo = modelMatchesHost(hostModel, "kilo");
|
|
104
|
+
const isKimiModel = isKimiModelId(spec.id);
|
|
105
|
+
const isMoonshotKimi = isKimiModel && modelMatchesHost(hostModel, "moonshotNative");
|
|
106
|
+
const usesMoonshotKimiPreservedThinking = isMoonshotKimi && isKimiK26ModelId(spec.id);
|
|
107
|
+
const isAnthropicModel =
|
|
108
|
+
modelMatchesHost(hostModel, "anthropic") || isClaudeModelId(spec.id) || isAnthropicNamespacedModelId(spec.id);
|
|
109
|
+
const isAlibaba = modelMatchesHost(hostModel, "alibabaDashscope");
|
|
110
|
+
const isQwen = isQwenModelId(spec.id);
|
|
111
|
+
// DeepSeek V4 (and other reasoning-capable DeepSeek models) reject follow-up requests in
|
|
112
|
+
// thinking mode unless prior assistant tool-call turns include `reasoning_content`. The
|
|
113
|
+
// upstream model is reachable through many OpenAI-compat hosts (api.deepseek.com, Deepinfra,
|
|
114
|
+
// Kilo, NVIDIA NIM, Zenmux, OpenRouter, …), so we match by model id/name as well as by
|
|
115
|
+
// provider/baseUrl. The flag is gated by `spec.reasoning` because the invariant only
|
|
116
|
+
// applies when thinking mode is actually engaged.
|
|
117
|
+
const lowerId = spec.id.toLowerCase();
|
|
118
|
+
const lowerName = (spec.name ?? "").toLowerCase();
|
|
119
|
+
const isXiaomiHost = modelMatchesHost(hostModel, "xiaomi");
|
|
120
|
+
const isXiaomiMimo = isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
|
|
121
|
+
// OpenCode Zen's `big-pickle` is a DeepSeek reasoning alias; the upstream
|
|
122
|
+
// 400s come from DeepSeek and require exact reasoning_content replay.
|
|
123
|
+
const isOpenCodeDeepseekAlias =
|
|
124
|
+
provider === "opencode-zen" && (lowerId === "big-pickle" || lowerName === "big pickle");
|
|
125
|
+
const isDeepseekFamily =
|
|
126
|
+
modelMatchesHost(hostModel, "deepseekFamily") ||
|
|
127
|
+
isDeepseekModelIdOrName(spec.id) ||
|
|
128
|
+
isDeepseekModelIdOrName(spec.name ?? "") ||
|
|
129
|
+
isOpenCodeDeepseekAlias;
|
|
130
|
+
const isDirectDeepseekApi = modelMatchesHost(hostModel, "deepseekDirect");
|
|
131
|
+
const isDirectDeepseekReasoning = isDirectDeepseekApi && isDeepseekFamily && Boolean(spec.reasoning);
|
|
132
|
+
const isGrok = modelMatchesHost(hostModel, "xai");
|
|
133
|
+
const isMistral = modelMatchesHost(hostModel, "mistral");
|
|
134
|
+
const isOpenCodeHost = modelMatchesHost(hostModel, "opencode");
|
|
135
|
+
const isNonStandard =
|
|
136
|
+
isCerebras ||
|
|
137
|
+
isGrok ||
|
|
138
|
+
isMistral ||
|
|
139
|
+
hostMatchesUrl(baseUrl, "chutes") ||
|
|
140
|
+
hostMatchesUrl(baseUrl, "deepseekFamily") ||
|
|
141
|
+
hostMatchesUrl(baseUrl, "fireworks") ||
|
|
142
|
+
isAlibaba ||
|
|
143
|
+
isZai ||
|
|
144
|
+
isZhipu ||
|
|
145
|
+
isKilo ||
|
|
146
|
+
isQwen ||
|
|
147
|
+
isXiaomiHost ||
|
|
148
|
+
isOpenCodeHost;
|
|
149
|
+
const isOpenCodeProvider = provider === "opencode-go" || provider === "opencode-zen";
|
|
150
|
+
|
|
151
|
+
const useMaxTokens =
|
|
152
|
+
isMistral || hostMatchesUrl(baseUrl, "chutes") || hostMatchesUrl(baseUrl, "fireworks") || isDirectDeepseekApi;
|
|
153
|
+
|
|
154
|
+
// Hosts whose chat-completions endpoints are known to accept multiple
|
|
155
|
+
// leading `system`/`developer` messages (preferred for KV-cache reuse).
|
|
156
|
+
// Anything outside this allowlist defaults to coalescing because
|
|
157
|
+
// strict chat templates (Qwen 3.5+ via vLLM, MiniMax, etc.) reject
|
|
158
|
+
// follow-up system messages with a 400.
|
|
159
|
+
const isOpenAIHost = modelMatchesHost(hostModel, "openai");
|
|
160
|
+
const isAzureHost = modelMatchesHost(hostModel, "azureOpenAI");
|
|
161
|
+
const isOpenRouter = modelMatchesHost(hostModel, "openrouter");
|
|
162
|
+
const isVercelGateway = modelMatchesHost(hostModel, "vercelAIGateway");
|
|
163
|
+
const isTogether = modelMatchesHost(hostModel, "together");
|
|
164
|
+
const isFireworks = hostMatchesUrl(baseUrl, "fireworks");
|
|
165
|
+
const isGroqHost = modelMatchesHost(hostModel, "groq");
|
|
166
|
+
const isCopilotHost = provider === "github-copilot";
|
|
167
|
+
const isZenmuxHost = provider === "zenmux";
|
|
168
|
+
// Endpoints that MUST receive a single system block. MiniMax's OpenAI
|
|
169
|
+
// endpoint returns error 2013 on multiple system messages; Alibaba's
|
|
170
|
+
// Dashscope and Qwen Portal serve Qwen models whose chat template
|
|
171
|
+
// raises "System message must be at the beginning" if any system
|
|
172
|
+
// message appears past index 0.
|
|
173
|
+
const isMiniMaxHost = modelMatchesHost(hostModel, "minimax");
|
|
174
|
+
const isQwenPortal = modelMatchesHost(hostModel, "qwenPortal");
|
|
175
|
+
const supportsMultipleSystemMessagesDefault =
|
|
176
|
+
!isMiniMaxHost &&
|
|
177
|
+
!isAlibaba &&
|
|
178
|
+
!isQwenPortal &&
|
|
179
|
+
(isOpenAIHost ||
|
|
180
|
+
isAzureHost ||
|
|
181
|
+
isOpenRouter ||
|
|
182
|
+
isCerebras ||
|
|
183
|
+
isTogether ||
|
|
184
|
+
isFireworks ||
|
|
185
|
+
isGroqHost ||
|
|
186
|
+
isDeepseekFamily ||
|
|
187
|
+
isMistral ||
|
|
188
|
+
isGrok ||
|
|
189
|
+
isZai ||
|
|
190
|
+
isZhipu ||
|
|
191
|
+
isCopilotHost ||
|
|
192
|
+
isZenmuxHost);
|
|
193
|
+
|
|
194
|
+
const openRouterAnthropicReasoningEffortMap = isOpenRouter
|
|
195
|
+
? getOpenRouterAnthropicReasoningEffortMap(lowerId)
|
|
196
|
+
: undefined;
|
|
197
|
+
const detectedReasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
|
|
198
|
+
provider === "groq" && spec.id === "qwen/qwen3-32b"
|
|
199
|
+
? ({
|
|
200
|
+
minimal: "default",
|
|
201
|
+
low: "default",
|
|
202
|
+
medium: "default",
|
|
203
|
+
high: "default",
|
|
204
|
+
xhigh: "default",
|
|
205
|
+
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
206
|
+
: isDeepseekFamily && spec.reasoning
|
|
207
|
+
? ({
|
|
208
|
+
minimal: "high",
|
|
209
|
+
low: "high",
|
|
210
|
+
medium: "high",
|
|
211
|
+
high: "high",
|
|
212
|
+
xhigh: "max",
|
|
213
|
+
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
214
|
+
: openRouterAnthropicReasoningEffortMap
|
|
215
|
+
? openRouterAnthropicReasoningEffortMap
|
|
216
|
+
: isFireworks
|
|
217
|
+
? ({
|
|
218
|
+
// Fireworks' OpenAI-compatible endpoint rejects OpenAI's
|
|
219
|
+
// `minimal` literal but accepts `none` for the lowest setting.
|
|
220
|
+
minimal: "none",
|
|
221
|
+
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
222
|
+
: {};
|
|
223
|
+
|
|
224
|
+
// Stream-watchdog floor: GLM coding-plan SKUs and direct DeepSeek reasoning
|
|
225
|
+
// models idle for minutes mid-reasoning; widen the idle timeout so warm-ups
|
|
226
|
+
// stop aborting and retrying.
|
|
227
|
+
const streamIdleTimeoutMs =
|
|
228
|
+
GLM_CODING_PLAN_MODEL_PATTERN.test(spec.id) && (isZai || isZhipu)
|
|
229
|
+
? GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS
|
|
230
|
+
: spec.reasoning && isDirectDeepseekApi
|
|
231
|
+
? DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS
|
|
232
|
+
: undefined;
|
|
233
|
+
|
|
234
|
+
const compat: ResolvedOpenAICompat = {
|
|
235
|
+
supportsStore: !isNonStandard,
|
|
236
|
+
// `developer` is an OpenAI-Responses-era extension to the chat-completions schema. Almost
|
|
237
|
+
// every OpenAI-compatible host other than OpenAI itself (and Azure OpenAI, which mirrors
|
|
238
|
+
// the schema exactly) treats it as an unknown role: Moonshot returns a 400 "tokenization
|
|
239
|
+
// failed", Groq/Cerebras/etc. error or silently misroute. Default to `system` and require
|
|
240
|
+
// callers to opt in via `compat.supportsDeveloperRole: true` for hosts known to mirror
|
|
241
|
+
// OpenAI's reasoning-API surface.
|
|
242
|
+
supportsDeveloperRole: isOpenAIHost || isAzureHost,
|
|
243
|
+
supportsMultipleSystemMessages: supportsMultipleSystemMessagesDefault,
|
|
244
|
+
supportsReasoningEffort: !isGrok && !isZai && !isZhipu && !isXiaomiMimo,
|
|
245
|
+
// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
|
|
246
|
+
supportsReasoningParams: provider !== "github-copilot",
|
|
247
|
+
reasoningEffortMap: detectedReasoningEffortMap,
|
|
248
|
+
supportsUsageInStreaming: !isCerebras,
|
|
249
|
+
// Kimi (including via OpenRouter and Fireworks router-form IDs such as
|
|
250
|
+
// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
|
|
251
|
+
// max_tokens, not actual output. The official Kimi K2 model guidance
|
|
252
|
+
// (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
|
|
253
|
+
// every call since the family can otherwise emit very long reasoning traces
|
|
254
|
+
// before the final answer.
|
|
255
|
+
alwaysSendMaxTokens: isKimiModel,
|
|
256
|
+
disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
|
|
257
|
+
disableReasoningOnToolChoice: isDeepseekFamily && Boolean(spec.reasoning) && !isOpenRouter,
|
|
258
|
+
supportsToolChoice: !isDirectDeepseekReasoning,
|
|
259
|
+
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
260
|
+
requiresToolResultName: isMistral,
|
|
261
|
+
requiresAssistantAfterToolResult: false,
|
|
262
|
+
requiresThinkingAsText: isMistral,
|
|
263
|
+
requiresMistralToolIds: isMistral,
|
|
264
|
+
// Only Kimi's native hosts (Moonshot / Kimi-code, matched by `isMoonshotKimi`)
|
|
265
|
+
// speak the z.ai binary `thinking: { type }` field. Kimi reached through
|
|
266
|
+
// OpenAI-compatible proxies — Fireworks' Fire Pass router, OpenCode's gateway,
|
|
267
|
+
// etc. — drives reasoning via OpenAI-style `reasoning_effort`
|
|
268
|
+
// (low|medium|high|xhigh|max|none), so those stay on the "openai" path.
|
|
269
|
+
thinkingFormat:
|
|
270
|
+
isZai || isZhipu || isMoonshotKimi || isXiaomiMimo
|
|
271
|
+
? "zai"
|
|
272
|
+
: isOpenRouter
|
|
273
|
+
? "openrouter"
|
|
274
|
+
: isAlibaba || isQwen
|
|
275
|
+
? "qwen"
|
|
276
|
+
: "openai",
|
|
277
|
+
thinkingKeep: usesMoonshotKimiPreservedThinking ? "all" : undefined,
|
|
278
|
+
reasoningContentField: "reasoning_content",
|
|
279
|
+
// Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`:
|
|
280
|
+
// - Kimi: documented invariant on its native API.
|
|
281
|
+
// - DeepSeek-family reasoning models, including aliased OpenCode Zen models
|
|
282
|
+
// like `big-pickle`, validate exact thinking-mode replay.
|
|
283
|
+
// - Xiaomi MiMo models require exact `reasoning_content` replay on
|
|
284
|
+
// thinking-mode tool-call continuations across standard and Token Plan hosts.
|
|
285
|
+
// - Any reasoning-capable model reached through OpenRouter can enforce this
|
|
286
|
+
// server-side whenever the request is in thinking mode. We can't translate
|
|
287
|
+
// Anthropic's redacted/encrypted reasoning into provider-native plaintext,
|
|
288
|
+
// so cross-provider continuations rely on a placeholder.
|
|
289
|
+
// OpenCode Kimi aliases handle reasoning content internally and reject
|
|
290
|
+
// client-sent `reasoning_content`, so exclude only that Kimi-on-OpenCode path
|
|
291
|
+
// (the `whenThinking` policy below re-enables the replay for thinking turns).
|
|
292
|
+
requiresReasoningContentForToolCalls:
|
|
293
|
+
(isKimiModel && !isOpenCodeProvider) ||
|
|
294
|
+
(isDeepseekFamily && Boolean(spec.reasoning)) ||
|
|
295
|
+
isXiaomiMimo ||
|
|
296
|
+
(isOpenRouter && Boolean(spec.reasoning)),
|
|
297
|
+
// DeepSeek V4 and Xiaomi MiMo reject synthetic reasoning_content placeholders (".") on tool-call turns.
|
|
298
|
+
// Kimi and OpenRouter accept them when actual reasoning is unavailable.
|
|
299
|
+
allowsSyntheticReasoningContentForToolCalls: (!isDeepseekFamily || !spec.reasoning) && !isXiaomiMimo,
|
|
300
|
+
requiresAssistantContentForToolCalls: isKimiModel || isDirectDeepseekReasoning,
|
|
301
|
+
cacheControlFormat: isOpenRouter && spec.id.startsWith("anthropic/") ? "anthropic" : undefined,
|
|
302
|
+
openRouterRouting: undefined,
|
|
303
|
+
vercelGatewayRouting: undefined,
|
|
304
|
+
isOpenRouterHost: isOpenRouter,
|
|
305
|
+
isVercelGatewayHost: isVercelGateway,
|
|
306
|
+
supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
|
|
307
|
+
extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
|
|
308
|
+
toolStrictMode: isCerebras ? "all_strict" : "mixed",
|
|
309
|
+
streamIdleTimeoutMs,
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
applyCompatOverrides(compat, spec.compat);
|
|
313
|
+
if (spec.compat?.reasoningEffortMap) {
|
|
314
|
+
// Effort maps merge per level instead of replacing wholesale.
|
|
315
|
+
compat.reasoningEffortMap = { ...detectedReasoningEffortMap, ...spec.compat.reasoningEffortMap };
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const whenThinkingPolicy =
|
|
319
|
+
spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
|
|
320
|
+
if (whenThinkingPolicy) {
|
|
321
|
+
const variant: ResolvedOpenAICompat = { ...compat };
|
|
322
|
+
applyCompatOverrides(variant, whenThinkingPolicy);
|
|
323
|
+
compat.whenThinking = variant;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return compat;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
interface OpenAIResponsesSpecLike {
|
|
330
|
+
provider: string;
|
|
331
|
+
baseUrl: string;
|
|
332
|
+
compat?: OpenAICompat;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Build the resolved Responses-API compat record. The Responses flavor
|
|
337
|
+
* deliberately differs from chat-completions: GitHub Copilot's responses
|
|
338
|
+
* endpoint accepts the `developer` role, while strict tool mode is scoped to
|
|
339
|
+
* first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
|
|
340
|
+
* detection are URL-only on purpose — the historical call sites never
|
|
341
|
+
* consulted the provider id for them.
|
|
342
|
+
*/
|
|
343
|
+
export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat {
|
|
344
|
+
const baseUrl = spec.baseUrl ?? "";
|
|
345
|
+
const compat: ResolvedOpenAIResponsesCompat = {
|
|
346
|
+
supportsDeveloperRole:
|
|
347
|
+
hostMatchesUrl(baseUrl, "openai") ||
|
|
348
|
+
hostMatchesUrl(baseUrl, "azureOpenAI") ||
|
|
349
|
+
hostMatchesUrl(baseUrl, "githubCopilot"),
|
|
350
|
+
supportsStrictMode:
|
|
351
|
+
spec.provider === "openai" ||
|
|
352
|
+
spec.provider === "azure" ||
|
|
353
|
+
spec.provider === "github-copilot" ||
|
|
354
|
+
hostMatchesUrl(baseUrl, "openai") ||
|
|
355
|
+
hostMatchesUrl(baseUrl, "azureOpenAI"),
|
|
356
|
+
supportsReasoningEffort: true,
|
|
357
|
+
supportsLongPromptCacheRetention: hostMatchesUrl(baseUrl, "openai"),
|
|
358
|
+
// Azure OpenAI and GitHub Copilot Responses paths require tool results
|
|
359
|
+
// to strictly match prior tool calls when building Responses inputs.
|
|
360
|
+
strictResponsesPairing: hostMatchesUrl(baseUrl, "azureOpenAI") || spec.provider === "github-copilot",
|
|
361
|
+
reasoningEffortMap: {},
|
|
362
|
+
};
|
|
363
|
+
applyCompatOverrides(compat, spec.compat);
|
|
364
|
+
return compat;
|
|
365
|
+
}
|