@oh-my-pi/pi-catalog 16.0.5 → 16.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/types/compat/openai.d.ts +3 -1
- package/dist/types/identity/family.d.ts +6 -0
- package/dist/types/provider-models/descriptors.d.ts +1 -1
- package/dist/types/provider-models/openai-compat.d.ts +5 -6
- package/dist/types/types.d.ts +94 -17
- package/package.json +4 -3
- package/src/build.ts +3 -1
- package/src/compat/openai.ts +206 -19
- package/src/discovery/antigravity.ts +76 -92
- package/src/discovery/codex.ts +33 -40
- package/src/discovery/cursor.ts +31 -24
- package/src/discovery/gemini.ts +39 -30
- package/src/discovery/openai-compatible.ts +22 -32
- package/src/identity/family.ts +13 -0
- package/src/model-thinking.ts +24 -6
- package/src/models.json +504 -360
- package/src/provider-models/ollama.ts +11 -2
- package/src/provider-models/openai-compat.ts +40 -42
- package/src/types.ts +175 -48
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,26 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.0.6] - 2026-06-18
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added a dedicated `openrouter` API type and `ResolvedOpenRouterCompat` configuration to support unified chat-completions and Responses-API compatibility for OpenRouter models
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Migrated bundled OpenRouter models in the catalog from `openai-completions` to the new `openrouter` API type
|
|
14
|
+
- Consolidated the resolved OpenAI compat shape: extracted a shared `ResolvedOpenAISharedCompat` core that both `ResolvedOpenAICompat` and `ResolvedOpenAIResponsesCompat` extend (each builder still computes its own per-surface value, preserving chat↔Responses divergence), added internal resolved wire-quirk fields (`wireModelIdMode`, `stripDeepseekSpecialTokens`, `reasoningDeltasMayBeCumulative`, `emptyLengthFinishIsContextError`, `usesOpenAIToolCallIdLimit`, `dropThinkingWhenReasoningEffort`, `supportsObfuscationOptOut`), and replaced `buildOpenRouterCompat`'s cast-and-copy with an exhaustive `pickResponsesOnly` composition that fails to compile if a new Responses-only field is added without handling. The public `OpenAICompat` config vocabulary is unchanged.
|
|
15
|
+
- Expanded `OpenAICompat`/`ResolvedOpenAISharedCompat` with shared reasoning/history/stream/request flags (`reasoningDisableMode`, `omitReasoningEffort`, `includeEncryptedReasoning`, `filterReasoningHistory`, `requiresReasoningContentForAllAssistantTurns`, `streamMarkupHealingPattern`, `promptCacheSessionHeader`, etc.) so model/provider/gateway constraints are declared once in catalog compat and then consumed uniformly by Chat Completions and Responses endpoints.
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
- Changed the default compatibility builder for `openai-completions` to set `requiresAssistantAfterToolResult` to `isMistral`, enabling the synthetic assistant bridge for built-in Mistral and Devstral models.
|
|
20
|
+
- Fixed local Ollama (`provider: "ollama"`) reasoning turns still failing with HTTP 400 `invalid reasoning value: "minimal"` when the model was selected from a stale `~/.omp/models.db` cache row or a hand-written config: the `minimal → low` / `xhigh → max` remap was only stamped during fresh discovery, so cached and custom specs reached the wire unmapped. The remap now lives in the OpenAI chat-completions and Responses compat builders, so every `buildModel` (including cache loads, custom specs, and the `whenThinking` variant) backfills it — no `omp models refresh` required. Custom OpenAI-compatible providers registered under a non-`ollama` provider id still need their own `compat.reasoningEffortMap`.
|
|
21
|
+
- Advertised Ollama Cloud GLM-5.2 reasoning efforts as high/xhigh-only and mapped `xhigh` to native max effort ([#2911](https://github.com/can1357/oh-my-pi/pull/2911) by [@serverinspector](https://github.com/serverinspector))
|
|
22
|
+
- Fixed OpenRouter pseudo-API model construction so bundled OpenRouter models resolve shared OpenAI compatibility metadata instead of an undefined compat record.
|
|
23
|
+
- Fixed custom/direct `xai-oauth` Responses model specs (e.g. `grok-build`) emitting `reasoning.effort` and hitting xAI's HTTP 400: `buildOpenAIResponsesCompat` now defaults `supportsReasoningEffort` to `false` for `xai-oauth` Grok models that are off the effort-capable allowlist (`grok-3-mini`/`grok-4.20-multi-agent`/`grok-4.3`), matching the curated discovery path; explicit `compat.supportsReasoningEffort` still overrides. The allowlist moved to a shared `isGrokReasoningEffortCapable` identity helper consumed by both the compat builder and provider-model curation so the two cannot drift.
|
|
24
|
+
|
|
5
25
|
## [16.0.5] - 2026-06-17
|
|
6
26
|
|
|
7
27
|
### Added
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
|
|
1
|
+
import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat, ResolvedOpenRouterCompat } from "../types";
|
|
2
2
|
/**
|
|
3
3
|
* Build the resolved chat-completions compat record for a model spec.
|
|
4
4
|
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
@@ -9,6 +9,7 @@ interface OpenAIResponsesSpecLike {
|
|
|
9
9
|
provider: string;
|
|
10
10
|
name: string;
|
|
11
11
|
baseUrl: string;
|
|
12
|
+
reasoning?: boolean;
|
|
12
13
|
compat?: OpenAICompat;
|
|
13
14
|
}
|
|
14
15
|
/**
|
|
@@ -23,4 +24,5 @@ interface OpenAIResponsesSpecLike {
|
|
|
23
24
|
* request-time check.
|
|
24
25
|
*/
|
|
25
26
|
export declare function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat;
|
|
27
|
+
export declare function buildOpenRouterCompat(spec: ModelSpec<"openrouter">): ResolvedOpenRouterCompat;
|
|
26
28
|
export {};
|
|
@@ -22,6 +22,12 @@ export declare function isGemmaModelId(modelId: string): boolean;
|
|
|
22
22
|
export declare function isDeepseekModelIdOrName(value: string): boolean;
|
|
23
23
|
/** Xiaomi MiMo family by id or display name. */
|
|
24
24
|
export declare function isMimoModelIdOrName(value: string): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Grok SKUs that expose the wire `reasoning.effort` dial. Other Grok reasoners
|
|
27
|
+
* (e.g. `grok-build`, `grok-4.20-0309-reasoning`) think natively but reject the
|
|
28
|
+
* param, so callers must omit reasoning effort for them.
|
|
29
|
+
*/
|
|
30
|
+
export declare function isGrokReasoningEffortCapable(modelId: string): boolean;
|
|
25
31
|
/**
|
|
26
32
|
* MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
|
|
27
33
|
* `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
|
|
@@ -233,7 +233,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
233
233
|
readonly id: "openrouter";
|
|
234
234
|
readonly defaultModel: "openai/gpt-5.5";
|
|
235
235
|
readonly envVars: readonly ["OPENROUTER_API_KEY"];
|
|
236
|
-
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"
|
|
236
|
+
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openrouter", unknown>;
|
|
237
237
|
readonly catalogDiscovery: {
|
|
238
238
|
readonly label: "OpenRouter";
|
|
239
239
|
readonly allowUnauthenticated: true;
|
|
@@ -95,11 +95,10 @@ interface XAICuratedModel {
|
|
|
95
95
|
reasoning?: boolean;
|
|
96
96
|
/**
|
|
97
97
|
* Whether xAI accepts the `reasoning.effort` wire param for this model.
|
|
98
|
-
* Default true. When false: picker hides the effort dial (via
|
|
99
|
-
* getSupportedEfforts in model-thinking.ts) AND wire
|
|
100
|
-
*
|
|
101
|
-
*
|
|
102
|
-
* author convention until a follow-up Op: compress unifies them.
|
|
98
|
+
* Default true. When false: the picker hides the effort dial (via
|
|
99
|
+
* getSupportedEfforts in model-thinking.ts) AND the wire omits the param —
|
|
100
|
+
* both derive from `isGrokReasoningEffortCapable` (identity/family.ts), the
|
|
101
|
+
* single allowlist shared by this curated layer and the compat builder.
|
|
103
102
|
*/
|
|
104
103
|
supportsReasoningEffort?: boolean;
|
|
105
104
|
/**
|
|
@@ -235,7 +234,7 @@ export interface OpenRouterModelManagerConfig {
|
|
|
235
234
|
baseUrl?: string;
|
|
236
235
|
fetch?: FetchImpl;
|
|
237
236
|
}
|
|
238
|
-
export declare function openrouterModelManagerOptions(config?: OpenRouterModelManagerConfig): ModelManagerOptions<"
|
|
237
|
+
export declare function openrouterModelManagerOptions(config?: OpenRouterModelManagerConfig): ModelManagerOptions<"openrouter">;
|
|
239
238
|
export interface ZenMuxModelManagerConfig {
|
|
240
239
|
apiKey?: string;
|
|
241
240
|
baseUrl?: string;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Effort } from "./effort";
|
|
2
2
|
export type { KnownProvider } from "./provider-models/descriptors";
|
|
3
|
-
export type KnownApi = "openai-completions" | "openai-responses" | "openai-codex-responses" | "azure-openai-responses" | "anthropic-messages" | "bedrock-converse-stream" | "google-generative-ai" | "google-gemini-cli" | "google-vertex" | "ollama-chat" | "cursor-agent";
|
|
3
|
+
export type KnownApi = "openai-completions" | "openai-responses" | "openrouter" | "openai-codex-responses" | "azure-openai-responses" | "anthropic-messages" | "bedrock-converse-stream" | "google-generative-ai" | "google-gemini-cli" | "google-vertex" | "ollama-chat" | "cursor-agent";
|
|
4
4
|
export type Api = KnownApi | (string & {});
|
|
5
5
|
/** Canonical thinking transport used by a model. */
|
|
6
6
|
export type ThinkingControlMode = "effort" | "budget" | "google-level" | "anthropic-adaptive" | "anthropic-budget-effort";
|
|
@@ -117,6 +117,9 @@ export interface Usage {
|
|
|
117
117
|
total: number;
|
|
118
118
|
};
|
|
119
119
|
}
|
|
120
|
+
export type OpenAIReasoningFormat = "openai" | "openrouter" | "zai" | "qwen" | "qwen-chat-template";
|
|
121
|
+
export type OpenAIReasoningDisableMode = "omit" | "lowest-effort" | "openrouter-enabled-false" | "zai-thinking-disabled" | "qwen-enable-thinking-false" | "qwen-template-false" | "juice-zero-developer-message";
|
|
122
|
+
export type OpenAIStreamMarkupHealingPattern = "kimi" | "dsml" | "thinking";
|
|
120
123
|
/**
|
|
121
124
|
* Compatibility settings for openai-completions API.
|
|
122
125
|
* Use this to override URL-based auto-detection for custom providers.
|
|
@@ -163,13 +166,23 @@ export interface OpenAICompat {
|
|
|
163
166
|
/** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
|
|
164
167
|
requiresMistralToolIds?: boolean;
|
|
165
168
|
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" | "disabled" } (also used by Moonshot Kimi), "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
|
|
166
|
-
thinkingFormat?:
|
|
169
|
+
thinkingFormat?: OpenAIReasoningFormat;
|
|
170
|
+
/** Request-time disable encoding for the selected reasoning/thinking format. Default: derived from `thinkingFormat`. */
|
|
171
|
+
reasoningDisableMode?: OpenAIReasoningDisableMode;
|
|
172
|
+
/** Whether the provider rejects `reasoning.effort`/`reasoning_effort` even when the model reasons natively. Default: false unless reasoning effort is unsupported. */
|
|
173
|
+
omitReasoningEffort?: boolean;
|
|
174
|
+
/** Whether Responses requests should ask for encrypted reasoning replay items. Default: true. */
|
|
175
|
+
includeEncryptedReasoning?: boolean;
|
|
176
|
+
/** Whether replayed Responses history should strip native `type: "reasoning"` items before request encoding. Default: false. */
|
|
177
|
+
filterReasoningHistory?: boolean;
|
|
167
178
|
/** Optional `thinking.keep` value for Z.ai/Moonshot-style thinking params. Set false to suppress auto-detected keep. Default: auto-detected. */
|
|
168
179
|
thinkingKeep?: "all" | false;
|
|
169
180
|
/** Which reasoning content field to emit on assistant messages. Default: auto-detected. */
|
|
170
181
|
reasoningContentField?: "reasoning_content" | "reasoning" | "reasoning_text";
|
|
171
182
|
/** Whether assistant tool-call messages must include reasoning content. Default: false. */
|
|
172
183
|
requiresReasoningContentForToolCalls?: boolean;
|
|
184
|
+
/** Whether all assistant messages must include reasoning content. Default: false. */
|
|
185
|
+
requiresReasoningContentForAllAssistantTurns?: boolean;
|
|
173
186
|
/** Whether the provider accepts a synthetic placeholder (e.g. ".") for missing reasoning_content on tool-call turns. Default: true. Set to false for providers like DeepSeek that validate the exact reasoning_content value. */
|
|
174
187
|
allowsSyntheticReasoningContentForToolCalls?: boolean;
|
|
175
188
|
/** Whether assistant tool-call messages must include non-empty content. Default: false. */
|
|
@@ -203,10 +216,22 @@ export interface OpenAICompat {
|
|
|
203
216
|
vercelGatewayRouting?: VercelGatewayRouting;
|
|
204
217
|
/** Extra fields to include in request body (e.g. gateway routing hints for OpenClaw-style proxies). */
|
|
205
218
|
extraBody?: Record<string, unknown>;
|
|
219
|
+
/** Request-session header that should mirror the normalized prompt-cache key. Default: unset. */
|
|
220
|
+
promptCacheSessionHeader?: "x-grok-conv-id";
|
|
206
221
|
/** Whether chat-completions payloads should include provider-specific prompt-cache markers. */
|
|
207
222
|
cacheControlFormat?: "anthropic" | undefined;
|
|
208
223
|
/** Whether the provider supports the `strict` field in tool definitions. Default: auto-detected per provider/baseUrl (conservative for unknown providers). */
|
|
209
224
|
supportsStrictMode?: boolean;
|
|
225
|
+
/**
|
|
226
|
+
* Tool-schema dialect the endpoint validates `tools.function.parameters`
|
|
227
|
+
* against. `"moonshot-mfjs"` triggers Moonshot Flavored JSON Schema
|
|
228
|
+
* normalization (collapse `const`→`enum`, infer `type` on bare enums, strip
|
|
229
|
+
* unsupported validators/`prefixItems`) because Moonshot/Kimi native hosts
|
|
230
|
+
* reject standard JSON Schema constructs with HTTP 400. Default:
|
|
231
|
+
* auto-detected (`"moonshot-mfjs"` on api.moonshot.ai / api.kimi.com). Set
|
|
232
|
+
* `"none"` to opt a custom Moonshot-compatible host out.
|
|
233
|
+
*/
|
|
234
|
+
toolSchemaFlavor?: "moonshot-mfjs" | "none";
|
|
210
235
|
/**
|
|
211
236
|
* Stream-watchdog idle-timeout floor in ms for slow reasoning hosts.
|
|
212
237
|
* Default: auto-detected (GLM coding-plan hosts, direct DeepSeek reasoning).
|
|
@@ -230,6 +255,16 @@ export interface OpenAICompat {
|
|
|
230
255
|
* Default: auto-detected (GPT-5-family model names).
|
|
231
256
|
*/
|
|
232
257
|
requiresJuiceZeroHack?: boolean;
|
|
258
|
+
/** Whether streamed reasoning deltas for the same field may repeat the full cumulative text snapshot. Default: false. */
|
|
259
|
+
reasoningDeltasMayBeCumulative?: boolean;
|
|
260
|
+
/** Strip leaked DeepSeek chat-template special tokens from visible content deltas. Default: auto-detected. */
|
|
261
|
+
stripDeepseekSpecialTokens?: boolean;
|
|
262
|
+
/** Heal leaked chat-template/tool-call/thinking markup from visible content deltas. Default: auto-detected. */
|
|
263
|
+
streamMarkupHealingPattern?: OpenAIStreamMarkupHealingPattern;
|
|
264
|
+
/** Treat an empty length-finished stream as a context-window error. Default: auto-detected. */
|
|
265
|
+
emptyLengthFinishIsContextError?: boolean;
|
|
266
|
+
/** Normalize tool call ids to OpenAI's 40-character limit. Default: auto-detected. */
|
|
267
|
+
usesOpenAIToolCallIdLimit?: boolean;
|
|
233
268
|
/**
|
|
234
269
|
* Compat deltas applied when a request actually engages thinking mode
|
|
235
270
|
* (reasoning requested and not disabled, model reasoning-capable, and not
|
|
@@ -331,41 +366,83 @@ export interface VercelGatewayRouting {
|
|
|
331
366
|
order?: string[];
|
|
332
367
|
}
|
|
333
368
|
type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mixed";
|
|
369
|
+
/**
|
|
370
|
+
* Fields whose meaning is identical across chat-completions and Responses surfaces.
|
|
371
|
+
* Each builder still computes its own per-surface value when defaults diverge.
|
|
372
|
+
*/
|
|
373
|
+
export interface ResolvedOpenAISharedCompat {
|
|
374
|
+
supportsDeveloperRole: boolean;
|
|
375
|
+
supportsStrictMode: boolean;
|
|
376
|
+
supportsReasoningEffort: boolean;
|
|
377
|
+
reasoningEffortMap: Partial<Record<Effort, string>>;
|
|
378
|
+
supportsReasoningParams: boolean;
|
|
379
|
+
thinkingFormat: OpenAIReasoningFormat;
|
|
380
|
+
reasoningDisableMode: OpenAIReasoningDisableMode;
|
|
381
|
+
omitReasoningEffort: boolean;
|
|
382
|
+
includeEncryptedReasoning: boolean;
|
|
383
|
+
filterReasoningHistory: boolean;
|
|
384
|
+
disableReasoningOnForcedToolChoice: boolean;
|
|
385
|
+
disableReasoningOnToolChoice: boolean;
|
|
386
|
+
supportsToolChoice: boolean;
|
|
387
|
+
supportsForcedToolChoice: boolean;
|
|
388
|
+
reasoningContentField?: OpenAICompat["reasoningContentField"];
|
|
389
|
+
requiresReasoningContentForToolCalls: boolean;
|
|
390
|
+
requiresReasoningContentForAllAssistantTurns: boolean;
|
|
391
|
+
allowsSyntheticReasoningContentForToolCalls: boolean;
|
|
392
|
+
requiresThinkingAsText: boolean;
|
|
393
|
+
requiresMistralToolIds: boolean;
|
|
394
|
+
requiresToolResultName: boolean;
|
|
395
|
+
requiresAssistantAfterToolResult: boolean;
|
|
396
|
+
requiresAssistantContentForToolCalls: boolean;
|
|
397
|
+
stripDeepseekSpecialTokens: boolean;
|
|
398
|
+
streamMarkupHealingPattern?: OpenAIStreamMarkupHealingPattern;
|
|
399
|
+
reasoningDeltasMayBeCumulative: boolean;
|
|
400
|
+
emptyLengthFinishIsContextError: boolean;
|
|
401
|
+
usesOpenAIToolCallIdLimit: boolean;
|
|
402
|
+
promptCacheSessionHeader?: OpenAICompat["promptCacheSessionHeader"];
|
|
403
|
+
/** The model sits behind OpenRouter (routing prefs and max-token omission apply). */
|
|
404
|
+
isOpenRouterHost: boolean;
|
|
405
|
+
/** Whether this endpoint needs a max-token field even when caller did not set one. */
|
|
406
|
+
alwaysSendMaxTokens: boolean;
|
|
407
|
+
/** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. Set by the builder from the family classifier. */
|
|
408
|
+
enableGeminiThinkingLoopGuard?: boolean;
|
|
409
|
+
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
410
|
+
/** Provider-specific wire model-id transform applied to the base id. */
|
|
411
|
+
wireModelIdMode: "raw" | "firepass" | "fireworks" | "openrouter";
|
|
412
|
+
}
|
|
334
413
|
/**
|
|
335
414
|
* Fully-resolved chat-completions compat view: every detected default
|
|
336
415
|
* materialized and user overrides applied. Built once per model by
|
|
337
416
|
* `buildModel`; request handlers read fields and never detect, resolve, or
|
|
338
417
|
* allocate.
|
|
339
418
|
*/
|
|
340
|
-
export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "
|
|
341
|
-
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
419
|
+
export type ResolvedOpenAICompat = ResolvedOpenAISharedCompat & Required<Omit<OpenAICompat, "supportsDeveloperRole" | "supportsReasoningEffort" | "reasoningEffortMap" | "supportsReasoningParams" | "thinkingFormat" | "reasoningDisableMode" | "omitReasoningEffort" | "includeEncryptedReasoning" | "filterReasoningHistory" | "disableReasoningOnForcedToolChoice" | "disableReasoningOnToolChoice" | "supportsToolChoice" | "supportsForcedToolChoice" | "reasoningContentField" | "requiresReasoningContentForToolCalls" | "requiresReasoningContentForAllAssistantTurns" | "allowsSyntheticReasoningContentForToolCalls" | "requiresThinkingAsText" | "requiresMistralToolIds" | "requiresToolResultName" | "requiresAssistantAfterToolResult" | "requiresAssistantContentForToolCalls" | "stripDeepseekSpecialTokens" | "streamMarkupHealingPattern" | "reasoningDeltasMayBeCumulative" | "emptyLengthFinishIsContextError" | "usesOpenAIToolCallIdLimit" | "promptCacheSessionHeader" | "openRouterRouting" | "isOpenRouterHost" | "supportsStrictMode" | "supportsLongPromptCacheRetention" | "alwaysSendMaxTokens" | "wireModelIdMode" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "toolSchemaFlavor" | "streamIdleTimeoutMs" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "enableGeminiThinkingLoopGuard" | "whenThinking">> & {
|
|
342
420
|
vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
|
|
343
421
|
extraBody?: OpenAICompat["extraBody"];
|
|
344
422
|
cacheControlFormat?: OpenAICompat["cacheControlFormat"];
|
|
345
423
|
thinkingKeep?: OpenAICompat["thinkingKeep"];
|
|
346
424
|
streamIdleTimeoutMs?: number;
|
|
347
425
|
toolStrictMode: ResolvedToolStrictMode;
|
|
348
|
-
|
|
349
|
-
isOpenRouterHost: boolean;
|
|
426
|
+
toolSchemaFlavor?: OpenAICompat["toolSchemaFlavor"];
|
|
350
427
|
/** The model sits behind Vercel AI Gateway. */
|
|
351
428
|
isVercelGatewayHost: boolean;
|
|
352
|
-
|
|
353
|
-
enableGeminiThinkingLoopGuard?: boolean;
|
|
429
|
+
dropThinkingWhenReasoningEffort: boolean;
|
|
354
430
|
/** Complete alternate view for thinking-engaged requests; swap pointers, never spread. */
|
|
355
431
|
whenThinking?: ResolvedOpenAICompat;
|
|
356
432
|
};
|
|
357
433
|
/** Fully-resolved Responses-API compat view (same contract as `ResolvedOpenAICompat`). */
|
|
358
|
-
export interface ResolvedOpenAIResponsesCompat {
|
|
359
|
-
supportsDeveloperRole: boolean;
|
|
360
|
-
supportsStrictMode: boolean;
|
|
361
|
-
supportsReasoningEffort: boolean;
|
|
434
|
+
export interface ResolvedOpenAIResponsesCompat extends ResolvedOpenAISharedCompat {
|
|
362
435
|
supportsLongPromptCacheRetention: boolean;
|
|
363
436
|
strictResponsesPairing: boolean;
|
|
364
437
|
requiresJuiceZeroHack: boolean;
|
|
365
|
-
|
|
366
|
-
/** See {@link OpenAICompat.enableGeminiThinkingLoopGuard}. */
|
|
367
|
-
enableGeminiThinkingLoopGuard?: boolean;
|
|
438
|
+
supportsObfuscationOptOut: boolean;
|
|
368
439
|
}
|
|
440
|
+
/**
|
|
441
|
+
* OpenRouter is a pseudo API: runtime dispatch can use either Responses
|
|
442
|
+
* (default) or Chat Completions (`PI_OPENROUTER_RESPONSES=0`) with the same
|
|
443
|
+
* model object, so its resolved compat must satisfy both handlers.
|
|
444
|
+
*/
|
|
445
|
+
export type ResolvedOpenRouterCompat = ResolvedOpenAICompat & ResolvedOpenAIResponsesCompat;
|
|
369
446
|
/** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
|
|
370
447
|
export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {
|
|
371
448
|
/**
|
|
@@ -377,9 +454,9 @@ export type ResolvedAnthropicCompat = Required<AnthropicCompat> & {
|
|
|
377
454
|
officialEndpoint: boolean;
|
|
378
455
|
};
|
|
379
456
|
/** Sparse, user-authored compat overrides for a given API (models.json / config vocabulary). */
|
|
380
|
-
export type CompatConfigOf<TApi extends Api> = TApi extends "openai-completions" | "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? OpenAICompat : TApi extends "anthropic-messages" ? AnthropicCompat : undefined;
|
|
457
|
+
export type CompatConfigOf<TApi extends Api> = TApi extends "openai-completions" | "openrouter" | "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? OpenAICompat : TApi extends "anthropic-messages" ? AnthropicCompat : undefined;
|
|
381
458
|
/** Resolved compat for a given API: complete record, materialized once by `buildModel`. */
|
|
382
|
-
export type CompatOf<TApi extends Api> = TApi extends "openai-completions" ? ResolvedOpenAICompat : TApi extends "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? ResolvedOpenAIResponsesCompat : TApi extends "anthropic-messages" ? ResolvedAnthropicCompat : undefined;
|
|
459
|
+
export type CompatOf<TApi extends Api> = TApi extends "openrouter" ? ResolvedOpenRouterCompat : TApi extends "openai-completions" ? ResolvedOpenAICompat : TApi extends "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? ResolvedOpenAIResponsesCompat : TApi extends "anthropic-messages" ? ResolvedAnthropicCompat : undefined;
|
|
383
460
|
export interface Model<TApi extends Api = Api> {
|
|
384
461
|
id: string;
|
|
385
462
|
/**
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "16.0.
|
|
4
|
+
"version": "16.0.6",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,11 +34,12 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "16.0.
|
|
37
|
+
"@oh-my-pi/pi-utils": "16.0.6",
|
|
38
|
+
"arktype": "^2.2.0",
|
|
38
39
|
"zod": "^4"
|
|
39
40
|
},
|
|
40
41
|
"devDependencies": {
|
|
41
|
-
"@oh-my-pi/pi-ai": "16.0.
|
|
42
|
+
"@oh-my-pi/pi-ai": "16.0.6",
|
|
42
43
|
"@types/bun": "^1.3.14"
|
|
43
44
|
},
|
|
44
45
|
"engines": {
|
package/src/build.ts
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* compat per request.
|
|
11
11
|
*/
|
|
12
12
|
import { buildAnthropicCompat } from "./compat/anthropic";
|
|
13
|
-
import { buildOpenAICompat, buildOpenAIResponsesCompat } from "./compat/openai";
|
|
13
|
+
import { buildOpenAICompat, buildOpenAIResponsesCompat, buildOpenRouterCompat } from "./compat/openai";
|
|
14
14
|
import { resolveModelThinking } from "./model-thinking";
|
|
15
15
|
import type { Api, CompatOf, Model, ModelSpec } from "./types";
|
|
16
16
|
import { cleanModelName } from "./utils";
|
|
@@ -28,6 +28,8 @@ export function buildModel<TApi extends Api>(spec: ModelSpec<TApi>): Model<TApi>
|
|
|
28
28
|
|
|
29
29
|
export function buildCompat(spec: ModelSpec<Api>): CompatOf<Api> {
|
|
30
30
|
switch (spec.api) {
|
|
31
|
+
case "openrouter":
|
|
32
|
+
return buildOpenRouterCompat(spec as ModelSpec<"openrouter">);
|
|
31
33
|
case "openai-completions":
|
|
32
34
|
return buildOpenAICompat(spec as ModelSpec<"openai-completions">);
|
|
33
35
|
case "openai-responses":
|
package/src/compat/openai.ts
CHANGED
|
@@ -13,13 +13,22 @@ import {
|
|
|
13
13
|
isClaudeModelId,
|
|
14
14
|
isDeepseekModelIdOrName,
|
|
15
15
|
isGlm52ReasoningEffortModelId,
|
|
16
|
+
isGrokReasoningEffortCapable,
|
|
16
17
|
isKimiK26ModelId,
|
|
17
18
|
isKimiModelId,
|
|
18
19
|
isMimoModelIdOrName,
|
|
19
20
|
isQwenModelId,
|
|
20
21
|
modelFamilyToken,
|
|
21
22
|
} from "../identity/family";
|
|
22
|
-
import type {
|
|
23
|
+
import type {
|
|
24
|
+
ModelSpec,
|
|
25
|
+
OpenAICompat,
|
|
26
|
+
OpenAIStreamMarkupHealingPattern,
|
|
27
|
+
ResolvedOpenAICompat,
|
|
28
|
+
ResolvedOpenAIResponsesCompat,
|
|
29
|
+
ResolvedOpenAISharedCompat,
|
|
30
|
+
ResolvedOpenRouterCompat,
|
|
31
|
+
} from "../types";
|
|
23
32
|
import { applyCompatOverrides } from "./apply";
|
|
24
33
|
|
|
25
34
|
/** GLM coding-plan SKUs idle for minutes mid-reasoning; see `streamIdleTimeoutMs`. */
|
|
@@ -29,6 +38,76 @@ const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
|
|
|
29
38
|
const DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS = 300_000;
|
|
30
39
|
/** Kimi K2.6 can spend several minutes reasoning before the first visible token. */
|
|
31
40
|
const KIMI_K26_REASONING_STREAM_IDLE_TIMEOUT_MS = 300_000;
|
|
41
|
+
const MINIMAX_PROVIDER_OR_ID_PATTERN = /minimax/i;
|
|
42
|
+
const DSML_HEALING_PROVIDERS = new Set([
|
|
43
|
+
"ollama",
|
|
44
|
+
"ollama-cloud",
|
|
45
|
+
"nvidia",
|
|
46
|
+
"deepseek",
|
|
47
|
+
"fireworks",
|
|
48
|
+
"nanogpt",
|
|
49
|
+
"opencode-go",
|
|
50
|
+
"openrouter",
|
|
51
|
+
]);
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Ollama's OpenAI-compatible `reasoning.effort` only accepts
|
|
55
|
+
* `high|medium|low|max|none`; OMP's `minimal`/`xhigh` levels make the server
|
|
56
|
+
* reject the turn with HTTP 400 `invalid reasoning value`. Map the two
|
|
57
|
+
* unsupported levels onto the closest accepted ones. Stamped in the compat
|
|
58
|
+
* builder (not only at discovery) so stale-cached and custom `ollama`-provider
|
|
59
|
+
* specs are backfilled on every `buildModel`, not just on a fresh
|
|
60
|
+
* `omp models refresh`. Custom OpenAI-compatible providers pointed at a local
|
|
61
|
+
* Ollama port under a different provider id are not covered — they must set
|
|
62
|
+
* `compat.reasoningEffortMap` themselves.
|
|
63
|
+
*/
|
|
64
|
+
const OLLAMA_REASONING_EFFORT_MAP: ResolvedOpenAISharedCompat["reasoningEffortMap"] = { minimal: "low", xhigh: "max" };
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Merge the Ollama default effort map under any explicit overrides (overrides
|
|
68
|
+
* win). No-op off the local `ollama` provider or for non-reasoning models.
|
|
69
|
+
*/
|
|
70
|
+
function mergeOllamaReasoningEffortMap(
|
|
71
|
+
compat: ResolvedOpenAISharedCompat,
|
|
72
|
+
provider: string,
|
|
73
|
+
reasoning: boolean | undefined,
|
|
74
|
+
): void {
|
|
75
|
+
if (provider !== "ollama" || !reasoning) return;
|
|
76
|
+
compat.reasoningEffortMap = { ...OLLAMA_REASONING_EFFORT_MAP, ...compat.reasoningEffortMap };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function resolveReasoningDisableMode(
|
|
80
|
+
thinkingFormat: ResolvedOpenAISharedCompat["thinkingFormat"],
|
|
81
|
+
): ResolvedOpenAISharedCompat["reasoningDisableMode"] {
|
|
82
|
+
switch (thinkingFormat) {
|
|
83
|
+
case "openrouter":
|
|
84
|
+
return "openrouter-enabled-false";
|
|
85
|
+
case "zai":
|
|
86
|
+
return "zai-thinking-disabled";
|
|
87
|
+
case "qwen":
|
|
88
|
+
return "qwen-enable-thinking-false";
|
|
89
|
+
case "qwen-chat-template":
|
|
90
|
+
return "qwen-template-false";
|
|
91
|
+
default:
|
|
92
|
+
return "lowest-effort";
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function detectStreamMarkupHealingPattern(
|
|
97
|
+
provider: string,
|
|
98
|
+
modelId: string,
|
|
99
|
+
): OpenAIStreamMarkupHealingPattern | undefined {
|
|
100
|
+
if (MINIMAX_PROVIDER_OR_ID_PATTERN.test(provider) || MINIMAX_PROVIDER_OR_ID_PATTERN.test(modelId)) {
|
|
101
|
+
return "thinking";
|
|
102
|
+
}
|
|
103
|
+
if (provider === "kimi-code" || provider === "moonshot" || /kimi[-/_.]?k2/i.test(modelId)) {
|
|
104
|
+
return "kimi";
|
|
105
|
+
}
|
|
106
|
+
if (isDeepseekModelIdOrName(modelId) && DSML_HEALING_PROVIDERS.has(provider)) {
|
|
107
|
+
return "dsml";
|
|
108
|
+
}
|
|
109
|
+
return undefined;
|
|
110
|
+
}
|
|
32
111
|
|
|
33
112
|
/**
|
|
34
113
|
* OpenCode's gateways (https://opencode.ai/zen|go) gate `reasoning_content`
|
|
@@ -197,6 +276,25 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
197
276
|
? DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS
|
|
198
277
|
: undefined;
|
|
199
278
|
|
|
279
|
+
const wireModelIdMode: ResolvedOpenAISharedCompat["wireModelIdMode"] =
|
|
280
|
+
provider === "firepass"
|
|
281
|
+
? "firepass"
|
|
282
|
+
: provider === "fireworks"
|
|
283
|
+
? "fireworks"
|
|
284
|
+
: isOpenRouter
|
|
285
|
+
? "openrouter"
|
|
286
|
+
: "raw";
|
|
287
|
+
const thinkingFormat: ResolvedOpenAISharedCompat["thinkingFormat"] =
|
|
288
|
+
isZai || isZhipu || isMoonshotKimi || isXiaomiMimo
|
|
289
|
+
? "zai"
|
|
290
|
+
: isOpenRouter
|
|
291
|
+
? "openrouter"
|
|
292
|
+
: isQwen && isNvidiaNim
|
|
293
|
+
? "qwen-chat-template"
|
|
294
|
+
: isAlibaba || isQwen
|
|
295
|
+
? "qwen"
|
|
296
|
+
: "openai";
|
|
297
|
+
|
|
200
298
|
const compat: ResolvedOpenAICompat = {
|
|
201
299
|
supportsStore: !isNonStandard,
|
|
202
300
|
// `developer` is an OpenAI-Responses-era extension to the chat-completions schema. Almost
|
|
@@ -229,7 +327,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
229
327
|
supportsForcedToolChoice: true,
|
|
230
328
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
231
329
|
requiresToolResultName: isMistral,
|
|
232
|
-
requiresAssistantAfterToolResult:
|
|
330
|
+
requiresAssistantAfterToolResult: isMistral,
|
|
233
331
|
requiresThinkingAsText: isMistral,
|
|
234
332
|
requiresMistralToolIds: isMistral,
|
|
235
333
|
// Only Kimi's native hosts (Moonshot / Kimi-code, matched by `isMoonshotKimi`)
|
|
@@ -241,16 +339,11 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
241
339
|
// (`chat_template_kwargs.enable_thinking`); top-level `enable_thinking`
|
|
242
340
|
// is rejected by NIM's `additionalProperties: false` request schema
|
|
243
341
|
// (issue #2299).
|
|
244
|
-
thinkingFormat
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
: isQwen && isNvidiaNim
|
|
250
|
-
? "qwen-chat-template"
|
|
251
|
-
: isAlibaba || isQwen
|
|
252
|
-
? "qwen"
|
|
253
|
-
: "openai",
|
|
342
|
+
thinkingFormat,
|
|
343
|
+
reasoningDisableMode: resolveReasoningDisableMode(thinkingFormat),
|
|
344
|
+
omitReasoningEffort: false,
|
|
345
|
+
includeEncryptedReasoning: true,
|
|
346
|
+
filterReasoningHistory: false,
|
|
254
347
|
thinkingKeep: usesMoonshotKimiPreservedThinking ? "all" : undefined,
|
|
255
348
|
reasoningContentField: "reasoning_content",
|
|
256
349
|
// Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`:
|
|
@@ -271,6 +364,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
271
364
|
(isDeepseekFamily && Boolean(spec.reasoning)) ||
|
|
272
365
|
isXiaomiMimo ||
|
|
273
366
|
(isOpenRouter && Boolean(spec.reasoning)),
|
|
367
|
+
requiresReasoningContentForAllAssistantTurns:
|
|
368
|
+
((isDeepseekFamily && Boolean(spec.reasoning)) || isXiaomiMimo) && !isOpenRouter,
|
|
274
369
|
// DeepSeek V4 and Xiaomi MiMo reject synthetic reasoning_content placeholders (".") on tool-call turns.
|
|
275
370
|
// Kimi and OpenRouter accept them when actual reasoning is unavailable.
|
|
276
371
|
allowsSyntheticReasoningContentForToolCalls: (!isDeepseekFamily || !spec.reasoning) && !isXiaomiMimo,
|
|
@@ -279,20 +374,45 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
279
374
|
openRouterRouting: undefined,
|
|
280
375
|
vercelGatewayRouting: undefined,
|
|
281
376
|
isOpenRouterHost: isOpenRouter,
|
|
377
|
+
wireModelIdMode,
|
|
282
378
|
isVercelGatewayHost: isVercelGateway,
|
|
283
379
|
supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
|
|
284
380
|
extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
|
|
285
381
|
toolStrictMode: isCerebras ? "all_strict" : "mixed",
|
|
382
|
+
toolSchemaFlavor: isMoonshotNative ? "moonshot-mfjs" : undefined,
|
|
286
383
|
streamIdleTimeoutMs,
|
|
384
|
+
stripDeepseekSpecialTokens:
|
|
385
|
+
isDeepseekModelIdOrName(spec.id) && (provider === "nvidia" || provider === "deepseek"),
|
|
386
|
+
streamMarkupHealingPattern: detectStreamMarkupHealingPattern(provider, spec.id),
|
|
387
|
+
reasoningDeltasMayBeCumulative:
|
|
388
|
+
MINIMAX_PROVIDER_OR_ID_PATTERN.test(provider) || MINIMAX_PROVIDER_OR_ID_PATTERN.test(spec.id),
|
|
389
|
+
emptyLengthFinishIsContextError: provider === "ollama",
|
|
390
|
+
usesOpenAIToolCallIdLimit: provider === "openai",
|
|
391
|
+
promptCacheSessionHeader: undefined,
|
|
392
|
+
dropThinkingWhenReasoningEffort: provider === "fireworks",
|
|
287
393
|
};
|
|
288
394
|
|
|
289
395
|
applyCompatOverrides(compat, spec.compat);
|
|
396
|
+
if (spec.compat?.reasoningDisableMode === undefined) {
|
|
397
|
+
compat.reasoningDisableMode = resolveReasoningDisableMode(compat.thinkingFormat);
|
|
398
|
+
}
|
|
399
|
+
if (spec.compat?.omitReasoningEffort === undefined && !compat.supportsReasoningEffort) {
|
|
400
|
+
compat.omitReasoningEffort = true;
|
|
401
|
+
}
|
|
402
|
+
mergeOllamaReasoningEffortMap(compat, provider, spec.reasoning);
|
|
290
403
|
|
|
291
404
|
const whenThinkingPolicy =
|
|
292
405
|
spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
|
|
293
406
|
if (whenThinkingPolicy) {
|
|
294
407
|
const variant: ResolvedOpenAICompat = { ...compat };
|
|
295
408
|
applyCompatOverrides(variant, whenThinkingPolicy);
|
|
409
|
+
if (whenThinkingPolicy.reasoningDisableMode === undefined) {
|
|
410
|
+
variant.reasoningDisableMode = resolveReasoningDisableMode(variant.thinkingFormat);
|
|
411
|
+
}
|
|
412
|
+
if (whenThinkingPolicy.omitReasoningEffort === undefined && !variant.supportsReasoningEffort) {
|
|
413
|
+
variant.omitReasoningEffort = true;
|
|
414
|
+
}
|
|
415
|
+
mergeOllamaReasoningEffortMap(variant, provider, spec.reasoning);
|
|
296
416
|
compat.whenThinking = variant;
|
|
297
417
|
}
|
|
298
418
|
|
|
@@ -304,6 +424,7 @@ interface OpenAIResponsesSpecLike {
|
|
|
304
424
|
provider: string;
|
|
305
425
|
name: string;
|
|
306
426
|
baseUrl: string;
|
|
427
|
+
reasoning?: boolean;
|
|
307
428
|
compat?: OpenAICompat;
|
|
308
429
|
}
|
|
309
430
|
|
|
@@ -321,22 +442,88 @@ interface OpenAIResponsesSpecLike {
|
|
|
321
442
|
export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat {
|
|
322
443
|
const baseUrl = spec.baseUrl ?? "";
|
|
323
444
|
const isAzure = modelMatchesHost({ provider: spec.provider, baseUrl }, "azureOpenAI");
|
|
445
|
+
const isOpenRouter = modelMatchesHost({ provider: spec.provider, baseUrl }, "openrouter");
|
|
446
|
+
const isOpenAIUrl = hostMatchesUrl(baseUrl, "openai");
|
|
447
|
+
const id = spec.id ?? "";
|
|
448
|
+
const thinkingFormat: ResolvedOpenAISharedCompat["thinkingFormat"] = isOpenRouter ? "openrouter" : "openai";
|
|
449
|
+
const isKimiModel = id ? isKimiModelId(id) : false;
|
|
450
|
+
const isDeepseekFamily = id ? isDeepseekModelIdOrName(id) || isDeepseekModelIdOrName(spec.name) : false;
|
|
451
|
+
const reasoningCapable = Boolean(spec.reasoning);
|
|
452
|
+
|
|
324
453
|
const compat: ResolvedOpenAIResponsesCompat = {
|
|
325
|
-
supportsDeveloperRole: isAzure ||
|
|
454
|
+
supportsDeveloperRole: isAzure || isOpenAIUrl || hostMatchesUrl(baseUrl, "githubCopilot"),
|
|
326
455
|
supportsStrictMode:
|
|
327
|
-
spec.provider === "openai" ||
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
hostMatchesUrl(baseUrl, "openai"),
|
|
331
|
-
supportsReasoningEffort: true,
|
|
332
|
-
supportsLongPromptCacheRetention: hostMatchesUrl(baseUrl, "openai"),
|
|
456
|
+
spec.provider === "openai" || isAzure || spec.provider === "github-copilot" || isOpenRouter || isOpenAIUrl,
|
|
457
|
+
supportsReasoningEffort: spec.provider !== "xai-oauth" || isGrokReasoningEffortCapable(id),
|
|
458
|
+
supportsLongPromptCacheRetention: isOpenAIUrl,
|
|
333
459
|
// Azure OpenAI and GitHub Copilot Responses paths require tool results
|
|
334
460
|
// to strictly match prior tool calls when building Responses inputs.
|
|
335
461
|
strictResponsesPairing: isAzure || spec.provider === "github-copilot",
|
|
336
462
|
requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
|
|
337
463
|
reasoningEffortMap: {},
|
|
464
|
+
supportsReasoningParams: true,
|
|
465
|
+
thinkingFormat,
|
|
466
|
+
reasoningDisableMode: resolveReasoningDisableMode(thinkingFormat),
|
|
467
|
+
omitReasoningEffort: false,
|
|
468
|
+
includeEncryptedReasoning: spec.provider !== "xai-oauth",
|
|
469
|
+
filterReasoningHistory: spec.provider === "xai-oauth",
|
|
470
|
+
disableReasoningOnForcedToolChoice: isKimiModel,
|
|
471
|
+
disableReasoningOnToolChoice: isDeepseekFamily && reasoningCapable && !isOpenRouter,
|
|
472
|
+
supportsToolChoice: true,
|
|
473
|
+
supportsForcedToolChoice: true,
|
|
474
|
+
reasoningContentField: "reasoning_content",
|
|
475
|
+
requiresReasoningContentForToolCalls:
|
|
476
|
+
(isKimiModel || (isDeepseekFamily && reasoningCapable) || (isOpenRouter && reasoningCapable)) &&
|
|
477
|
+
reasoningCapable,
|
|
478
|
+
requiresReasoningContentForAllAssistantTurns: isDeepseekFamily && reasoningCapable && !isOpenRouter,
|
|
479
|
+
allowsSyntheticReasoningContentForToolCalls: !isDeepseekFamily || !reasoningCapable,
|
|
480
|
+
requiresThinkingAsText: false,
|
|
481
|
+
requiresMistralToolIds: false,
|
|
482
|
+
requiresToolResultName: false,
|
|
483
|
+
requiresAssistantAfterToolResult: false,
|
|
484
|
+
requiresAssistantContentForToolCalls: isKimiModel,
|
|
485
|
+
openRouterRouting: undefined,
|
|
486
|
+
isOpenRouterHost: isOpenRouter,
|
|
487
|
+
wireModelIdMode: isOpenRouter ? "openrouter" : "raw",
|
|
488
|
+
alwaysSendMaxTokens: spec.id ? isKimiModelId(spec.id) : false,
|
|
338
489
|
enableGeminiThinkingLoopGuard: modelFamilyToken(spec.id ?? "") === "gemini",
|
|
490
|
+
supportsObfuscationOptOut: isOpenAIUrl || spec.provider === "openai",
|
|
491
|
+
stripDeepseekSpecialTokens:
|
|
492
|
+
Boolean(id) && isDeepseekModelIdOrName(id) && (spec.provider === "nvidia" || spec.provider === "deepseek"),
|
|
493
|
+
streamMarkupHealingPattern: id ? detectStreamMarkupHealingPattern(spec.provider, id) : undefined,
|
|
494
|
+
reasoningDeltasMayBeCumulative:
|
|
495
|
+
MINIMAX_PROVIDER_OR_ID_PATTERN.test(spec.provider) || (id ? MINIMAX_PROVIDER_OR_ID_PATTERN.test(id) : false),
|
|
496
|
+
emptyLengthFinishIsContextError: spec.provider === "ollama",
|
|
497
|
+
usesOpenAIToolCallIdLimit: spec.provider === "openai",
|
|
498
|
+
promptCacheSessionHeader: spec.provider === "xai-oauth" ? "x-grok-conv-id" : undefined,
|
|
339
499
|
};
|
|
340
500
|
applyCompatOverrides(compat, spec.compat);
|
|
501
|
+
if (spec.compat?.reasoningDisableMode === undefined) {
|
|
502
|
+
compat.reasoningDisableMode = resolveReasoningDisableMode(compat.thinkingFormat);
|
|
503
|
+
}
|
|
504
|
+
if (spec.compat?.omitReasoningEffort === undefined && !compat.supportsReasoningEffort) {
|
|
505
|
+
compat.omitReasoningEffort = true;
|
|
506
|
+
}
|
|
507
|
+
mergeOllamaReasoningEffortMap(compat, spec.provider, spec.reasoning);
|
|
341
508
|
return compat;
|
|
342
509
|
}
|
|
510
|
+
|
|
511
|
+
type ResponsesOnlyCompat = Omit<ResolvedOpenAIResponsesCompat, keyof ResolvedOpenAISharedCompat>;
|
|
512
|
+
|
|
513
|
+
function pickResponsesOnly(compat: ResolvedOpenAIResponsesCompat): ResponsesOnlyCompat {
|
|
514
|
+
return {
|
|
515
|
+
supportsLongPromptCacheRetention: compat.supportsLongPromptCacheRetention,
|
|
516
|
+
strictResponsesPairing: compat.strictResponsesPairing,
|
|
517
|
+
requiresJuiceZeroHack: compat.requiresJuiceZeroHack,
|
|
518
|
+
supportsObfuscationOptOut: compat.supportsObfuscationOptOut,
|
|
519
|
+
} satisfies ResponsesOnlyCompat;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
export function buildOpenRouterCompat(spec: ModelSpec<"openrouter">): ResolvedOpenRouterCompat {
|
|
523
|
+
const chat = buildOpenAICompat({
|
|
524
|
+
...spec,
|
|
525
|
+
api: "openai-completions",
|
|
526
|
+
} as ModelSpec<"openai-completions">);
|
|
527
|
+
const responses = buildOpenAIResponsesCompat(spec);
|
|
528
|
+
return { ...chat, ...pickResponsesOnly(responses) } as ResolvedOpenRouterCompat;
|
|
529
|
+
}
|