@oh-my-pi/pi-catalog 15.11.2 → 15.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -1
- package/dist/types/compat/openai.d.ts +3 -1
- package/dist/types/identity/family.d.ts +17 -0
- package/dist/types/model-thinking.d.ts +1 -1
- package/dist/types/provider-models/openai-compat.d.ts +2 -0
- package/dist/types/types.d.ts +22 -4
- package/dist/types/wire/github-copilot.d.ts +15 -0
- package/package.json +3 -3
- package/src/compat/openai.ts +5 -56
- package/src/discovery/openai-compatible.ts +3 -1
- package/src/identity/family.ts +27 -0
- package/src/model-thinking.ts +168 -16
- package/src/models.json +1148 -579
- package/src/provider-models/openai-compat.ts +212 -51
- package/src/types.ts +22 -3
- package/src/wire/github-copilot.ts +17 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,33 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.11.4] - 2026-06-12
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Fixed MiniMax M2-family and OpenAI gpt-oss model metadata so OpenAI-compatible catalog entries declare only `low|medium|high` thinking efforts. Their upstreams reject `minimal`, `xhigh`, and Fireworks' `minimal → none` wire mapping, so `fireworks/minimax-m2.7` as the smol auto-thinking classifier model 400ed on every turn. OpenAI-compatible provider effort maps (`Groq qwen/qwen3-32b`, DeepSeek-family, OpenRouter Anthropic adaptive, Fireworks `minimal → none`) now bake into `thinking.effortMap` in catalog metadata instead of `buildOpenAICompat`, and request builders read that field directly. Regenerated `models.json` now makes `disableReasoning` choose `low` for those families while leaving GLM-5.x and other Fireworks models on the existing `minimal → none` path ([#2315](https://github.com/can1357/oh-my-pi/issues/2315)).
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Added `requiresJuiceZeroHack` Responses-API compat flag, resolved by `buildOpenAIResponsesCompat` from GPT-5-family model names and overridable via sparse model `compat` config. Replaces the request-time `model.name.startsWith("gpt-5")` sniff that gated the trailing `# Juice: 0 !important` no-reasoning developer item.
|
|
13
|
+
|
|
14
|
+
## [15.11.3] - 2026-06-11
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- Added `requestModelId` on `Model` to represent the upstream model id used when a catalog entry is a local variant
|
|
18
|
+
- Added synthetic GitHub Copilot long-context model variants with `-1m` suffixes when tiered token pricing is advertised
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
|
|
22
|
+
- Changed GitHub Copilot discovery to request `X-GitHub-Api-Version: 2026-06-01` from `api.githubcopilot.com`
|
|
23
|
+
- Changed GitHub Copilot discovery to cap base model `contextWindow` to the default token tier and keep long-context access as the separate `-1m` model entry
|
|
24
|
+
- Changed Copilot model mapping to omit non-chat `/models` entries and enable image input for models whose capabilities indicate vision support
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- Fixed long-context variant pricing to use `billing.token_prices.long_context` rates instead of default model pricing
|
|
29
|
+
- Fixed `mapModel` handling in OpenAI-compatible discovery so returning `null` now skips a model entry rather than falling back to defaults
|
|
30
|
+
- Fixed model ID precedence so a real upstream Copilot model id is kept when it conflicts with a synthesized `-1m` variant
|
|
31
|
+
|
|
5
32
|
## [15.11.1] - 2026-06-11
|
|
6
33
|
|
|
7
34
|
### Fixed
|
|
@@ -62,4 +89,4 @@
|
|
|
62
89
|
|
|
63
90
|
### Removed
|
|
64
91
|
|
|
65
|
-
- Removed the runtime enrichment layer: `enrichModelThinking` (and its non-enumerable memo-slot cache), `refreshModelThinking`, `modelOmitsReasoningEffort`, and the `model-thinking` re-exports of generator-only policies. Thinking metadata is resolved exactly once inside `buildModel`; runtime helpers (`getSupportedEfforts`, `clampThinkingLevelForModel`, `requireSupportedEffort`, the effort mappers) are pure field reads.
|
|
92
|
+
- Removed the runtime enrichment layer: `enrichModelThinking` (and its non-enumerable memo-slot cache), `refreshModelThinking`, `modelOmitsReasoningEffort`, and the `model-thinking` re-exports of generator-only policies. Thinking metadata is resolved exactly once inside `buildModel`; runtime helpers (`getSupportedEfforts`, `clampThinkingLevelForModel`, `requireSupportedEffort`, the effort mappers) are pure field reads.
|
|
@@ -6,6 +6,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
|
|
|
6
6
|
export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
|
|
7
7
|
interface OpenAIResponsesSpecLike {
|
|
8
8
|
provider: string;
|
|
9
|
+
name: string;
|
|
9
10
|
baseUrl: string;
|
|
10
11
|
compat?: OpenAICompat;
|
|
11
12
|
}
|
|
@@ -15,7 +16,8 @@ interface OpenAIResponsesSpecLike {
|
|
|
15
16
|
* endpoint accepts the `developer` role, while strict tool mode is scoped to
|
|
16
17
|
* first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
|
|
17
18
|
* detection are URL-only on purpose — the historical call sites never
|
|
18
|
-
* consulted the provider id for them.
|
|
19
|
+
* consulted the provider id for them. The GPT-5 juice-zero hack keys on the
|
|
20
|
+
* model name, matching the historical request-time check.
|
|
19
21
|
*/
|
|
20
22
|
export declare function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat;
|
|
21
23
|
export {};
|
|
@@ -20,6 +20,23 @@ export declare function isQwenModelId(modelId: string): boolean;
|
|
|
20
20
|
export declare function isDeepseekModelIdOrName(value: string): boolean;
|
|
21
21
|
/** Xiaomi MiMo family by id or display name. */
|
|
22
22
|
export declare function isMimoModelIdOrName(value: string): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
|
|
25
|
+
* `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
|
|
26
|
+
* and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
|
|
27
|
+
* only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
|
|
28
|
+
* `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
|
|
29
|
+
* `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
|
|
30
|
+
* clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
|
|
31
|
+
*/
|
|
32
|
+
export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
|
|
33
|
+
/**
|
|
34
|
+
* OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
|
|
35
|
+
* `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
|
|
36
|
+
* `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
|
|
37
|
+
* and `none`.
|
|
38
|
+
*/
|
|
39
|
+
export declare function isOpenAIGptOssModelId(modelId: string): boolean;
|
|
23
40
|
/**
|
|
24
41
|
* Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
|
|
25
42
|
* the Claude Fable/Mythos 5 generation. Older adaptive-thinking models
|
|
@@ -38,7 +38,7 @@ export declare const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Reco
|
|
|
38
38
|
* - Explicit spec thinking (generator-baked or user-authored) owns the
|
|
39
39
|
* capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
|
|
40
40
|
* (`effortMap`, `supportsDisplay`) are backfilled from identity when not
|
|
41
|
-
* explicitly set, so configs never need to know
|
|
41
|
+
* explicitly set, so configs never need to know provider wire tier tables.
|
|
42
42
|
* - Sparse specs go through full inference.
|
|
43
43
|
*/
|
|
44
44
|
export declare function resolveModelThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): ThinkingConfig | undefined;
|
|
@@ -334,6 +334,8 @@ export interface GithubCopilotModelManagerConfig {
|
|
|
334
334
|
baseUrl?: string;
|
|
335
335
|
fetch?: FetchImpl;
|
|
336
336
|
}
|
|
337
|
+
/** Local id/name suffixes for synthesized Copilot long-context variants. */
|
|
338
|
+
export declare const COPILOT_LONG_CONTEXT_ID_SUFFIX = "-1m";
|
|
337
339
|
export declare function githubCopilotModelManagerOptions(config?: GithubCopilotModelManagerConfig): ModelManagerOptions<Api>;
|
|
338
340
|
export interface AnthropicModelManagerConfig {
|
|
339
341
|
apiKey?: string;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -17,9 +17,9 @@ export interface ThinkingConfig {
|
|
|
17
17
|
/** Optional default effort applied when this model is selected. Falls back to global default if absent. */
|
|
18
18
|
defaultLevel?: Effort;
|
|
19
19
|
/**
|
|
20
|
-
* Effort → wire-value remap
|
|
21
|
-
*
|
|
22
|
-
*
|
|
20
|
+
* Effort → provider wire-value remap, baked at build time. Identity for
|
|
21
|
+
* efforts the map omits. Used by Anthropic adaptive thinking, OpenAI-
|
|
22
|
+
* compatible `reasoning_effort`, and Responses-style reasoning params.
|
|
23
23
|
*/
|
|
24
24
|
effortMap?: Partial<Record<Effort, string>>;
|
|
25
25
|
/**
|
|
@@ -179,6 +179,14 @@ export interface OpenAICompat {
|
|
|
179
179
|
alwaysSendMaxTokens?: boolean;
|
|
180
180
|
/** Whether Responses-API tool-call/result history must be strictly paired. Default: auto-detected (Azure OpenAI, GitHub Copilot). */
|
|
181
181
|
strictResponsesPairing?: boolean;
|
|
182
|
+
/**
|
|
183
|
+
* Append a trailing `# Juice: 0 !important` developer item when the caller
|
|
184
|
+
* did not request reasoning, suppressing default reasoning on models that
|
|
185
|
+
* cannot disable it via request params (Responses APIs only; see
|
|
186
|
+
* https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7).
|
|
187
|
+
* Default: auto-detected (GPT-5-family model names).
|
|
188
|
+
*/
|
|
189
|
+
requiresJuiceZeroHack?: boolean;
|
|
182
190
|
/**
|
|
183
191
|
* Compat deltas applied when a request actually engages thinking mode
|
|
184
192
|
* (reasoning requested and not disabled, model reasoning-capable, and not
|
|
@@ -279,7 +287,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
|
|
|
279
287
|
* `buildModel`; request handlers read fields and never detect, resolve, or
|
|
280
288
|
* allocate.
|
|
281
289
|
*/
|
|
282
|
-
export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "whenThinking">> & {
|
|
290
|
+
export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
|
|
283
291
|
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
284
292
|
vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
|
|
285
293
|
extraBody?: OpenAICompat["extraBody"];
|
|
@@ -301,6 +309,7 @@ export interface ResolvedOpenAIResponsesCompat {
|
|
|
301
309
|
supportsReasoningEffort: boolean;
|
|
302
310
|
supportsLongPromptCacheRetention: boolean;
|
|
303
311
|
strictResponsesPairing: boolean;
|
|
312
|
+
requiresJuiceZeroHack: boolean;
|
|
304
313
|
reasoningEffortMap: Partial<Record<Effort, string>>;
|
|
305
314
|
}
|
|
306
315
|
/** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
|
|
@@ -319,6 +328,15 @@ export type CompatConfigOf<TApi extends Api> = TApi extends "openai-completions"
|
|
|
319
328
|
export type CompatOf<TApi extends Api> = TApi extends "openai-completions" ? ResolvedOpenAICompat : TApi extends "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? ResolvedOpenAIResponsesCompat : TApi extends "anthropic-messages" ? ResolvedAnthropicCompat : undefined;
|
|
320
329
|
export interface Model<TApi extends Api = Api> {
|
|
321
330
|
id: string;
|
|
331
|
+
/**
|
|
332
|
+
* Model id to send on the wire when it differs from `id`. Used by catalog
|
|
333
|
+
* variants that present one upstream model under several local entries —
|
|
334
|
+
* e.g. GitHub Copilot long-context variants (`claude-opus-4.7-1m` requests
|
|
335
|
+
* upstream `claude-opus-4.7`; the tier is a client-side context budget, not
|
|
336
|
+
* a served model id). Providers MUST serialize `requestModelId ?? id`;
|
|
337
|
+
* everything local (selection, caching, usage attribution) keys on `id`.
|
|
338
|
+
*/
|
|
339
|
+
requestModelId?: string;
|
|
322
340
|
name: string;
|
|
323
341
|
api: TApi;
|
|
324
342
|
provider: Provider;
|
|
@@ -7,6 +7,21 @@ export declare const COPILOT_USER_AGENT: "opencode/1.3.15";
|
|
|
7
7
|
export declare const OPENCODE_HEADERS: {
|
|
8
8
|
readonly "User-Agent": "opencode/1.3.15";
|
|
9
9
|
};
|
|
10
|
+
/**
|
|
11
|
+
* Copilot API version sent on `api.githubcopilot.com` requests (`/models`,
|
|
12
|
+
* chat endpoints). Newer versions unlock tiered context metadata: `/models`
|
|
13
|
+
* reports the full long-context window in `capabilities.limits` plus per-tier
|
|
14
|
+
* boundaries/prices under `billing.token_prices.{default,long_context}`.
|
|
15
|
+
* Without it the endpoint serves default-tier limits only (e.g. 264k instead
|
|
16
|
+
* of 1M for Claude Opus). Never send this to `api.github.com` REST endpoints —
|
|
17
|
+
* they validate `X-GitHub-Api-Version` against the REST version vocabulary.
|
|
18
|
+
*/
|
|
19
|
+
export declare const COPILOT_API_VERSION: "2026-06-01";
|
|
20
|
+
/** Headers for `api.githubcopilot.com` (capi) requests: discovery, chat, policy. */
|
|
21
|
+
export declare const COPILOT_API_HEADERS: {
|
|
22
|
+
readonly "User-Agent": "opencode/1.3.15";
|
|
23
|
+
readonly "X-GitHub-Api-Version": "2026-06-01";
|
|
24
|
+
};
|
|
10
25
|
export type ParsedGitHubCopilotApiKey = {
|
|
11
26
|
accessToken: string;
|
|
12
27
|
enterpriseUrl?: string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "15.11.
|
|
4
|
+
"version": "15.11.4",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,11 +34,11 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "15.11.
|
|
37
|
+
"@oh-my-pi/pi-utils": "15.11.4",
|
|
38
38
|
"zod": "4.4.3"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
|
-
"@oh-my-pi/pi-ai": "15.11.
|
|
41
|
+
"@oh-my-pi/pi-ai": "15.11.4",
|
|
42
42
|
"@types/bun": "^1.3.14"
|
|
43
43
|
},
|
|
44
44
|
"engines": {
|
package/src/compat/openai.ts
CHANGED
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
* never detect, resolve, or allocate.
|
|
9
9
|
*/
|
|
10
10
|
import { hostMatchesUrl, modelMatchesHost } from "../hosts";
|
|
11
|
-
import { bareModelId, isFableOrMythos, parseAnthropicModel, semverGte } from "../identity/classify";
|
|
12
11
|
import {
|
|
13
12
|
isAnthropicNamespacedModelId,
|
|
14
13
|
isClaudeModelId,
|
|
@@ -18,12 +17,9 @@ import {
|
|
|
18
17
|
isMimoModelIdOrName,
|
|
19
18
|
isQwenModelId,
|
|
20
19
|
} from "../identity/family";
|
|
21
|
-
import { ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER, ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER } from "../model-thinking";
|
|
22
20
|
import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
|
|
23
21
|
import { applyCompatOverrides } from "./apply";
|
|
24
22
|
|
|
25
|
-
type OpenAIReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
26
|
-
|
|
27
23
|
/** GLM coding-plan SKUs idle for minutes mid-reasoning; see `streamIdleTimeoutMs`. */
|
|
28
24
|
const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
|
|
29
25
|
const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
|
|
@@ -72,22 +68,6 @@ function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
|
|
|
72
68
|
);
|
|
73
69
|
}
|
|
74
70
|
|
|
75
|
-
function getOpenRouterAnthropicReasoningEffortMap(
|
|
76
|
-
modelId: string,
|
|
77
|
-
): Partial<Record<OpenAIReasoningEffort, string>> | undefined {
|
|
78
|
-
const parsed = parseAnthropicModel(bareModelId(modelId));
|
|
79
|
-
if (!parsed) return undefined;
|
|
80
|
-
// Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
|
|
81
|
-
// Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
|
|
82
|
-
const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
|
|
83
|
-
if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
|
|
84
|
-
|
|
85
|
-
const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
|
|
86
|
-
return (hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER) as Partial<
|
|
87
|
-
Record<OpenAIReasoningEffort, string>
|
|
88
|
-
>;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
71
|
/**
|
|
92
72
|
* Build the resolved chat-completions compat record for a model spec.
|
|
93
73
|
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
@@ -198,36 +178,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
198
178
|
isCopilotHost ||
|
|
199
179
|
isZenmuxHost);
|
|
200
180
|
|
|
201
|
-
const openRouterAnthropicReasoningEffortMap = isOpenRouter
|
|
202
|
-
? getOpenRouterAnthropicReasoningEffortMap(lowerId)
|
|
203
|
-
: undefined;
|
|
204
|
-
const detectedReasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
|
|
205
|
-
provider === "groq" && spec.id === "qwen/qwen3-32b"
|
|
206
|
-
? ({
|
|
207
|
-
minimal: "default",
|
|
208
|
-
low: "default",
|
|
209
|
-
medium: "default",
|
|
210
|
-
high: "default",
|
|
211
|
-
xhigh: "default",
|
|
212
|
-
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
213
|
-
: isDeepseekFamily && spec.reasoning
|
|
214
|
-
? ({
|
|
215
|
-
minimal: "high",
|
|
216
|
-
low: "high",
|
|
217
|
-
medium: "high",
|
|
218
|
-
high: "high",
|
|
219
|
-
xhigh: "max",
|
|
220
|
-
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
221
|
-
: openRouterAnthropicReasoningEffortMap
|
|
222
|
-
? openRouterAnthropicReasoningEffortMap
|
|
223
|
-
: isFireworks
|
|
224
|
-
? ({
|
|
225
|
-
// Fireworks' OpenAI-compatible endpoint rejects OpenAI's
|
|
226
|
-
// `minimal` literal but accepts `none` for the lowest setting.
|
|
227
|
-
minimal: "none",
|
|
228
|
-
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
229
|
-
: {};
|
|
230
|
-
|
|
231
181
|
// Stream-watchdog floor: GLM coding-plan SKUs and direct DeepSeek reasoning
|
|
232
182
|
// models idle for minutes mid-reasoning; widen the idle timeout so warm-ups
|
|
233
183
|
// stop aborting and retrying.
|
|
@@ -251,7 +201,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
251
201
|
supportsReasoningEffort: !isGrok && !isZai && !isZhipu && !isXiaomiMimo,
|
|
252
202
|
// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
|
|
253
203
|
supportsReasoningParams: provider !== "github-copilot",
|
|
254
|
-
reasoningEffortMap:
|
|
204
|
+
reasoningEffortMap: {},
|
|
255
205
|
supportsUsageInStreaming: !isCerebras,
|
|
256
206
|
// Kimi (including via OpenRouter and Fireworks router-form IDs such as
|
|
257
207
|
// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
|
|
@@ -323,10 +273,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
323
273
|
};
|
|
324
274
|
|
|
325
275
|
applyCompatOverrides(compat, spec.compat);
|
|
326
|
-
if (spec.compat?.reasoningEffortMap) {
|
|
327
|
-
// Effort maps merge per level instead of replacing wholesale.
|
|
328
|
-
compat.reasoningEffortMap = { ...detectedReasoningEffortMap, ...spec.compat.reasoningEffortMap };
|
|
329
|
-
}
|
|
330
276
|
|
|
331
277
|
const whenThinkingPolicy =
|
|
332
278
|
spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
|
|
@@ -341,6 +287,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
341
287
|
|
|
342
288
|
interface OpenAIResponsesSpecLike {
|
|
343
289
|
provider: string;
|
|
290
|
+
name: string;
|
|
344
291
|
baseUrl: string;
|
|
345
292
|
compat?: OpenAICompat;
|
|
346
293
|
}
|
|
@@ -351,7 +298,8 @@ interface OpenAIResponsesSpecLike {
|
|
|
351
298
|
* endpoint accepts the `developer` role, while strict tool mode is scoped to
|
|
352
299
|
* first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
|
|
353
300
|
* detection are URL-only on purpose — the historical call sites never
|
|
354
|
-
* consulted the provider id for them.
|
|
301
|
+
* consulted the provider id for them. The GPT-5 juice-zero hack keys on the
|
|
302
|
+
* model name, matching the historical request-time check.
|
|
355
303
|
*/
|
|
356
304
|
export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat {
|
|
357
305
|
const baseUrl = spec.baseUrl ?? "";
|
|
@@ -371,6 +319,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
|
|
|
371
319
|
// Azure OpenAI and GitHub Copilot Responses paths require tool results
|
|
372
320
|
// to strictly match prior tool calls when building Responses inputs.
|
|
373
321
|
strictResponsesPairing: hostMatchesUrl(baseUrl, "azureOpenAI") || spec.provider === "github-copilot",
|
|
322
|
+
requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
|
|
374
323
|
reasoningEffortMap: {},
|
|
375
324
|
};
|
|
376
325
|
applyCompatOverrides(compat, spec.compat);
|
|
@@ -169,7 +169,9 @@ export async function fetchOpenAICompatibleModels<TApi extends Api>(
|
|
|
169
169
|
maxTokens: UNK_MAX_TOKENS,
|
|
170
170
|
};
|
|
171
171
|
|
|
172
|
-
|
|
172
|
+
// `mapModel` returning null skips the entry (documented contract); only a
|
|
173
|
+
// missing mapper falls back to the defaults.
|
|
174
|
+
const mapped = options.mapModel ? options.mapModel(entry, defaults, context) : defaults;
|
|
173
175
|
if (!mapped || typeof mapped.id !== "string" || mapped.id.length === 0) {
|
|
174
176
|
continue;
|
|
175
177
|
}
|
package/src/identity/family.ts
CHANGED
|
@@ -44,6 +44,33 @@ export function isMimoModelIdOrName(value: string): boolean {
|
|
|
44
44
|
return value.toLowerCase().includes("mimo");
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
+
/**
|
|
48
|
+
* MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
|
|
49
|
+
* `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
|
|
50
|
+
* and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
|
|
51
|
+
* only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
|
|
52
|
+
* `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
|
|
53
|
+
* `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
|
|
54
|
+
* clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
|
|
55
|
+
*/
|
|
56
|
+
export function isMinimaxM2FamilyModelId(modelId: string): boolean {
|
|
57
|
+
const lower = modelId.toLowerCase();
|
|
58
|
+
if (!lower.includes("minimax")) return false;
|
|
59
|
+
// Boundary-delimited `m2` token followed by zero or more digits (dotless
|
|
60
|
+
// variants like `m21`/`m25`/`m27`) and an optional dotted minor version.
|
|
61
|
+
return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
|
|
66
|
+
* `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
|
|
67
|
+
* `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
|
|
68
|
+
* and `none`.
|
|
69
|
+
*/
|
|
70
|
+
export function isOpenAIGptOssModelId(modelId: string): boolean {
|
|
71
|
+
return /(^|\/)gpt-oss[-:]/i.test(modelId);
|
|
72
|
+
}
|
|
73
|
+
|
|
47
74
|
/**
|
|
48
75
|
* Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
|
|
49
76
|
* the Claude Fable/Mythos 5 generation. Older adaptive-thinking models
|
package/src/model-thinking.ts
CHANGED
|
@@ -10,15 +10,22 @@ import { Effort, THINKING_EFFORTS } from "./effort";
|
|
|
10
10
|
import { modelMatchesHost } from "./hosts";
|
|
11
11
|
import {
|
|
12
12
|
type AnthropicModel,
|
|
13
|
+
bareModelId,
|
|
13
14
|
type GeminiModel,
|
|
14
15
|
isFableOrMythos,
|
|
15
16
|
type OpenAIModel,
|
|
16
17
|
type ParsedModel,
|
|
18
|
+
parseAnthropicModel,
|
|
17
19
|
parseKnownModel,
|
|
18
20
|
semverEqual,
|
|
19
21
|
semverGte,
|
|
20
22
|
} from "./identity/classify";
|
|
21
|
-
import {
|
|
23
|
+
import {
|
|
24
|
+
isDeepseekModelIdOrName,
|
|
25
|
+
isMinimaxM2FamilyModelId,
|
|
26
|
+
isOpenAIGptOssModelId,
|
|
27
|
+
supportsAdaptiveThinkingDisplay,
|
|
28
|
+
} from "./identity/family";
|
|
22
29
|
import type {
|
|
23
30
|
Api,
|
|
24
31
|
CompatOf,
|
|
@@ -47,6 +54,27 @@ const GEMINI_3_PRO_EFFORTS: readonly Effort[] = [Effort.Low, Effort.High];
|
|
|
47
54
|
const GEMINI_3_FLASH_EFFORTS: readonly Effort[] = [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High];
|
|
48
55
|
const GPT_5_2_PLUS_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High, Effort.XHigh];
|
|
49
56
|
const GPT_5_1_CODEX_MINI_EFFORTS: readonly Effort[] = [Effort.Medium, Effort.High];
|
|
57
|
+
const LOW_MEDIUM_HIGH_REASONING_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High];
|
|
58
|
+
|
|
59
|
+
type EffortMap = Partial<Record<Effort, string>>;
|
|
60
|
+
|
|
61
|
+
const GROQ_QWEN3_32B_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
62
|
+
[Effort.Minimal]: "default",
|
|
63
|
+
[Effort.Low]: "default",
|
|
64
|
+
[Effort.Medium]: "default",
|
|
65
|
+
[Effort.High]: "default",
|
|
66
|
+
[Effort.XHigh]: "default",
|
|
67
|
+
};
|
|
68
|
+
const DEEPSEEK_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
69
|
+
[Effort.Minimal]: "high",
|
|
70
|
+
[Effort.Low]: "high",
|
|
71
|
+
[Effort.Medium]: "high",
|
|
72
|
+
[Effort.High]: "high",
|
|
73
|
+
[Effort.XHigh]: "max",
|
|
74
|
+
};
|
|
75
|
+
const FIREWORKS_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
76
|
+
[Effort.Minimal]: "none",
|
|
77
|
+
};
|
|
50
78
|
|
|
51
79
|
/**
|
|
52
80
|
* Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
|
|
@@ -88,7 +116,7 @@ export const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Record<Effor
|
|
|
88
116
|
* - Explicit spec thinking (generator-baked or user-authored) owns the
|
|
89
117
|
* capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
|
|
90
118
|
* (`effortMap`, `supportsDisplay`) are backfilled from identity when not
|
|
91
|
-
* explicitly set, so configs never need to know
|
|
119
|
+
* explicitly set, so configs never need to know provider wire tier tables.
|
|
92
120
|
* - Sparse specs go through full inference.
|
|
93
121
|
*/
|
|
94
122
|
export function resolveModelThinking<TApi extends Api>(
|
|
@@ -98,7 +126,7 @@ export function resolveModelThinking<TApi extends Api>(
|
|
|
98
126
|
if (!spec.reasoning) return undefined;
|
|
99
127
|
if (omitsWireReasoningEffort(spec.api, compat)) return undefined;
|
|
100
128
|
if (spec.thinking && Array.isArray(spec.thinking.efforts) && spec.thinking.efforts.length > 0) {
|
|
101
|
-
return fillThinkingWireDefaults(spec, spec.thinking);
|
|
129
|
+
return fillThinkingWireDefaults(spec, compat, spec.thinking);
|
|
102
130
|
}
|
|
103
131
|
// Empty/malformed explicit metadata is treated as absent — infer instead.
|
|
104
132
|
return deriveThinking(spec, compat);
|
|
@@ -106,23 +134,42 @@ export function resolveModelThinking<TApi extends Api>(
|
|
|
106
134
|
|
|
107
135
|
/**
|
|
108
136
|
* Backfill identity-derived wire facts onto explicit thinking metadata.
|
|
109
|
-
* Explicit `effortMap` / `supportsDisplay` (including `false`)
|
|
110
|
-
*
|
|
137
|
+
* Explicit `effortMap` / `supportsDisplay` (including `false`) win, except
|
|
138
|
+
* model-defined effort restrictions still normalize stale cached capability
|
|
139
|
+
* surfaces before request-time code can observe them.
|
|
111
140
|
*/
|
|
112
|
-
function fillThinkingWireDefaults<TApi extends Api>(
|
|
113
|
-
|
|
141
|
+
function fillThinkingWireDefaults<TApi extends Api>(
|
|
142
|
+
spec: ModelSpec<TApi>,
|
|
143
|
+
compat: CompatOf<TApi>,
|
|
144
|
+
thinking: ThinkingConfig,
|
|
145
|
+
): ThinkingConfig {
|
|
146
|
+
const parsed = parseKnownModel(spec.id);
|
|
147
|
+
const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
|
|
148
|
+
const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
|
|
149
|
+
const effortMap =
|
|
150
|
+
thinking.effortMap === undefined
|
|
151
|
+
? inferEffortMap(spec, compat, parsed, thinking.mode, normalizedEfforts)
|
|
152
|
+
: effortsChanged
|
|
153
|
+
? filterEffortMapToSupportedEfforts(thinking.effortMap, normalizedEfforts)
|
|
154
|
+
: undefined;
|
|
155
|
+
const shouldReplaceEffortMap = thinking.effortMap === undefined ? effortMap !== undefined : effortsChanged;
|
|
114
156
|
const needsDisplay =
|
|
115
157
|
thinking.supportsDisplay === undefined &&
|
|
116
158
|
(spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
|
|
117
159
|
supportsAdaptiveThinkingDisplay(spec.id);
|
|
118
|
-
if (!
|
|
160
|
+
if (!effortsChanged && !shouldReplaceEffortMap && !needsDisplay) {
|
|
119
161
|
return thinking;
|
|
120
162
|
}
|
|
121
163
|
const filled: ThinkingConfig = { ...thinking };
|
|
122
|
-
if (
|
|
123
|
-
filled.
|
|
124
|
-
|
|
125
|
-
|
|
164
|
+
if (effortsChanged) {
|
|
165
|
+
filled.efforts = normalizedEfforts;
|
|
166
|
+
}
|
|
167
|
+
if (shouldReplaceEffortMap) {
|
|
168
|
+
if (effortMap === undefined) {
|
|
169
|
+
delete filled.effortMap;
|
|
170
|
+
} else {
|
|
171
|
+
filled.effortMap = effortMap;
|
|
172
|
+
}
|
|
126
173
|
}
|
|
127
174
|
if (needsDisplay) {
|
|
128
175
|
filled.supportsDisplay = true;
|
|
@@ -141,10 +188,9 @@ export function deriveThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat:
|
|
|
141
188
|
mode: inferThinkingControlMode(spec, parsed),
|
|
142
189
|
efforts,
|
|
143
190
|
};
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
|
|
191
|
+
const effortMap = inferEffortMap(spec, compat, parsed, config.mode, config.efforts);
|
|
192
|
+
if (effortMap !== undefined) {
|
|
193
|
+
config.effortMap = effortMap;
|
|
148
194
|
}
|
|
149
195
|
if (
|
|
150
196
|
(spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
|
|
@@ -171,11 +217,117 @@ function omitsWireReasoningEffort(api: Api, compat: CompatOf<Api>): boolean {
|
|
|
171
217
|
return (compat as ResolvedOpenAIResponsesCompat | undefined)?.supportsReasoningEffort === false;
|
|
172
218
|
}
|
|
173
219
|
|
|
220
|
+
function inferEffortMap<TApi extends Api>(
|
|
221
|
+
spec: ModelSpec<TApi>,
|
|
222
|
+
compat: CompatOf<TApi>,
|
|
223
|
+
parsedModel: ParsedModel,
|
|
224
|
+
mode: ThinkingConfig["mode"],
|
|
225
|
+
efforts: readonly Effort[],
|
|
226
|
+
): EffortMap | undefined {
|
|
227
|
+
const detected = inferDetectedEffortMap(spec, parsedModel, mode);
|
|
228
|
+
const configured = readCompatEffortMap(compat);
|
|
229
|
+
const merged =
|
|
230
|
+
detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
|
|
231
|
+
return merged === undefined ? undefined : filterEffortMapToSupportedEfforts(merged, efforts);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function filterEffortMapToSupportedEfforts(map: EffortMap, efforts: readonly Effort[]): EffortMap | undefined {
|
|
235
|
+
let filtered: EffortMap | undefined;
|
|
236
|
+
for (const effort of efforts) {
|
|
237
|
+
const mapped = map[effort];
|
|
238
|
+
if (mapped === undefined) continue;
|
|
239
|
+
if (filtered === undefined) filtered = {};
|
|
240
|
+
filtered[effort] = mapped;
|
|
241
|
+
}
|
|
242
|
+
return filtered;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function sameEffortList(left: readonly Effort[], right: readonly Effort[]): boolean {
|
|
246
|
+
if (left.length !== right.length) return false;
|
|
247
|
+
for (let index = 0; index < left.length; index++) {
|
|
248
|
+
if (left[index] !== right[index]) return false;
|
|
249
|
+
}
|
|
250
|
+
return true;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
|
|
254
|
+
return spec.api === "openai-completions" && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
|
|
255
|
+
? LOW_MEDIUM_HIGH_REASONING_EFFORTS
|
|
256
|
+
: undefined;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
|
|
260
|
+
if (compat === undefined || !("reasoningEffortMap" in compat)) {
|
|
261
|
+
return undefined;
|
|
262
|
+
}
|
|
263
|
+
const map = compat.reasoningEffortMap;
|
|
264
|
+
return map && Object.keys(map).length > 0 ? map : undefined;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function inferDetectedEffortMap<TApi extends Api>(
|
|
268
|
+
spec: ModelSpec<TApi>,
|
|
269
|
+
parsedModel: ParsedModel,
|
|
270
|
+
mode: ThinkingConfig["mode"],
|
|
271
|
+
): EffortMap | undefined {
|
|
272
|
+
if (mode === "anthropic-adaptive") {
|
|
273
|
+
return anthropicModelHasRealXHighEffort(spec, parsedModel)
|
|
274
|
+
? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
|
|
275
|
+
: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
|
|
276
|
+
}
|
|
277
|
+
if (spec.api !== "openai-completions") {
|
|
278
|
+
return undefined;
|
|
279
|
+
}
|
|
280
|
+
if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
|
|
281
|
+
return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
|
|
282
|
+
}
|
|
283
|
+
if (isDeepseekReasoningModel(spec)) {
|
|
284
|
+
return DEEPSEEK_REASONING_EFFORT_MAP;
|
|
285
|
+
}
|
|
286
|
+
if (modelMatchesHost(spec, "openrouter")) {
|
|
287
|
+
const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
|
|
288
|
+
if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
|
|
289
|
+
}
|
|
290
|
+
if (modelMatchesHost(spec, "fireworks")) {
|
|
291
|
+
return FIREWORKS_REASONING_EFFORT_MAP;
|
|
292
|
+
}
|
|
293
|
+
return undefined;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
|
|
297
|
+
if (!spec.reasoning) return false;
|
|
298
|
+
const lowerId = spec.id.toLowerCase();
|
|
299
|
+
const lowerName = (spec.name ?? "").toLowerCase();
|
|
300
|
+
const isOpenCodeDeepseekAlias =
|
|
301
|
+
spec.provider === "opencode-zen" && (lowerId === "big-pickle" || lowerName === "big pickle");
|
|
302
|
+
return (
|
|
303
|
+
modelMatchesHost(spec, "deepseekFamily") ||
|
|
304
|
+
isDeepseekModelIdOrName(spec.id) ||
|
|
305
|
+
isDeepseekModelIdOrName(spec.name ?? "") ||
|
|
306
|
+
isOpenCodeDeepseekAlias
|
|
307
|
+
);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function getOpenRouterAnthropicReasoningEffortMap(modelId: string): EffortMap | undefined {
|
|
311
|
+
const parsed = parseAnthropicModel(bareModelId(modelId));
|
|
312
|
+
if (!parsed) return undefined;
|
|
313
|
+
// Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
|
|
314
|
+
// Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
|
|
315
|
+
const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
|
|
316
|
+
if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
|
|
317
|
+
|
|
318
|
+
const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
|
|
319
|
+
return hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
|
|
320
|
+
}
|
|
321
|
+
|
|
174
322
|
function inferSupportedEfforts<TApi extends Api>(
|
|
175
323
|
parsedModel: ParsedModel,
|
|
176
324
|
spec: ModelSpec<TApi>,
|
|
177
325
|
compat: CompatOf<TApi>,
|
|
178
326
|
): readonly Effort[] {
|
|
327
|
+
const modelDefinedEfforts = getModelDefinedEfforts(spec);
|
|
328
|
+
if (modelDefinedEfforts !== undefined) {
|
|
329
|
+
return modelDefinedEfforts;
|
|
330
|
+
}
|
|
179
331
|
switch (parsedModel.family) {
|
|
180
332
|
case "openai":
|
|
181
333
|
return inferOpenAISupportedEfforts(parsedModel);
|