@oh-my-pi/pi-catalog 15.11.3 → 15.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.11.4] - 2026-06-12
6
+
7
+ ### Fixed
8
+
9
+ - Fixed MiniMax M2-family and OpenAI gpt-oss model metadata so OpenAI-compatible catalog entries declare only `low|medium|high` thinking efforts. Their upstreams reject `minimal`, `xhigh`, and Fireworks' `minimal → none` wire mapping, so `fireworks/minimax-m2.7` as the smol auto-thinking classifier model 400ed on every turn. OpenAI-compatible provider effort maps (`Groq qwen/qwen3-32b`, DeepSeek-family, OpenRouter Anthropic adaptive, Fireworks `minimal → none`) now bake into `thinking.effortMap` in catalog metadata instead of `buildOpenAICompat`, and request builders read that field directly. Regenerated `models.json` now makes `disableReasoning` choose `low` for those families while leaving GLM-5.x and other Fireworks models on the existing `minimal → none` path ([#2315](https://github.com/can1357/oh-my-pi/issues/2315)).
10
+ ### Added
11
+
12
+ - Added `requiresJuiceZeroHack` Responses-API compat flag, resolved by `buildOpenAIResponsesCompat` from GPT-5-family model names and overridable via sparse model `compat` config. Replaces the request-time `model.name.startsWith("gpt-5")` sniff that gated the trailing `# Juice: 0 !important` no-reasoning developer item.
13
+
5
14
  ## [15.11.3] - 2026-06-11
6
15
  ### Added
7
16
 
@@ -6,6 +6,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
6
6
  export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
7
7
  interface OpenAIResponsesSpecLike {
8
8
  provider: string;
9
+ name: string;
9
10
  baseUrl: string;
10
11
  compat?: OpenAICompat;
11
12
  }
@@ -15,7 +16,8 @@ interface OpenAIResponsesSpecLike {
15
16
  * endpoint accepts the `developer` role, while strict tool mode is scoped to
16
17
  * first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
17
18
  * detection are URL-only on purpose — the historical call sites never
18
- * consulted the provider id for them.
19
+ * consulted the provider id for them. The GPT-5 juice-zero hack keys on the
20
+ * model name, matching the historical request-time check.
19
21
  */
20
22
  export declare function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat;
21
23
  export {};
@@ -20,6 +20,23 @@ export declare function isQwenModelId(modelId: string): boolean;
20
20
  export declare function isDeepseekModelIdOrName(value: string): boolean;
21
21
  /** Xiaomi MiMo family by id or display name. */
22
22
  export declare function isMimoModelIdOrName(value: string): boolean;
23
+ /**
24
+ * MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
25
+ * `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
26
+ * and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
27
+ * only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
28
+ * `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
29
+ * `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
30
+ * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
31
+ */
32
+ export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
33
+ /**
34
+ * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
35
+ * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
36
+ * `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
37
+ * and `none`.
38
+ */
39
+ export declare function isOpenAIGptOssModelId(modelId: string): boolean;
23
40
  /**
24
41
  * Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
25
42
  * the Claude Fable/Mythos 5 generation. Older adaptive-thinking models
@@ -38,7 +38,7 @@ export declare const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Reco
38
38
  * - Explicit spec thinking (generator-baked or user-authored) owns the
39
39
  * capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
40
40
  * (`effortMap`, `supportsDisplay`) are backfilled from identity when not
41
- * explicitly set, so configs never need to know Anthropic's tier tables.
41
+ * explicitly set, so configs never need to know provider wire tier tables.
42
42
  * - Sparse specs go through full inference.
43
43
  */
44
44
  export declare function resolveModelThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): ThinkingConfig | undefined;
@@ -17,9 +17,9 @@ export interface ThinkingConfig {
17
17
  /** Optional default effort applied when this model is selected. Falls back to global default if absent. */
18
18
  defaultLevel?: Effort;
19
19
  /**
20
- * Effort → wire-value remap for `anthropic-adaptive` transports, baked at
21
- * build time (4-tier legacy scale vs the 5-tier Opus 4.7+/Fable/Mythos
22
- * scale). Identity for efforts the map omits.
20
+ * Effort → provider wire-value remap, baked at build time. Identity for
21
+ * efforts the map omits. Used by Anthropic adaptive thinking, OpenAI-
22
+ * compatible `reasoning_effort`, and Responses-style reasoning params.
23
23
  */
24
24
  effortMap?: Partial<Record<Effort, string>>;
25
25
  /**
@@ -179,6 +179,14 @@ export interface OpenAICompat {
179
179
  alwaysSendMaxTokens?: boolean;
180
180
  /** Whether Responses-API tool-call/result history must be strictly paired. Default: auto-detected (Azure OpenAI, GitHub Copilot). */
181
181
  strictResponsesPairing?: boolean;
182
+ /**
183
+ * Append a trailing `# Juice: 0 !important` developer item when the caller
184
+ * did not request reasoning, suppressing default reasoning on models that
185
+ * cannot disable it via request params (Responses APIs only; see
186
+ * https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7).
187
+ * Default: auto-detected (GPT-5-family model names).
188
+ */
189
+ requiresJuiceZeroHack?: boolean;
182
190
  /**
183
191
  * Compat deltas applied when a request actually engages thinking mode
184
192
  * (reasoning requested and not disabled, model reasoning-capable, and not
@@ -279,7 +287,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
279
287
  * `buildModel`; request handlers read fields and never detect, resolve, or
280
288
  * allocate.
281
289
  */
282
- export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "whenThinking">> & {
290
+ export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
283
291
  openRouterRouting?: OpenAICompat["openRouterRouting"];
284
292
  vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
285
293
  extraBody?: OpenAICompat["extraBody"];
@@ -301,6 +309,7 @@ export interface ResolvedOpenAIResponsesCompat {
301
309
  supportsReasoningEffort: boolean;
302
310
  supportsLongPromptCacheRetention: boolean;
303
311
  strictResponsesPairing: boolean;
312
+ requiresJuiceZeroHack: boolean;
304
313
  reasoningEffortMap: Partial<Record<Effort, string>>;
305
314
  }
306
315
  /** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-catalog",
4
- "version": "15.11.3",
4
+ "version": "15.11.4",
5
5
  "description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -34,11 +34,11 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@bufbuild/protobuf": "^2.12.0",
37
- "@oh-my-pi/pi-utils": "15.11.3",
37
+ "@oh-my-pi/pi-utils": "15.11.4",
38
38
  "zod": "4.4.3"
39
39
  },
40
40
  "devDependencies": {
41
- "@oh-my-pi/pi-ai": "15.11.3",
41
+ "@oh-my-pi/pi-ai": "15.11.4",
42
42
  "@types/bun": "^1.3.14"
43
43
  },
44
44
  "engines": {
@@ -8,7 +8,6 @@
8
8
  * never detect, resolve, or allocate.
9
9
  */
10
10
  import { hostMatchesUrl, modelMatchesHost } from "../hosts";
11
- import { bareModelId, isFableOrMythos, parseAnthropicModel, semverGte } from "../identity/classify";
12
11
  import {
13
12
  isAnthropicNamespacedModelId,
14
13
  isClaudeModelId,
@@ -18,12 +17,9 @@ import {
18
17
  isMimoModelIdOrName,
19
18
  isQwenModelId,
20
19
  } from "../identity/family";
21
- import { ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER, ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER } from "../model-thinking";
22
20
  import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
23
21
  import { applyCompatOverrides } from "./apply";
24
22
 
25
- type OpenAIReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
26
-
27
23
  /** GLM coding-plan SKUs idle for minutes mid-reasoning; see `streamIdleTimeoutMs`. */
28
24
  const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
29
25
  const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
@@ -72,22 +68,6 @@ function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
72
68
  );
73
69
  }
74
70
 
75
- function getOpenRouterAnthropicReasoningEffortMap(
76
- modelId: string,
77
- ): Partial<Record<OpenAIReasoningEffort, string>> | undefined {
78
- const parsed = parseAnthropicModel(bareModelId(modelId));
79
- if (!parsed) return undefined;
80
- // Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
81
- // Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
82
- const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
83
- if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
84
-
85
- const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
86
- return (hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER) as Partial<
87
- Record<OpenAIReasoningEffort, string>
88
- >;
89
- }
90
-
91
71
  /**
92
72
  * Build the resolved chat-completions compat record for a model spec.
93
73
  * Provider takes precedence over URL-based detection since it's explicitly configured.
@@ -198,36 +178,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
198
178
  isCopilotHost ||
199
179
  isZenmuxHost);
200
180
 
201
- const openRouterAnthropicReasoningEffortMap = isOpenRouter
202
- ? getOpenRouterAnthropicReasoningEffortMap(lowerId)
203
- : undefined;
204
- const detectedReasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
205
- provider === "groq" && spec.id === "qwen/qwen3-32b"
206
- ? ({
207
- minimal: "default",
208
- low: "default",
209
- medium: "default",
210
- high: "default",
211
- xhigh: "default",
212
- } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
213
- : isDeepseekFamily && spec.reasoning
214
- ? ({
215
- minimal: "high",
216
- low: "high",
217
- medium: "high",
218
- high: "high",
219
- xhigh: "max",
220
- } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
221
- : openRouterAnthropicReasoningEffortMap
222
- ? openRouterAnthropicReasoningEffortMap
223
- : isFireworks
224
- ? ({
225
- // Fireworks' OpenAI-compatible endpoint rejects OpenAI's
226
- // `minimal` literal but accepts `none` for the lowest setting.
227
- minimal: "none",
228
- } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
229
- : {};
230
-
231
181
  // Stream-watchdog floor: GLM coding-plan SKUs and direct DeepSeek reasoning
232
182
  // models idle for minutes mid-reasoning; widen the idle timeout so warm-ups
233
183
  // stop aborting and retrying.
@@ -251,7 +201,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
251
201
  supportsReasoningEffort: !isGrok && !isZai && !isZhipu && !isXiaomiMimo,
252
202
  // GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
253
203
  supportsReasoningParams: provider !== "github-copilot",
254
- reasoningEffortMap: detectedReasoningEffortMap,
204
+ reasoningEffortMap: {},
255
205
  supportsUsageInStreaming: !isCerebras,
256
206
  // Kimi (including via OpenRouter and Fireworks router-form IDs such as
257
207
  // `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
@@ -323,10 +273,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
323
273
  };
324
274
 
325
275
  applyCompatOverrides(compat, spec.compat);
326
- if (spec.compat?.reasoningEffortMap) {
327
- // Effort maps merge per level instead of replacing wholesale.
328
- compat.reasoningEffortMap = { ...detectedReasoningEffortMap, ...spec.compat.reasoningEffortMap };
329
- }
330
276
 
331
277
  const whenThinkingPolicy =
332
278
  spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
@@ -341,6 +287,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
341
287
 
342
288
  interface OpenAIResponsesSpecLike {
343
289
  provider: string;
290
+ name: string;
344
291
  baseUrl: string;
345
292
  compat?: OpenAICompat;
346
293
  }
@@ -351,7 +298,8 @@ interface OpenAIResponsesSpecLike {
351
298
  * endpoint accepts the `developer` role, while strict tool mode is scoped to
352
299
  * first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
353
300
  * detection are URL-only on purpose — the historical call sites never
354
- * consulted the provider id for them.
301
+ * consulted the provider id for them. The GPT-5 juice-zero hack keys on the
302
+ * model name, matching the historical request-time check.
355
303
  */
356
304
  export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat {
357
305
  const baseUrl = spec.baseUrl ?? "";
@@ -371,6 +319,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
371
319
  // Azure OpenAI and GitHub Copilot Responses paths require tool results
372
320
  // to strictly match prior tool calls when building Responses inputs.
373
321
  strictResponsesPairing: hostMatchesUrl(baseUrl, "azureOpenAI") || spec.provider === "github-copilot",
322
+ requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
374
323
  reasoningEffortMap: {},
375
324
  };
376
325
  applyCompatOverrides(compat, spec.compat);
@@ -44,6 +44,33 @@ export function isMimoModelIdOrName(value: string): boolean {
44
44
  return value.toLowerCase().includes("mimo");
45
45
  }
46
46
 
47
+ /**
48
+ * MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
49
+ * `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
50
+ * and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
51
+ * only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
52
+ * `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
53
+ * `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
54
+ * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
55
+ */
56
+ export function isMinimaxM2FamilyModelId(modelId: string): boolean {
57
+ const lower = modelId.toLowerCase();
58
+ if (!lower.includes("minimax")) return false;
59
+ // Boundary-delimited `m2` token followed by zero or more digits (dotless
60
+ // variants like `m21`/`m25`/`m27`) and an optional dotted minor version.
61
+ return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
62
+ }
63
+
64
+ /**
65
+ * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
66
+ * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
67
+ * `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
68
+ * and `none`.
69
+ */
70
+ export function isOpenAIGptOssModelId(modelId: string): boolean {
71
+ return /(^|\/)gpt-oss[-:]/i.test(modelId);
72
+ }
73
+
47
74
  /**
48
75
  * Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
49
76
  * the Claude Fable/Mythos 5 generation. Older adaptive-thinking models
@@ -10,15 +10,22 @@ import { Effort, THINKING_EFFORTS } from "./effort";
10
10
  import { modelMatchesHost } from "./hosts";
11
11
  import {
12
12
  type AnthropicModel,
13
+ bareModelId,
13
14
  type GeminiModel,
14
15
  isFableOrMythos,
15
16
  type OpenAIModel,
16
17
  type ParsedModel,
18
+ parseAnthropicModel,
17
19
  parseKnownModel,
18
20
  semverEqual,
19
21
  semverGte,
20
22
  } from "./identity/classify";
21
- import { supportsAdaptiveThinkingDisplay } from "./identity/family";
23
+ import {
24
+ isDeepseekModelIdOrName,
25
+ isMinimaxM2FamilyModelId,
26
+ isOpenAIGptOssModelId,
27
+ supportsAdaptiveThinkingDisplay,
28
+ } from "./identity/family";
22
29
  import type {
23
30
  Api,
24
31
  CompatOf,
@@ -47,6 +54,27 @@ const GEMINI_3_PRO_EFFORTS: readonly Effort[] = [Effort.Low, Effort.High];
47
54
  const GEMINI_3_FLASH_EFFORTS: readonly Effort[] = [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High];
48
55
  const GPT_5_2_PLUS_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High, Effort.XHigh];
49
56
  const GPT_5_1_CODEX_MINI_EFFORTS: readonly Effort[] = [Effort.Medium, Effort.High];
57
+ const LOW_MEDIUM_HIGH_REASONING_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High];
58
+
59
+ type EffortMap = Partial<Record<Effort, string>>;
60
+
61
+ const GROQ_QWEN3_32B_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
62
+ [Effort.Minimal]: "default",
63
+ [Effort.Low]: "default",
64
+ [Effort.Medium]: "default",
65
+ [Effort.High]: "default",
66
+ [Effort.XHigh]: "default",
67
+ };
68
+ const DEEPSEEK_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
69
+ [Effort.Minimal]: "high",
70
+ [Effort.Low]: "high",
71
+ [Effort.Medium]: "high",
72
+ [Effort.High]: "high",
73
+ [Effort.XHigh]: "max",
74
+ };
75
+ const FIREWORKS_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
76
+ [Effort.Minimal]: "none",
77
+ };
50
78
 
51
79
  /**
52
80
  * Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
@@ -88,7 +116,7 @@ export const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Record<Effor
88
116
  * - Explicit spec thinking (generator-baked or user-authored) owns the
89
117
  * capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
90
118
  * (`effortMap`, `supportsDisplay`) are backfilled from identity when not
91
- * explicitly set, so configs never need to know Anthropic's tier tables.
119
+ * explicitly set, so configs never need to know provider wire tier tables.
92
120
  * - Sparse specs go through full inference.
93
121
  */
94
122
  export function resolveModelThinking<TApi extends Api>(
@@ -98,7 +126,7 @@ export function resolveModelThinking<TApi extends Api>(
98
126
  if (!spec.reasoning) return undefined;
99
127
  if (omitsWireReasoningEffort(spec.api, compat)) return undefined;
100
128
  if (spec.thinking && Array.isArray(spec.thinking.efforts) && spec.thinking.efforts.length > 0) {
101
- return fillThinkingWireDefaults(spec, spec.thinking);
129
+ return fillThinkingWireDefaults(spec, compat, spec.thinking);
102
130
  }
103
131
  // Empty/malformed explicit metadata is treated as absent — infer instead.
104
132
  return deriveThinking(spec, compat);
@@ -106,23 +134,42 @@ export function resolveModelThinking<TApi extends Api>(
106
134
 
107
135
  /**
108
136
  * Backfill identity-derived wire facts onto explicit thinking metadata.
109
- * Explicit `effortMap` / `supportsDisplay` (including `false`) always win;
110
- * untouched configs are returned as-is with zero allocation.
137
+ * Explicit `effortMap` / `supportsDisplay` (including `false`) win, except
138
+ * model-defined effort restrictions still normalize stale cached capability
139
+ * surfaces before request-time code can observe them.
111
140
  */
112
- function fillThinkingWireDefaults<TApi extends Api>(spec: ModelSpec<TApi>, thinking: ThinkingConfig): ThinkingConfig {
113
- const needsEffortMap = thinking.mode === "anthropic-adaptive" && thinking.effortMap === undefined;
141
+ function fillThinkingWireDefaults<TApi extends Api>(
142
+ spec: ModelSpec<TApi>,
143
+ compat: CompatOf<TApi>,
144
+ thinking: ThinkingConfig,
145
+ ): ThinkingConfig {
146
+ const parsed = parseKnownModel(spec.id);
147
+ const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
148
+ const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
149
+ const effortMap =
150
+ thinking.effortMap === undefined
151
+ ? inferEffortMap(spec, compat, parsed, thinking.mode, normalizedEfforts)
152
+ : effortsChanged
153
+ ? filterEffortMapToSupportedEfforts(thinking.effortMap, normalizedEfforts)
154
+ : undefined;
155
+ const shouldReplaceEffortMap = thinking.effortMap === undefined ? effortMap !== undefined : effortsChanged;
114
156
  const needsDisplay =
115
157
  thinking.supportsDisplay === undefined &&
116
158
  (spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
117
159
  supportsAdaptiveThinkingDisplay(spec.id);
118
- if (!needsEffortMap && !needsDisplay) {
160
+ if (!effortsChanged && !shouldReplaceEffortMap && !needsDisplay) {
119
161
  return thinking;
120
162
  }
121
163
  const filled: ThinkingConfig = { ...thinking };
122
- if (needsEffortMap) {
123
- filled.effortMap = anthropicModelHasRealXHighEffort(spec, parseKnownModel(spec.id))
124
- ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
125
- : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
164
+ if (effortsChanged) {
165
+ filled.efforts = normalizedEfforts;
166
+ }
167
+ if (shouldReplaceEffortMap) {
168
+ if (effortMap === undefined) {
169
+ delete filled.effortMap;
170
+ } else {
171
+ filled.effortMap = effortMap;
172
+ }
126
173
  }
127
174
  if (needsDisplay) {
128
175
  filled.supportsDisplay = true;
@@ -141,10 +188,9 @@ export function deriveThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat:
141
188
  mode: inferThinkingControlMode(spec, parsed),
142
189
  efforts,
143
190
  };
144
- if (config.mode === "anthropic-adaptive") {
145
- config.effortMap = anthropicModelHasRealXHighEffort(spec, parsed)
146
- ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
147
- : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
191
+ const effortMap = inferEffortMap(spec, compat, parsed, config.mode, config.efforts);
192
+ if (effortMap !== undefined) {
193
+ config.effortMap = effortMap;
148
194
  }
149
195
  if (
150
196
  (spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
@@ -171,11 +217,117 @@ function omitsWireReasoningEffort(api: Api, compat: CompatOf<Api>): boolean {
171
217
  return (compat as ResolvedOpenAIResponsesCompat | undefined)?.supportsReasoningEffort === false;
172
218
  }
173
219
 
220
+ function inferEffortMap<TApi extends Api>(
221
+ spec: ModelSpec<TApi>,
222
+ compat: CompatOf<TApi>,
223
+ parsedModel: ParsedModel,
224
+ mode: ThinkingConfig["mode"],
225
+ efforts: readonly Effort[],
226
+ ): EffortMap | undefined {
227
+ const detected = inferDetectedEffortMap(spec, parsedModel, mode);
228
+ const configured = readCompatEffortMap(compat);
229
+ const merged =
230
+ detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
231
+ return merged === undefined ? undefined : filterEffortMapToSupportedEfforts(merged, efforts);
232
+ }
233
+
234
+ function filterEffortMapToSupportedEfforts(map: EffortMap, efforts: readonly Effort[]): EffortMap | undefined {
235
+ let filtered: EffortMap | undefined;
236
+ for (const effort of efforts) {
237
+ const mapped = map[effort];
238
+ if (mapped === undefined) continue;
239
+ if (filtered === undefined) filtered = {};
240
+ filtered[effort] = mapped;
241
+ }
242
+ return filtered;
243
+ }
244
+
245
+ function sameEffortList(left: readonly Effort[], right: readonly Effort[]): boolean {
246
+ if (left.length !== right.length) return false;
247
+ for (let index = 0; index < left.length; index++) {
248
+ if (left[index] !== right[index]) return false;
249
+ }
250
+ return true;
251
+ }
252
+
253
+ function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
254
+ return spec.api === "openai-completions" && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
255
+ ? LOW_MEDIUM_HIGH_REASONING_EFFORTS
256
+ : undefined;
257
+ }
258
+
259
+ function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
260
+ if (compat === undefined || !("reasoningEffortMap" in compat)) {
261
+ return undefined;
262
+ }
263
+ const map = compat.reasoningEffortMap;
264
+ return map && Object.keys(map).length > 0 ? map : undefined;
265
+ }
266
+
267
+ function inferDetectedEffortMap<TApi extends Api>(
268
+ spec: ModelSpec<TApi>,
269
+ parsedModel: ParsedModel,
270
+ mode: ThinkingConfig["mode"],
271
+ ): EffortMap | undefined {
272
+ if (mode === "anthropic-adaptive") {
273
+ return anthropicModelHasRealXHighEffort(spec, parsedModel)
274
+ ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
275
+ : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
276
+ }
277
+ if (spec.api !== "openai-completions") {
278
+ return undefined;
279
+ }
280
+ if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
281
+ return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
282
+ }
283
+ if (isDeepseekReasoningModel(spec)) {
284
+ return DEEPSEEK_REASONING_EFFORT_MAP;
285
+ }
286
+ if (modelMatchesHost(spec, "openrouter")) {
287
+ const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
288
+ if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
289
+ }
290
+ if (modelMatchesHost(spec, "fireworks")) {
291
+ return FIREWORKS_REASONING_EFFORT_MAP;
292
+ }
293
+ return undefined;
294
+ }
295
+
296
+ function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
297
+ if (!spec.reasoning) return false;
298
+ const lowerId = spec.id.toLowerCase();
299
+ const lowerName = (spec.name ?? "").toLowerCase();
300
+ const isOpenCodeDeepseekAlias =
301
+ spec.provider === "opencode-zen" && (lowerId === "big-pickle" || lowerName === "big pickle");
302
+ return (
303
+ modelMatchesHost(spec, "deepseekFamily") ||
304
+ isDeepseekModelIdOrName(spec.id) ||
305
+ isDeepseekModelIdOrName(spec.name ?? "") ||
306
+ isOpenCodeDeepseekAlias
307
+ );
308
+ }
309
+
310
+ function getOpenRouterAnthropicReasoningEffortMap(modelId: string): EffortMap | undefined {
311
+ const parsed = parseAnthropicModel(bareModelId(modelId));
312
+ if (!parsed) return undefined;
313
+ // Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
314
+ // Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
315
+ const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
316
+ if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
317
+
318
+ const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
319
+ return hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
320
+ }
321
+
174
322
  function inferSupportedEfforts<TApi extends Api>(
175
323
  parsedModel: ParsedModel,
176
324
  spec: ModelSpec<TApi>,
177
325
  compat: CompatOf<TApi>,
178
326
  ): readonly Effort[] {
327
+ const modelDefinedEfforts = getModelDefinedEfforts(spec);
328
+ if (modelDefinedEfforts !== undefined) {
329
+ return modelDefinedEfforts;
330
+ }
179
331
  switch (parsedModel.family) {
180
332
  case "openai":
181
333
  return inferOpenAISupportedEfforts(parsedModel);