npm - @oh-my-pi/pi-catalog - Versions diffs - 15.11.3 → 15.11.4 - Mend

@oh-my-pi/pi-catalog 15.11.3 → 15.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +9 -0
package/dist/types/compat/openai.d.ts +3 -1
package/dist/types/identity/family.d.ts +17 -0
package/dist/types/model-thinking.d.ts +1 -1
package/dist/types/types.d.ts +13 -4
package/package.json +3 -3
package/src/compat/openai.ts +5 -56
package/src/identity/family.ts +27 -0
package/src/model-thinking.ts +168 -16
package/src/models.json +1148 -579
package/src/provider-models/openai-compat.ts +4 -6
package/src/types.ts +13 -3

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,15 @@
 ## [Unreleased]
+## [15.11.4] - 2026-06-12
+### Fixed
+- Fixed MiniMax M2-family and OpenAI gpt-oss model metadata so OpenAI-compatible catalog entries declare only `low|medium|high` thinking efforts. Their upstreams reject `minimal`, `xhigh`, and Fireworks' `minimal → none` wire mapping, so `fireworks/minimax-m2.7` as the smol auto-thinking classifier model 400ed on every turn. OpenAI-compatible provider effort maps (`Groq qwen/qwen3-32b`, DeepSeek-family, OpenRouter Anthropic adaptive, Fireworks `minimal → none`) now bake into `thinking.effortMap` in catalog metadata instead of `buildOpenAICompat`, and request builders read that field directly. Regenerated `models.json` now makes `disableReasoning` choose `low` for those families while leaving GLM-5.x and other Fireworks models on the existing `minimal → none` path ([#2315](https://github.com/can1357/oh-my-pi/issues/2315)).
+### Added
+- Added `requiresJuiceZeroHack` Responses-API compat flag, resolved by `buildOpenAIResponsesCompat` from GPT-5-family model names and overridable via sparse model `compat` config. Replaces the request-time `model.name.startsWith("gpt-5")` sniff that gated the trailing `# Juice: 0 !important` no-reasoning developer item.
 ## [15.11.3] - 2026-06-11
 ### Added

package/dist/types/compat/openai.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
 export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
 interface OpenAIResponsesSpecLike {
     provider: string;
+    name: string;
     baseUrl: string;
     compat?: OpenAICompat;
 }
@@ -15,7 +16,8 @@ interface OpenAIResponsesSpecLike {
  * endpoint accepts the `developer` role, while strict tool mode is scoped to
  * first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
  * detection are URL-only on purpose — the historical call sites never
- * consulted the provider id for them.
+ * consulted the provider id for them. The GPT-5 juice-zero hack keys on the
+ * model name, matching the historical request-time check.
  */
 export declare function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat;
 export {};

package/dist/types/identity/family.d.ts CHANGED Viewed

@@ -20,6 +20,23 @@ export declare function isQwenModelId(modelId: string): boolean;
 export declare function isDeepseekModelIdOrName(value: string): boolean;
 /** Xiaomi MiMo family by id or display name. */
 export declare function isMimoModelIdOrName(value: string): boolean;
+/**
+ * MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
+ * `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
+ * and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
+ * only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
+ * `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
+ * `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
+ * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
+ */
+export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
+/**
+ * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
+ * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
+ * `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
+ * and `none`.
+ */
+export declare function isOpenAIGptOssModelId(modelId: string): boolean;
 /**
  * Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
  * the Claude Fable/Mythos 5 generation. Older adaptive-thinking models

package/dist/types/model-thinking.d.ts CHANGED Viewed

@@ -38,7 +38,7 @@ export declare const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Reco
  * - Explicit spec thinking (generator-baked or user-authored) owns the
  *   capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
  *   (`effortMap`, `supportsDisplay`) are backfilled from identity when not
- *   explicitly set, so configs never need to know Anthropic's tier tables.
+ *   explicitly set, so configs never need to know provider wire tier tables.
  * - Sparse specs go through full inference.
  */
 export declare function resolveModelThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): ThinkingConfig | undefined;

package/dist/types/types.d.ts CHANGED Viewed

@@ -17,9 +17,9 @@ export interface ThinkingConfig {
     /** Optional default effort applied when this model is selected. Falls back to global default if absent. */
     defaultLevel?: Effort;
     /**
-     * Effort → wire-value remap for `anthropic-adaptive` transports, baked at
-     * build time (4-tier legacy scale vs the 5-tier Opus 4.7+/Fable/Mythos
-     * scale). Identity for efforts the map omits.
+     * Effort → provider wire-value remap, baked at build time. Identity for
+     * efforts the map omits. Used by Anthropic adaptive thinking, OpenAI-
+     * compatible `reasoning_effort`, and Responses-style reasoning params.
      */
     effortMap?: Partial<Record<Effort, string>>;
     /**
@@ -179,6 +179,14 @@ export interface OpenAICompat {
     alwaysSendMaxTokens?: boolean;
     /** Whether Responses-API tool-call/result history must be strictly paired. Default: auto-detected (Azure OpenAI, GitHub Copilot). */
     strictResponsesPairing?: boolean;
+    /**
+     * Append a trailing `# Juice: 0 !important` developer item when the caller
+     * did not request reasoning, suppressing default reasoning on models that
+     * cannot disable it via request params (Responses APIs only; see
+     * https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7).
+     * Default: auto-detected (GPT-5-family model names).
+     */
+    requiresJuiceZeroHack?: boolean;
     /**
      * Compat deltas applied when a request actually engages thinking mode
      * (reasoning requested and not disabled, model reasoning-capable, and not
@@ -279,7 +287,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
  * `buildModel`; request handlers read fields and never detect, resolve, or
  * allocate.
  */
-export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "whenThinking">> & {
+export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
     openRouterRouting?: OpenAICompat["openRouterRouting"];
     vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
     extraBody?: OpenAICompat["extraBody"];
@@ -301,6 +309,7 @@ export interface ResolvedOpenAIResponsesCompat {
     supportsReasoningEffort: boolean;
     supportsLongPromptCacheRetention: boolean;
     strictResponsesPairing: boolean;
+    requiresJuiceZeroHack: boolean;
     reasoningEffortMap: Partial<Record<Effort, string>>;
 }
 /** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-catalog",
-	"version": "15.11.3",
+	"version": "15.11.4",
 	"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -34,11 +34,11 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "15.11.3",
+		"@oh-my-pi/pi-utils": "15.11.4",
 		"zod": "4.4.3"
 	},
 	"devDependencies": {
-		"@oh-my-pi/pi-ai": "15.11.3",
+		"@oh-my-pi/pi-ai": "15.11.4",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/compat/openai.ts CHANGED Viewed

@@ -8,7 +8,6 @@
  * never detect, resolve, or allocate.
  */
 import { hostMatchesUrl, modelMatchesHost } from "../hosts";
-import { bareModelId, isFableOrMythos, parseAnthropicModel, semverGte } from "../identity/classify";
 import {
 	isAnthropicNamespacedModelId,
 	isClaudeModelId,
@@ -18,12 +17,9 @@ import {
 	isMimoModelIdOrName,
 	isQwenModelId,
 } from "../identity/family";
-import { ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER, ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER } from "../model-thinking";
 import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
 import { applyCompatOverrides } from "./apply";
-type OpenAIReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
 /** GLM coding-plan SKUs idle for minutes mid-reasoning; see `streamIdleTimeoutMs`. */
 const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
 const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
@@ -72,22 +68,6 @@ function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
 	);
 }
-function getOpenRouterAnthropicReasoningEffortMap(
-	modelId: string,
-): Partial<Record<OpenAIReasoningEffort, string>> | undefined {
-	const parsed = parseAnthropicModel(bareModelId(modelId));
-	if (!parsed) return undefined;
-	// Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
-	// Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
-	const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
-	if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
-	const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
-	return (hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER) as Partial<
-		Record<OpenAIReasoningEffort, string>
-	>;
-}
 /**
  * Build the resolved chat-completions compat record for a model spec.
  * Provider takes precedence over URL-based detection since it's explicitly configured.
@@ -198,36 +178,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 			isCopilotHost ||
 			isZenmuxHost);
-	const openRouterAnthropicReasoningEffortMap = isOpenRouter
-		? getOpenRouterAnthropicReasoningEffortMap(lowerId)
-		: undefined;
-	const detectedReasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
-		provider === "groq" && spec.id === "qwen/qwen3-32b"
-			? ({
-					minimal: "default",
-					low: "default",
-					medium: "default",
-					high: "default",
-					xhigh: "default",
-				} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
-			: isDeepseekFamily && spec.reasoning
-				? ({
-						minimal: "high",
-						low: "high",
-						medium: "high",
-						high: "high",
-						xhigh: "max",
-					} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
-				: openRouterAnthropicReasoningEffortMap
-					? openRouterAnthropicReasoningEffortMap
-					: isFireworks
-						? ({
-								// Fireworks' OpenAI-compatible endpoint rejects OpenAI's
-								// `minimal` literal but accepts `none` for the lowest setting.
-								minimal: "none",
-							} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
-						: {};
 	// Stream-watchdog floor: GLM coding-plan SKUs and direct DeepSeek reasoning
 	// models idle for minutes mid-reasoning; widen the idle timeout so warm-ups
 	// stop aborting and retrying.
@@ -251,7 +201,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 		supportsReasoningEffort: !isGrok && !isZai && !isZhipu && !isXiaomiMimo,
 		// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
 		supportsReasoningParams: provider !== "github-copilot",
-		reasoningEffortMap: detectedReasoningEffortMap,
+		reasoningEffortMap: {},
 		supportsUsageInStreaming: !isCerebras,
 		// Kimi (including via OpenRouter and Fireworks router-form IDs such as
 		// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
@@ -323,10 +273,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 	};
 	applyCompatOverrides(compat, spec.compat);
-	if (spec.compat?.reasoningEffortMap) {
-		// Effort maps merge per level instead of replacing wholesale.
-		compat.reasoningEffortMap = { ...detectedReasoningEffortMap, ...spec.compat.reasoningEffortMap };
-	}
 	const whenThinkingPolicy =
 		spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
@@ -341,6 +287,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 interface OpenAIResponsesSpecLike {
 	provider: string;
+	name: string;
 	baseUrl: string;
 	compat?: OpenAICompat;
 }
@@ -351,7 +298,8 @@ interface OpenAIResponsesSpecLike {
  * endpoint accepts the `developer` role, while strict tool mode is scoped to
  * first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
  * detection are URL-only on purpose — the historical call sites never
- * consulted the provider id for them.
+ * consulted the provider id for them. The GPT-5 juice-zero hack keys on the
+ * model name, matching the historical request-time check.
  */
 export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat {
 	const baseUrl = spec.baseUrl ?? "";
@@ -371,6 +319,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
 		// Azure OpenAI and GitHub Copilot Responses paths require tool results
 		// to strictly match prior tool calls when building Responses inputs.
 		strictResponsesPairing: hostMatchesUrl(baseUrl, "azureOpenAI") || spec.provider === "github-copilot",
+		requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
 		reasoningEffortMap: {},
 	};
 	applyCompatOverrides(compat, spec.compat);

package/src/identity/family.ts CHANGED Viewed

@@ -44,6 +44,33 @@ export function isMimoModelIdOrName(value: string): boolean {
 	return value.toLowerCase().includes("mimo");
 }
+/**
+ * MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
+ * `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
+ * and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
+ * only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
+ * `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
+ * `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
+ * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
+ */
+export function isMinimaxM2FamilyModelId(modelId: string): boolean {
+	const lower = modelId.toLowerCase();
+	if (!lower.includes("minimax")) return false;
+	// Boundary-delimited `m2` token followed by zero or more digits (dotless
+	// variants like `m21`/`m25`/`m27`) and an optional dotted minor version.
+	return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
+}
+/**
+ * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
+ * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
+ * `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
+ * and `none`.
+ */
+export function isOpenAIGptOssModelId(modelId: string): boolean {
+	return /(^|\/)gpt-oss[-:]/i.test(modelId);
+}
 /**
  * Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
  * the Claude Fable/Mythos 5 generation. Older adaptive-thinking models

package/src/model-thinking.ts CHANGED Viewed

@@ -10,15 +10,22 @@ import { Effort, THINKING_EFFORTS } from "./effort";
 import { modelMatchesHost } from "./hosts";
 import {
 	type AnthropicModel,
+	bareModelId,
 	type GeminiModel,
 	isFableOrMythos,
 	type OpenAIModel,
 	type ParsedModel,
+	parseAnthropicModel,
 	parseKnownModel,
 	semverEqual,
 	semverGte,
 } from "./identity/classify";
-import { supportsAdaptiveThinkingDisplay } from "./identity/family";
+import {
+	isDeepseekModelIdOrName,
+	isMinimaxM2FamilyModelId,
+	isOpenAIGptOssModelId,
+	supportsAdaptiveThinkingDisplay,
+} from "./identity/family";
 import type {
 	Api,
 	CompatOf,
@@ -47,6 +54,27 @@ const GEMINI_3_PRO_EFFORTS: readonly Effort[] = [Effort.Low, Effort.High];
 const GEMINI_3_FLASH_EFFORTS: readonly Effort[] = [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High];
 const GPT_5_2_PLUS_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High, Effort.XHigh];
 const GPT_5_1_CODEX_MINI_EFFORTS: readonly Effort[] = [Effort.Medium, Effort.High];
+const LOW_MEDIUM_HIGH_REASONING_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High];
+type EffortMap = Partial<Record<Effort, string>>;
+const GROQ_QWEN3_32B_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "default",
+	[Effort.Low]: "default",
+	[Effort.Medium]: "default",
+	[Effort.High]: "default",
+	[Effort.XHigh]: "default",
+};
+const DEEPSEEK_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "high",
+	[Effort.Low]: "high",
+	[Effort.Medium]: "high",
+	[Effort.High]: "high",
+	[Effort.XHigh]: "max",
+};
+const FIREWORKS_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "none",
+};
 /**
  * Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
@@ -88,7 +116,7 @@ export const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Record<Effor
  * - Explicit spec thinking (generator-baked or user-authored) owns the
  *   capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
  *   (`effortMap`, `supportsDisplay`) are backfilled from identity when not
- *   explicitly set, so configs never need to know Anthropic's tier tables.
+ *   explicitly set, so configs never need to know provider wire tier tables.
  * - Sparse specs go through full inference.
  */
 export function resolveModelThinking<TApi extends Api>(
@@ -98,7 +126,7 @@ export function resolveModelThinking<TApi extends Api>(
 	if (!spec.reasoning) return undefined;
 	if (omitsWireReasoningEffort(spec.api, compat)) return undefined;
 	if (spec.thinking && Array.isArray(spec.thinking.efforts) && spec.thinking.efforts.length > 0) {
-		return fillThinkingWireDefaults(spec, spec.thinking);
+		return fillThinkingWireDefaults(spec, compat, spec.thinking);
 	}
 	// Empty/malformed explicit metadata is treated as absent — infer instead.
 	return deriveThinking(spec, compat);
@@ -106,23 +134,42 @@ export function resolveModelThinking<TApi extends Api>(
 /**
  * Backfill identity-derived wire facts onto explicit thinking metadata.
- * Explicit `effortMap` / `supportsDisplay` (including `false`) always win;
- * untouched configs are returned as-is with zero allocation.
+ * Explicit `effortMap` / `supportsDisplay` (including `false`) win, except
+ * model-defined effort restrictions still normalize stale cached capability
+ * surfaces before request-time code can observe them.
  */
-function fillThinkingWireDefaults<TApi extends Api>(spec: ModelSpec<TApi>, thinking: ThinkingConfig): ThinkingConfig {
-	const needsEffortMap = thinking.mode === "anthropic-adaptive" && thinking.effortMap === undefined;
+function fillThinkingWireDefaults<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
+	thinking: ThinkingConfig,
+): ThinkingConfig {
+	const parsed = parseKnownModel(spec.id);
+	const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
+	const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
+	const effortMap =
+		thinking.effortMap === undefined
+			? inferEffortMap(spec, compat, parsed, thinking.mode, normalizedEfforts)
+			: effortsChanged
+				? filterEffortMapToSupportedEfforts(thinking.effortMap, normalizedEfforts)
+				: undefined;
+	const shouldReplaceEffortMap = thinking.effortMap === undefined ? effortMap !== undefined : effortsChanged;
 	const needsDisplay =
 		thinking.supportsDisplay === undefined &&
 		(spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
 		supportsAdaptiveThinkingDisplay(spec.id);
-	if (!needsEffortMap && !needsDisplay) {
+	if (!effortsChanged && !shouldReplaceEffortMap && !needsDisplay) {
 		return thinking;
 	}
 	const filled: ThinkingConfig = { ...thinking };
-	if (needsEffortMap) {
-		filled.effortMap = anthropicModelHasRealXHighEffort(spec, parseKnownModel(spec.id))
-			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
-			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+	if (effortsChanged) {
+		filled.efforts = normalizedEfforts;
+	}
+	if (shouldReplaceEffortMap) {
+		if (effortMap === undefined) {
+			delete filled.effortMap;
+		} else {
+			filled.effortMap = effortMap;
+		}
 	}
 	if (needsDisplay) {
 		filled.supportsDisplay = true;
@@ -141,10 +188,9 @@ export function deriveThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat:
 		mode: inferThinkingControlMode(spec, parsed),
 		efforts,
 	};
-	if (config.mode === "anthropic-adaptive") {
-		config.effortMap = anthropicModelHasRealXHighEffort(spec, parsed)
-			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
-			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+	const effortMap = inferEffortMap(spec, compat, parsed, config.mode, config.efforts);
+	if (effortMap !== undefined) {
+		config.effortMap = effortMap;
 	}
 	if (
 		(spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
@@ -171,11 +217,117 @@ function omitsWireReasoningEffort(api: Api, compat: CompatOf<Api>): boolean {
 	return (compat as ResolvedOpenAIResponsesCompat | undefined)?.supportsReasoningEffort === false;
 }
+function inferEffortMap<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
+	parsedModel: ParsedModel,
+	mode: ThinkingConfig["mode"],
+	efforts: readonly Effort[],
+): EffortMap | undefined {
+	const detected = inferDetectedEffortMap(spec, parsedModel, mode);
+	const configured = readCompatEffortMap(compat);
+	const merged =
+		detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
+	return merged === undefined ? undefined : filterEffortMapToSupportedEfforts(merged, efforts);
+}
+function filterEffortMapToSupportedEfforts(map: EffortMap, efforts: readonly Effort[]): EffortMap | undefined {
+	let filtered: EffortMap | undefined;
+	for (const effort of efforts) {
+		const mapped = map[effort];
+		if (mapped === undefined) continue;
+		if (filtered === undefined) filtered = {};
+		filtered[effort] = mapped;
+	}
+	return filtered;
+}
+function sameEffortList(left: readonly Effort[], right: readonly Effort[]): boolean {
+	if (left.length !== right.length) return false;
+	for (let index = 0; index < left.length; index++) {
+		if (left[index] !== right[index]) return false;
+	}
+	return true;
+}
+function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
+	return spec.api === "openai-completions" && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
+		? LOW_MEDIUM_HIGH_REASONING_EFFORTS
+		: undefined;
+}
+function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
+	if (compat === undefined || !("reasoningEffortMap" in compat)) {
+		return undefined;
+	}
+	const map = compat.reasoningEffortMap;
+	return map && Object.keys(map).length > 0 ? map : undefined;
+}
+function inferDetectedEffortMap<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	parsedModel: ParsedModel,
+	mode: ThinkingConfig["mode"],
+): EffortMap | undefined {
+	if (mode === "anthropic-adaptive") {
+		return anthropicModelHasRealXHighEffort(spec, parsedModel)
+			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
+			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+	}
+	if (spec.api !== "openai-completions") {
+		return undefined;
+	}
+	if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
+		return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
+	}
+	if (isDeepseekReasoningModel(spec)) {
+		return DEEPSEEK_REASONING_EFFORT_MAP;
+	}
+	if (modelMatchesHost(spec, "openrouter")) {
+		const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
+		if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
+	}
+	if (modelMatchesHost(spec, "fireworks")) {
+		return FIREWORKS_REASONING_EFFORT_MAP;
+	}
+	return undefined;
+}
+function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
+	if (!spec.reasoning) return false;
+	const lowerId = spec.id.toLowerCase();
+	const lowerName = (spec.name ?? "").toLowerCase();
+	const isOpenCodeDeepseekAlias =
+		spec.provider === "opencode-zen" && (lowerId === "big-pickle" || lowerName === "big pickle");
+	return (
+		modelMatchesHost(spec, "deepseekFamily") ||
+		isDeepseekModelIdOrName(spec.id) ||
+		isDeepseekModelIdOrName(spec.name ?? "") ||
+		isOpenCodeDeepseekAlias
+	);
+}
+function getOpenRouterAnthropicReasoningEffortMap(modelId: string): EffortMap | undefined {
+	const parsed = parseAnthropicModel(bareModelId(modelId));
+	if (!parsed) return undefined;
+	// Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
+	// Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
+	const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
+	if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
+	const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
+	return hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+}
 function inferSupportedEfforts<TApi extends Api>(
 	parsedModel: ParsedModel,
 	spec: ModelSpec<TApi>,
 	compat: CompatOf<TApi>,
 ): readonly Effort[] {
+	const modelDefinedEfforts = getModelDefinedEfforts(spec);
+	if (modelDefinedEfforts !== undefined) {
+		return modelDefinedEfforts;
+	}
 	switch (parsedModel.family) {
 		case "openai":
 			return inferOpenAISupportedEfforts(parsedModel);