npm - @oh-my-pi/pi-catalog - Versions diffs - 15.11.2 → 15.11.4 - Mend

@oh-my-pi/pi-catalog 15.11.2 → 15.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +28 -1
package/dist/types/compat/openai.d.ts +3 -1
package/dist/types/identity/family.d.ts +17 -0
package/dist/types/model-thinking.d.ts +1 -1
package/dist/types/provider-models/openai-compat.d.ts +2 -0
package/dist/types/types.d.ts +22 -4
package/dist/types/wire/github-copilot.d.ts +15 -0
package/package.json +3 -3
package/src/compat/openai.ts +5 -56
package/src/discovery/openai-compatible.ts +3 -1
package/src/identity/family.ts +27 -0
package/src/model-thinking.ts +168 -16
package/src/models.json +1148 -579
package/src/provider-models/openai-compat.ts +212 -51
package/src/types.ts +22 -3
package/src/wire/github-copilot.ts +17 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,33 @@
 ## [Unreleased]
+## [15.11.4] - 2026-06-12
+### Fixed
+- Fixed MiniMax M2-family and OpenAI gpt-oss model metadata so OpenAI-compatible catalog entries declare only `low|medium|high` thinking efforts. Their upstreams reject `minimal`, `xhigh`, and Fireworks' `minimal → none` wire mapping, so `fireworks/minimax-m2.7` as the smol auto-thinking classifier model 400ed on every turn. OpenAI-compatible provider effort maps (`Groq qwen/qwen3-32b`, DeepSeek-family, OpenRouter Anthropic adaptive, Fireworks `minimal → none`) now bake into `thinking.effortMap` in catalog metadata instead of `buildOpenAICompat`, and request builders read that field directly. Regenerated `models.json` now makes `disableReasoning` choose `low` for those families while leaving GLM-5.x and other Fireworks models on the existing `minimal → none` path ([#2315](https://github.com/can1357/oh-my-pi/issues/2315)).
+### Added
+- Added `requiresJuiceZeroHack` Responses-API compat flag, resolved by `buildOpenAIResponsesCompat` from GPT-5-family model names and overridable via sparse model `compat` config. Replaces the request-time `model.name.startsWith("gpt-5")` sniff that gated the trailing `# Juice: 0 !important` no-reasoning developer item.
+## [15.11.3] - 2026-06-11
+### Added
+- Added `requestModelId` on `Model` to represent the upstream model id used when a catalog entry is a local variant
+- Added synthetic GitHub Copilot long-context model variants with `-1m` suffixes when tiered token pricing is advertised
+### Changed
+- Changed GitHub Copilot discovery to request `X-GitHub-Api-Version: 2026-06-01` from `api.githubcopilot.com`
+- Changed GitHub Copilot discovery to cap base model `contextWindow` to the default token tier and keep long-context access as the separate `-1m` model entry
+- Changed Copilot model mapping to omit non-chat `/models` entries and enable image input for models whose capabilities indicate vision support
+### Fixed
+- Fixed long-context variant pricing to use `billing.token_prices.long_context` rates instead of default model pricing
+- Fixed `mapModel` handling in OpenAI-compatible discovery so returning `null` now skips a model entry rather than falling back to defaults
+- Fixed model ID precedence so a real upstream Copilot model id is kept when it conflicts with a synthesized `-1m` variant
 ## [15.11.1] - 2026-06-11
 ### Fixed
@@ -62,4 +89,4 @@
 ### Removed
-- Removed the runtime enrichment layer: `enrichModelThinking` (and its non-enumerable memo-slot cache), `refreshModelThinking`, `modelOmitsReasoningEffort`, and the `model-thinking` re-exports of generator-only policies. Thinking metadata is resolved exactly once inside `buildModel`; runtime helpers (`getSupportedEfforts`, `clampThinkingLevelForModel`, `requireSupportedEffort`, the effort mappers) are pure field reads.
+- Removed the runtime enrichment layer: `enrichModelThinking` (and its non-enumerable memo-slot cache), `refreshModelThinking`, `modelOmitsReasoningEffort`, and the `model-thinking` re-exports of generator-only policies. Thinking metadata is resolved exactly once inside `buildModel`; runtime helpers (`getSupportedEfforts`, `clampThinkingLevelForModel`, `requireSupportedEffort`, the effort mappers) are pure field reads.

package/dist/types/compat/openai.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIRespo
 export declare function buildOpenAICompat(spec: ModelSpec<"openai-completions">): ResolvedOpenAICompat;
 interface OpenAIResponsesSpecLike {
     provider: string;
+    name: string;
     baseUrl: string;
     compat?: OpenAICompat;
 }
@@ -15,7 +16,8 @@ interface OpenAIResponsesSpecLike {
  * endpoint accepts the `developer` role, while strict tool mode is scoped to
  * first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
  * detection are URL-only on purpose — the historical call sites never
- * consulted the provider id for them.
+ * consulted the provider id for them. The GPT-5 juice-zero hack keys on the
+ * model name, matching the historical request-time check.
  */
 export declare function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat;
 export {};

package/dist/types/identity/family.d.ts CHANGED Viewed

@@ -20,6 +20,23 @@ export declare function isQwenModelId(modelId: string): boolean;
 export declare function isDeepseekModelIdOrName(value: string): boolean;
 /** Xiaomi MiMo family by id or display name. */
 export declare function isMimoModelIdOrName(value: string): boolean;
+/**
+ * MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
+ * `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
+ * and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
+ * only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
+ * `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
+ * `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
+ * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
+ */
+export declare function isMinimaxM2FamilyModelId(modelId: string): boolean;
+/**
+ * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
+ * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
+ * `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
+ * and `none`.
+ */
+export declare function isOpenAIGptOssModelId(modelId: string): boolean;
 /**
  * Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
  * the Claude Fable/Mythos 5 generation. Older adaptive-thinking models

package/dist/types/model-thinking.d.ts CHANGED Viewed

@@ -38,7 +38,7 @@ export declare const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Reco
  * - Explicit spec thinking (generator-baked or user-authored) owns the
  *   capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
  *   (`effortMap`, `supportsDisplay`) are backfilled from identity when not
- *   explicitly set, so configs never need to know Anthropic's tier tables.
+ *   explicitly set, so configs never need to know provider wire tier tables.
  * - Sparse specs go through full inference.
  */
 export declare function resolveModelThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): ThinkingConfig | undefined;

package/dist/types/provider-models/openai-compat.d.ts CHANGED Viewed

@@ -334,6 +334,8 @@ export interface GithubCopilotModelManagerConfig {
     baseUrl?: string;
     fetch?: FetchImpl;
 }
+/** Local id/name suffixes for synthesized Copilot long-context variants. */
+export declare const COPILOT_LONG_CONTEXT_ID_SUFFIX = "-1m";
 export declare function githubCopilotModelManagerOptions(config?: GithubCopilotModelManagerConfig): ModelManagerOptions<Api>;
 export interface AnthropicModelManagerConfig {
     apiKey?: string;

package/dist/types/types.d.ts CHANGED Viewed

@@ -17,9 +17,9 @@ export interface ThinkingConfig {
     /** Optional default effort applied when this model is selected. Falls back to global default if absent. */
     defaultLevel?: Effort;
     /**
-     * Effort → wire-value remap for `anthropic-adaptive` transports, baked at
-     * build time (4-tier legacy scale vs the 5-tier Opus 4.7+/Fable/Mythos
-     * scale). Identity for efforts the map omits.
+     * Effort → provider wire-value remap, baked at build time. Identity for
+     * efforts the map omits. Used by Anthropic adaptive thinking, OpenAI-
+     * compatible `reasoning_effort`, and Responses-style reasoning params.
      */
     effortMap?: Partial<Record<Effort, string>>;
     /**
@@ -179,6 +179,14 @@ export interface OpenAICompat {
     alwaysSendMaxTokens?: boolean;
     /** Whether Responses-API tool-call/result history must be strictly paired. Default: auto-detected (Azure OpenAI, GitHub Copilot). */
     strictResponsesPairing?: boolean;
+    /**
+     * Append a trailing `# Juice: 0 !important` developer item when the caller
+     * did not request reasoning, suppressing default reasoning on models that
+     * cannot disable it via request params (Responses APIs only; see
+     * https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7).
+     * Default: auto-detected (GPT-5-family model names).
+     */
+    requiresJuiceZeroHack?: boolean;
     /**
      * Compat deltas applied when a request actually engages thinking mode
      * (reasoning requested and not disabled, model reasoning-capable, and not
@@ -279,7 +287,7 @@ type ResolvedToolStrictMode = NonNullable<OpenAICompat["toolStrictMode"]> | "mix
  * `buildModel`; request handlers read fields and never detect, resolve, or
  * allocate.
  */
-export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "whenThinking">> & {
+export type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting" | "vercelGatewayRouting" | "extraBody" | "toolStrictMode" | "streamIdleTimeoutMs" | "supportsLongPromptCacheRetention" | "cacheControlFormat" | "thinkingKeep" | "strictResponsesPairing" | "requiresJuiceZeroHack" | "whenThinking">> & {
     openRouterRouting?: OpenAICompat["openRouterRouting"];
     vercelGatewayRouting?: OpenAICompat["vercelGatewayRouting"];
     extraBody?: OpenAICompat["extraBody"];
@@ -301,6 +309,7 @@ export interface ResolvedOpenAIResponsesCompat {
     supportsReasoningEffort: boolean;
     supportsLongPromptCacheRetention: boolean;
     strictResponsesPairing: boolean;
+    requiresJuiceZeroHack: boolean;
     reasoningEffortMap: Partial<Record<Effort, string>>;
 }
 /** Fully-resolved anthropic-messages compat view (same contract as `ResolvedOpenAICompat`). */
@@ -319,6 +328,15 @@ export type CompatConfigOf<TApi extends Api> = TApi extends "openai-completions"
 export type CompatOf<TApi extends Api> = TApi extends "openai-completions" ? ResolvedOpenAICompat : TApi extends "openai-responses" | "azure-openai-responses" | "openai-codex-responses" ? ResolvedOpenAIResponsesCompat : TApi extends "anthropic-messages" ? ResolvedAnthropicCompat : undefined;
 export interface Model<TApi extends Api = Api> {
     id: string;
+    /**
+     * Model id to send on the wire when it differs from `id`. Used by catalog
+     * variants that present one upstream model under several local entries —
+     * e.g. GitHub Copilot long-context variants (`claude-opus-4.7-1m` requests
+     * upstream `claude-opus-4.7`; the tier is a client-side context budget, not
+     * a served model id). Providers MUST serialize `requestModelId ?? id`;
+     * everything local (selection, caching, usage attribution) keys on `id`.
+     */
+    requestModelId?: string;
     name: string;
     api: TApi;
     provider: Provider;

package/dist/types/wire/github-copilot.d.ts CHANGED Viewed

@@ -7,6 +7,21 @@ export declare const COPILOT_USER_AGENT: "opencode/1.3.15";
 export declare const OPENCODE_HEADERS: {
     readonly "User-Agent": "opencode/1.3.15";
 };
+/**
+ * Copilot API version sent on `api.githubcopilot.com` requests (`/models`,
+ * chat endpoints). Newer versions unlock tiered context metadata: `/models`
+ * reports the full long-context window in `capabilities.limits` plus per-tier
+ * boundaries/prices under `billing.token_prices.{default,long_context}`.
+ * Without it the endpoint serves default-tier limits only (e.g. 264k instead
+ * of 1M for Claude Opus). Never send this to `api.github.com` REST endpoints —
+ * they validate `X-GitHub-Api-Version` against the REST version vocabulary.
+ */
+export declare const COPILOT_API_VERSION: "2026-06-01";
+/** Headers for `api.githubcopilot.com` (capi) requests: discovery, chat, policy. */
+export declare const COPILOT_API_HEADERS: {
+    readonly "User-Agent": "opencode/1.3.15";
+    readonly "X-GitHub-Api-Version": "2026-06-01";
+};
 export type ParsedGitHubCopilotApiKey = {
     accessToken: string;
     enterpriseUrl?: string;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-catalog",
-	"version": "15.11.2",
+	"version": "15.11.4",
 	"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -34,11 +34,11 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "15.11.2",
+		"@oh-my-pi/pi-utils": "15.11.4",
 		"zod": "4.4.3"
 	},
 	"devDependencies": {
-		"@oh-my-pi/pi-ai": "15.11.2",
+		"@oh-my-pi/pi-ai": "15.11.4",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/compat/openai.ts CHANGED Viewed

@@ -8,7 +8,6 @@
  * never detect, resolve, or allocate.
  */
 import { hostMatchesUrl, modelMatchesHost } from "../hosts";
-import { bareModelId, isFableOrMythos, parseAnthropicModel, semverGte } from "../identity/classify";
 import {
 	isAnthropicNamespacedModelId,
 	isClaudeModelId,
@@ -18,12 +17,9 @@ import {
 	isMimoModelIdOrName,
 	isQwenModelId,
 } from "../identity/family";
-import { ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER, ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER } from "../model-thinking";
 import type { ModelSpec, OpenAICompat, ResolvedOpenAICompat, ResolvedOpenAIResponsesCompat } from "../types";
 import { applyCompatOverrides } from "./apply";
-type OpenAIReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
 /** GLM coding-plan SKUs idle for minutes mid-reasoning; see `streamIdleTimeoutMs`. */
 const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
 const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
@@ -72,22 +68,6 @@ function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
 	);
 }
-function getOpenRouterAnthropicReasoningEffortMap(
-	modelId: string,
-): Partial<Record<OpenAIReasoningEffort, string>> | undefined {
-	const parsed = parseAnthropicModel(bareModelId(modelId));
-	if (!parsed) return undefined;
-	// Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
-	// Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
-	const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
-	if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
-	const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
-	return (hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER) as Partial<
-		Record<OpenAIReasoningEffort, string>
-	>;
-}
 /**
  * Build the resolved chat-completions compat record for a model spec.
  * Provider takes precedence over URL-based detection since it's explicitly configured.
@@ -198,36 +178,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 			isCopilotHost ||
 			isZenmuxHost);
-	const openRouterAnthropicReasoningEffortMap = isOpenRouter
-		? getOpenRouterAnthropicReasoningEffortMap(lowerId)
-		: undefined;
-	const detectedReasoningEffortMap: NonNullable<OpenAICompat["reasoningEffortMap"]> =
-		provider === "groq" && spec.id === "qwen/qwen3-32b"
-			? ({
-					minimal: "default",
-					low: "default",
-					medium: "default",
-					high: "default",
-					xhigh: "default",
-				} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
-			: isDeepseekFamily && spec.reasoning
-				? ({
-						minimal: "high",
-						low: "high",
-						medium: "high",
-						high: "high",
-						xhigh: "max",
-					} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
-				: openRouterAnthropicReasoningEffortMap
-					? openRouterAnthropicReasoningEffortMap
-					: isFireworks
-						? ({
-								// Fireworks' OpenAI-compatible endpoint rejects OpenAI's
-								// `minimal` literal but accepts `none` for the lowest setting.
-								minimal: "none",
-							} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
-						: {};
 	// Stream-watchdog floor: GLM coding-plan SKUs and direct DeepSeek reasoning
 	// models idle for minutes mid-reasoning; widen the idle timeout so warm-ups
 	// stop aborting and retrying.
@@ -251,7 +201,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 		supportsReasoningEffort: !isGrok && !isZai && !isZhipu && !isXiaomiMimo,
 		// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
 		supportsReasoningParams: provider !== "github-copilot",
-		reasoningEffortMap: detectedReasoningEffortMap,
+		reasoningEffortMap: {},
 		supportsUsageInStreaming: !isCerebras,
 		// Kimi (including via OpenRouter and Fireworks router-form IDs such as
 		// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
@@ -323,10 +273,6 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 	};
 	applyCompatOverrides(compat, spec.compat);
-	if (spec.compat?.reasoningEffortMap) {
-		// Effort maps merge per level instead of replacing wholesale.
-		compat.reasoningEffortMap = { ...detectedReasoningEffortMap, ...spec.compat.reasoningEffortMap };
-	}
 	const whenThinkingPolicy =
 		spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
@@ -341,6 +287,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 interface OpenAIResponsesSpecLike {
 	provider: string;
+	name: string;
 	baseUrl: string;
 	compat?: OpenAICompat;
 }
@@ -351,7 +298,8 @@ interface OpenAIResponsesSpecLike {
  * endpoint accepts the `developer` role, while strict tool mode is scoped to
  * first-party OpenAI/Azure/Copilot providers. Developer-role and prompt-cache
  * detection are URL-only on purpose — the historical call sites never
- * consulted the provider id for them.
+ * consulted the provider id for them. The GPT-5 juice-zero hack keys on the
+ * model name, matching the historical request-time check.
  */
 export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): ResolvedOpenAIResponsesCompat {
 	const baseUrl = spec.baseUrl ?? "";
@@ -371,6 +319,7 @@ export function buildOpenAIResponsesCompat(spec: OpenAIResponsesSpecLike): Resol
 		// Azure OpenAI and GitHub Copilot Responses paths require tool results
 		// to strictly match prior tool calls when building Responses inputs.
 		strictResponsesPairing: hostMatchesUrl(baseUrl, "azureOpenAI") || spec.provider === "github-copilot",
+		requiresJuiceZeroHack: spec.name.toLowerCase().startsWith("gpt-5"),
 		reasoningEffortMap: {},
 	};
 	applyCompatOverrides(compat, spec.compat);

package/src/discovery/openai-compatible.ts CHANGED Viewed

@@ -169,7 +169,9 @@ export async function fetchOpenAICompatibleModels<TApi extends Api>(
 			maxTokens: UNK_MAX_TOKENS,
 		};
-		const mapped = options.mapModel?.(entry, defaults, context) ?? defaults;
+		// `mapModel` returning null skips the entry (documented contract); only a
+		// missing mapper falls back to the defaults.
+		const mapped = options.mapModel ? options.mapModel(entry, defaults, context) : defaults;
 		if (!mapped || typeof mapped.id !== "string" || mapped.id.length === 0) {
 			continue;
 		}

package/src/identity/family.ts CHANGED Viewed

@@ -44,6 +44,33 @@ export function isMimoModelIdOrName(value: string): boolean {
 	return value.toLowerCase().includes("mimo");
 }
+/**
+ * MiniMax M2-generation family (M2, M2.1, M2.5, M2.7, including `-highspeed`/
+ * `-lightning`/`-her`/`-turbo` variants, dotless aliases like `minimax-m21`,
+ * and short `minimax/m2-…` ids on aggregator hosts). Underlying model accepts
+ * only `low|medium|high` for `reasoning_effort` and 400s on `minimal`,
+ * `xhigh`, or `none` — so hosts whose default effort map otherwise lowers
+ * `minimal` to `none` (Fireworks) or expects the full 5-tier scale must
+ * clamp instead. Excludes M1, M3, MiniMax-Text-01, music, hailuo, voice ids.
+ */
+export function isMinimaxM2FamilyModelId(modelId: string): boolean {
+	const lower = modelId.toLowerCase();
+	if (!lower.includes("minimax")) return false;
+	// Boundary-delimited `m2` token followed by zero or more digits (dotless
+	// variants like `m21`/`m25`/`m27`) and an optional dotted minor version.
+	return /(?:^|[/.-])m2\d*(?:[.-]\d+)?(?:[-.:_]|$)/i.test(lower);
+}
+/**
+ * OpenAI gpt-oss family (`gpt-oss-20b`, `gpt-oss-120b`, `gpt-oss:120b`,
+ * `vendor/gpt-oss-…`). The Harmony reasoning format only accepts
+ * `low|medium|high` for `reasoning_effort` and rejects `minimal`, `xhigh`,
+ * and `none`.
+ */
+export function isOpenAIGptOssModelId(modelId: string): boolean {
+	return /(^|\/)gpt-oss[-:]/i.test(modelId);
+}
 /**
  * Adaptive thinking `display` is supported starting with Claude Opus 4.7 and
  * the Claude Fable/Mythos 5 generation. Older adaptive-thinking models

package/src/model-thinking.ts CHANGED Viewed

@@ -10,15 +10,22 @@ import { Effort, THINKING_EFFORTS } from "./effort";
 import { modelMatchesHost } from "./hosts";
 import {
 	type AnthropicModel,
+	bareModelId,
 	type GeminiModel,
 	isFableOrMythos,
 	type OpenAIModel,
 	type ParsedModel,
+	parseAnthropicModel,
 	parseKnownModel,
 	semverEqual,
 	semverGte,
 } from "./identity/classify";
-import { supportsAdaptiveThinkingDisplay } from "./identity/family";
+import {
+	isDeepseekModelIdOrName,
+	isMinimaxM2FamilyModelId,
+	isOpenAIGptOssModelId,
+	supportsAdaptiveThinkingDisplay,
+} from "./identity/family";
 import type {
 	Api,
 	CompatOf,
@@ -47,6 +54,27 @@ const GEMINI_3_PRO_EFFORTS: readonly Effort[] = [Effort.Low, Effort.High];
 const GEMINI_3_FLASH_EFFORTS: readonly Effort[] = [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High];
 const GPT_5_2_PLUS_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High, Effort.XHigh];
 const GPT_5_1_CODEX_MINI_EFFORTS: readonly Effort[] = [Effort.Medium, Effort.High];
+const LOW_MEDIUM_HIGH_REASONING_EFFORTS: readonly Effort[] = [Effort.Low, Effort.Medium, Effort.High];
+type EffortMap = Partial<Record<Effort, string>>;
+const GROQ_QWEN3_32B_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "default",
+	[Effort.Low]: "default",
+	[Effort.Medium]: "default",
+	[Effort.High]: "default",
+	[Effort.XHigh]: "default",
+};
+const DEEPSEEK_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "high",
+	[Effort.Low]: "high",
+	[Effort.Medium]: "high",
+	[Effort.High]: "high",
+	[Effort.XHigh]: "max",
+};
+const FIREWORKS_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "none",
+};
 /**
  * Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
@@ -88,7 +116,7 @@ export const ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER: Readonly<Partial<Record<Effor
  * - Explicit spec thinking (generator-baked or user-authored) owns the
  *   capability surface (`mode`, `efforts`, `defaultLevel`); the wire facts
  *   (`effortMap`, `supportsDisplay`) are backfilled from identity when not
- *   explicitly set, so configs never need to know Anthropic's tier tables.
+ *   explicitly set, so configs never need to know provider wire tier tables.
  * - Sparse specs go through full inference.
  */
 export function resolveModelThinking<TApi extends Api>(
@@ -98,7 +126,7 @@ export function resolveModelThinking<TApi extends Api>(
 	if (!spec.reasoning) return undefined;
 	if (omitsWireReasoningEffort(spec.api, compat)) return undefined;
 	if (spec.thinking && Array.isArray(spec.thinking.efforts) && spec.thinking.efforts.length > 0) {
-		return fillThinkingWireDefaults(spec, spec.thinking);
+		return fillThinkingWireDefaults(spec, compat, spec.thinking);
 	}
 	// Empty/malformed explicit metadata is treated as absent — infer instead.
 	return deriveThinking(spec, compat);
@@ -106,23 +134,42 @@ export function resolveModelThinking<TApi extends Api>(
 /**
  * Backfill identity-derived wire facts onto explicit thinking metadata.
- * Explicit `effortMap` / `supportsDisplay` (including `false`) always win;
- * untouched configs are returned as-is with zero allocation.
+ * Explicit `effortMap` / `supportsDisplay` (including `false`) win, except
+ * model-defined effort restrictions still normalize stale cached capability
+ * surfaces before request-time code can observe them.
  */
-function fillThinkingWireDefaults<TApi extends Api>(spec: ModelSpec<TApi>, thinking: ThinkingConfig): ThinkingConfig {
-	const needsEffortMap = thinking.mode === "anthropic-adaptive" && thinking.effortMap === undefined;
+function fillThinkingWireDefaults<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
+	thinking: ThinkingConfig,
+): ThinkingConfig {
+	const parsed = parseKnownModel(spec.id);
+	const normalizedEfforts = getModelDefinedEfforts(spec) ?? thinking.efforts;
+	const effortsChanged = !sameEffortList(normalizedEfforts, thinking.efforts);
+	const effortMap =
+		thinking.effortMap === undefined
+			? inferEffortMap(spec, compat, parsed, thinking.mode, normalizedEfforts)
+			: effortsChanged
+				? filterEffortMapToSupportedEfforts(thinking.effortMap, normalizedEfforts)
+				: undefined;
+	const shouldReplaceEffortMap = thinking.effortMap === undefined ? effortMap !== undefined : effortsChanged;
 	const needsDisplay =
 		thinking.supportsDisplay === undefined &&
 		(spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
 		supportsAdaptiveThinkingDisplay(spec.id);
-	if (!needsEffortMap && !needsDisplay) {
+	if (!effortsChanged && !shouldReplaceEffortMap && !needsDisplay) {
 		return thinking;
 	}
 	const filled: ThinkingConfig = { ...thinking };
-	if (needsEffortMap) {
-		filled.effortMap = anthropicModelHasRealXHighEffort(spec, parseKnownModel(spec.id))
-			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
-			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+	if (effortsChanged) {
+		filled.efforts = normalizedEfforts;
+	}
+	if (shouldReplaceEffortMap) {
+		if (effortMap === undefined) {
+			delete filled.effortMap;
+		} else {
+			filled.effortMap = effortMap;
+		}
 	}
 	if (needsDisplay) {
 		filled.supportsDisplay = true;
@@ -141,10 +188,9 @@ export function deriveThinking<TApi extends Api>(spec: ModelSpec<TApi>, compat:
 		mode: inferThinkingControlMode(spec, parsed),
 		efforts,
 	};
-	if (config.mode === "anthropic-adaptive") {
-		config.effortMap = anthropicModelHasRealXHighEffort(spec, parsed)
-			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
-			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+	const effortMap = inferEffortMap(spec, compat, parsed, config.mode, config.efforts);
+	if (effortMap !== undefined) {
+		config.effortMap = effortMap;
 	}
 	if (
 		(spec.api === "anthropic-messages" || spec.api === "bedrock-converse-stream") &&
@@ -171,11 +217,117 @@ function omitsWireReasoningEffort(api: Api, compat: CompatOf<Api>): boolean {
 	return (compat as ResolvedOpenAIResponsesCompat | undefined)?.supportsReasoningEffort === false;
 }
+function inferEffortMap<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
+	parsedModel: ParsedModel,
+	mode: ThinkingConfig["mode"],
+	efforts: readonly Effort[],
+): EffortMap | undefined {
+	const detected = inferDetectedEffortMap(spec, parsedModel, mode);
+	const configured = readCompatEffortMap(compat);
+	const merged =
+		detected === undefined ? configured : configured === undefined ? detected : { ...detected, ...configured };
+	return merged === undefined ? undefined : filterEffortMapToSupportedEfforts(merged, efforts);
+}
+function filterEffortMapToSupportedEfforts(map: EffortMap, efforts: readonly Effort[]): EffortMap | undefined {
+	let filtered: EffortMap | undefined;
+	for (const effort of efforts) {
+		const mapped = map[effort];
+		if (mapped === undefined) continue;
+		if (filtered === undefined) filtered = {};
+		filtered[effort] = mapped;
+	}
+	return filtered;
+}
+function sameEffortList(left: readonly Effort[], right: readonly Effort[]): boolean {
+	if (left.length !== right.length) return false;
+	for (let index = 0; index < left.length; index++) {
+		if (left[index] !== right[index]) return false;
+	}
+	return true;
+}
+function getModelDefinedEfforts<TApi extends Api>(spec: ModelSpec<TApi>): readonly Effort[] | undefined {
+	return spec.api === "openai-completions" && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
+		? LOW_MEDIUM_HIGH_REASONING_EFFORTS
+		: undefined;
+}
+function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
+	if (compat === undefined || !("reasoningEffortMap" in compat)) {
+		return undefined;
+	}
+	const map = compat.reasoningEffortMap;
+	return map && Object.keys(map).length > 0 ? map : undefined;
+}
+function inferDetectedEffortMap<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	parsedModel: ParsedModel,
+	mode: ThinkingConfig["mode"],
+): EffortMap | undefined {
+	if (mode === "anthropic-adaptive") {
+		return anthropicModelHasRealXHighEffort(spec, parsedModel)
+			? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER
+			: ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+	}
+	if (spec.api !== "openai-completions") {
+		return undefined;
+	}
+	if (spec.provider === "groq" && spec.id === "qwen/qwen3-32b") {
+		return GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
+	}
+	if (isDeepseekReasoningModel(spec)) {
+		return DEEPSEEK_REASONING_EFFORT_MAP;
+	}
+	if (modelMatchesHost(spec, "openrouter")) {
+		const openRouterAnthropicMap = getOpenRouterAnthropicReasoningEffortMap(spec.id);
+		if (openRouterAnthropicMap !== undefined) return openRouterAnthropicMap;
+	}
+	if (modelMatchesHost(spec, "fireworks")) {
+		return FIREWORKS_REASONING_EFFORT_MAP;
+	}
+	return undefined;
+}
+function isDeepseekReasoningModel<TApi extends Api>(spec: ModelSpec<TApi>): boolean {
+	if (!spec.reasoning) return false;
+	const lowerId = spec.id.toLowerCase();
+	const lowerName = (spec.name ?? "").toLowerCase();
+	const isOpenCodeDeepseekAlias =
+		spec.provider === "opencode-zen" && (lowerId === "big-pickle" || lowerName === "big pickle");
+	return (
+		modelMatchesHost(spec, "deepseekFamily") ||
+		isDeepseekModelIdOrName(spec.id) ||
+		isDeepseekModelIdOrName(spec.name ?? "") ||
+		isOpenCodeDeepseekAlias
+	);
+}
+function getOpenRouterAnthropicReasoningEffortMap(modelId: string): EffortMap | undefined {
+	const parsed = parseAnthropicModel(bareModelId(modelId));
+	if (!parsed) return undefined;
+	// Adaptive efforts on OpenRouter's completions front: Fable/Mythos and
+	// Opus 4.6+ only — Sonnet stays on the plain effort vocabulary there.
+	const isOpusAdaptive = parsed.kind === "opus" && semverGte(parsed.version, "4.6");
+	if (!isFableOrMythos(parsed.kind) && !isOpusAdaptive) return undefined;
+	const hasRealXHigh = isFableOrMythos(parsed.kind) || semverGte(parsed.version, "4.7");
+	return hasRealXHigh ? ANTHROPIC_ADAPTIVE_EFFORT_MAP_5_TIER : ANTHROPIC_ADAPTIVE_EFFORT_MAP_4_TIER;
+}
 function inferSupportedEfforts<TApi extends Api>(
 	parsedModel: ParsedModel,
 	spec: ModelSpec<TApi>,
 	compat: CompatOf<TApi>,
 ): readonly Effort[] {
+	const modelDefinedEfforts = getModelDefinedEfforts(spec);
+	if (modelDefinedEfforts !== undefined) {
+		return modelDefinedEfforts;
+	}
 	switch (parsedModel.family) {
 		case "openai":
 			return inferOpenAISupportedEfforts(parsedModel);