npm - @oh-my-pi/pi-catalog - Versions diffs - 16.1.7 → 16.1.8 - Mend

@oh-my-pi/pi-catalog 16.1.7 → 16.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +7 -0
package/dist/types/fireworks-model-id.d.ts +13 -0
package/dist/types/provider-models/openai-compat.d.ts +7 -0
package/package.json +3 -3
package/src/compat/openai.ts +35 -10
package/src/fireworks-model-id.ts +20 -0
package/src/model-thinking.ts +26 -1
package/src/models.json +228 -96
package/src/provider-models/openai-compat.ts +46 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,13 @@
 ## [Unreleased]
+## [16.1.8] - 2026-06-20
+### Fixed
+- Fixed Fireworks-hosted Qwen turns (e.g. `fireworks/qwen3.7-plus`) failing with `400 Extra inputs are not permitted, field: 'enable_thinking'`. Fireworks serves Qwen3 with controllable thinking via OpenAI-style `reasoning_effort` and rejects the top-level `enable_thinking` boolean that Alibaba DashScope speaks; `buildOpenAICompat` was selecting `thinkingFormat: "qwen"` from the `qwen` id pattern regardless of host. Fireworks-hosted Qwen models now resolve to `thinkingFormat: "openai"`.
+- Fixed MiMo models on OpenAI-compatible gateways to expose only accepted `low`, `medium`, and `high` reasoning tiers and map unsupported raw `minimal`/`xhigh` requests to safe wire values. ([#2864](https://github.com/can1357/oh-my-pi/issues/2864))
 ## [16.1.7] - 2026-06-20
 ### Fixed

package/dist/types/fireworks-model-id.d.ts CHANGED Viewed

@@ -8,3 +8,16 @@ export declare function toFireworksWireModelId(modelId: string): string;
  */
 export declare function toFirepassPublicModelId(modelId: string): string;
 export declare function toFirepassWireModelId(modelId: string): string;
+/**
+ * Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
+ * higher-throughput route (100+ tok/s) exposed under a dedicated router id
+ * (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
+ * higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
+ * translate it to the router wire form at request time (compat
+ * `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
+ */
+export declare const FIREWORKS_FAST_SUFFIX = "-fast";
+/** True for a Fireworks public model id that selects the Fast serving path. */
+export declare function isFireworksFastModelId(modelId: string): boolean;
+/** Strip the Fast suffix to recover the base (Standard-tier) model id. */
+export declare function toFireworksBaseModelId(modelId: string): string;

package/dist/types/provider-models/openai-compat.d.ts CHANGED Viewed

@@ -179,6 +179,13 @@ export declare const KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS = 32768;
 export declare function isKimiK27CodeModelId(modelId: string): boolean;
 export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number): number;
 export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | null): number | null;
+/**
+ * Build the Fireworks Fast seed by projecting each base bundled spec into a
+ * `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
+ * appear in the serverless control-plane list, so discovery cannot surface
+ * them) and deduped behind any identical previous-snapshot entry.
+ */
+export declare function buildFireworksFastSeed(): ModelSpec<"openai-completions">[];
 /**
  * Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
  * DeepSeek-native binary `thinking` toggle when both are present.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-catalog",
-	"version": "16.1.7",
+	"version": "16.1.8",
 	"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -34,12 +34,12 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "16.1.7",
+		"@oh-my-pi/pi-utils": "16.1.8",
 		"arktype": "^2.2.0",
 		"zod": "^4"
 	},
 	"devDependencies": {
-		"@oh-my-pi/pi-ai": "16.1.7",
+		"@oh-my-pi/pi-ai": "16.1.8",
 		"@types/bun": "^1.3.14"
 	},
 	"engines": {

package/src/compat/openai.ts CHANGED Viewed

@@ -7,6 +7,7 @@
  * complete alternate views. Request handlers read `model.compat` fields and
  * never detect, resolve, or allocate.
  */
+import { isFireworksFastModelId } from "../fireworks-model-id";
 import { hostMatchesUrl, modelMatchesHost } from "../hosts";
 import {
 	isAnthropicNamespacedModelId,
@@ -130,6 +131,16 @@ const OPENCODE_WHEN_THINKING: NonNullable<OpenAICompat["whenThinking"]> = {
 	reasoningContentField: "reasoning_content",
 };
+const MIMO_REASONING_EFFORT_MAP: NonNullable<OpenAICompat["reasoningEffortMap"]> = {
+	minimal: "low",
+	xhigh: "high",
+};
+function mergeMimoReasoningEffortMap(compat: ResolvedOpenAISharedCompat, enabled: boolean): void {
+	if (!enabled) return;
+	compat.reasoningEffortMap = { ...MIMO_REASONING_EFFORT_MAP, ...compat.reasoningEffortMap };
+}
 function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
 	if (
 		provider === "openai" ||
@@ -184,6 +195,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 	const lowerName = (spec.name ?? "").toLowerCase();
 	const isXiaomiHost = modelMatchesHost(hostModel, "xiaomi");
 	const isXiaomiMimo = isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
+	const isMimoReasoningEffortModel =
+		!isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
 	// OpenCode Zen's `big-pickle` is a DeepSeek reasoning alias; the upstream
 	// 400s come from DeepSeek and require exact reasoning_content replay.
 	const isOpenCodeDeepseekAlias =
@@ -238,17 +251,21 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 	const isGroqHost = modelMatchesHost(hostModel, "groq");
 	const isCopilotHost = provider === "github-copilot";
 	const isZenmuxHost = provider === "zenmux";
-	// Endpoints that MUST receive a single system block. MiniMax's OpenAI
-	// endpoint returns error 2013 on multiple system messages; Alibaba's
-	// Dashscope and Qwen Portal serve Qwen models whose chat template
-	// raises "System message must be at the beginning" if any system
-	// message appears past index 0.
+	// Endpoints/models that MUST receive a single system block. MiniMax's OpenAI
+	// endpoint returns error 2013 on multiple system messages; the Qwen 3.5+ chat
+	// template raises "System message must be at the beginning" / 500s with an
+	// internal_server_error when any system block appears past index 0. That
+	// template ships with the weights, so every Qwen-serving vLLM/SGLang host
+	// hits it — confirmed on Alibaba Dashscope, Qwen Portal, and Fireworks
+	// (`fireworks/qwen3.7-plus` 500'd on two leading system blocks). Gate on the
+	// Qwen family itself, not per-host: coalescing only trades away KV-cache reuse.
 	const isMiniMaxHost = modelMatchesHost(hostModel, "minimax");
 	const isQwenPortal = modelMatchesHost(hostModel, "qwenPortal");
 	const supportsMultipleSystemMessagesDefault =
 		!isMiniMaxHost &&
 		!isAlibaba &&
 		!isQwenPortal &&
+		!isQwen &&
 		(isOpenAIHost ||
 			isAzureHost ||
 			isOpenRouter ||
@@ -276,8 +293,12 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 					? DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS
 					: undefined;
+	// Fireworks "Fast" variants (`<id>-fast`) are served from the router
+	// namespace (`accounts/fireworks/routers/<id>-fast`), like Fire Pass, rather
+	// than the `models/` namespace the rest of the `fireworks` provider uses.
+	const isFireworksFastRouter = provider === "fireworks" && isFireworksFastModelId(spec.id);
 	const wireModelIdMode: ResolvedOpenAISharedCompat["wireModelIdMode"] =
-		provider === "firepass"
+		provider === "firepass" || isFireworksFastRouter
 			? "firepass"
 			: provider === "fireworks"
 				? "fireworks"
@@ -291,9 +312,11 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 				? "openrouter"
 				: isQwen && isNvidiaNim
 					? "qwen-chat-template"
-					: isAlibaba || isQwen
-						? "qwen"
-						: "openai";
+					: isQwen && isFireworks
+						? "openai"
+						: isAlibaba || isQwen
+							? "qwen"
+							: "openai";
 	const compat: ResolvedOpenAICompat = {
 		supportsStore: !isNonStandard,
@@ -308,7 +331,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 		supportsReasoningEffort: !isGrok && !isXiaomiMimo && (!(isZai || isZhipu) || supportsZaiReasoningEffort),
 		// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
 		supportsReasoningParams: provider !== "github-copilot",
-		reasoningEffortMap: {},
+		reasoningEffortMap: isMimoReasoningEffortModel ? MIMO_REASONING_EFFORT_MAP : {},
 		supportsUsageInStreaming: !isCerebras,
 		// pi-ai's thinking-loop guard is gemini-only; default the flag from the
 		// family classifier so OpenAI-compat proxies serving Gemini are covered.
@@ -400,6 +423,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 		compat.omitReasoningEffort = true;
 	}
 	mergeOllamaReasoningEffortMap(compat, provider, spec.reasoning);
+	mergeMimoReasoningEffortMap(compat, isMimoReasoningEffortModel);
 	const whenThinkingPolicy =
 		spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
@@ -413,6 +437,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
 			variant.omitReasoningEffort = true;
 		}
 		mergeOllamaReasoningEffortMap(variant, provider, spec.reasoning);
+		mergeMimoReasoningEffortMap(variant, isMimoReasoningEffortModel);
 		compat.whenThinking = variant;
 	}

package/src/fireworks-model-id.ts CHANGED Viewed

@@ -28,3 +28,23 @@ export function toFirepassWireModelId(modelId: string): string {
 	const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
 	return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
 }
+/**
+ * Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
+ * higher-throughput route (100+ tok/s) exposed under a dedicated router id
+ * (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
+ * higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
+ * translate it to the router wire form at request time (compat
+ * `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
+ */
+export const FIREWORKS_FAST_SUFFIX = "-fast";
+/** True for a Fireworks public model id that selects the Fast serving path. */
+export function isFireworksFastModelId(modelId: string): boolean {
+	return modelId.endsWith(FIREWORKS_FAST_SUFFIX);
+}
+/** Strip the Fast suffix to recover the base (Standard-tier) model id. */
+export function toFireworksBaseModelId(modelId: string): string {
+	return modelId.endsWith(FIREWORKS_FAST_SUFFIX) ? modelId.slice(0, -FIREWORKS_FAST_SUFFIX.length) : modelId;
+}

package/src/model-thinking.ts CHANGED Viewed

@@ -24,6 +24,7 @@ import {
 	findThinkingVariantToken,
 	isDeepseekModelIdOrName,
 	isGlm52ReasoningEffortModelId,
+	isMimoModelIdOrName,
 	isMinimaxM2FamilyModelId,
 	isMinimaxM3FamilyModelId,
 	isOpenAIGptOssModelId,
@@ -89,6 +90,10 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
 const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
 	[Effort.XHigh]: "max",
 };
+const MIMO_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
+	[Effort.Minimal]: "low",
+	[Effort.XHigh]: "high",
+};
 /**
  * Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
@@ -296,7 +301,10 @@ function getModelDefinedEfforts<TApi extends Api>(
 			return GLM_52_HIGH_MAX_REASONING_EFFORTS;
 		}
 	}
-	return isOpenAICompatReasoningApi(spec.api) && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
+	return isOpenAICompatReasoningApi(spec.api) &&
+		(isMinimaxM2FamilyModelId(spec.id) ||
+			isOpenAIGptOssModelId(spec.id) ||
+			isOpenAICompatMimoReasoningEffortModel(spec, compat))
 		? LOW_MEDIUM_HIGH_REASONING_EFFORTS
 		: undefined;
 }
@@ -309,6 +317,19 @@ function isMinimaxReasoningModelOnAnthropicEndpoint<TApi extends Api>(spec: Mode
 	return spec.api === "anthropic-messages" && (isMinimaxM2FamilyModelId(spec.id) || isMinimaxM3FamilyModelId(spec.id));
 }
+function isOpenAICompatMimoReasoningEffortModel<TApi extends Api>(
+	spec: ModelSpec<TApi>,
+	compat: CompatOf<TApi>,
+): boolean {
+	if (!isOpenAICompatReasoningApi(spec.api)) return false;
+	if (!isMimoModelIdOrName(spec.id) && !isMimoModelIdOrName(spec.name ?? "")) return false;
+	const resolved = compat as ResolvedOpenAICompat | undefined;
+	return (
+		(resolved?.thinkingFormat === "openai" || resolved?.thinkingFormat === "openrouter") &&
+		resolved.supportsReasoningEffort
+	);
+}
 function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
 	if (compat === undefined || !("reasoningEffortMap" in compat)) {
 		return undefined;
@@ -364,6 +385,8 @@ function inferDetectedEffortMap<TApi extends Api>(
 		map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
 	} else if (isDeepseekReasoningModel(spec)) {
 		map = DEEPSEEK_REASONING_EFFORT_MAP;
+	} else if (isOpenAICompatMimoReasoningEffortModel(spec, compat)) {
+		map = MIMO_REASONING_EFFORT_MAP;
 	} else if (modelMatchesHost(spec, "openrouter")) {
 		map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
 	} else if (modelMatchesHost(spec, "fireworks")) {
@@ -485,6 +508,8 @@ function inferAnthropicSupportedEfforts<TApi extends Api>(
 }
 function inferFallbackEfforts<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): readonly Effort[] {
+	const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
+	if (modelDefinedEfforts !== undefined) return modelDefinedEfforts;
 	if (isMinimaxReasoningModelOnAnthropicEndpoint(spec)) {
 		return LOW_MEDIUM_HIGH_REASONING_EFFORTS;
 	}

package/src/models.json CHANGED Viewed

@@ -7208,11 +7208,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -7238,11 +7236,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -7267,11 +7263,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		}
@@ -14801,6 +14795,38 @@
 				}
 			}
 		},
+		"glm-5.1-fast": {
+			"id": "glm-5.1-fast",
+			"name": "GLM-5.1 Fast",
+			"api": "openai-completions",
+			"provider": "fireworks",
+			"baseUrl": "https://api.fireworks.ai/inference/v1",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 2.8,
+				"output": 8.8,
+				"cacheRead": 0.52,
+				"cacheWrite": 0
+			},
+			"contextWindow": 202752,
+			"maxTokens": 131072,
+			"thinking": {
+				"mode": "effort",
+				"efforts": [
+					"minimal",
+					"low",
+					"medium",
+					"high",
+					"xhigh"
+				],
+				"effortMap": {
+					"minimal": "none"
+				}
+			}
+		},
 		"glm-5.2": {
 			"id": "glm-5.2",
 			"name": "GLM-5.2",
@@ -14947,6 +14973,39 @@
 				}
 			}
 		},
+		"kimi-k2.6-fast": {
+			"id": "kimi-k2.6-fast",
+			"name": "Kimi K2.6 Fast",
+			"api": "openai-completions",
+			"provider": "fireworks",
+			"baseUrl": "https://api.fireworks.ai/inference/v1",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 2,
+				"output": 8,
+				"cacheRead": 0.3,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 32768,
+			"thinking": {
+				"mode": "effort",
+				"efforts": [
+					"minimal",
+					"low",
+					"medium",
+					"high",
+					"xhigh"
+				],
+				"effortMap": {
+					"minimal": "none"
+				}
+			}
+		},
 		"kimi-k2.7-code": {
 			"id": "kimi-k2.7-code",
 			"name": "Kimi K2.7 Code",
@@ -14980,6 +15039,39 @@
 				}
 			}
 		},
+		"kimi-k2.7-code-fast": {
+			"id": "kimi-k2.7-code-fast",
+			"name": "Kimi K2.7 Code Fast",
+			"api": "openai-completions",
+			"provider": "fireworks",
+			"baseUrl": "https://api.fireworks.ai/inference/v1",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 1.9,
+				"output": 8,
+				"cacheRead": 0.38,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 32768,
+			"thinking": {
+				"mode": "effort",
+				"efforts": [
+					"minimal",
+					"low",
+					"medium",
+					"high",
+					"xhigh"
+				],
+				"effortMap": {
+					"minimal": "none"
+				}
+			}
+		},
 		"minimax-m2.5": {
 			"id": "minimax-m2.5",
 			"name": "MiniMax M2.5",
@@ -20276,11 +20368,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -30625,11 +30715,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -30655,11 +30743,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -30703,11 +30789,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -30752,11 +30836,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -30781,11 +30863,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -31172,7 +31252,7 @@
 	"kimi-code": {
 		"kimi-for-coding": {
 			"id": "kimi-for-coding",
-			"name": "Kimi For Coding",
+			"name": "K2.7 Code",
 			"api": "openai-completions",
 			"provider": "kimi-code",
 			"baseUrl": "https://api.kimi.com/coding/v1",
@@ -49119,11 +49199,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				],
 				"effortRouting": {
 					"off": "xiaomi/mimo-v2-flash",
@@ -49183,11 +49261,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				],
 				"effortRouting": {
 					"off": "xiaomi/mimo-v2-flash-original",
@@ -49248,11 +49324,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -49277,11 +49351,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -49307,11 +49379,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -49336,11 +49406,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -56842,11 +56910,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			},
 			"compat": {
@@ -56874,11 +56940,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			},
 			"compat": {
@@ -56910,11 +56974,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -56942,11 +57004,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -58575,11 +58635,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -58605,11 +58663,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -58634,11 +58690,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -58664,11 +58718,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -67414,7 +67466,6 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
 					"high"
@@ -67443,7 +67494,6 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
 					"high"
@@ -67471,7 +67521,6 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
 					"high"
@@ -67500,7 +67549,6 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
 					"high"
@@ -67528,7 +67576,6 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
 					"high"
@@ -72124,11 +72171,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -84061,11 +84106,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -84090,11 +84133,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -84120,11 +84161,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -84149,11 +84188,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -84179,11 +84216,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -84208,11 +84243,9 @@
 			"thinking": {
 				"mode": "effort",
 				"efforts": [
-					"minimal",
 					"low",
 					"medium",
-					"high",
-					"xhigh"
+					"high"
 				]
 			}
 		},
@@ -84637,6 +84670,39 @@
 		}
 	},
 	"zhipu-coding-plan": {
+		"glm-4.5": {
+			"id": "glm-4.5",
+			"name": "glm-4.5",
+			"api": "openai-completions",
+			"provider": "zhipu-coding-plan",
+			"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 131072,
+			"maxTokens": 98304,
+			"thinking": {
+				"mode": "effort",
+				"efforts": [
+					"minimal",
+					"low",
+					"medium",
+					"high"
+				]
+			},
+			"compat": {
+				"thinkingFormat": "zai",
+				"reasoningContentField": "reasoning_content",
+				"supportsDeveloperRole": false
+			}
+		},
 		"glm-4.5-air": {
 			"id": "glm-4.5-air",
 			"name": "GLM-4.5-Air",
@@ -84670,6 +84736,39 @@
 				]
 			}
 		},
+		"glm-4.6": {
+			"id": "glm-4.6",
+			"name": "glm-4.6",
+			"api": "openai-completions",
+			"provider": "zhipu-coding-plan",
+			"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 202752,
+			"maxTokens": 131072,
+			"thinking": {
+				"mode": "effort",
+				"efforts": [
+					"minimal",
+					"low",
+					"medium",
+					"high"
+				]
+			},
+			"compat": {
+				"thinkingFormat": "zai",
+				"reasoningContentField": "reasoning_content",
+				"supportsDeveloperRole": false
+			}
+		},
 		"glm-4.6v": {
 			"id": "glm-4.6v",
 			"name": "GLM-4.6V",
@@ -84737,6 +84836,39 @@
 				]
 			}
 		},
+		"glm-5": {
+			"id": "glm-5",
+			"name": "GLM-5",
+			"api": "openai-completions",
+			"provider": "zhipu-coding-plan",
+			"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
+			"reasoning": true,
+			"input": [
+				"text"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 204800,
+			"maxTokens": 131072,
+			"thinking": {
+				"mode": "effort",
+				"efforts": [
+					"minimal",
+					"low",
+					"medium",
+					"high"
+				]
+			},
+			"compat": {
+				"thinkingFormat": "zai",
+				"reasoningContentField": "reasoning_content",
+				"supportsDeveloperRole": false
+			}
+		},
 		"glm-5-turbo": {
 			"id": "glm-5-turbo",
 			"name": "GLM-5-Turbo",

package/src/provider-models/openai-compat.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import {
 	type OpenAICompatibleModelRecord,
 } from "../discovery/openai-compatible";
 import { Effort } from "../effort";
-import { toFireworksPublicModelId } from "../fireworks-model-id";
+import { FIREWORKS_FAST_SUFFIX, toFireworksPublicModelId } from "../fireworks-model-id";
 import { isGlmVisionModelId, isGrokReasoningEffortCapable, isReasoningGlmModelId } from "../identity/family";
 import type { ModelManagerOptions } from "../model-manager";
 import { getBundledModels } from "../models";
@@ -1258,6 +1258,51 @@ export function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | n
 	return isKimiK27CodeModelId(modelId) ? Math.min(candidate, KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS) : candidate;
 }
+/**
+ * Fireworks Fast variants we surface. Each inherits the base model's
+ * limits/modalities/thinking and overrides only the cost with the Standard-column
+ * Fast prices from the Serverless pricing table; `cacheWrite` stays 0 (Fireworks
+ * bills no cache-write). Derived from the bundled base entries so metadata stays
+ * in lockstep, and the runtime auto-falls back to the base id on a failed fast
+ * request. See https://docs.fireworks.ai/serverless/pricing.
+ */
+const FIREWORKS_FAST_VARIANT_SPECS: ReadonlyArray<{
+	base: string;
+	name: string;
+	cost: { input: number; output: number; cacheRead: number };
+}> = [
+	{ base: "kimi-k2.7-code", name: "Kimi K2.7 Code Fast", cost: { input: 1.9, output: 8, cacheRead: 0.38 } },
+	{ base: "kimi-k2.6", name: "Kimi K2.6 Fast", cost: { input: 2, output: 8, cacheRead: 0.3 } },
+	{ base: "glm-5.1", name: "GLM-5.1 Fast", cost: { input: 2.8, output: 8.8, cacheRead: 0.52 } },
+];
+/**
+ * Build the Fireworks Fast seed by projecting each base bundled spec into a
+ * `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
+ * appear in the serverless control-plane list, so discovery cannot surface
+ * them) and deduped behind any identical previous-snapshot entry.
+ */
+export function buildFireworksFastSeed(): ModelSpec<"openai-completions">[] {
+	const bundled = createBundledReferenceMap<"openai-completions">("fireworks");
+	const seeds: ModelSpec<"openai-completions">[] = [];
+	for (const variant of FIREWORKS_FAST_VARIANT_SPECS) {
+		const base = bundled.get(variant.base);
+		if (!base) continue;
+		seeds.push({
+			...base,
+			id: `${variant.base}${FIREWORKS_FAST_SUFFIX}`,
+			name: variant.name,
+			cost: {
+				input: variant.cost.input,
+				output: variant.cost.output,
+				cacheRead: variant.cost.cacheRead,
+				cacheWrite: 0,
+			},
+		});
+	}
+	return seeds;
+}
 /**
  * Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
  * DeepSeek-native binary `thinking` toggle when both are present.