@oh-my-pi/pi-catalog 16.1.6 → 16.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.1.8] - 2026-06-20
6
+
7
+ ### Fixed
8
+
9
+ - Fixed Fireworks-hosted Qwen turns (e.g. `fireworks/qwen3.7-plus`) failing with `400 Extra inputs are not permitted, field: 'enable_thinking'`. Fireworks serves Qwen3 with controllable thinking via OpenAI-style `reasoning_effort` and rejects the top-level `enable_thinking` boolean that Alibaba DashScope speaks; `buildOpenAICompat` was selecting `thinkingFormat: "qwen"` from the `qwen` id pattern regardless of host. Fireworks-hosted Qwen models now resolve to `thinkingFormat: "openai"`.
10
+ - Fixed MiMo models on OpenAI-compatible gateways to expose only accepted `low`, `medium`, and `high` reasoning tiers and map unsupported raw `minimal`/`xhigh` requests to safe wire values. ([#2864](https://github.com/can1357/oh-my-pi/issues/2864))
11
+
12
+ ## [16.1.7] - 2026-06-20
13
+
14
+ ### Fixed
15
+
16
+ - Fixed MiniMax-M3 catalog context for the MiniMax Coding/Token Plan providers `minimax-code` and `minimax-code-cn` to report the documented 1M long-context tier instead of the upstream 512K pricing boundary; the previous patch only covered `minimax`/`minimax-cn`, so the Coding Plan picker still showed 512K in the status bar ([#3097](https://github.com/can1357/oh-my-pi/issues/3097)).
17
+
5
18
  ## [16.1.4] - 2026-06-19
6
19
 
7
20
  ### Fixed
@@ -8,3 +8,16 @@ export declare function toFireworksWireModelId(modelId: string): string;
8
8
  */
9
9
  export declare function toFirepassPublicModelId(modelId: string): string;
10
10
  export declare function toFirepassWireModelId(modelId: string): string;
11
+ /**
12
+ * Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
13
+ * higher-throughput route (100+ tok/s) exposed under a dedicated router id
14
+ * (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
15
+ * higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
16
+ * translate it to the router wire form at request time (compat
17
+ * `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
18
+ */
19
+ export declare const FIREWORKS_FAST_SUFFIX = "-fast";
20
+ /** True for a Fireworks public model id that selects the Fast serving path. */
21
+ export declare function isFireworksFastModelId(modelId: string): boolean;
22
+ /** Strip the Fast suffix to recover the base (Standard-tier) model id. */
23
+ export declare function toFireworksBaseModelId(modelId: string): string;
@@ -179,6 +179,13 @@ export declare const KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS = 32768;
179
179
  export declare function isKimiK27CodeModelId(modelId: string): boolean;
180
180
  export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number): number;
181
181
  export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | null): number | null;
182
+ /**
183
+ * Build the Fireworks Fast seed by projecting each base bundled spec into a
184
+ * `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
185
+ * appear in the serverless control-plane list, so discovery cannot surface
186
+ * them) and deduped behind any identical previous-snapshot entry.
187
+ */
188
+ export declare function buildFireworksFastSeed(): ModelSpec<"openai-completions">[];
182
189
  /**
183
190
  * Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
184
191
  * DeepSeek-native binary `thinking` toggle when both are present.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-catalog",
4
- "version": "16.1.6",
4
+ "version": "16.1.8",
5
5
  "description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -34,12 +34,12 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@bufbuild/protobuf": "^2.12.0",
37
- "@oh-my-pi/pi-utils": "16.1.6",
37
+ "@oh-my-pi/pi-utils": "16.1.8",
38
38
  "arktype": "^2.2.0",
39
39
  "zod": "^4"
40
40
  },
41
41
  "devDependencies": {
42
- "@oh-my-pi/pi-ai": "16.1.6",
42
+ "@oh-my-pi/pi-ai": "16.1.8",
43
43
  "@types/bun": "^1.3.14"
44
44
  },
45
45
  "engines": {
@@ -7,6 +7,7 @@
7
7
  * complete alternate views. Request handlers read `model.compat` fields and
8
8
  * never detect, resolve, or allocate.
9
9
  */
10
+ import { isFireworksFastModelId } from "../fireworks-model-id";
10
11
  import { hostMatchesUrl, modelMatchesHost } from "../hosts";
11
12
  import {
12
13
  isAnthropicNamespacedModelId,
@@ -130,6 +131,16 @@ const OPENCODE_WHEN_THINKING: NonNullable<OpenAICompat["whenThinking"]> = {
130
131
  reasoningContentField: "reasoning_content",
131
132
  };
132
133
 
134
+ const MIMO_REASONING_EFFORT_MAP: NonNullable<OpenAICompat["reasoningEffortMap"]> = {
135
+ minimal: "low",
136
+ xhigh: "high",
137
+ };
138
+
139
+ function mergeMimoReasoningEffortMap(compat: ResolvedOpenAISharedCompat, enabled: boolean): void {
140
+ if (!enabled) return;
141
+ compat.reasoningEffortMap = { ...MIMO_REASONING_EFFORT_MAP, ...compat.reasoningEffortMap };
142
+ }
143
+
133
144
  function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
134
145
  if (
135
146
  provider === "openai" ||
@@ -184,6 +195,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
184
195
  const lowerName = (spec.name ?? "").toLowerCase();
185
196
  const isXiaomiHost = modelMatchesHost(hostModel, "xiaomi");
186
197
  const isXiaomiMimo = isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
198
+ const isMimoReasoningEffortModel =
199
+ !isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
187
200
  // OpenCode Zen's `big-pickle` is a DeepSeek reasoning alias; the upstream
188
201
  // 400s come from DeepSeek and require exact reasoning_content replay.
189
202
  const isOpenCodeDeepseekAlias =
@@ -238,17 +251,21 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
238
251
  const isGroqHost = modelMatchesHost(hostModel, "groq");
239
252
  const isCopilotHost = provider === "github-copilot";
240
253
  const isZenmuxHost = provider === "zenmux";
241
- // Endpoints that MUST receive a single system block. MiniMax's OpenAI
242
- // endpoint returns error 2013 on multiple system messages; Alibaba's
243
- // Dashscope and Qwen Portal serve Qwen models whose chat template
244
- // raises "System message must be at the beginning" if any system
245
- // message appears past index 0.
254
+ // Endpoints/models that MUST receive a single system block. MiniMax's OpenAI
255
+ // endpoint returns error 2013 on multiple system messages; the Qwen 3.5+ chat
256
+ // template raises "System message must be at the beginning" / 500s with an
257
+ // internal_server_error when any system block appears past index 0. That
258
+ // template ships with the weights, so every Qwen-serving vLLM/SGLang host
259
+ // hits it — confirmed on Alibaba Dashscope, Qwen Portal, and Fireworks
260
+ // (`fireworks/qwen3.7-plus` 500'd on two leading system blocks). Gate on the
261
+ // Qwen family itself, not per-host: coalescing only trades away KV-cache reuse.
246
262
  const isMiniMaxHost = modelMatchesHost(hostModel, "minimax");
247
263
  const isQwenPortal = modelMatchesHost(hostModel, "qwenPortal");
248
264
  const supportsMultipleSystemMessagesDefault =
249
265
  !isMiniMaxHost &&
250
266
  !isAlibaba &&
251
267
  !isQwenPortal &&
268
+ !isQwen &&
252
269
  (isOpenAIHost ||
253
270
  isAzureHost ||
254
271
  isOpenRouter ||
@@ -276,8 +293,12 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
276
293
  ? DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS
277
294
  : undefined;
278
295
 
296
+ // Fireworks "Fast" variants (`<id>-fast`) are served from the router
297
+ // namespace (`accounts/fireworks/routers/<id>-fast`), like Fire Pass, rather
298
+ // than the `models/` namespace the rest of the `fireworks` provider uses.
299
+ const isFireworksFastRouter = provider === "fireworks" && isFireworksFastModelId(spec.id);
279
300
  const wireModelIdMode: ResolvedOpenAISharedCompat["wireModelIdMode"] =
280
- provider === "firepass"
301
+ provider === "firepass" || isFireworksFastRouter
281
302
  ? "firepass"
282
303
  : provider === "fireworks"
283
304
  ? "fireworks"
@@ -291,9 +312,11 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
291
312
  ? "openrouter"
292
313
  : isQwen && isNvidiaNim
293
314
  ? "qwen-chat-template"
294
- : isAlibaba || isQwen
295
- ? "qwen"
296
- : "openai";
315
+ : isQwen && isFireworks
316
+ ? "openai"
317
+ : isAlibaba || isQwen
318
+ ? "qwen"
319
+ : "openai";
297
320
 
298
321
  const compat: ResolvedOpenAICompat = {
299
322
  supportsStore: !isNonStandard,
@@ -308,7 +331,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
308
331
  supportsReasoningEffort: !isGrok && !isXiaomiMimo && (!(isZai || isZhipu) || supportsZaiReasoningEffort),
309
332
  // GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
310
333
  supportsReasoningParams: provider !== "github-copilot",
311
- reasoningEffortMap: {},
334
+ reasoningEffortMap: isMimoReasoningEffortModel ? MIMO_REASONING_EFFORT_MAP : {},
312
335
  supportsUsageInStreaming: !isCerebras,
313
336
  // pi-ai's thinking-loop guard is gemini-only; default the flag from the
314
337
  // family classifier so OpenAI-compat proxies serving Gemini are covered.
@@ -400,6 +423,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
400
423
  compat.omitReasoningEffort = true;
401
424
  }
402
425
  mergeOllamaReasoningEffortMap(compat, provider, spec.reasoning);
426
+ mergeMimoReasoningEffortMap(compat, isMimoReasoningEffortModel);
403
427
 
404
428
  const whenThinkingPolicy =
405
429
  spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
@@ -413,6 +437,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
413
437
  variant.omitReasoningEffort = true;
414
438
  }
415
439
  mergeOllamaReasoningEffortMap(variant, provider, spec.reasoning);
440
+ mergeMimoReasoningEffortMap(variant, isMimoReasoningEffortModel);
416
441
  compat.whenThinking = variant;
417
442
  }
418
443
 
@@ -28,3 +28,23 @@ export function toFirepassWireModelId(modelId: string): string {
28
28
  const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
29
29
  return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
30
30
  }
31
+
32
+ /**
33
+ * Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
34
+ * higher-throughput route (100+ tok/s) exposed under a dedicated router id
35
+ * (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
36
+ * higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
37
+ * translate it to the router wire form at request time (compat
38
+ * `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
39
+ */
40
+ export const FIREWORKS_FAST_SUFFIX = "-fast";
41
+
42
+ /** True for a Fireworks public model id that selects the Fast serving path. */
43
+ export function isFireworksFastModelId(modelId: string): boolean {
44
+ return modelId.endsWith(FIREWORKS_FAST_SUFFIX);
45
+ }
46
+
47
+ /** Strip the Fast suffix to recover the base (Standard-tier) model id. */
48
+ export function toFireworksBaseModelId(modelId: string): string {
49
+ return modelId.endsWith(FIREWORKS_FAST_SUFFIX) ? modelId.slice(0, -FIREWORKS_FAST_SUFFIX.length) : modelId;
50
+ }
@@ -24,6 +24,7 @@ import {
24
24
  findThinkingVariantToken,
25
25
  isDeepseekModelIdOrName,
26
26
  isGlm52ReasoningEffortModelId,
27
+ isMimoModelIdOrName,
27
28
  isMinimaxM2FamilyModelId,
28
29
  isMinimaxM3FamilyModelId,
29
30
  isOpenAIGptOssModelId,
@@ -89,6 +90,10 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
89
90
  const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
90
91
  [Effort.XHigh]: "max",
91
92
  };
93
+ const MIMO_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
94
+ [Effort.Minimal]: "low",
95
+ [Effort.XHigh]: "high",
96
+ };
92
97
 
93
98
  /**
94
99
  * Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
@@ -296,7 +301,10 @@ function getModelDefinedEfforts<TApi extends Api>(
296
301
  return GLM_52_HIGH_MAX_REASONING_EFFORTS;
297
302
  }
298
303
  }
299
- return isOpenAICompatReasoningApi(spec.api) && (isMinimaxM2FamilyModelId(spec.id) || isOpenAIGptOssModelId(spec.id))
304
+ return isOpenAICompatReasoningApi(spec.api) &&
305
+ (isMinimaxM2FamilyModelId(spec.id) ||
306
+ isOpenAIGptOssModelId(spec.id) ||
307
+ isOpenAICompatMimoReasoningEffortModel(spec, compat))
300
308
  ? LOW_MEDIUM_HIGH_REASONING_EFFORTS
301
309
  : undefined;
302
310
  }
@@ -309,6 +317,19 @@ function isMinimaxReasoningModelOnAnthropicEndpoint<TApi extends Api>(spec: Mode
309
317
  return spec.api === "anthropic-messages" && (isMinimaxM2FamilyModelId(spec.id) || isMinimaxM3FamilyModelId(spec.id));
310
318
  }
311
319
 
320
+ function isOpenAICompatMimoReasoningEffortModel<TApi extends Api>(
321
+ spec: ModelSpec<TApi>,
322
+ compat: CompatOf<TApi>,
323
+ ): boolean {
324
+ if (!isOpenAICompatReasoningApi(spec.api)) return false;
325
+ if (!isMimoModelIdOrName(spec.id) && !isMimoModelIdOrName(spec.name ?? "")) return false;
326
+ const resolved = compat as ResolvedOpenAICompat | undefined;
327
+ return (
328
+ (resolved?.thinkingFormat === "openai" || resolved?.thinkingFormat === "openrouter") &&
329
+ resolved.supportsReasoningEffort
330
+ );
331
+ }
332
+
312
333
  function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
313
334
  if (compat === undefined || !("reasoningEffortMap" in compat)) {
314
335
  return undefined;
@@ -364,6 +385,8 @@ function inferDetectedEffortMap<TApi extends Api>(
364
385
  map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
365
386
  } else if (isDeepseekReasoningModel(spec)) {
366
387
  map = DEEPSEEK_REASONING_EFFORT_MAP;
388
+ } else if (isOpenAICompatMimoReasoningEffortModel(spec, compat)) {
389
+ map = MIMO_REASONING_EFFORT_MAP;
367
390
  } else if (modelMatchesHost(spec, "openrouter")) {
368
391
  map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
369
392
  } else if (modelMatchesHost(spec, "fireworks")) {
@@ -485,6 +508,8 @@ function inferAnthropicSupportedEfforts<TApi extends Api>(
485
508
  }
486
509
 
487
510
  function inferFallbackEfforts<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): readonly Effort[] {
511
+ const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
512
+ if (modelDefinedEfforts !== undefined) return modelDefinedEfforts;
488
513
  if (isMinimaxReasoningModelOnAnthropicEndpoint(spec)) {
489
514
  return LOW_MEDIUM_HIGH_REASONING_EFFORTS;
490
515
  }
package/src/models.json CHANGED
@@ -7208,11 +7208,9 @@
7208
7208
  "thinking": {
7209
7209
  "mode": "effort",
7210
7210
  "efforts": [
7211
- "minimal",
7212
7211
  "low",
7213
7212
  "medium",
7214
- "high",
7215
- "xhigh"
7213
+ "high"
7216
7214
  ]
7217
7215
  }
7218
7216
  },
@@ -7238,11 +7236,9 @@
7238
7236
  "thinking": {
7239
7237
  "mode": "effort",
7240
7238
  "efforts": [
7241
- "minimal",
7242
7239
  "low",
7243
7240
  "medium",
7244
- "high",
7245
- "xhigh"
7241
+ "high"
7246
7242
  ]
7247
7243
  }
7248
7244
  },
@@ -7267,11 +7263,9 @@
7267
7263
  "thinking": {
7268
7264
  "mode": "effort",
7269
7265
  "efforts": [
7270
- "minimal",
7271
7266
  "low",
7272
7267
  "medium",
7273
- "high",
7274
- "xhigh"
7268
+ "high"
7275
7269
  ]
7276
7270
  }
7277
7271
  }
@@ -14801,6 +14795,38 @@
14801
14795
  }
14802
14796
  }
14803
14797
  },
14798
+ "glm-5.1-fast": {
14799
+ "id": "glm-5.1-fast",
14800
+ "name": "GLM-5.1 Fast",
14801
+ "api": "openai-completions",
14802
+ "provider": "fireworks",
14803
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
14804
+ "reasoning": true,
14805
+ "input": [
14806
+ "text"
14807
+ ],
14808
+ "cost": {
14809
+ "input": 2.8,
14810
+ "output": 8.8,
14811
+ "cacheRead": 0.52,
14812
+ "cacheWrite": 0
14813
+ },
14814
+ "contextWindow": 202752,
14815
+ "maxTokens": 131072,
14816
+ "thinking": {
14817
+ "mode": "effort",
14818
+ "efforts": [
14819
+ "minimal",
14820
+ "low",
14821
+ "medium",
14822
+ "high",
14823
+ "xhigh"
14824
+ ],
14825
+ "effortMap": {
14826
+ "minimal": "none"
14827
+ }
14828
+ }
14829
+ },
14804
14830
  "glm-5.2": {
14805
14831
  "id": "glm-5.2",
14806
14832
  "name": "GLM-5.2",
@@ -14947,6 +14973,39 @@
14947
14973
  }
14948
14974
  }
14949
14975
  },
14976
+ "kimi-k2.6-fast": {
14977
+ "id": "kimi-k2.6-fast",
14978
+ "name": "Kimi K2.6 Fast",
14979
+ "api": "openai-completions",
14980
+ "provider": "fireworks",
14981
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
14982
+ "reasoning": true,
14983
+ "input": [
14984
+ "text",
14985
+ "image"
14986
+ ],
14987
+ "cost": {
14988
+ "input": 2,
14989
+ "output": 8,
14990
+ "cacheRead": 0.3,
14991
+ "cacheWrite": 0
14992
+ },
14993
+ "contextWindow": 262144,
14994
+ "maxTokens": 32768,
14995
+ "thinking": {
14996
+ "mode": "effort",
14997
+ "efforts": [
14998
+ "minimal",
14999
+ "low",
15000
+ "medium",
15001
+ "high",
15002
+ "xhigh"
15003
+ ],
15004
+ "effortMap": {
15005
+ "minimal": "none"
15006
+ }
15007
+ }
15008
+ },
14950
15009
  "kimi-k2.7-code": {
14951
15010
  "id": "kimi-k2.7-code",
14952
15011
  "name": "Kimi K2.7 Code",
@@ -14980,6 +15039,39 @@
14980
15039
  }
14981
15040
  }
14982
15041
  },
15042
+ "kimi-k2.7-code-fast": {
15043
+ "id": "kimi-k2.7-code-fast",
15044
+ "name": "Kimi K2.7 Code Fast",
15045
+ "api": "openai-completions",
15046
+ "provider": "fireworks",
15047
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
15048
+ "reasoning": true,
15049
+ "input": [
15050
+ "text",
15051
+ "image"
15052
+ ],
15053
+ "cost": {
15054
+ "input": 1.9,
15055
+ "output": 8,
15056
+ "cacheRead": 0.38,
15057
+ "cacheWrite": 0
15058
+ },
15059
+ "contextWindow": 262144,
15060
+ "maxTokens": 32768,
15061
+ "thinking": {
15062
+ "mode": "effort",
15063
+ "efforts": [
15064
+ "minimal",
15065
+ "low",
15066
+ "medium",
15067
+ "high",
15068
+ "xhigh"
15069
+ ],
15070
+ "effortMap": {
15071
+ "minimal": "none"
15072
+ }
15073
+ }
15074
+ },
14983
15075
  "minimax-m2.5": {
14984
15076
  "id": "minimax-m2.5",
14985
15077
  "name": "MiniMax M2.5",
@@ -17786,13 +17878,7 @@
17786
17878
  "low",
17787
17879
  "medium",
17788
17880
  "high"
17789
- ],
17790
- "effortRouting": {
17791
- "minimal": "claude-opus-4-6-thinking",
17792
- "low": "claude-opus-4-6-thinking",
17793
- "medium": "claude-opus-4-6-thinking",
17794
- "high": "claude-opus-4-6-thinking"
17795
- }
17881
+ ]
17796
17882
  }
17797
17883
  },
17798
17884
  "claude-sonnet-4-5": {
@@ -17851,7 +17937,6 @@
17851
17937
  },
17852
17938
  "contextWindow": 250000,
17853
17939
  "maxTokens": 64000,
17854
- "requestModelId": "claude-sonnet-4-6",
17855
17940
  "thinking": {
17856
17941
  "mode": "budget",
17857
17942
  "efforts": [
@@ -17859,14 +17944,7 @@
17859
17944
  "low",
17860
17945
  "medium",
17861
17946
  "high"
17862
- ],
17863
- "effortRouting": {
17864
- "off": "claude-sonnet-4-6",
17865
- "minimal": "claude-sonnet-4-6",
17866
- "low": "claude-sonnet-4-6",
17867
- "medium": "claude-sonnet-4-6",
17868
- "high": "claude-sonnet-4-6"
17869
- }
17947
+ ]
17870
17948
  }
17871
17949
  },
17872
17950
  "gemini-2.5-flash": {
@@ -20290,11 +20368,9 @@
20290
20368
  "thinking": {
20291
20369
  "mode": "effort",
20292
20370
  "efforts": [
20293
- "minimal",
20294
20371
  "low",
20295
20372
  "medium",
20296
- "high",
20297
- "xhigh"
20373
+ "high"
20298
20374
  ]
20299
20375
  }
20300
20376
  },
@@ -30639,11 +30715,9 @@
30639
30715
  "thinking": {
30640
30716
  "mode": "effort",
30641
30717
  "efforts": [
30642
- "minimal",
30643
30718
  "low",
30644
30719
  "medium",
30645
- "high",
30646
- "xhigh"
30720
+ "high"
30647
30721
  ]
30648
30722
  }
30649
30723
  },
@@ -30669,11 +30743,9 @@
30669
30743
  "thinking": {
30670
30744
  "mode": "effort",
30671
30745
  "efforts": [
30672
- "minimal",
30673
30746
  "low",
30674
30747
  "medium",
30675
- "high",
30676
- "xhigh"
30748
+ "high"
30677
30749
  ]
30678
30750
  }
30679
30751
  },
@@ -30717,11 +30789,9 @@
30717
30789
  "thinking": {
30718
30790
  "mode": "effort",
30719
30791
  "efforts": [
30720
- "minimal",
30721
30792
  "low",
30722
30793
  "medium",
30723
- "high",
30724
- "xhigh"
30794
+ "high"
30725
30795
  ]
30726
30796
  }
30727
30797
  },
@@ -30766,11 +30836,9 @@
30766
30836
  "thinking": {
30767
30837
  "mode": "effort",
30768
30838
  "efforts": [
30769
- "minimal",
30770
30839
  "low",
30771
30840
  "medium",
30772
- "high",
30773
- "xhigh"
30841
+ "high"
30774
30842
  ]
30775
30843
  }
30776
30844
  },
@@ -30795,11 +30863,9 @@
30795
30863
  "thinking": {
30796
30864
  "mode": "effort",
30797
30865
  "efforts": [
30798
- "minimal",
30799
30866
  "low",
30800
30867
  "medium",
30801
- "high",
30802
- "xhigh"
30868
+ "high"
30803
30869
  ]
30804
30870
  }
30805
30871
  },
@@ -31186,7 +31252,7 @@
31186
31252
  "kimi-code": {
31187
31253
  "kimi-for-coding": {
31188
31254
  "id": "kimi-for-coding",
31189
- "name": "Kimi For Coding",
31255
+ "name": "K2.7 Code",
31190
31256
  "api": "openai-completions",
31191
31257
  "provider": "kimi-code",
31192
31258
  "baseUrl": "https://api.kimi.com/coding/v1",
@@ -32139,7 +32205,7 @@
32139
32205
  "cacheRead": 0,
32140
32206
  "cacheWrite": 0
32141
32207
  },
32142
- "contextWindow": 512000,
32208
+ "contextWindow": 1000000,
32143
32209
  "maxTokens": 128000,
32144
32210
  "compat": {
32145
32211
  "supportsStore": false,
@@ -32448,7 +32514,7 @@
32448
32514
  "cacheRead": 0,
32449
32515
  "cacheWrite": 0
32450
32516
  },
32451
- "contextWindow": 512000,
32517
+ "contextWindow": 1000000,
32452
32518
  "maxTokens": 128000,
32453
32519
  "compat": {
32454
32520
  "supportsStore": false,
@@ -35908,6 +35974,44 @@
35908
35974
  "contextWindow": null,
35909
35975
  "maxTokens": null
35910
35976
  },
35977
+ "crofai/greg-2-super": {
35978
+ "id": "crofai/greg-2-super",
35979
+ "name": "crofai/greg-2-super",
35980
+ "api": "openai-completions",
35981
+ "provider": "nanogpt",
35982
+ "baseUrl": "https://nano-gpt.com/api/v1",
35983
+ "reasoning": false,
35984
+ "input": [
35985
+ "text"
35986
+ ],
35987
+ "cost": {
35988
+ "input": 0,
35989
+ "output": 0,
35990
+ "cacheRead": 0,
35991
+ "cacheWrite": 0
35992
+ },
35993
+ "contextWindow": null,
35994
+ "maxTokens": null
35995
+ },
35996
+ "crofai/greg-2-ultra": {
35997
+ "id": "crofai/greg-2-ultra",
35998
+ "name": "crofai/greg-2-ultra",
35999
+ "api": "openai-completions",
36000
+ "provider": "nanogpt",
36001
+ "baseUrl": "https://nano-gpt.com/api/v1",
36002
+ "reasoning": false,
36003
+ "input": [
36004
+ "text"
36005
+ ],
36006
+ "cost": {
36007
+ "input": 0,
36008
+ "output": 0,
36009
+ "cacheRead": 0,
36010
+ "cacheWrite": 0
36011
+ },
36012
+ "contextWindow": null,
36013
+ "maxTokens": null
36014
+ },
35911
36015
  "CrucibleLab/L3.3-70B-Loki-V2.0": {
35912
36016
  "id": "CrucibleLab/L3.3-70B-Loki-V2.0",
35913
36017
  "name": "CrucibleLab/L3.3-70B-Loki-V2.0",
@@ -49095,11 +49199,9 @@
49095
49199
  "thinking": {
49096
49200
  "mode": "effort",
49097
49201
  "efforts": [
49098
- "minimal",
49099
49202
  "low",
49100
49203
  "medium",
49101
- "high",
49102
- "xhigh"
49204
+ "high"
49103
49205
  ],
49104
49206
  "effortRouting": {
49105
49207
  "off": "xiaomi/mimo-v2-flash",
@@ -49159,11 +49261,9 @@
49159
49261
  "thinking": {
49160
49262
  "mode": "effort",
49161
49263
  "efforts": [
49162
- "minimal",
49163
49264
  "low",
49164
49265
  "medium",
49165
- "high",
49166
- "xhigh"
49266
+ "high"
49167
49267
  ],
49168
49268
  "effortRouting": {
49169
49269
  "off": "xiaomi/mimo-v2-flash-original",
@@ -49224,11 +49324,9 @@
49224
49324
  "thinking": {
49225
49325
  "mode": "effort",
49226
49326
  "efforts": [
49227
- "minimal",
49228
49327
  "low",
49229
49328
  "medium",
49230
- "high",
49231
- "xhigh"
49329
+ "high"
49232
49330
  ]
49233
49331
  }
49234
49332
  },
@@ -49253,11 +49351,9 @@
49253
49351
  "thinking": {
49254
49352
  "mode": "effort",
49255
49353
  "efforts": [
49256
- "minimal",
49257
49354
  "low",
49258
49355
  "medium",
49259
- "high",
49260
- "xhigh"
49356
+ "high"
49261
49357
  ]
49262
49358
  }
49263
49359
  },
@@ -49283,11 +49379,9 @@
49283
49379
  "thinking": {
49284
49380
  "mode": "effort",
49285
49381
  "efforts": [
49286
- "minimal",
49287
49382
  "low",
49288
49383
  "medium",
49289
- "high",
49290
- "xhigh"
49384
+ "high"
49291
49385
  ]
49292
49386
  }
49293
49387
  },
@@ -49312,11 +49406,9 @@
49312
49406
  "thinking": {
49313
49407
  "mode": "effort",
49314
49408
  "efforts": [
49315
- "minimal",
49316
49409
  "low",
49317
49410
  "medium",
49318
- "high",
49319
- "xhigh"
49411
+ "high"
49320
49412
  ]
49321
49413
  }
49322
49414
  },
@@ -56818,11 +56910,9 @@
56818
56910
  "thinking": {
56819
56911
  "mode": "effort",
56820
56912
  "efforts": [
56821
- "minimal",
56822
56913
  "low",
56823
56914
  "medium",
56824
- "high",
56825
- "xhigh"
56915
+ "high"
56826
56916
  ]
56827
56917
  },
56828
56918
  "compat": {
@@ -56850,11 +56940,9 @@
56850
56940
  "thinking": {
56851
56941
  "mode": "effort",
56852
56942
  "efforts": [
56853
- "minimal",
56854
56943
  "low",
56855
56944
  "medium",
56856
- "high",
56857
- "xhigh"
56945
+ "high"
56858
56946
  ]
56859
56947
  },
56860
56948
  "compat": {
@@ -56886,11 +56974,9 @@
56886
56974
  "thinking": {
56887
56975
  "mode": "effort",
56888
56976
  "efforts": [
56889
- "minimal",
56890
56977
  "low",
56891
56978
  "medium",
56892
- "high",
56893
- "xhigh"
56979
+ "high"
56894
56980
  ]
56895
56981
  }
56896
56982
  },
@@ -56918,11 +57004,9 @@
56918
57004
  "thinking": {
56919
57005
  "mode": "effort",
56920
57006
  "efforts": [
56921
- "minimal",
56922
57007
  "low",
56923
57008
  "medium",
56924
- "high",
56925
- "xhigh"
57009
+ "high"
56926
57010
  ]
56927
57011
  }
56928
57012
  },
@@ -58551,11 +58635,9 @@
58551
58635
  "thinking": {
58552
58636
  "mode": "effort",
58553
58637
  "efforts": [
58554
- "minimal",
58555
58638
  "low",
58556
58639
  "medium",
58557
- "high",
58558
- "xhigh"
58640
+ "high"
58559
58641
  ]
58560
58642
  }
58561
58643
  },
@@ -58581,11 +58663,9 @@
58581
58663
  "thinking": {
58582
58664
  "mode": "effort",
58583
58665
  "efforts": [
58584
- "minimal",
58585
58666
  "low",
58586
58667
  "medium",
58587
- "high",
58588
- "xhigh"
58668
+ "high"
58589
58669
  ]
58590
58670
  }
58591
58671
  },
@@ -58610,11 +58690,9 @@
58610
58690
  "thinking": {
58611
58691
  "mode": "effort",
58612
58692
  "efforts": [
58613
- "minimal",
58614
58693
  "low",
58615
58694
  "medium",
58616
- "high",
58617
- "xhigh"
58695
+ "high"
58618
58696
  ]
58619
58697
  }
58620
58698
  },
@@ -58640,11 +58718,9 @@
58640
58718
  "thinking": {
58641
58719
  "mode": "effort",
58642
58720
  "efforts": [
58643
- "minimal",
58644
58721
  "low",
58645
58722
  "medium",
58646
- "high",
58647
- "xhigh"
58723
+ "high"
58648
58724
  ]
58649
58725
  }
58650
58726
  },
@@ -62930,13 +63006,13 @@
62930
63006
  "image"
62931
63007
  ],
62932
63008
  "cost": {
62933
- "input": 0.74,
62934
- "output": 3.5,
62935
- "cacheRead": 0.15,
63009
+ "input": 0.612,
63010
+ "output": 3.0690000000000004,
63011
+ "cacheRead": 0.1296,
62936
63012
  "cacheWrite": 0
62937
63013
  },
62938
63014
  "contextWindow": 262144,
62939
- "maxTokens": 16384,
63015
+ "maxTokens": 262144,
62940
63016
  "thinking": {
62941
63017
  "mode": "effort",
62942
63018
  "efforts": [
@@ -67390,7 +67466,6 @@
67390
67466
  "thinking": {
67391
67467
  "mode": "effort",
67392
67468
  "efforts": [
67393
- "minimal",
67394
67469
  "low",
67395
67470
  "medium",
67396
67471
  "high"
@@ -67419,7 +67494,6 @@
67419
67494
  "thinking": {
67420
67495
  "mode": "effort",
67421
67496
  "efforts": [
67422
- "minimal",
67423
67497
  "low",
67424
67498
  "medium",
67425
67499
  "high"
@@ -67447,7 +67521,6 @@
67447
67521
  "thinking": {
67448
67522
  "mode": "effort",
67449
67523
  "efforts": [
67450
- "minimal",
67451
67524
  "low",
67452
67525
  "medium",
67453
67526
  "high"
@@ -67476,7 +67549,6 @@
67476
67549
  "thinking": {
67477
67550
  "mode": "effort",
67478
67551
  "efforts": [
67479
- "minimal",
67480
67552
  "low",
67481
67553
  "medium",
67482
67554
  "high"
@@ -67504,7 +67576,6 @@
67504
67576
  "thinking": {
67505
67577
  "mode": "effort",
67506
67578
  "efforts": [
67507
- "minimal",
67508
67579
  "low",
67509
67580
  "medium",
67510
67581
  "high"
@@ -72100,11 +72171,9 @@
72100
72171
  "thinking": {
72101
72172
  "mode": "effort",
72102
72173
  "efforts": [
72103
- "minimal",
72104
72174
  "low",
72105
72175
  "medium",
72106
- "high",
72107
- "xhigh"
72176
+ "high"
72108
72177
  ]
72109
72178
  }
72110
72179
  },
@@ -72323,13 +72392,13 @@
72323
72392
  "text"
72324
72393
  ],
72325
72394
  "cost": {
72326
- "input": 1.75,
72327
- "output": 5.5,
72328
- "cacheRead": 0.325,
72395
+ "input": 1.4,
72396
+ "output": 4.4,
72397
+ "cacheRead": 0.26,
72329
72398
  "cacheWrite": 0
72330
72399
  },
72331
72400
  "contextWindow": 1000000,
72332
- "maxTokens": 24000,
72401
+ "maxTokens": 131072,
72333
72402
  "thinking": {
72334
72403
  "mode": "effort",
72335
72404
  "efforts": [
@@ -76427,7 +76496,7 @@
76427
76496
  "cost": {
76428
76497
  "input": 0.09,
76429
76498
  "output": 0.3,
76430
- "cacheRead": 0,
76499
+ "cacheRead": 0.02,
76431
76500
  "cacheWrite": 0.02
76432
76501
  },
76433
76502
  "contextWindow": 262114,
@@ -84037,11 +84106,9 @@
84037
84106
  "thinking": {
84038
84107
  "mode": "effort",
84039
84108
  "efforts": [
84040
- "minimal",
84041
84109
  "low",
84042
84110
  "medium",
84043
- "high",
84044
- "xhigh"
84111
+ "high"
84045
84112
  ]
84046
84113
  }
84047
84114
  },
@@ -84066,11 +84133,9 @@
84066
84133
  "thinking": {
84067
84134
  "mode": "effort",
84068
84135
  "efforts": [
84069
- "minimal",
84070
84136
  "low",
84071
84137
  "medium",
84072
- "high",
84073
- "xhigh"
84138
+ "high"
84074
84139
  ]
84075
84140
  }
84076
84141
  },
@@ -84096,11 +84161,9 @@
84096
84161
  "thinking": {
84097
84162
  "mode": "effort",
84098
84163
  "efforts": [
84099
- "minimal",
84100
84164
  "low",
84101
84165
  "medium",
84102
- "high",
84103
- "xhigh"
84166
+ "high"
84104
84167
  ]
84105
84168
  }
84106
84169
  },
@@ -84125,11 +84188,9 @@
84125
84188
  "thinking": {
84126
84189
  "mode": "effort",
84127
84190
  "efforts": [
84128
- "minimal",
84129
84191
  "low",
84130
84192
  "medium",
84131
- "high",
84132
- "xhigh"
84193
+ "high"
84133
84194
  ]
84134
84195
  }
84135
84196
  },
@@ -84155,11 +84216,9 @@
84155
84216
  "thinking": {
84156
84217
  "mode": "effort",
84157
84218
  "efforts": [
84158
- "minimal",
84159
84219
  "low",
84160
84220
  "medium",
84161
- "high",
84162
- "xhigh"
84221
+ "high"
84163
84222
  ]
84164
84223
  }
84165
84224
  },
@@ -84184,11 +84243,9 @@
84184
84243
  "thinking": {
84185
84244
  "mode": "effort",
84186
84245
  "efforts": [
84187
- "minimal",
84188
84246
  "low",
84189
84247
  "medium",
84190
- "high",
84191
- "xhigh"
84248
+ "high"
84192
84249
  ]
84193
84250
  }
84194
84251
  },
@@ -84613,6 +84670,39 @@
84613
84670
  }
84614
84671
  },
84615
84672
  "zhipu-coding-plan": {
84673
+ "glm-4.5": {
84674
+ "id": "glm-4.5",
84675
+ "name": "glm-4.5",
84676
+ "api": "openai-completions",
84677
+ "provider": "zhipu-coding-plan",
84678
+ "baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
84679
+ "reasoning": true,
84680
+ "input": [
84681
+ "text"
84682
+ ],
84683
+ "cost": {
84684
+ "input": 0,
84685
+ "output": 0,
84686
+ "cacheRead": 0,
84687
+ "cacheWrite": 0
84688
+ },
84689
+ "contextWindow": 131072,
84690
+ "maxTokens": 98304,
84691
+ "thinking": {
84692
+ "mode": "effort",
84693
+ "efforts": [
84694
+ "minimal",
84695
+ "low",
84696
+ "medium",
84697
+ "high"
84698
+ ]
84699
+ },
84700
+ "compat": {
84701
+ "thinkingFormat": "zai",
84702
+ "reasoningContentField": "reasoning_content",
84703
+ "supportsDeveloperRole": false
84704
+ }
84705
+ },
84616
84706
  "glm-4.5-air": {
84617
84707
  "id": "glm-4.5-air",
84618
84708
  "name": "GLM-4.5-Air",
@@ -84646,6 +84736,39 @@
84646
84736
  ]
84647
84737
  }
84648
84738
  },
84739
+ "glm-4.6": {
84740
+ "id": "glm-4.6",
84741
+ "name": "glm-4.6",
84742
+ "api": "openai-completions",
84743
+ "provider": "zhipu-coding-plan",
84744
+ "baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
84745
+ "reasoning": true,
84746
+ "input": [
84747
+ "text"
84748
+ ],
84749
+ "cost": {
84750
+ "input": 0,
84751
+ "output": 0,
84752
+ "cacheRead": 0,
84753
+ "cacheWrite": 0
84754
+ },
84755
+ "contextWindow": 202752,
84756
+ "maxTokens": 131072,
84757
+ "thinking": {
84758
+ "mode": "effort",
84759
+ "efforts": [
84760
+ "minimal",
84761
+ "low",
84762
+ "medium",
84763
+ "high"
84764
+ ]
84765
+ },
84766
+ "compat": {
84767
+ "thinkingFormat": "zai",
84768
+ "reasoningContentField": "reasoning_content",
84769
+ "supportsDeveloperRole": false
84770
+ }
84771
+ },
84649
84772
  "glm-4.6v": {
84650
84773
  "id": "glm-4.6v",
84651
84774
  "name": "GLM-4.6V",
@@ -84713,6 +84836,39 @@
84713
84836
  ]
84714
84837
  }
84715
84838
  },
84839
+ "glm-5": {
84840
+ "id": "glm-5",
84841
+ "name": "GLM-5",
84842
+ "api": "openai-completions",
84843
+ "provider": "zhipu-coding-plan",
84844
+ "baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
84845
+ "reasoning": true,
84846
+ "input": [
84847
+ "text"
84848
+ ],
84849
+ "cost": {
84850
+ "input": 0,
84851
+ "output": 0,
84852
+ "cacheRead": 0,
84853
+ "cacheWrite": 0
84854
+ },
84855
+ "contextWindow": 204800,
84856
+ "maxTokens": 131072,
84857
+ "thinking": {
84858
+ "mode": "effort",
84859
+ "efforts": [
84860
+ "minimal",
84861
+ "low",
84862
+ "medium",
84863
+ "high"
84864
+ ]
84865
+ },
84866
+ "compat": {
84867
+ "thinkingFormat": "zai",
84868
+ "reasoningContentField": "reasoning_content",
84869
+ "supportsDeveloperRole": false
84870
+ }
84871
+ },
84716
84872
  "glm-5-turbo": {
84717
84873
  "id": "glm-5-turbo",
84718
84874
  "name": "GLM-5-Turbo",
@@ -84855,4 +85011,4 @@
84855
85011
  }
84856
85012
  }
84857
85013
  }
84858
- }
85014
+ }
@@ -4,7 +4,7 @@ import {
4
4
  type OpenAICompatibleModelRecord,
5
5
  } from "../discovery/openai-compatible";
6
6
  import { Effort } from "../effort";
7
- import { toFireworksPublicModelId } from "../fireworks-model-id";
7
+ import { FIREWORKS_FAST_SUFFIX, toFireworksPublicModelId } from "../fireworks-model-id";
8
8
  import { isGlmVisionModelId, isGrokReasoningEffortCapable, isReasoningGlmModelId } from "../identity/family";
9
9
  import type { ModelManagerOptions } from "../model-manager";
10
10
  import { getBundledModels } from "../models";
@@ -1258,6 +1258,51 @@ export function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | n
1258
1258
  return isKimiK27CodeModelId(modelId) ? Math.min(candidate, KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS) : candidate;
1259
1259
  }
1260
1260
 
1261
+ /**
1262
+ * Fireworks Fast variants we surface. Each inherits the base model's
1263
+ * limits/modalities/thinking and overrides only the cost with the Standard-column
1264
+ * Fast prices from the Serverless pricing table; `cacheWrite` stays 0 (Fireworks
1265
+ * bills no cache-write). Derived from the bundled base entries so metadata stays
1266
+ * in lockstep, and the runtime auto-falls back to the base id on a failed fast
1267
+ * request. See https://docs.fireworks.ai/serverless/pricing.
1268
+ */
1269
+ const FIREWORKS_FAST_VARIANT_SPECS: ReadonlyArray<{
1270
+ base: string;
1271
+ name: string;
1272
+ cost: { input: number; output: number; cacheRead: number };
1273
+ }> = [
1274
+ { base: "kimi-k2.7-code", name: "Kimi K2.7 Code Fast", cost: { input: 1.9, output: 8, cacheRead: 0.38 } },
1275
+ { base: "kimi-k2.6", name: "Kimi K2.6 Fast", cost: { input: 2, output: 8, cacheRead: 0.3 } },
1276
+ { base: "glm-5.1", name: "GLM-5.1 Fast", cost: { input: 2.8, output: 8.8, cacheRead: 0.52 } },
1277
+ ];
1278
+
1279
+ /**
1280
+ * Build the Fireworks Fast seed by projecting each base bundled spec into a
1281
+ * `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
1282
+ * appear in the serverless control-plane list, so discovery cannot surface
1283
+ * them) and deduped behind any identical previous-snapshot entry.
1284
+ */
1285
+ export function buildFireworksFastSeed(): ModelSpec<"openai-completions">[] {
1286
+ const bundled = createBundledReferenceMap<"openai-completions">("fireworks");
1287
+ const seeds: ModelSpec<"openai-completions">[] = [];
1288
+ for (const variant of FIREWORKS_FAST_VARIANT_SPECS) {
1289
+ const base = bundled.get(variant.base);
1290
+ if (!base) continue;
1291
+ seeds.push({
1292
+ ...base,
1293
+ id: `${variant.base}${FIREWORKS_FAST_SUFFIX}`,
1294
+ name: variant.name,
1295
+ cost: {
1296
+ input: variant.cost.input,
1297
+ output: variant.cost.output,
1298
+ cacheRead: variant.cost.cacheRead,
1299
+ cacheWrite: 0,
1300
+ },
1301
+ });
1302
+ }
1303
+ return seeds;
1304
+ }
1305
+
1261
1306
  /**
1262
1307
  * Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
1263
1308
  * DeepSeek-native binary `thinking` toggle when both are present.