@oh-my-pi/pi-catalog 16.1.7 → 16.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/dist/types/fireworks-model-id.d.ts +13 -0
- package/dist/types/provider-models/openai-compat.d.ts +7 -0
- package/package.json +3 -3
- package/src/compat/openai.ts +35 -10
- package/src/fireworks-model-id.ts +20 -0
- package/src/model-thinking.ts +26 -1
- package/src/models.json +228 -96
- package/src/provider-models/openai-compat.ts +46 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.1.8] - 2026-06-20
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Fixed Fireworks-hosted Qwen turns (e.g. `fireworks/qwen3.7-plus`) failing with `400 Extra inputs are not permitted, field: 'enable_thinking'`. Fireworks serves Qwen3 with controllable thinking via OpenAI-style `reasoning_effort` and rejects the top-level `enable_thinking` boolean that Alibaba DashScope speaks; `buildOpenAICompat` was selecting `thinkingFormat: "qwen"` from the `qwen` id pattern regardless of host. Fireworks-hosted Qwen models now resolve to `thinkingFormat: "openai"`.
|
|
10
|
+
- Fixed MiMo models on OpenAI-compatible gateways to expose only accepted `low`, `medium`, and `high` reasoning tiers and map unsupported raw `minimal`/`xhigh` requests to safe wire values. ([#2864](https://github.com/can1357/oh-my-pi/issues/2864))
|
|
11
|
+
|
|
5
12
|
## [16.1.7] - 2026-06-20
|
|
6
13
|
|
|
7
14
|
### Fixed
|
|
@@ -8,3 +8,16 @@ export declare function toFireworksWireModelId(modelId: string): string;
|
|
|
8
8
|
*/
|
|
9
9
|
export declare function toFirepassPublicModelId(modelId: string): string;
|
|
10
10
|
export declare function toFirepassWireModelId(modelId: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
|
|
13
|
+
* higher-throughput route (100+ tok/s) exposed under a dedicated router id
|
|
14
|
+
* (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
|
|
15
|
+
* higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
|
|
16
|
+
* translate it to the router wire form at request time (compat
|
|
17
|
+
* `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
|
|
18
|
+
*/
|
|
19
|
+
export declare const FIREWORKS_FAST_SUFFIX = "-fast";
|
|
20
|
+
/** True for a Fireworks public model id that selects the Fast serving path. */
|
|
21
|
+
export declare function isFireworksFastModelId(modelId: string): boolean;
|
|
22
|
+
/** Strip the Fast suffix to recover the base (Standard-tier) model id. */
|
|
23
|
+
export declare function toFireworksBaseModelId(modelId: string): string;
|
|
@@ -179,6 +179,13 @@ export declare const KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS = 32768;
|
|
|
179
179
|
export declare function isKimiK27CodeModelId(modelId: string): boolean;
|
|
180
180
|
export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number): number;
|
|
181
181
|
export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | null): number | null;
|
|
182
|
+
/**
|
|
183
|
+
* Build the Fireworks Fast seed by projecting each base bundled spec into a
|
|
184
|
+
* `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
|
|
185
|
+
* appear in the serverless control-plane list, so discovery cannot surface
|
|
186
|
+
* them) and deduped behind any identical previous-snapshot entry.
|
|
187
|
+
*/
|
|
188
|
+
export declare function buildFireworksFastSeed(): ModelSpec<"openai-completions">[];
|
|
182
189
|
/**
|
|
183
190
|
* Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
|
|
184
191
|
* DeepSeek-native binary `thinking` toggle when both are present.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "16.1.
|
|
4
|
+
"version": "16.1.8",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,12 +34,12 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "16.1.
|
|
37
|
+
"@oh-my-pi/pi-utils": "16.1.8",
|
|
38
38
|
"arktype": "^2.2.0",
|
|
39
39
|
"zod": "^4"
|
|
40
40
|
},
|
|
41
41
|
"devDependencies": {
|
|
42
|
-
"@oh-my-pi/pi-ai": "16.1.
|
|
42
|
+
"@oh-my-pi/pi-ai": "16.1.8",
|
|
43
43
|
"@types/bun": "^1.3.14"
|
|
44
44
|
},
|
|
45
45
|
"engines": {
|
package/src/compat/openai.ts
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
* complete alternate views. Request handlers read `model.compat` fields and
|
|
8
8
|
* never detect, resolve, or allocate.
|
|
9
9
|
*/
|
|
10
|
+
import { isFireworksFastModelId } from "../fireworks-model-id";
|
|
10
11
|
import { hostMatchesUrl, modelMatchesHost } from "../hosts";
|
|
11
12
|
import {
|
|
12
13
|
isAnthropicNamespacedModelId,
|
|
@@ -130,6 +131,16 @@ const OPENCODE_WHEN_THINKING: NonNullable<OpenAICompat["whenThinking"]> = {
|
|
|
130
131
|
reasoningContentField: "reasoning_content",
|
|
131
132
|
};
|
|
132
133
|
|
|
134
|
+
const MIMO_REASONING_EFFORT_MAP: NonNullable<OpenAICompat["reasoningEffortMap"]> = {
|
|
135
|
+
minimal: "low",
|
|
136
|
+
xhigh: "high",
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
function mergeMimoReasoningEffortMap(compat: ResolvedOpenAISharedCompat, enabled: boolean): void {
|
|
140
|
+
if (!enabled) return;
|
|
141
|
+
compat.reasoningEffortMap = { ...MIMO_REASONING_EFFORT_MAP, ...compat.reasoningEffortMap };
|
|
142
|
+
}
|
|
143
|
+
|
|
133
144
|
function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
|
|
134
145
|
if (
|
|
135
146
|
provider === "openai" ||
|
|
@@ -184,6 +195,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
184
195
|
const lowerName = (spec.name ?? "").toLowerCase();
|
|
185
196
|
const isXiaomiHost = modelMatchesHost(hostModel, "xiaomi");
|
|
186
197
|
const isXiaomiMimo = isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
|
|
198
|
+
const isMimoReasoningEffortModel =
|
|
199
|
+
!isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
|
|
187
200
|
// OpenCode Zen's `big-pickle` is a DeepSeek reasoning alias; the upstream
|
|
188
201
|
// 400s come from DeepSeek and require exact reasoning_content replay.
|
|
189
202
|
const isOpenCodeDeepseekAlias =
|
|
@@ -238,17 +251,21 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
238
251
|
const isGroqHost = modelMatchesHost(hostModel, "groq");
|
|
239
252
|
const isCopilotHost = provider === "github-copilot";
|
|
240
253
|
const isZenmuxHost = provider === "zenmux";
|
|
241
|
-
// Endpoints that MUST receive a single system block. MiniMax's OpenAI
|
|
242
|
-
// endpoint returns error 2013 on multiple system messages;
|
|
243
|
-
//
|
|
244
|
-
//
|
|
245
|
-
//
|
|
254
|
+
// Endpoints/models that MUST receive a single system block. MiniMax's OpenAI
|
|
255
|
+
// endpoint returns error 2013 on multiple system messages; the Qwen 3.5+ chat
|
|
256
|
+
// template raises "System message must be at the beginning" / 500s with an
|
|
257
|
+
// internal_server_error when any system block appears past index 0. That
|
|
258
|
+
// template ships with the weights, so every Qwen-serving vLLM/SGLang host
|
|
259
|
+
// hits it — confirmed on Alibaba Dashscope, Qwen Portal, and Fireworks
|
|
260
|
+
// (`fireworks/qwen3.7-plus` 500'd on two leading system blocks). Gate on the
|
|
261
|
+
// Qwen family itself, not per-host: coalescing only trades away KV-cache reuse.
|
|
246
262
|
const isMiniMaxHost = modelMatchesHost(hostModel, "minimax");
|
|
247
263
|
const isQwenPortal = modelMatchesHost(hostModel, "qwenPortal");
|
|
248
264
|
const supportsMultipleSystemMessagesDefault =
|
|
249
265
|
!isMiniMaxHost &&
|
|
250
266
|
!isAlibaba &&
|
|
251
267
|
!isQwenPortal &&
|
|
268
|
+
!isQwen &&
|
|
252
269
|
(isOpenAIHost ||
|
|
253
270
|
isAzureHost ||
|
|
254
271
|
isOpenRouter ||
|
|
@@ -276,8 +293,12 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
276
293
|
? DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS
|
|
277
294
|
: undefined;
|
|
278
295
|
|
|
296
|
+
// Fireworks "Fast" variants (`<id>-fast`) are served from the router
|
|
297
|
+
// namespace (`accounts/fireworks/routers/<id>-fast`), like Fire Pass, rather
|
|
298
|
+
// than the `models/` namespace the rest of the `fireworks` provider uses.
|
|
299
|
+
const isFireworksFastRouter = provider === "fireworks" && isFireworksFastModelId(spec.id);
|
|
279
300
|
const wireModelIdMode: ResolvedOpenAISharedCompat["wireModelIdMode"] =
|
|
280
|
-
provider === "firepass"
|
|
301
|
+
provider === "firepass" || isFireworksFastRouter
|
|
281
302
|
? "firepass"
|
|
282
303
|
: provider === "fireworks"
|
|
283
304
|
? "fireworks"
|
|
@@ -291,9 +312,11 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
291
312
|
? "openrouter"
|
|
292
313
|
: isQwen && isNvidiaNim
|
|
293
314
|
? "qwen-chat-template"
|
|
294
|
-
:
|
|
295
|
-
? "
|
|
296
|
-
:
|
|
315
|
+
: isQwen && isFireworks
|
|
316
|
+
? "openai"
|
|
317
|
+
: isAlibaba || isQwen
|
|
318
|
+
? "qwen"
|
|
319
|
+
: "openai";
|
|
297
320
|
|
|
298
321
|
const compat: ResolvedOpenAICompat = {
|
|
299
322
|
supportsStore: !isNonStandard,
|
|
@@ -308,7 +331,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
308
331
|
supportsReasoningEffort: !isGrok && !isXiaomiMimo && (!(isZai || isZhipu) || supportsZaiReasoningEffort),
|
|
309
332
|
// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
|
|
310
333
|
supportsReasoningParams: provider !== "github-copilot",
|
|
311
|
-
reasoningEffortMap: {},
|
|
334
|
+
reasoningEffortMap: isMimoReasoningEffortModel ? MIMO_REASONING_EFFORT_MAP : {},
|
|
312
335
|
supportsUsageInStreaming: !isCerebras,
|
|
313
336
|
// pi-ai's thinking-loop guard is gemini-only; default the flag from the
|
|
314
337
|
// family classifier so OpenAI-compat proxies serving Gemini are covered.
|
|
@@ -400,6 +423,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
400
423
|
compat.omitReasoningEffort = true;
|
|
401
424
|
}
|
|
402
425
|
mergeOllamaReasoningEffortMap(compat, provider, spec.reasoning);
|
|
426
|
+
mergeMimoReasoningEffortMap(compat, isMimoReasoningEffortModel);
|
|
403
427
|
|
|
404
428
|
const whenThinkingPolicy =
|
|
405
429
|
spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
|
|
@@ -413,6 +437,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
413
437
|
variant.omitReasoningEffort = true;
|
|
414
438
|
}
|
|
415
439
|
mergeOllamaReasoningEffortMap(variant, provider, spec.reasoning);
|
|
440
|
+
mergeMimoReasoningEffortMap(variant, isMimoReasoningEffortModel);
|
|
416
441
|
compat.whenThinking = variant;
|
|
417
442
|
}
|
|
418
443
|
|
|
@@ -28,3 +28,23 @@ export function toFirepassWireModelId(modelId: string): string {
|
|
|
28
28
|
const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
|
|
29
29
|
return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
|
|
30
30
|
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
|
|
34
|
+
* higher-throughput route (100+ tok/s) exposed under a dedicated router id
|
|
35
|
+
* (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
|
|
36
|
+
* higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
|
|
37
|
+
* translate it to the router wire form at request time (compat
|
|
38
|
+
* `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
|
|
39
|
+
*/
|
|
40
|
+
export const FIREWORKS_FAST_SUFFIX = "-fast";
|
|
41
|
+
|
|
42
|
+
/** True for a Fireworks public model id that selects the Fast serving path. */
|
|
43
|
+
export function isFireworksFastModelId(modelId: string): boolean {
|
|
44
|
+
return modelId.endsWith(FIREWORKS_FAST_SUFFIX);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Strip the Fast suffix to recover the base (Standard-tier) model id. */
|
|
48
|
+
export function toFireworksBaseModelId(modelId: string): string {
|
|
49
|
+
return modelId.endsWith(FIREWORKS_FAST_SUFFIX) ? modelId.slice(0, -FIREWORKS_FAST_SUFFIX.length) : modelId;
|
|
50
|
+
}
|
package/src/model-thinking.ts
CHANGED
|
@@ -24,6 +24,7 @@ import {
|
|
|
24
24
|
findThinkingVariantToken,
|
|
25
25
|
isDeepseekModelIdOrName,
|
|
26
26
|
isGlm52ReasoningEffortModelId,
|
|
27
|
+
isMimoModelIdOrName,
|
|
27
28
|
isMinimaxM2FamilyModelId,
|
|
28
29
|
isMinimaxM3FamilyModelId,
|
|
29
30
|
isOpenAIGptOssModelId,
|
|
@@ -89,6 +90,10 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
|
89
90
|
const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
|
|
90
91
|
[Effort.XHigh]: "max",
|
|
91
92
|
};
|
|
93
|
+
const MIMO_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
94
|
+
[Effort.Minimal]: "low",
|
|
95
|
+
[Effort.XHigh]: "high",
|
|
96
|
+
};
|
|
92
97
|
|
|
93
98
|
/**
|
|
94
99
|
* Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
|
|
@@ -296,7 +301,10 @@ function getModelDefinedEfforts<TApi extends Api>(
|
|
|
296
301
|
return GLM_52_HIGH_MAX_REASONING_EFFORTS;
|
|
297
302
|
}
|
|
298
303
|
}
|
|
299
|
-
return isOpenAICompatReasoningApi(spec.api) &&
|
|
304
|
+
return isOpenAICompatReasoningApi(spec.api) &&
|
|
305
|
+
(isMinimaxM2FamilyModelId(spec.id) ||
|
|
306
|
+
isOpenAIGptOssModelId(spec.id) ||
|
|
307
|
+
isOpenAICompatMimoReasoningEffortModel(spec, compat))
|
|
300
308
|
? LOW_MEDIUM_HIGH_REASONING_EFFORTS
|
|
301
309
|
: undefined;
|
|
302
310
|
}
|
|
@@ -309,6 +317,19 @@ function isMinimaxReasoningModelOnAnthropicEndpoint<TApi extends Api>(spec: Mode
|
|
|
309
317
|
return spec.api === "anthropic-messages" && (isMinimaxM2FamilyModelId(spec.id) || isMinimaxM3FamilyModelId(spec.id));
|
|
310
318
|
}
|
|
311
319
|
|
|
320
|
+
function isOpenAICompatMimoReasoningEffortModel<TApi extends Api>(
|
|
321
|
+
spec: ModelSpec<TApi>,
|
|
322
|
+
compat: CompatOf<TApi>,
|
|
323
|
+
): boolean {
|
|
324
|
+
if (!isOpenAICompatReasoningApi(spec.api)) return false;
|
|
325
|
+
if (!isMimoModelIdOrName(spec.id) && !isMimoModelIdOrName(spec.name ?? "")) return false;
|
|
326
|
+
const resolved = compat as ResolvedOpenAICompat | undefined;
|
|
327
|
+
return (
|
|
328
|
+
(resolved?.thinkingFormat === "openai" || resolved?.thinkingFormat === "openrouter") &&
|
|
329
|
+
resolved.supportsReasoningEffort
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
|
|
312
333
|
function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
|
|
313
334
|
if (compat === undefined || !("reasoningEffortMap" in compat)) {
|
|
314
335
|
return undefined;
|
|
@@ -364,6 +385,8 @@ function inferDetectedEffortMap<TApi extends Api>(
|
|
|
364
385
|
map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
|
|
365
386
|
} else if (isDeepseekReasoningModel(spec)) {
|
|
366
387
|
map = DEEPSEEK_REASONING_EFFORT_MAP;
|
|
388
|
+
} else if (isOpenAICompatMimoReasoningEffortModel(spec, compat)) {
|
|
389
|
+
map = MIMO_REASONING_EFFORT_MAP;
|
|
367
390
|
} else if (modelMatchesHost(spec, "openrouter")) {
|
|
368
391
|
map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
|
|
369
392
|
} else if (modelMatchesHost(spec, "fireworks")) {
|
|
@@ -485,6 +508,8 @@ function inferAnthropicSupportedEfforts<TApi extends Api>(
|
|
|
485
508
|
}
|
|
486
509
|
|
|
487
510
|
function inferFallbackEfforts<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): readonly Effort[] {
|
|
511
|
+
const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
|
|
512
|
+
if (modelDefinedEfforts !== undefined) return modelDefinedEfforts;
|
|
488
513
|
if (isMinimaxReasoningModelOnAnthropicEndpoint(spec)) {
|
|
489
514
|
return LOW_MEDIUM_HIGH_REASONING_EFFORTS;
|
|
490
515
|
}
|
package/src/models.json
CHANGED
|
@@ -7208,11 +7208,9 @@
|
|
|
7208
7208
|
"thinking": {
|
|
7209
7209
|
"mode": "effort",
|
|
7210
7210
|
"efforts": [
|
|
7211
|
-
"minimal",
|
|
7212
7211
|
"low",
|
|
7213
7212
|
"medium",
|
|
7214
|
-
"high"
|
|
7215
|
-
"xhigh"
|
|
7213
|
+
"high"
|
|
7216
7214
|
]
|
|
7217
7215
|
}
|
|
7218
7216
|
},
|
|
@@ -7238,11 +7236,9 @@
|
|
|
7238
7236
|
"thinking": {
|
|
7239
7237
|
"mode": "effort",
|
|
7240
7238
|
"efforts": [
|
|
7241
|
-
"minimal",
|
|
7242
7239
|
"low",
|
|
7243
7240
|
"medium",
|
|
7244
|
-
"high"
|
|
7245
|
-
"xhigh"
|
|
7241
|
+
"high"
|
|
7246
7242
|
]
|
|
7247
7243
|
}
|
|
7248
7244
|
},
|
|
@@ -7267,11 +7263,9 @@
|
|
|
7267
7263
|
"thinking": {
|
|
7268
7264
|
"mode": "effort",
|
|
7269
7265
|
"efforts": [
|
|
7270
|
-
"minimal",
|
|
7271
7266
|
"low",
|
|
7272
7267
|
"medium",
|
|
7273
|
-
"high"
|
|
7274
|
-
"xhigh"
|
|
7268
|
+
"high"
|
|
7275
7269
|
]
|
|
7276
7270
|
}
|
|
7277
7271
|
}
|
|
@@ -14801,6 +14795,38 @@
|
|
|
14801
14795
|
}
|
|
14802
14796
|
}
|
|
14803
14797
|
},
|
|
14798
|
+
"glm-5.1-fast": {
|
|
14799
|
+
"id": "glm-5.1-fast",
|
|
14800
|
+
"name": "GLM-5.1 Fast",
|
|
14801
|
+
"api": "openai-completions",
|
|
14802
|
+
"provider": "fireworks",
|
|
14803
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
14804
|
+
"reasoning": true,
|
|
14805
|
+
"input": [
|
|
14806
|
+
"text"
|
|
14807
|
+
],
|
|
14808
|
+
"cost": {
|
|
14809
|
+
"input": 2.8,
|
|
14810
|
+
"output": 8.8,
|
|
14811
|
+
"cacheRead": 0.52,
|
|
14812
|
+
"cacheWrite": 0
|
|
14813
|
+
},
|
|
14814
|
+
"contextWindow": 202752,
|
|
14815
|
+
"maxTokens": 131072,
|
|
14816
|
+
"thinking": {
|
|
14817
|
+
"mode": "effort",
|
|
14818
|
+
"efforts": [
|
|
14819
|
+
"minimal",
|
|
14820
|
+
"low",
|
|
14821
|
+
"medium",
|
|
14822
|
+
"high",
|
|
14823
|
+
"xhigh"
|
|
14824
|
+
],
|
|
14825
|
+
"effortMap": {
|
|
14826
|
+
"minimal": "none"
|
|
14827
|
+
}
|
|
14828
|
+
}
|
|
14829
|
+
},
|
|
14804
14830
|
"glm-5.2": {
|
|
14805
14831
|
"id": "glm-5.2",
|
|
14806
14832
|
"name": "GLM-5.2",
|
|
@@ -14947,6 +14973,39 @@
|
|
|
14947
14973
|
}
|
|
14948
14974
|
}
|
|
14949
14975
|
},
|
|
14976
|
+
"kimi-k2.6-fast": {
|
|
14977
|
+
"id": "kimi-k2.6-fast",
|
|
14978
|
+
"name": "Kimi K2.6 Fast",
|
|
14979
|
+
"api": "openai-completions",
|
|
14980
|
+
"provider": "fireworks",
|
|
14981
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
14982
|
+
"reasoning": true,
|
|
14983
|
+
"input": [
|
|
14984
|
+
"text",
|
|
14985
|
+
"image"
|
|
14986
|
+
],
|
|
14987
|
+
"cost": {
|
|
14988
|
+
"input": 2,
|
|
14989
|
+
"output": 8,
|
|
14990
|
+
"cacheRead": 0.3,
|
|
14991
|
+
"cacheWrite": 0
|
|
14992
|
+
},
|
|
14993
|
+
"contextWindow": 262144,
|
|
14994
|
+
"maxTokens": 32768,
|
|
14995
|
+
"thinking": {
|
|
14996
|
+
"mode": "effort",
|
|
14997
|
+
"efforts": [
|
|
14998
|
+
"minimal",
|
|
14999
|
+
"low",
|
|
15000
|
+
"medium",
|
|
15001
|
+
"high",
|
|
15002
|
+
"xhigh"
|
|
15003
|
+
],
|
|
15004
|
+
"effortMap": {
|
|
15005
|
+
"minimal": "none"
|
|
15006
|
+
}
|
|
15007
|
+
}
|
|
15008
|
+
},
|
|
14950
15009
|
"kimi-k2.7-code": {
|
|
14951
15010
|
"id": "kimi-k2.7-code",
|
|
14952
15011
|
"name": "Kimi K2.7 Code",
|
|
@@ -14980,6 +15039,39 @@
|
|
|
14980
15039
|
}
|
|
14981
15040
|
}
|
|
14982
15041
|
},
|
|
15042
|
+
"kimi-k2.7-code-fast": {
|
|
15043
|
+
"id": "kimi-k2.7-code-fast",
|
|
15044
|
+
"name": "Kimi K2.7 Code Fast",
|
|
15045
|
+
"api": "openai-completions",
|
|
15046
|
+
"provider": "fireworks",
|
|
15047
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
15048
|
+
"reasoning": true,
|
|
15049
|
+
"input": [
|
|
15050
|
+
"text",
|
|
15051
|
+
"image"
|
|
15052
|
+
],
|
|
15053
|
+
"cost": {
|
|
15054
|
+
"input": 1.9,
|
|
15055
|
+
"output": 8,
|
|
15056
|
+
"cacheRead": 0.38,
|
|
15057
|
+
"cacheWrite": 0
|
|
15058
|
+
},
|
|
15059
|
+
"contextWindow": 262144,
|
|
15060
|
+
"maxTokens": 32768,
|
|
15061
|
+
"thinking": {
|
|
15062
|
+
"mode": "effort",
|
|
15063
|
+
"efforts": [
|
|
15064
|
+
"minimal",
|
|
15065
|
+
"low",
|
|
15066
|
+
"medium",
|
|
15067
|
+
"high",
|
|
15068
|
+
"xhigh"
|
|
15069
|
+
],
|
|
15070
|
+
"effortMap": {
|
|
15071
|
+
"minimal": "none"
|
|
15072
|
+
}
|
|
15073
|
+
}
|
|
15074
|
+
},
|
|
14983
15075
|
"minimax-m2.5": {
|
|
14984
15076
|
"id": "minimax-m2.5",
|
|
14985
15077
|
"name": "MiniMax M2.5",
|
|
@@ -20276,11 +20368,9 @@
|
|
|
20276
20368
|
"thinking": {
|
|
20277
20369
|
"mode": "effort",
|
|
20278
20370
|
"efforts": [
|
|
20279
|
-
"minimal",
|
|
20280
20371
|
"low",
|
|
20281
20372
|
"medium",
|
|
20282
|
-
"high"
|
|
20283
|
-
"xhigh"
|
|
20373
|
+
"high"
|
|
20284
20374
|
]
|
|
20285
20375
|
}
|
|
20286
20376
|
},
|
|
@@ -30625,11 +30715,9 @@
|
|
|
30625
30715
|
"thinking": {
|
|
30626
30716
|
"mode": "effort",
|
|
30627
30717
|
"efforts": [
|
|
30628
|
-
"minimal",
|
|
30629
30718
|
"low",
|
|
30630
30719
|
"medium",
|
|
30631
|
-
"high"
|
|
30632
|
-
"xhigh"
|
|
30720
|
+
"high"
|
|
30633
30721
|
]
|
|
30634
30722
|
}
|
|
30635
30723
|
},
|
|
@@ -30655,11 +30743,9 @@
|
|
|
30655
30743
|
"thinking": {
|
|
30656
30744
|
"mode": "effort",
|
|
30657
30745
|
"efforts": [
|
|
30658
|
-
"minimal",
|
|
30659
30746
|
"low",
|
|
30660
30747
|
"medium",
|
|
30661
|
-
"high"
|
|
30662
|
-
"xhigh"
|
|
30748
|
+
"high"
|
|
30663
30749
|
]
|
|
30664
30750
|
}
|
|
30665
30751
|
},
|
|
@@ -30703,11 +30789,9 @@
|
|
|
30703
30789
|
"thinking": {
|
|
30704
30790
|
"mode": "effort",
|
|
30705
30791
|
"efforts": [
|
|
30706
|
-
"minimal",
|
|
30707
30792
|
"low",
|
|
30708
30793
|
"medium",
|
|
30709
|
-
"high"
|
|
30710
|
-
"xhigh"
|
|
30794
|
+
"high"
|
|
30711
30795
|
]
|
|
30712
30796
|
}
|
|
30713
30797
|
},
|
|
@@ -30752,11 +30836,9 @@
|
|
|
30752
30836
|
"thinking": {
|
|
30753
30837
|
"mode": "effort",
|
|
30754
30838
|
"efforts": [
|
|
30755
|
-
"minimal",
|
|
30756
30839
|
"low",
|
|
30757
30840
|
"medium",
|
|
30758
|
-
"high"
|
|
30759
|
-
"xhigh"
|
|
30841
|
+
"high"
|
|
30760
30842
|
]
|
|
30761
30843
|
}
|
|
30762
30844
|
},
|
|
@@ -30781,11 +30863,9 @@
|
|
|
30781
30863
|
"thinking": {
|
|
30782
30864
|
"mode": "effort",
|
|
30783
30865
|
"efforts": [
|
|
30784
|
-
"minimal",
|
|
30785
30866
|
"low",
|
|
30786
30867
|
"medium",
|
|
30787
|
-
"high"
|
|
30788
|
-
"xhigh"
|
|
30868
|
+
"high"
|
|
30789
30869
|
]
|
|
30790
30870
|
}
|
|
30791
30871
|
},
|
|
@@ -31172,7 +31252,7 @@
|
|
|
31172
31252
|
"kimi-code": {
|
|
31173
31253
|
"kimi-for-coding": {
|
|
31174
31254
|
"id": "kimi-for-coding",
|
|
31175
|
-
"name": "
|
|
31255
|
+
"name": "K2.7 Code",
|
|
31176
31256
|
"api": "openai-completions",
|
|
31177
31257
|
"provider": "kimi-code",
|
|
31178
31258
|
"baseUrl": "https://api.kimi.com/coding/v1",
|
|
@@ -49119,11 +49199,9 @@
|
|
|
49119
49199
|
"thinking": {
|
|
49120
49200
|
"mode": "effort",
|
|
49121
49201
|
"efforts": [
|
|
49122
|
-
"minimal",
|
|
49123
49202
|
"low",
|
|
49124
49203
|
"medium",
|
|
49125
|
-
"high"
|
|
49126
|
-
"xhigh"
|
|
49204
|
+
"high"
|
|
49127
49205
|
],
|
|
49128
49206
|
"effortRouting": {
|
|
49129
49207
|
"off": "xiaomi/mimo-v2-flash",
|
|
@@ -49183,11 +49261,9 @@
|
|
|
49183
49261
|
"thinking": {
|
|
49184
49262
|
"mode": "effort",
|
|
49185
49263
|
"efforts": [
|
|
49186
|
-
"minimal",
|
|
49187
49264
|
"low",
|
|
49188
49265
|
"medium",
|
|
49189
|
-
"high"
|
|
49190
|
-
"xhigh"
|
|
49266
|
+
"high"
|
|
49191
49267
|
],
|
|
49192
49268
|
"effortRouting": {
|
|
49193
49269
|
"off": "xiaomi/mimo-v2-flash-original",
|
|
@@ -49248,11 +49324,9 @@
|
|
|
49248
49324
|
"thinking": {
|
|
49249
49325
|
"mode": "effort",
|
|
49250
49326
|
"efforts": [
|
|
49251
|
-
"minimal",
|
|
49252
49327
|
"low",
|
|
49253
49328
|
"medium",
|
|
49254
|
-
"high"
|
|
49255
|
-
"xhigh"
|
|
49329
|
+
"high"
|
|
49256
49330
|
]
|
|
49257
49331
|
}
|
|
49258
49332
|
},
|
|
@@ -49277,11 +49351,9 @@
|
|
|
49277
49351
|
"thinking": {
|
|
49278
49352
|
"mode": "effort",
|
|
49279
49353
|
"efforts": [
|
|
49280
|
-
"minimal",
|
|
49281
49354
|
"low",
|
|
49282
49355
|
"medium",
|
|
49283
|
-
"high"
|
|
49284
|
-
"xhigh"
|
|
49356
|
+
"high"
|
|
49285
49357
|
]
|
|
49286
49358
|
}
|
|
49287
49359
|
},
|
|
@@ -49307,11 +49379,9 @@
|
|
|
49307
49379
|
"thinking": {
|
|
49308
49380
|
"mode": "effort",
|
|
49309
49381
|
"efforts": [
|
|
49310
|
-
"minimal",
|
|
49311
49382
|
"low",
|
|
49312
49383
|
"medium",
|
|
49313
|
-
"high"
|
|
49314
|
-
"xhigh"
|
|
49384
|
+
"high"
|
|
49315
49385
|
]
|
|
49316
49386
|
}
|
|
49317
49387
|
},
|
|
@@ -49336,11 +49406,9 @@
|
|
|
49336
49406
|
"thinking": {
|
|
49337
49407
|
"mode": "effort",
|
|
49338
49408
|
"efforts": [
|
|
49339
|
-
"minimal",
|
|
49340
49409
|
"low",
|
|
49341
49410
|
"medium",
|
|
49342
|
-
"high"
|
|
49343
|
-
"xhigh"
|
|
49411
|
+
"high"
|
|
49344
49412
|
]
|
|
49345
49413
|
}
|
|
49346
49414
|
},
|
|
@@ -56842,11 +56910,9 @@
|
|
|
56842
56910
|
"thinking": {
|
|
56843
56911
|
"mode": "effort",
|
|
56844
56912
|
"efforts": [
|
|
56845
|
-
"minimal",
|
|
56846
56913
|
"low",
|
|
56847
56914
|
"medium",
|
|
56848
|
-
"high"
|
|
56849
|
-
"xhigh"
|
|
56915
|
+
"high"
|
|
56850
56916
|
]
|
|
56851
56917
|
},
|
|
56852
56918
|
"compat": {
|
|
@@ -56874,11 +56940,9 @@
|
|
|
56874
56940
|
"thinking": {
|
|
56875
56941
|
"mode": "effort",
|
|
56876
56942
|
"efforts": [
|
|
56877
|
-
"minimal",
|
|
56878
56943
|
"low",
|
|
56879
56944
|
"medium",
|
|
56880
|
-
"high"
|
|
56881
|
-
"xhigh"
|
|
56945
|
+
"high"
|
|
56882
56946
|
]
|
|
56883
56947
|
},
|
|
56884
56948
|
"compat": {
|
|
@@ -56910,11 +56974,9 @@
|
|
|
56910
56974
|
"thinking": {
|
|
56911
56975
|
"mode": "effort",
|
|
56912
56976
|
"efforts": [
|
|
56913
|
-
"minimal",
|
|
56914
56977
|
"low",
|
|
56915
56978
|
"medium",
|
|
56916
|
-
"high"
|
|
56917
|
-
"xhigh"
|
|
56979
|
+
"high"
|
|
56918
56980
|
]
|
|
56919
56981
|
}
|
|
56920
56982
|
},
|
|
@@ -56942,11 +57004,9 @@
|
|
|
56942
57004
|
"thinking": {
|
|
56943
57005
|
"mode": "effort",
|
|
56944
57006
|
"efforts": [
|
|
56945
|
-
"minimal",
|
|
56946
57007
|
"low",
|
|
56947
57008
|
"medium",
|
|
56948
|
-
"high"
|
|
56949
|
-
"xhigh"
|
|
57009
|
+
"high"
|
|
56950
57010
|
]
|
|
56951
57011
|
}
|
|
56952
57012
|
},
|
|
@@ -58575,11 +58635,9 @@
|
|
|
58575
58635
|
"thinking": {
|
|
58576
58636
|
"mode": "effort",
|
|
58577
58637
|
"efforts": [
|
|
58578
|
-
"minimal",
|
|
58579
58638
|
"low",
|
|
58580
58639
|
"medium",
|
|
58581
|
-
"high"
|
|
58582
|
-
"xhigh"
|
|
58640
|
+
"high"
|
|
58583
58641
|
]
|
|
58584
58642
|
}
|
|
58585
58643
|
},
|
|
@@ -58605,11 +58663,9 @@
|
|
|
58605
58663
|
"thinking": {
|
|
58606
58664
|
"mode": "effort",
|
|
58607
58665
|
"efforts": [
|
|
58608
|
-
"minimal",
|
|
58609
58666
|
"low",
|
|
58610
58667
|
"medium",
|
|
58611
|
-
"high"
|
|
58612
|
-
"xhigh"
|
|
58668
|
+
"high"
|
|
58613
58669
|
]
|
|
58614
58670
|
}
|
|
58615
58671
|
},
|
|
@@ -58634,11 +58690,9 @@
|
|
|
58634
58690
|
"thinking": {
|
|
58635
58691
|
"mode": "effort",
|
|
58636
58692
|
"efforts": [
|
|
58637
|
-
"minimal",
|
|
58638
58693
|
"low",
|
|
58639
58694
|
"medium",
|
|
58640
|
-
"high"
|
|
58641
|
-
"xhigh"
|
|
58695
|
+
"high"
|
|
58642
58696
|
]
|
|
58643
58697
|
}
|
|
58644
58698
|
},
|
|
@@ -58664,11 +58718,9 @@
|
|
|
58664
58718
|
"thinking": {
|
|
58665
58719
|
"mode": "effort",
|
|
58666
58720
|
"efforts": [
|
|
58667
|
-
"minimal",
|
|
58668
58721
|
"low",
|
|
58669
58722
|
"medium",
|
|
58670
|
-
"high"
|
|
58671
|
-
"xhigh"
|
|
58723
|
+
"high"
|
|
58672
58724
|
]
|
|
58673
58725
|
}
|
|
58674
58726
|
},
|
|
@@ -67414,7 +67466,6 @@
|
|
|
67414
67466
|
"thinking": {
|
|
67415
67467
|
"mode": "effort",
|
|
67416
67468
|
"efforts": [
|
|
67417
|
-
"minimal",
|
|
67418
67469
|
"low",
|
|
67419
67470
|
"medium",
|
|
67420
67471
|
"high"
|
|
@@ -67443,7 +67494,6 @@
|
|
|
67443
67494
|
"thinking": {
|
|
67444
67495
|
"mode": "effort",
|
|
67445
67496
|
"efforts": [
|
|
67446
|
-
"minimal",
|
|
67447
67497
|
"low",
|
|
67448
67498
|
"medium",
|
|
67449
67499
|
"high"
|
|
@@ -67471,7 +67521,6 @@
|
|
|
67471
67521
|
"thinking": {
|
|
67472
67522
|
"mode": "effort",
|
|
67473
67523
|
"efforts": [
|
|
67474
|
-
"minimal",
|
|
67475
67524
|
"low",
|
|
67476
67525
|
"medium",
|
|
67477
67526
|
"high"
|
|
@@ -67500,7 +67549,6 @@
|
|
|
67500
67549
|
"thinking": {
|
|
67501
67550
|
"mode": "effort",
|
|
67502
67551
|
"efforts": [
|
|
67503
|
-
"minimal",
|
|
67504
67552
|
"low",
|
|
67505
67553
|
"medium",
|
|
67506
67554
|
"high"
|
|
@@ -67528,7 +67576,6 @@
|
|
|
67528
67576
|
"thinking": {
|
|
67529
67577
|
"mode": "effort",
|
|
67530
67578
|
"efforts": [
|
|
67531
|
-
"minimal",
|
|
67532
67579
|
"low",
|
|
67533
67580
|
"medium",
|
|
67534
67581
|
"high"
|
|
@@ -72124,11 +72171,9 @@
|
|
|
72124
72171
|
"thinking": {
|
|
72125
72172
|
"mode": "effort",
|
|
72126
72173
|
"efforts": [
|
|
72127
|
-
"minimal",
|
|
72128
72174
|
"low",
|
|
72129
72175
|
"medium",
|
|
72130
|
-
"high"
|
|
72131
|
-
"xhigh"
|
|
72176
|
+
"high"
|
|
72132
72177
|
]
|
|
72133
72178
|
}
|
|
72134
72179
|
},
|
|
@@ -84061,11 +84106,9 @@
|
|
|
84061
84106
|
"thinking": {
|
|
84062
84107
|
"mode": "effort",
|
|
84063
84108
|
"efforts": [
|
|
84064
|
-
"minimal",
|
|
84065
84109
|
"low",
|
|
84066
84110
|
"medium",
|
|
84067
|
-
"high"
|
|
84068
|
-
"xhigh"
|
|
84111
|
+
"high"
|
|
84069
84112
|
]
|
|
84070
84113
|
}
|
|
84071
84114
|
},
|
|
@@ -84090,11 +84133,9 @@
|
|
|
84090
84133
|
"thinking": {
|
|
84091
84134
|
"mode": "effort",
|
|
84092
84135
|
"efforts": [
|
|
84093
|
-
"minimal",
|
|
84094
84136
|
"low",
|
|
84095
84137
|
"medium",
|
|
84096
|
-
"high"
|
|
84097
|
-
"xhigh"
|
|
84138
|
+
"high"
|
|
84098
84139
|
]
|
|
84099
84140
|
}
|
|
84100
84141
|
},
|
|
@@ -84120,11 +84161,9 @@
|
|
|
84120
84161
|
"thinking": {
|
|
84121
84162
|
"mode": "effort",
|
|
84122
84163
|
"efforts": [
|
|
84123
|
-
"minimal",
|
|
84124
84164
|
"low",
|
|
84125
84165
|
"medium",
|
|
84126
|
-
"high"
|
|
84127
|
-
"xhigh"
|
|
84166
|
+
"high"
|
|
84128
84167
|
]
|
|
84129
84168
|
}
|
|
84130
84169
|
},
|
|
@@ -84149,11 +84188,9 @@
|
|
|
84149
84188
|
"thinking": {
|
|
84150
84189
|
"mode": "effort",
|
|
84151
84190
|
"efforts": [
|
|
84152
|
-
"minimal",
|
|
84153
84191
|
"low",
|
|
84154
84192
|
"medium",
|
|
84155
|
-
"high"
|
|
84156
|
-
"xhigh"
|
|
84193
|
+
"high"
|
|
84157
84194
|
]
|
|
84158
84195
|
}
|
|
84159
84196
|
},
|
|
@@ -84179,11 +84216,9 @@
|
|
|
84179
84216
|
"thinking": {
|
|
84180
84217
|
"mode": "effort",
|
|
84181
84218
|
"efforts": [
|
|
84182
|
-
"minimal",
|
|
84183
84219
|
"low",
|
|
84184
84220
|
"medium",
|
|
84185
|
-
"high"
|
|
84186
|
-
"xhigh"
|
|
84221
|
+
"high"
|
|
84187
84222
|
]
|
|
84188
84223
|
}
|
|
84189
84224
|
},
|
|
@@ -84208,11 +84243,9 @@
|
|
|
84208
84243
|
"thinking": {
|
|
84209
84244
|
"mode": "effort",
|
|
84210
84245
|
"efforts": [
|
|
84211
|
-
"minimal",
|
|
84212
84246
|
"low",
|
|
84213
84247
|
"medium",
|
|
84214
|
-
"high"
|
|
84215
|
-
"xhigh"
|
|
84248
|
+
"high"
|
|
84216
84249
|
]
|
|
84217
84250
|
}
|
|
84218
84251
|
},
|
|
@@ -84637,6 +84670,39 @@
|
|
|
84637
84670
|
}
|
|
84638
84671
|
},
|
|
84639
84672
|
"zhipu-coding-plan": {
|
|
84673
|
+
"glm-4.5": {
|
|
84674
|
+
"id": "glm-4.5",
|
|
84675
|
+
"name": "glm-4.5",
|
|
84676
|
+
"api": "openai-completions",
|
|
84677
|
+
"provider": "zhipu-coding-plan",
|
|
84678
|
+
"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
|
|
84679
|
+
"reasoning": true,
|
|
84680
|
+
"input": [
|
|
84681
|
+
"text"
|
|
84682
|
+
],
|
|
84683
|
+
"cost": {
|
|
84684
|
+
"input": 0,
|
|
84685
|
+
"output": 0,
|
|
84686
|
+
"cacheRead": 0,
|
|
84687
|
+
"cacheWrite": 0
|
|
84688
|
+
},
|
|
84689
|
+
"contextWindow": 131072,
|
|
84690
|
+
"maxTokens": 98304,
|
|
84691
|
+
"thinking": {
|
|
84692
|
+
"mode": "effort",
|
|
84693
|
+
"efforts": [
|
|
84694
|
+
"minimal",
|
|
84695
|
+
"low",
|
|
84696
|
+
"medium",
|
|
84697
|
+
"high"
|
|
84698
|
+
]
|
|
84699
|
+
},
|
|
84700
|
+
"compat": {
|
|
84701
|
+
"thinkingFormat": "zai",
|
|
84702
|
+
"reasoningContentField": "reasoning_content",
|
|
84703
|
+
"supportsDeveloperRole": false
|
|
84704
|
+
}
|
|
84705
|
+
},
|
|
84640
84706
|
"glm-4.5-air": {
|
|
84641
84707
|
"id": "glm-4.5-air",
|
|
84642
84708
|
"name": "GLM-4.5-Air",
|
|
@@ -84670,6 +84736,39 @@
|
|
|
84670
84736
|
]
|
|
84671
84737
|
}
|
|
84672
84738
|
},
|
|
84739
|
+
"glm-4.6": {
|
|
84740
|
+
"id": "glm-4.6",
|
|
84741
|
+
"name": "glm-4.6",
|
|
84742
|
+
"api": "openai-completions",
|
|
84743
|
+
"provider": "zhipu-coding-plan",
|
|
84744
|
+
"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
|
|
84745
|
+
"reasoning": true,
|
|
84746
|
+
"input": [
|
|
84747
|
+
"text"
|
|
84748
|
+
],
|
|
84749
|
+
"cost": {
|
|
84750
|
+
"input": 0,
|
|
84751
|
+
"output": 0,
|
|
84752
|
+
"cacheRead": 0,
|
|
84753
|
+
"cacheWrite": 0
|
|
84754
|
+
},
|
|
84755
|
+
"contextWindow": 202752,
|
|
84756
|
+
"maxTokens": 131072,
|
|
84757
|
+
"thinking": {
|
|
84758
|
+
"mode": "effort",
|
|
84759
|
+
"efforts": [
|
|
84760
|
+
"minimal",
|
|
84761
|
+
"low",
|
|
84762
|
+
"medium",
|
|
84763
|
+
"high"
|
|
84764
|
+
]
|
|
84765
|
+
},
|
|
84766
|
+
"compat": {
|
|
84767
|
+
"thinkingFormat": "zai",
|
|
84768
|
+
"reasoningContentField": "reasoning_content",
|
|
84769
|
+
"supportsDeveloperRole": false
|
|
84770
|
+
}
|
|
84771
|
+
},
|
|
84673
84772
|
"glm-4.6v": {
|
|
84674
84773
|
"id": "glm-4.6v",
|
|
84675
84774
|
"name": "GLM-4.6V",
|
|
@@ -84737,6 +84836,39 @@
|
|
|
84737
84836
|
]
|
|
84738
84837
|
}
|
|
84739
84838
|
},
|
|
84839
|
+
"glm-5": {
|
|
84840
|
+
"id": "glm-5",
|
|
84841
|
+
"name": "GLM-5",
|
|
84842
|
+
"api": "openai-completions",
|
|
84843
|
+
"provider": "zhipu-coding-plan",
|
|
84844
|
+
"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
|
|
84845
|
+
"reasoning": true,
|
|
84846
|
+
"input": [
|
|
84847
|
+
"text"
|
|
84848
|
+
],
|
|
84849
|
+
"cost": {
|
|
84850
|
+
"input": 0,
|
|
84851
|
+
"output": 0,
|
|
84852
|
+
"cacheRead": 0,
|
|
84853
|
+
"cacheWrite": 0
|
|
84854
|
+
},
|
|
84855
|
+
"contextWindow": 204800,
|
|
84856
|
+
"maxTokens": 131072,
|
|
84857
|
+
"thinking": {
|
|
84858
|
+
"mode": "effort",
|
|
84859
|
+
"efforts": [
|
|
84860
|
+
"minimal",
|
|
84861
|
+
"low",
|
|
84862
|
+
"medium",
|
|
84863
|
+
"high"
|
|
84864
|
+
]
|
|
84865
|
+
},
|
|
84866
|
+
"compat": {
|
|
84867
|
+
"thinkingFormat": "zai",
|
|
84868
|
+
"reasoningContentField": "reasoning_content",
|
|
84869
|
+
"supportsDeveloperRole": false
|
|
84870
|
+
}
|
|
84871
|
+
},
|
|
84740
84872
|
"glm-5-turbo": {
|
|
84741
84873
|
"id": "glm-5-turbo",
|
|
84742
84874
|
"name": "GLM-5-Turbo",
|
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
type OpenAICompatibleModelRecord,
|
|
5
5
|
} from "../discovery/openai-compatible";
|
|
6
6
|
import { Effort } from "../effort";
|
|
7
|
-
import { toFireworksPublicModelId } from "../fireworks-model-id";
|
|
7
|
+
import { FIREWORKS_FAST_SUFFIX, toFireworksPublicModelId } from "../fireworks-model-id";
|
|
8
8
|
import { isGlmVisionModelId, isGrokReasoningEffortCapable, isReasoningGlmModelId } from "../identity/family";
|
|
9
9
|
import type { ModelManagerOptions } from "../model-manager";
|
|
10
10
|
import { getBundledModels } from "../models";
|
|
@@ -1258,6 +1258,51 @@ export function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | n
|
|
|
1258
1258
|
return isKimiK27CodeModelId(modelId) ? Math.min(candidate, KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS) : candidate;
|
|
1259
1259
|
}
|
|
1260
1260
|
|
|
1261
|
+
/**
|
|
1262
|
+
* Fireworks Fast variants we surface. Each inherits the base model's
|
|
1263
|
+
* limits/modalities/thinking and overrides only the cost with the Standard-column
|
|
1264
|
+
* Fast prices from the Serverless pricing table; `cacheWrite` stays 0 (Fireworks
|
|
1265
|
+
* bills no cache-write). Derived from the bundled base entries so metadata stays
|
|
1266
|
+
* in lockstep, and the runtime auto-falls back to the base id on a failed fast
|
|
1267
|
+
* request. See https://docs.fireworks.ai/serverless/pricing.
|
|
1268
|
+
*/
|
|
1269
|
+
const FIREWORKS_FAST_VARIANT_SPECS: ReadonlyArray<{
|
|
1270
|
+
base: string;
|
|
1271
|
+
name: string;
|
|
1272
|
+
cost: { input: number; output: number; cacheRead: number };
|
|
1273
|
+
}> = [
|
|
1274
|
+
{ base: "kimi-k2.7-code", name: "Kimi K2.7 Code Fast", cost: { input: 1.9, output: 8, cacheRead: 0.38 } },
|
|
1275
|
+
{ base: "kimi-k2.6", name: "Kimi K2.6 Fast", cost: { input: 2, output: 8, cacheRead: 0.3 } },
|
|
1276
|
+
{ base: "glm-5.1", name: "GLM-5.1 Fast", cost: { input: 2.8, output: 8.8, cacheRead: 0.52 } },
|
|
1277
|
+
];
|
|
1278
|
+
|
|
1279
|
+
/**
|
|
1280
|
+
* Build the Fireworks Fast seed by projecting each base bundled spec into a
|
|
1281
|
+
* `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
|
|
1282
|
+
* appear in the serverless control-plane list, so discovery cannot surface
|
|
1283
|
+
* them) and deduped behind any identical previous-snapshot entry.
|
|
1284
|
+
*/
|
|
1285
|
+
export function buildFireworksFastSeed(): ModelSpec<"openai-completions">[] {
|
|
1286
|
+
const bundled = createBundledReferenceMap<"openai-completions">("fireworks");
|
|
1287
|
+
const seeds: ModelSpec<"openai-completions">[] = [];
|
|
1288
|
+
for (const variant of FIREWORKS_FAST_VARIANT_SPECS) {
|
|
1289
|
+
const base = bundled.get(variant.base);
|
|
1290
|
+
if (!base) continue;
|
|
1291
|
+
seeds.push({
|
|
1292
|
+
...base,
|
|
1293
|
+
id: `${variant.base}${FIREWORKS_FAST_SUFFIX}`,
|
|
1294
|
+
name: variant.name,
|
|
1295
|
+
cost: {
|
|
1296
|
+
input: variant.cost.input,
|
|
1297
|
+
output: variant.cost.output,
|
|
1298
|
+
cacheRead: variant.cost.cacheRead,
|
|
1299
|
+
cacheWrite: 0,
|
|
1300
|
+
},
|
|
1301
|
+
});
|
|
1302
|
+
}
|
|
1303
|
+
return seeds;
|
|
1304
|
+
}
|
|
1305
|
+
|
|
1261
1306
|
/**
|
|
1262
1307
|
* Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
|
|
1263
1308
|
* DeepSeek-native binary `thinking` toggle when both are present.
|