@oh-my-pi/pi-catalog 16.1.7 → 16.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/types/fireworks-model-id.d.ts +13 -0
- package/dist/types/provider-models/descriptors.d.ts +1 -10
- package/dist/types/provider-models/openai-compat.d.ts +7 -1
- package/package.json +3 -3
- package/src/compat/openai.ts +35 -10
- package/src/fireworks-model-id.ts +20 -0
- package/src/model-thinking.ts +26 -1
- package/src/models.json +228 -154
- package/src/provider-models/descriptors.ts +3 -9
- package/src/provider-models/openai-compat.ts +70 -50
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.1.9] - 2026-06-21
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Fixed the `moonshot` provider with no path to the Kimi China API: model discovery now honors a `MOONSHOT_BASE_URL` override (redirecting to `api.moonshot.cn`), and `KIMI_API_KEY` resolves as a fallback for `MOONSHOT_API_KEY`. ([#2883](https://github.com/can1357/oh-my-pi/issues/2883))
|
|
10
|
+
- Fixed LiteLLM model discovery preserving colliding models.dev transport metadata (for example `ollama-cloud` `deepseek-v4-flash`) instead of keeping the LiteLLM `openai-completions` provider transport. ([#3162](https://github.com/can1357/oh-my-pi/issues/3162))
|
|
11
|
+
|
|
12
|
+
### Removed
|
|
13
|
+
|
|
14
|
+
- Removed bundled Wafer Pass (`wafer-pass`) catalog entries and generation support; Wafer Serverless remains available as `wafer-serverless`.
|
|
15
|
+
|
|
16
|
+
## [16.1.8] - 2026-06-20
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- Fixed Fireworks-hosted Qwen turns (e.g. `fireworks/qwen3.7-plus`) failing with `400 Extra inputs are not permitted, field: 'enable_thinking'`. Fireworks serves Qwen3 with controllable thinking via OpenAI-style `reasoning_effort` and rejects the top-level `enable_thinking` boolean that Alibaba DashScope speaks; `buildOpenAICompat` was selecting `thinkingFormat: "qwen"` from the `qwen` id pattern regardless of host. Fireworks-hosted Qwen models now resolve to `thinkingFormat: "openai"`.
|
|
21
|
+
- Fixed MiMo models on OpenAI-compatible gateways to expose only accepted `low`, `medium`, and `high` reasoning tiers and map unsupported raw `minimal`/`xhigh` requests to safe wire values. ([#2864](https://github.com/can1357/oh-my-pi/issues/2864))
|
|
22
|
+
|
|
5
23
|
## [16.1.7] - 2026-06-20
|
|
6
24
|
|
|
7
25
|
### Fixed
|
|
@@ -8,3 +8,16 @@ export declare function toFireworksWireModelId(modelId: string): string;
|
|
|
8
8
|
*/
|
|
9
9
|
export declare function toFirepassPublicModelId(modelId: string): string;
|
|
10
10
|
export declare function toFirepassWireModelId(modelId: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
|
|
13
|
+
* higher-throughput route (100+ tok/s) exposed under a dedicated router id
|
|
14
|
+
* (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
|
|
15
|
+
* higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
|
|
16
|
+
* translate it to the router wire form at request time (compat
|
|
17
|
+
* `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
|
|
18
|
+
*/
|
|
19
|
+
export declare const FIREWORKS_FAST_SUFFIX = "-fast";
|
|
20
|
+
/** True for a Fireworks public model id that selects the Fast serving path. */
|
|
21
|
+
export declare function isFireworksFastModelId(modelId: string): boolean;
|
|
22
|
+
/** Strip the Fast suffix to recover the base (Standard-tier) model id. */
|
|
23
|
+
export declare function toFireworksBaseModelId(modelId: string): string;
|
|
@@ -173,7 +173,7 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
173
173
|
}, {
|
|
174
174
|
readonly id: "moonshot";
|
|
175
175
|
readonly defaultModel: "kimi-k2.7-code";
|
|
176
|
-
readonly envVars: readonly ["MOONSHOT_API_KEY"];
|
|
176
|
+
readonly envVars: readonly ["MOONSHOT_API_KEY", "KIMI_API_KEY"];
|
|
177
177
|
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openai-completions", unknown>;
|
|
178
178
|
readonly catalogDiscovery: {
|
|
179
179
|
readonly label: "Moonshot";
|
|
@@ -310,15 +310,6 @@ export declare const CATALOG_PROVIDERS: readonly [{
|
|
|
310
310
|
readonly label: "vLLM";
|
|
311
311
|
readonly allowUnauthenticated: true;
|
|
312
312
|
};
|
|
313
|
-
}, {
|
|
314
|
-
readonly id: "wafer-pass";
|
|
315
|
-
readonly defaultModel: "GLM-5.1";
|
|
316
|
-
readonly envVars: readonly ["WAFER_PASS_API_KEY"];
|
|
317
|
-
readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"openai-completions", unknown>;
|
|
318
|
-
readonly catalogDiscovery: {
|
|
319
|
-
readonly label: "Wafer Pass";
|
|
320
|
-
readonly oauthProvider: "wafer-pass";
|
|
321
|
-
};
|
|
322
313
|
}, {
|
|
323
314
|
readonly id: "wafer-serverless";
|
|
324
315
|
readonly defaultModel: "GLM-5.1";
|
|
@@ -179,6 +179,13 @@ export declare const KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS = 32768;
|
|
|
179
179
|
export declare function isKimiK27CodeModelId(modelId: string): boolean;
|
|
180
180
|
export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number): number;
|
|
181
181
|
export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | null): number | null;
|
|
182
|
+
/**
|
|
183
|
+
* Build the Fireworks Fast seed by projecting each base bundled spec into a
|
|
184
|
+
* `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
|
|
185
|
+
* appear in the serverless control-plane list, so discovery cannot surface
|
|
186
|
+
* them) and deduped behind any identical previous-snapshot entry.
|
|
187
|
+
*/
|
|
188
|
+
export declare function buildFireworksFastSeed(): ModelSpec<"openai-completions">[];
|
|
182
189
|
/**
|
|
183
190
|
* Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
|
|
184
191
|
* DeepSeek-native binary `thinking` toggle when both are present.
|
|
@@ -208,7 +215,6 @@ export interface WaferModelManagerConfig {
|
|
|
208
215
|
baseUrl?: string;
|
|
209
216
|
fetch?: FetchImpl;
|
|
210
217
|
}
|
|
211
|
-
export declare function waferPassModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
212
218
|
export declare function waferServerlessModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
213
219
|
export interface MistralModelManagerConfig {
|
|
214
220
|
apiKey?: string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-catalog",
|
|
4
|
-
"version": "16.1.
|
|
4
|
+
"version": "16.1.9",
|
|
5
5
|
"description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -34,12 +34,12 @@
|
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@bufbuild/protobuf": "^2.12.0",
|
|
37
|
-
"@oh-my-pi/pi-utils": "16.1.
|
|
37
|
+
"@oh-my-pi/pi-utils": "16.1.9",
|
|
38
38
|
"arktype": "^2.2.0",
|
|
39
39
|
"zod": "^4"
|
|
40
40
|
},
|
|
41
41
|
"devDependencies": {
|
|
42
|
-
"@oh-my-pi/pi-ai": "16.1.
|
|
42
|
+
"@oh-my-pi/pi-ai": "16.1.9",
|
|
43
43
|
"@types/bun": "^1.3.14"
|
|
44
44
|
},
|
|
45
45
|
"engines": {
|
package/src/compat/openai.ts
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
* complete alternate views. Request handlers read `model.compat` fields and
|
|
8
8
|
* never detect, resolve, or allocate.
|
|
9
9
|
*/
|
|
10
|
+
import { isFireworksFastModelId } from "../fireworks-model-id";
|
|
10
11
|
import { hostMatchesUrl, modelMatchesHost } from "../hosts";
|
|
11
12
|
import {
|
|
12
13
|
isAnthropicNamespacedModelId,
|
|
@@ -130,6 +131,16 @@ const OPENCODE_WHEN_THINKING: NonNullable<OpenAICompat["whenThinking"]> = {
|
|
|
130
131
|
reasoningContentField: "reasoning_content",
|
|
131
132
|
};
|
|
132
133
|
|
|
134
|
+
const MIMO_REASONING_EFFORT_MAP: NonNullable<OpenAICompat["reasoningEffortMap"]> = {
|
|
135
|
+
minimal: "low",
|
|
136
|
+
xhigh: "high",
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
function mergeMimoReasoningEffortMap(compat: ResolvedOpenAISharedCompat, enabled: boolean): void {
|
|
140
|
+
if (!enabled) return;
|
|
141
|
+
compat.reasoningEffortMap = { ...MIMO_REASONING_EFFORT_MAP, ...compat.reasoningEffortMap };
|
|
142
|
+
}
|
|
143
|
+
|
|
133
144
|
function detectStrictModeSupport(provider: string, baseUrl: string): boolean {
|
|
134
145
|
if (
|
|
135
146
|
provider === "openai" ||
|
|
@@ -184,6 +195,8 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
184
195
|
const lowerName = (spec.name ?? "").toLowerCase();
|
|
185
196
|
const isXiaomiHost = modelMatchesHost(hostModel, "xiaomi");
|
|
186
197
|
const isXiaomiMimo = isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
|
|
198
|
+
const isMimoReasoningEffortModel =
|
|
199
|
+
!isXiaomiHost && (isMimoModelIdOrName(spec.id) || isMimoModelIdOrName(spec.name ?? ""));
|
|
187
200
|
// OpenCode Zen's `big-pickle` is a DeepSeek reasoning alias; the upstream
|
|
188
201
|
// 400s come from DeepSeek and require exact reasoning_content replay.
|
|
189
202
|
const isOpenCodeDeepseekAlias =
|
|
@@ -238,17 +251,21 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
238
251
|
const isGroqHost = modelMatchesHost(hostModel, "groq");
|
|
239
252
|
const isCopilotHost = provider === "github-copilot";
|
|
240
253
|
const isZenmuxHost = provider === "zenmux";
|
|
241
|
-
// Endpoints that MUST receive a single system block. MiniMax's OpenAI
|
|
242
|
-
// endpoint returns error 2013 on multiple system messages;
|
|
243
|
-
//
|
|
244
|
-
//
|
|
245
|
-
//
|
|
254
|
+
// Endpoints/models that MUST receive a single system block. MiniMax's OpenAI
|
|
255
|
+
// endpoint returns error 2013 on multiple system messages; the Qwen 3.5+ chat
|
|
256
|
+
// template raises "System message must be at the beginning" / 500s with an
|
|
257
|
+
// internal_server_error when any system block appears past index 0. That
|
|
258
|
+
// template ships with the weights, so every Qwen-serving vLLM/SGLang host
|
|
259
|
+
// hits it — confirmed on Alibaba Dashscope, Qwen Portal, and Fireworks
|
|
260
|
+
// (`fireworks/qwen3.7-plus` 500'd on two leading system blocks). Gate on the
|
|
261
|
+
// Qwen family itself, not per-host: coalescing only trades away KV-cache reuse.
|
|
246
262
|
const isMiniMaxHost = modelMatchesHost(hostModel, "minimax");
|
|
247
263
|
const isQwenPortal = modelMatchesHost(hostModel, "qwenPortal");
|
|
248
264
|
const supportsMultipleSystemMessagesDefault =
|
|
249
265
|
!isMiniMaxHost &&
|
|
250
266
|
!isAlibaba &&
|
|
251
267
|
!isQwenPortal &&
|
|
268
|
+
!isQwen &&
|
|
252
269
|
(isOpenAIHost ||
|
|
253
270
|
isAzureHost ||
|
|
254
271
|
isOpenRouter ||
|
|
@@ -276,8 +293,12 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
276
293
|
? DEEPSEEK_REASONING_STREAM_IDLE_TIMEOUT_MS
|
|
277
294
|
: undefined;
|
|
278
295
|
|
|
296
|
+
// Fireworks "Fast" variants (`<id>-fast`) are served from the router
|
|
297
|
+
// namespace (`accounts/fireworks/routers/<id>-fast`), like Fire Pass, rather
|
|
298
|
+
// than the `models/` namespace the rest of the `fireworks` provider uses.
|
|
299
|
+
const isFireworksFastRouter = provider === "fireworks" && isFireworksFastModelId(spec.id);
|
|
279
300
|
const wireModelIdMode: ResolvedOpenAISharedCompat["wireModelIdMode"] =
|
|
280
|
-
provider === "firepass"
|
|
301
|
+
provider === "firepass" || isFireworksFastRouter
|
|
281
302
|
? "firepass"
|
|
282
303
|
: provider === "fireworks"
|
|
283
304
|
? "fireworks"
|
|
@@ -291,9 +312,11 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
291
312
|
? "openrouter"
|
|
292
313
|
: isQwen && isNvidiaNim
|
|
293
314
|
? "qwen-chat-template"
|
|
294
|
-
:
|
|
295
|
-
? "
|
|
296
|
-
:
|
|
315
|
+
: isQwen && isFireworks
|
|
316
|
+
? "openai"
|
|
317
|
+
: isAlibaba || isQwen
|
|
318
|
+
? "qwen"
|
|
319
|
+
: "openai";
|
|
297
320
|
|
|
298
321
|
const compat: ResolvedOpenAICompat = {
|
|
299
322
|
supportsStore: !isNonStandard,
|
|
@@ -308,7 +331,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
308
331
|
supportsReasoningEffort: !isGrok && !isXiaomiMimo && (!(isZai || isZhipu) || supportsZaiReasoningEffort),
|
|
309
332
|
// GitHub Copilot's chat-completions endpoint rejects reasoning params wholesale.
|
|
310
333
|
supportsReasoningParams: provider !== "github-copilot",
|
|
311
|
-
reasoningEffortMap: {},
|
|
334
|
+
reasoningEffortMap: isMimoReasoningEffortModel ? MIMO_REASONING_EFFORT_MAP : {},
|
|
312
335
|
supportsUsageInStreaming: !isCerebras,
|
|
313
336
|
// pi-ai's thinking-loop guard is gemini-only; default the flag from the
|
|
314
337
|
// family classifier so OpenAI-compat proxies serving Gemini are covered.
|
|
@@ -400,6 +423,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
400
423
|
compat.omitReasoningEffort = true;
|
|
401
424
|
}
|
|
402
425
|
mergeOllamaReasoningEffortMap(compat, provider, spec.reasoning);
|
|
426
|
+
mergeMimoReasoningEffortMap(compat, isMimoReasoningEffortModel);
|
|
403
427
|
|
|
404
428
|
const whenThinkingPolicy =
|
|
405
429
|
spec.compat?.whenThinking ?? (isOpenCodeProvider && spec.reasoning ? OPENCODE_WHEN_THINKING : undefined);
|
|
@@ -413,6 +437,7 @@ export function buildOpenAICompat(spec: ModelSpec<"openai-completions">): Resolv
|
|
|
413
437
|
variant.omitReasoningEffort = true;
|
|
414
438
|
}
|
|
415
439
|
mergeOllamaReasoningEffortMap(variant, provider, spec.reasoning);
|
|
440
|
+
mergeMimoReasoningEffortMap(variant, isMimoReasoningEffortModel);
|
|
416
441
|
compat.whenThinking = variant;
|
|
417
442
|
}
|
|
418
443
|
|
|
@@ -28,3 +28,23 @@ export function toFirepassWireModelId(modelId: string): string {
|
|
|
28
28
|
const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
|
|
29
29
|
return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
|
|
30
30
|
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Public-id suffix marking a Fireworks "Fast" serving-path variant. Fast is a
|
|
34
|
+
* higher-throughput route (100+ tok/s) exposed under a dedicated router id
|
|
35
|
+
* (`accounts/fireworks/routers/<id>-fast`), not a separate model — same weights,
|
|
36
|
+
* higher price, no Priority tier. We keep a friendly `<id>-fast` public id and
|
|
37
|
+
* translate it to the router wire form at request time (compat
|
|
38
|
+
* `wireModelIdMode: "firepass"`). See https://docs.fireworks.ai/serverless/serving-paths.
|
|
39
|
+
*/
|
|
40
|
+
export const FIREWORKS_FAST_SUFFIX = "-fast";
|
|
41
|
+
|
|
42
|
+
/** True for a Fireworks public model id that selects the Fast serving path. */
|
|
43
|
+
export function isFireworksFastModelId(modelId: string): boolean {
|
|
44
|
+
return modelId.endsWith(FIREWORKS_FAST_SUFFIX);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Strip the Fast suffix to recover the base (Standard-tier) model id. */
|
|
48
|
+
export function toFireworksBaseModelId(modelId: string): string {
|
|
49
|
+
return modelId.endsWith(FIREWORKS_FAST_SUFFIX) ? modelId.slice(0, -FIREWORKS_FAST_SUFFIX.length) : modelId;
|
|
50
|
+
}
|
package/src/model-thinking.ts
CHANGED
|
@@ -24,6 +24,7 @@ import {
|
|
|
24
24
|
findThinkingVariantToken,
|
|
25
25
|
isDeepseekModelIdOrName,
|
|
26
26
|
isGlm52ReasoningEffortModelId,
|
|
27
|
+
isMimoModelIdOrName,
|
|
27
28
|
isMinimaxM2FamilyModelId,
|
|
28
29
|
isMinimaxM3FamilyModelId,
|
|
29
30
|
isOpenAIGptOssModelId,
|
|
@@ -89,6 +90,10 @@ const ZAI_GLM_52_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
|
89
90
|
const GLM_52_XHIGH_MAX_EFFORT_MAP: Readonly<EffortMap> = {
|
|
90
91
|
[Effort.XHigh]: "max",
|
|
91
92
|
};
|
|
93
|
+
const MIMO_REASONING_EFFORT_MAP: Readonly<EffortMap> = {
|
|
94
|
+
[Effort.Minimal]: "low",
|
|
95
|
+
[Effort.XHigh]: "high",
|
|
96
|
+
};
|
|
92
97
|
|
|
93
98
|
/**
|
|
94
99
|
* Effort → wire-value map for the 5-tier adaptive scale (Opus 4.7+ and
|
|
@@ -296,7 +301,10 @@ function getModelDefinedEfforts<TApi extends Api>(
|
|
|
296
301
|
return GLM_52_HIGH_MAX_REASONING_EFFORTS;
|
|
297
302
|
}
|
|
298
303
|
}
|
|
299
|
-
return isOpenAICompatReasoningApi(spec.api) &&
|
|
304
|
+
return isOpenAICompatReasoningApi(spec.api) &&
|
|
305
|
+
(isMinimaxM2FamilyModelId(spec.id) ||
|
|
306
|
+
isOpenAIGptOssModelId(spec.id) ||
|
|
307
|
+
isOpenAICompatMimoReasoningEffortModel(spec, compat))
|
|
300
308
|
? LOW_MEDIUM_HIGH_REASONING_EFFORTS
|
|
301
309
|
: undefined;
|
|
302
310
|
}
|
|
@@ -309,6 +317,19 @@ function isMinimaxReasoningModelOnAnthropicEndpoint<TApi extends Api>(spec: Mode
|
|
|
309
317
|
return spec.api === "anthropic-messages" && (isMinimaxM2FamilyModelId(spec.id) || isMinimaxM3FamilyModelId(spec.id));
|
|
310
318
|
}
|
|
311
319
|
|
|
320
|
+
function isOpenAICompatMimoReasoningEffortModel<TApi extends Api>(
|
|
321
|
+
spec: ModelSpec<TApi>,
|
|
322
|
+
compat: CompatOf<TApi>,
|
|
323
|
+
): boolean {
|
|
324
|
+
if (!isOpenAICompatReasoningApi(spec.api)) return false;
|
|
325
|
+
if (!isMimoModelIdOrName(spec.id) && !isMimoModelIdOrName(spec.name ?? "")) return false;
|
|
326
|
+
const resolved = compat as ResolvedOpenAICompat | undefined;
|
|
327
|
+
return (
|
|
328
|
+
(resolved?.thinkingFormat === "openai" || resolved?.thinkingFormat === "openrouter") &&
|
|
329
|
+
resolved.supportsReasoningEffort
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
|
|
312
333
|
function readCompatEffortMap(compat: CompatOf<Api>): EffortMap | undefined {
|
|
313
334
|
if (compat === undefined || !("reasoningEffortMap" in compat)) {
|
|
314
335
|
return undefined;
|
|
@@ -364,6 +385,8 @@ function inferDetectedEffortMap<TApi extends Api>(
|
|
|
364
385
|
map = GROQ_QWEN3_32B_REASONING_EFFORT_MAP;
|
|
365
386
|
} else if (isDeepseekReasoningModel(spec)) {
|
|
366
387
|
map = DEEPSEEK_REASONING_EFFORT_MAP;
|
|
388
|
+
} else if (isOpenAICompatMimoReasoningEffortModel(spec, compat)) {
|
|
389
|
+
map = MIMO_REASONING_EFFORT_MAP;
|
|
367
390
|
} else if (modelMatchesHost(spec, "openrouter")) {
|
|
368
391
|
map = getOpenRouterAnthropicReasoningEffortMap(spec.id);
|
|
369
392
|
} else if (modelMatchesHost(spec, "fireworks")) {
|
|
@@ -485,6 +508,8 @@ function inferAnthropicSupportedEfforts<TApi extends Api>(
|
|
|
485
508
|
}
|
|
486
509
|
|
|
487
510
|
function inferFallbackEfforts<TApi extends Api>(spec: ModelSpec<TApi>, compat: CompatOf<TApi>): readonly Effort[] {
|
|
511
|
+
const modelDefinedEfforts = getModelDefinedEfforts(spec, compat);
|
|
512
|
+
if (modelDefinedEfforts !== undefined) return modelDefinedEfforts;
|
|
488
513
|
if (isMinimaxReasoningModelOnAnthropicEndpoint(spec)) {
|
|
489
514
|
return LOW_MEDIUM_HIGH_REASONING_EFFORTS;
|
|
490
515
|
}
|
package/src/models.json
CHANGED
|
@@ -7208,11 +7208,9 @@
|
|
|
7208
7208
|
"thinking": {
|
|
7209
7209
|
"mode": "effort",
|
|
7210
7210
|
"efforts": [
|
|
7211
|
-
"minimal",
|
|
7212
7211
|
"low",
|
|
7213
7212
|
"medium",
|
|
7214
|
-
"high"
|
|
7215
|
-
"xhigh"
|
|
7213
|
+
"high"
|
|
7216
7214
|
]
|
|
7217
7215
|
}
|
|
7218
7216
|
},
|
|
@@ -7238,11 +7236,9 @@
|
|
|
7238
7236
|
"thinking": {
|
|
7239
7237
|
"mode": "effort",
|
|
7240
7238
|
"efforts": [
|
|
7241
|
-
"minimal",
|
|
7242
7239
|
"low",
|
|
7243
7240
|
"medium",
|
|
7244
|
-
"high"
|
|
7245
|
-
"xhigh"
|
|
7241
|
+
"high"
|
|
7246
7242
|
]
|
|
7247
7243
|
}
|
|
7248
7244
|
},
|
|
@@ -7267,11 +7263,9 @@
|
|
|
7267
7263
|
"thinking": {
|
|
7268
7264
|
"mode": "effort",
|
|
7269
7265
|
"efforts": [
|
|
7270
|
-
"minimal",
|
|
7271
7266
|
"low",
|
|
7272
7267
|
"medium",
|
|
7273
|
-
"high"
|
|
7274
|
-
"xhigh"
|
|
7268
|
+
"high"
|
|
7275
7269
|
]
|
|
7276
7270
|
}
|
|
7277
7271
|
}
|
|
@@ -14801,6 +14795,38 @@
|
|
|
14801
14795
|
}
|
|
14802
14796
|
}
|
|
14803
14797
|
},
|
|
14798
|
+
"glm-5.1-fast": {
|
|
14799
|
+
"id": "glm-5.1-fast",
|
|
14800
|
+
"name": "GLM-5.1 Fast",
|
|
14801
|
+
"api": "openai-completions",
|
|
14802
|
+
"provider": "fireworks",
|
|
14803
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
14804
|
+
"reasoning": true,
|
|
14805
|
+
"input": [
|
|
14806
|
+
"text"
|
|
14807
|
+
],
|
|
14808
|
+
"cost": {
|
|
14809
|
+
"input": 2.8,
|
|
14810
|
+
"output": 8.8,
|
|
14811
|
+
"cacheRead": 0.52,
|
|
14812
|
+
"cacheWrite": 0
|
|
14813
|
+
},
|
|
14814
|
+
"contextWindow": 202752,
|
|
14815
|
+
"maxTokens": 131072,
|
|
14816
|
+
"thinking": {
|
|
14817
|
+
"mode": "effort",
|
|
14818
|
+
"efforts": [
|
|
14819
|
+
"minimal",
|
|
14820
|
+
"low",
|
|
14821
|
+
"medium",
|
|
14822
|
+
"high",
|
|
14823
|
+
"xhigh"
|
|
14824
|
+
],
|
|
14825
|
+
"effortMap": {
|
|
14826
|
+
"minimal": "none"
|
|
14827
|
+
}
|
|
14828
|
+
}
|
|
14829
|
+
},
|
|
14804
14830
|
"glm-5.2": {
|
|
14805
14831
|
"id": "glm-5.2",
|
|
14806
14832
|
"name": "GLM-5.2",
|
|
@@ -14947,6 +14973,39 @@
|
|
|
14947
14973
|
}
|
|
14948
14974
|
}
|
|
14949
14975
|
},
|
|
14976
|
+
"kimi-k2.6-fast": {
|
|
14977
|
+
"id": "kimi-k2.6-fast",
|
|
14978
|
+
"name": "Kimi K2.6 Fast",
|
|
14979
|
+
"api": "openai-completions",
|
|
14980
|
+
"provider": "fireworks",
|
|
14981
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
14982
|
+
"reasoning": true,
|
|
14983
|
+
"input": [
|
|
14984
|
+
"text",
|
|
14985
|
+
"image"
|
|
14986
|
+
],
|
|
14987
|
+
"cost": {
|
|
14988
|
+
"input": 2,
|
|
14989
|
+
"output": 8,
|
|
14990
|
+
"cacheRead": 0.3,
|
|
14991
|
+
"cacheWrite": 0
|
|
14992
|
+
},
|
|
14993
|
+
"contextWindow": 262144,
|
|
14994
|
+
"maxTokens": 32768,
|
|
14995
|
+
"thinking": {
|
|
14996
|
+
"mode": "effort",
|
|
14997
|
+
"efforts": [
|
|
14998
|
+
"minimal",
|
|
14999
|
+
"low",
|
|
15000
|
+
"medium",
|
|
15001
|
+
"high",
|
|
15002
|
+
"xhigh"
|
|
15003
|
+
],
|
|
15004
|
+
"effortMap": {
|
|
15005
|
+
"minimal": "none"
|
|
15006
|
+
}
|
|
15007
|
+
}
|
|
15008
|
+
},
|
|
14950
15009
|
"kimi-k2.7-code": {
|
|
14951
15010
|
"id": "kimi-k2.7-code",
|
|
14952
15011
|
"name": "Kimi K2.7 Code",
|
|
@@ -14980,6 +15039,39 @@
|
|
|
14980
15039
|
}
|
|
14981
15040
|
}
|
|
14982
15041
|
},
|
|
15042
|
+
"kimi-k2.7-code-fast": {
|
|
15043
|
+
"id": "kimi-k2.7-code-fast",
|
|
15044
|
+
"name": "Kimi K2.7 Code Fast",
|
|
15045
|
+
"api": "openai-completions",
|
|
15046
|
+
"provider": "fireworks",
|
|
15047
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
15048
|
+
"reasoning": true,
|
|
15049
|
+
"input": [
|
|
15050
|
+
"text",
|
|
15051
|
+
"image"
|
|
15052
|
+
],
|
|
15053
|
+
"cost": {
|
|
15054
|
+
"input": 1.9,
|
|
15055
|
+
"output": 8,
|
|
15056
|
+
"cacheRead": 0.38,
|
|
15057
|
+
"cacheWrite": 0
|
|
15058
|
+
},
|
|
15059
|
+
"contextWindow": 262144,
|
|
15060
|
+
"maxTokens": 32768,
|
|
15061
|
+
"thinking": {
|
|
15062
|
+
"mode": "effort",
|
|
15063
|
+
"efforts": [
|
|
15064
|
+
"minimal",
|
|
15065
|
+
"low",
|
|
15066
|
+
"medium",
|
|
15067
|
+
"high",
|
|
15068
|
+
"xhigh"
|
|
15069
|
+
],
|
|
15070
|
+
"effortMap": {
|
|
15071
|
+
"minimal": "none"
|
|
15072
|
+
}
|
|
15073
|
+
}
|
|
15074
|
+
},
|
|
14983
15075
|
"minimax-m2.5": {
|
|
14984
15076
|
"id": "minimax-m2.5",
|
|
14985
15077
|
"name": "MiniMax M2.5",
|
|
@@ -20276,11 +20368,9 @@
|
|
|
20276
20368
|
"thinking": {
|
|
20277
20369
|
"mode": "effort",
|
|
20278
20370
|
"efforts": [
|
|
20279
|
-
"minimal",
|
|
20280
20371
|
"low",
|
|
20281
20372
|
"medium",
|
|
20282
|
-
"high"
|
|
20283
|
-
"xhigh"
|
|
20373
|
+
"high"
|
|
20284
20374
|
]
|
|
20285
20375
|
}
|
|
20286
20376
|
},
|
|
@@ -30625,11 +30715,9 @@
|
|
|
30625
30715
|
"thinking": {
|
|
30626
30716
|
"mode": "effort",
|
|
30627
30717
|
"efforts": [
|
|
30628
|
-
"minimal",
|
|
30629
30718
|
"low",
|
|
30630
30719
|
"medium",
|
|
30631
|
-
"high"
|
|
30632
|
-
"xhigh"
|
|
30720
|
+
"high"
|
|
30633
30721
|
]
|
|
30634
30722
|
}
|
|
30635
30723
|
},
|
|
@@ -30655,11 +30743,9 @@
|
|
|
30655
30743
|
"thinking": {
|
|
30656
30744
|
"mode": "effort",
|
|
30657
30745
|
"efforts": [
|
|
30658
|
-
"minimal",
|
|
30659
30746
|
"low",
|
|
30660
30747
|
"medium",
|
|
30661
|
-
"high"
|
|
30662
|
-
"xhigh"
|
|
30748
|
+
"high"
|
|
30663
30749
|
]
|
|
30664
30750
|
}
|
|
30665
30751
|
},
|
|
@@ -30703,11 +30789,9 @@
|
|
|
30703
30789
|
"thinking": {
|
|
30704
30790
|
"mode": "effort",
|
|
30705
30791
|
"efforts": [
|
|
30706
|
-
"minimal",
|
|
30707
30792
|
"low",
|
|
30708
30793
|
"medium",
|
|
30709
|
-
"high"
|
|
30710
|
-
"xhigh"
|
|
30794
|
+
"high"
|
|
30711
30795
|
]
|
|
30712
30796
|
}
|
|
30713
30797
|
},
|
|
@@ -30752,11 +30836,9 @@
|
|
|
30752
30836
|
"thinking": {
|
|
30753
30837
|
"mode": "effort",
|
|
30754
30838
|
"efforts": [
|
|
30755
|
-
"minimal",
|
|
30756
30839
|
"low",
|
|
30757
30840
|
"medium",
|
|
30758
|
-
"high"
|
|
30759
|
-
"xhigh"
|
|
30841
|
+
"high"
|
|
30760
30842
|
]
|
|
30761
30843
|
}
|
|
30762
30844
|
},
|
|
@@ -30781,11 +30863,9 @@
|
|
|
30781
30863
|
"thinking": {
|
|
30782
30864
|
"mode": "effort",
|
|
30783
30865
|
"efforts": [
|
|
30784
|
-
"minimal",
|
|
30785
30866
|
"low",
|
|
30786
30867
|
"medium",
|
|
30787
|
-
"high"
|
|
30788
|
-
"xhigh"
|
|
30868
|
+
"high"
|
|
30789
30869
|
]
|
|
30790
30870
|
}
|
|
30791
30871
|
},
|
|
@@ -31172,7 +31252,7 @@
|
|
|
31172
31252
|
"kimi-code": {
|
|
31173
31253
|
"kimi-for-coding": {
|
|
31174
31254
|
"id": "kimi-for-coding",
|
|
31175
|
-
"name": "
|
|
31255
|
+
"name": "K2.7 Code",
|
|
31176
31256
|
"api": "openai-completions",
|
|
31177
31257
|
"provider": "kimi-code",
|
|
31178
31258
|
"baseUrl": "https://api.kimi.com/coding/v1",
|
|
@@ -49119,11 +49199,9 @@
|
|
|
49119
49199
|
"thinking": {
|
|
49120
49200
|
"mode": "effort",
|
|
49121
49201
|
"efforts": [
|
|
49122
|
-
"minimal",
|
|
49123
49202
|
"low",
|
|
49124
49203
|
"medium",
|
|
49125
|
-
"high"
|
|
49126
|
-
"xhigh"
|
|
49204
|
+
"high"
|
|
49127
49205
|
],
|
|
49128
49206
|
"effortRouting": {
|
|
49129
49207
|
"off": "xiaomi/mimo-v2-flash",
|
|
@@ -49183,11 +49261,9 @@
|
|
|
49183
49261
|
"thinking": {
|
|
49184
49262
|
"mode": "effort",
|
|
49185
49263
|
"efforts": [
|
|
49186
|
-
"minimal",
|
|
49187
49264
|
"low",
|
|
49188
49265
|
"medium",
|
|
49189
|
-
"high"
|
|
49190
|
-
"xhigh"
|
|
49266
|
+
"high"
|
|
49191
49267
|
],
|
|
49192
49268
|
"effortRouting": {
|
|
49193
49269
|
"off": "xiaomi/mimo-v2-flash-original",
|
|
@@ -49248,11 +49324,9 @@
|
|
|
49248
49324
|
"thinking": {
|
|
49249
49325
|
"mode": "effort",
|
|
49250
49326
|
"efforts": [
|
|
49251
|
-
"minimal",
|
|
49252
49327
|
"low",
|
|
49253
49328
|
"medium",
|
|
49254
|
-
"high"
|
|
49255
|
-
"xhigh"
|
|
49329
|
+
"high"
|
|
49256
49330
|
]
|
|
49257
49331
|
}
|
|
49258
49332
|
},
|
|
@@ -49277,11 +49351,9 @@
|
|
|
49277
49351
|
"thinking": {
|
|
49278
49352
|
"mode": "effort",
|
|
49279
49353
|
"efforts": [
|
|
49280
|
-
"minimal",
|
|
49281
49354
|
"low",
|
|
49282
49355
|
"medium",
|
|
49283
|
-
"high"
|
|
49284
|
-
"xhigh"
|
|
49356
|
+
"high"
|
|
49285
49357
|
]
|
|
49286
49358
|
}
|
|
49287
49359
|
},
|
|
@@ -49307,11 +49379,9 @@
|
|
|
49307
49379
|
"thinking": {
|
|
49308
49380
|
"mode": "effort",
|
|
49309
49381
|
"efforts": [
|
|
49310
|
-
"minimal",
|
|
49311
49382
|
"low",
|
|
49312
49383
|
"medium",
|
|
49313
|
-
"high"
|
|
49314
|
-
"xhigh"
|
|
49384
|
+
"high"
|
|
49315
49385
|
]
|
|
49316
49386
|
}
|
|
49317
49387
|
},
|
|
@@ -49336,11 +49406,9 @@
|
|
|
49336
49406
|
"thinking": {
|
|
49337
49407
|
"mode": "effort",
|
|
49338
49408
|
"efforts": [
|
|
49339
|
-
"minimal",
|
|
49340
49409
|
"low",
|
|
49341
49410
|
"medium",
|
|
49342
|
-
"high"
|
|
49343
|
-
"xhigh"
|
|
49411
|
+
"high"
|
|
49344
49412
|
]
|
|
49345
49413
|
}
|
|
49346
49414
|
},
|
|
@@ -56842,11 +56910,9 @@
|
|
|
56842
56910
|
"thinking": {
|
|
56843
56911
|
"mode": "effort",
|
|
56844
56912
|
"efforts": [
|
|
56845
|
-
"minimal",
|
|
56846
56913
|
"low",
|
|
56847
56914
|
"medium",
|
|
56848
|
-
"high"
|
|
56849
|
-
"xhigh"
|
|
56915
|
+
"high"
|
|
56850
56916
|
]
|
|
56851
56917
|
},
|
|
56852
56918
|
"compat": {
|
|
@@ -56874,11 +56940,9 @@
|
|
|
56874
56940
|
"thinking": {
|
|
56875
56941
|
"mode": "effort",
|
|
56876
56942
|
"efforts": [
|
|
56877
|
-
"minimal",
|
|
56878
56943
|
"low",
|
|
56879
56944
|
"medium",
|
|
56880
|
-
"high"
|
|
56881
|
-
"xhigh"
|
|
56945
|
+
"high"
|
|
56882
56946
|
]
|
|
56883
56947
|
},
|
|
56884
56948
|
"compat": {
|
|
@@ -56910,11 +56974,9 @@
|
|
|
56910
56974
|
"thinking": {
|
|
56911
56975
|
"mode": "effort",
|
|
56912
56976
|
"efforts": [
|
|
56913
|
-
"minimal",
|
|
56914
56977
|
"low",
|
|
56915
56978
|
"medium",
|
|
56916
|
-
"high"
|
|
56917
|
-
"xhigh"
|
|
56979
|
+
"high"
|
|
56918
56980
|
]
|
|
56919
56981
|
}
|
|
56920
56982
|
},
|
|
@@ -56942,11 +57004,9 @@
|
|
|
56942
57004
|
"thinking": {
|
|
56943
57005
|
"mode": "effort",
|
|
56944
57006
|
"efforts": [
|
|
56945
|
-
"minimal",
|
|
56946
57007
|
"low",
|
|
56947
57008
|
"medium",
|
|
56948
|
-
"high"
|
|
56949
|
-
"xhigh"
|
|
57009
|
+
"high"
|
|
56950
57010
|
]
|
|
56951
57011
|
}
|
|
56952
57012
|
},
|
|
@@ -58575,11 +58635,9 @@
|
|
|
58575
58635
|
"thinking": {
|
|
58576
58636
|
"mode": "effort",
|
|
58577
58637
|
"efforts": [
|
|
58578
|
-
"minimal",
|
|
58579
58638
|
"low",
|
|
58580
58639
|
"medium",
|
|
58581
|
-
"high"
|
|
58582
|
-
"xhigh"
|
|
58640
|
+
"high"
|
|
58583
58641
|
]
|
|
58584
58642
|
}
|
|
58585
58643
|
},
|
|
@@ -58605,11 +58663,9 @@
|
|
|
58605
58663
|
"thinking": {
|
|
58606
58664
|
"mode": "effort",
|
|
58607
58665
|
"efforts": [
|
|
58608
|
-
"minimal",
|
|
58609
58666
|
"low",
|
|
58610
58667
|
"medium",
|
|
58611
|
-
"high"
|
|
58612
|
-
"xhigh"
|
|
58668
|
+
"high"
|
|
58613
58669
|
]
|
|
58614
58670
|
}
|
|
58615
58671
|
},
|
|
@@ -58634,11 +58690,9 @@
|
|
|
58634
58690
|
"thinking": {
|
|
58635
58691
|
"mode": "effort",
|
|
58636
58692
|
"efforts": [
|
|
58637
|
-
"minimal",
|
|
58638
58693
|
"low",
|
|
58639
58694
|
"medium",
|
|
58640
|
-
"high"
|
|
58641
|
-
"xhigh"
|
|
58695
|
+
"high"
|
|
58642
58696
|
]
|
|
58643
58697
|
}
|
|
58644
58698
|
},
|
|
@@ -58664,11 +58718,9 @@
|
|
|
58664
58718
|
"thinking": {
|
|
58665
58719
|
"mode": "effort",
|
|
58666
58720
|
"efforts": [
|
|
58667
|
-
"minimal",
|
|
58668
58721
|
"low",
|
|
58669
58722
|
"medium",
|
|
58670
|
-
"high"
|
|
58671
|
-
"xhigh"
|
|
58723
|
+
"high"
|
|
58672
58724
|
]
|
|
58673
58725
|
}
|
|
58674
58726
|
},
|
|
@@ -67414,7 +67466,6 @@
|
|
|
67414
67466
|
"thinking": {
|
|
67415
67467
|
"mode": "effort",
|
|
67416
67468
|
"efforts": [
|
|
67417
|
-
"minimal",
|
|
67418
67469
|
"low",
|
|
67419
67470
|
"medium",
|
|
67420
67471
|
"high"
|
|
@@ -67443,7 +67494,6 @@
|
|
|
67443
67494
|
"thinking": {
|
|
67444
67495
|
"mode": "effort",
|
|
67445
67496
|
"efforts": [
|
|
67446
|
-
"minimal",
|
|
67447
67497
|
"low",
|
|
67448
67498
|
"medium",
|
|
67449
67499
|
"high"
|
|
@@ -67471,7 +67521,6 @@
|
|
|
67471
67521
|
"thinking": {
|
|
67472
67522
|
"mode": "effort",
|
|
67473
67523
|
"efforts": [
|
|
67474
|
-
"minimal",
|
|
67475
67524
|
"low",
|
|
67476
67525
|
"medium",
|
|
67477
67526
|
"high"
|
|
@@ -67500,7 +67549,6 @@
|
|
|
67500
67549
|
"thinking": {
|
|
67501
67550
|
"mode": "effort",
|
|
67502
67551
|
"efforts": [
|
|
67503
|
-
"minimal",
|
|
67504
67552
|
"low",
|
|
67505
67553
|
"medium",
|
|
67506
67554
|
"high"
|
|
@@ -67528,7 +67576,6 @@
|
|
|
67528
67576
|
"thinking": {
|
|
67529
67577
|
"mode": "effort",
|
|
67530
67578
|
"efforts": [
|
|
67531
|
-
"minimal",
|
|
67532
67579
|
"low",
|
|
67533
67580
|
"medium",
|
|
67534
67581
|
"high"
|
|
@@ -72124,11 +72171,9 @@
|
|
|
72124
72171
|
"thinking": {
|
|
72125
72172
|
"mode": "effort",
|
|
72126
72173
|
"efforts": [
|
|
72127
|
-
"minimal",
|
|
72128
72174
|
"low",
|
|
72129
72175
|
"medium",
|
|
72130
|
-
"high"
|
|
72131
|
-
"xhigh"
|
|
72176
|
+
"high"
|
|
72132
72177
|
]
|
|
72133
72178
|
}
|
|
72134
72179
|
},
|
|
@@ -77545,64 +77590,6 @@
|
|
|
77545
77590
|
}
|
|
77546
77591
|
}
|
|
77547
77592
|
},
|
|
77548
|
-
"wafer-pass": {
|
|
77549
|
-
"GLM-5.1": {
|
|
77550
|
-
"id": "GLM-5.1",
|
|
77551
|
-
"name": "GLM-5.1",
|
|
77552
|
-
"api": "openai-completions",
|
|
77553
|
-
"provider": "wafer-pass",
|
|
77554
|
-
"baseUrl": "https://pass.wafer.ai/v1",
|
|
77555
|
-
"reasoning": true,
|
|
77556
|
-
"input": [
|
|
77557
|
-
"text"
|
|
77558
|
-
],
|
|
77559
|
-
"cost": {
|
|
77560
|
-
"input": 0,
|
|
77561
|
-
"output": 0,
|
|
77562
|
-
"cacheRead": 0,
|
|
77563
|
-
"cacheWrite": 0
|
|
77564
|
-
},
|
|
77565
|
-
"contextWindow": 202752,
|
|
77566
|
-
"maxTokens": 65536,
|
|
77567
|
-
"compat": {
|
|
77568
|
-
"supportsDeveloperRole": false,
|
|
77569
|
-
"thinkingFormat": "zai",
|
|
77570
|
-
"reasoningContentField": "reasoning_content"
|
|
77571
|
-
},
|
|
77572
|
-
"thinking": {
|
|
77573
|
-
"mode": "effort",
|
|
77574
|
-
"efforts": [
|
|
77575
|
-
"minimal",
|
|
77576
|
-
"low",
|
|
77577
|
-
"medium",
|
|
77578
|
-
"high"
|
|
77579
|
-
]
|
|
77580
|
-
}
|
|
77581
|
-
},
|
|
77582
|
-
"Qwen3.5-397B-A17B": {
|
|
77583
|
-
"id": "Qwen3.5-397B-A17B",
|
|
77584
|
-
"name": "Qwen3.5-397B-A17B",
|
|
77585
|
-
"api": "openai-completions",
|
|
77586
|
-
"provider": "wafer-pass",
|
|
77587
|
-
"baseUrl": "https://pass.wafer.ai/v1",
|
|
77588
|
-
"reasoning": false,
|
|
77589
|
-
"input": [
|
|
77590
|
-
"text",
|
|
77591
|
-
"image"
|
|
77592
|
-
],
|
|
77593
|
-
"cost": {
|
|
77594
|
-
"input": 0,
|
|
77595
|
-
"output": 0,
|
|
77596
|
-
"cacheRead": 0,
|
|
77597
|
-
"cacheWrite": 0
|
|
77598
|
-
},
|
|
77599
|
-
"contextWindow": 262144,
|
|
77600
|
-
"maxTokens": 65536,
|
|
77601
|
-
"compat": {
|
|
77602
|
-
"supportsDeveloperRole": false
|
|
77603
|
-
}
|
|
77604
|
-
}
|
|
77605
|
-
},
|
|
77606
77593
|
"wafer-serverless": {
|
|
77607
77594
|
"deepseek-v4-flash": {
|
|
77608
77595
|
"id": "deepseek-v4-flash",
|
|
@@ -84061,11 +84048,9 @@
|
|
|
84061
84048
|
"thinking": {
|
|
84062
84049
|
"mode": "effort",
|
|
84063
84050
|
"efforts": [
|
|
84064
|
-
"minimal",
|
|
84065
84051
|
"low",
|
|
84066
84052
|
"medium",
|
|
84067
|
-
"high"
|
|
84068
|
-
"xhigh"
|
|
84053
|
+
"high"
|
|
84069
84054
|
]
|
|
84070
84055
|
}
|
|
84071
84056
|
},
|
|
@@ -84090,11 +84075,9 @@
|
|
|
84090
84075
|
"thinking": {
|
|
84091
84076
|
"mode": "effort",
|
|
84092
84077
|
"efforts": [
|
|
84093
|
-
"minimal",
|
|
84094
84078
|
"low",
|
|
84095
84079
|
"medium",
|
|
84096
|
-
"high"
|
|
84097
|
-
"xhigh"
|
|
84080
|
+
"high"
|
|
84098
84081
|
]
|
|
84099
84082
|
}
|
|
84100
84083
|
},
|
|
@@ -84120,11 +84103,9 @@
|
|
|
84120
84103
|
"thinking": {
|
|
84121
84104
|
"mode": "effort",
|
|
84122
84105
|
"efforts": [
|
|
84123
|
-
"minimal",
|
|
84124
84106
|
"low",
|
|
84125
84107
|
"medium",
|
|
84126
|
-
"high"
|
|
84127
|
-
"xhigh"
|
|
84108
|
+
"high"
|
|
84128
84109
|
]
|
|
84129
84110
|
}
|
|
84130
84111
|
},
|
|
@@ -84149,11 +84130,9 @@
|
|
|
84149
84130
|
"thinking": {
|
|
84150
84131
|
"mode": "effort",
|
|
84151
84132
|
"efforts": [
|
|
84152
|
-
"minimal",
|
|
84153
84133
|
"low",
|
|
84154
84134
|
"medium",
|
|
84155
|
-
"high"
|
|
84156
|
-
"xhigh"
|
|
84135
|
+
"high"
|
|
84157
84136
|
]
|
|
84158
84137
|
}
|
|
84159
84138
|
},
|
|
@@ -84179,11 +84158,9 @@
|
|
|
84179
84158
|
"thinking": {
|
|
84180
84159
|
"mode": "effort",
|
|
84181
84160
|
"efforts": [
|
|
84182
|
-
"minimal",
|
|
84183
84161
|
"low",
|
|
84184
84162
|
"medium",
|
|
84185
|
-
"high"
|
|
84186
|
-
"xhigh"
|
|
84163
|
+
"high"
|
|
84187
84164
|
]
|
|
84188
84165
|
}
|
|
84189
84166
|
},
|
|
@@ -84208,11 +84185,9 @@
|
|
|
84208
84185
|
"thinking": {
|
|
84209
84186
|
"mode": "effort",
|
|
84210
84187
|
"efforts": [
|
|
84211
|
-
"minimal",
|
|
84212
84188
|
"low",
|
|
84213
84189
|
"medium",
|
|
84214
|
-
"high"
|
|
84215
|
-
"xhigh"
|
|
84190
|
+
"high"
|
|
84216
84191
|
]
|
|
84217
84192
|
}
|
|
84218
84193
|
},
|
|
@@ -84637,6 +84612,39 @@
|
|
|
84637
84612
|
}
|
|
84638
84613
|
},
|
|
84639
84614
|
"zhipu-coding-plan": {
|
|
84615
|
+
"glm-4.5": {
|
|
84616
|
+
"id": "glm-4.5",
|
|
84617
|
+
"name": "glm-4.5",
|
|
84618
|
+
"api": "openai-completions",
|
|
84619
|
+
"provider": "zhipu-coding-plan",
|
|
84620
|
+
"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
|
|
84621
|
+
"reasoning": true,
|
|
84622
|
+
"input": [
|
|
84623
|
+
"text"
|
|
84624
|
+
],
|
|
84625
|
+
"cost": {
|
|
84626
|
+
"input": 0,
|
|
84627
|
+
"output": 0,
|
|
84628
|
+
"cacheRead": 0,
|
|
84629
|
+
"cacheWrite": 0
|
|
84630
|
+
},
|
|
84631
|
+
"contextWindow": 131072,
|
|
84632
|
+
"maxTokens": 98304,
|
|
84633
|
+
"thinking": {
|
|
84634
|
+
"mode": "effort",
|
|
84635
|
+
"efforts": [
|
|
84636
|
+
"minimal",
|
|
84637
|
+
"low",
|
|
84638
|
+
"medium",
|
|
84639
|
+
"high"
|
|
84640
|
+
]
|
|
84641
|
+
},
|
|
84642
|
+
"compat": {
|
|
84643
|
+
"thinkingFormat": "zai",
|
|
84644
|
+
"reasoningContentField": "reasoning_content",
|
|
84645
|
+
"supportsDeveloperRole": false
|
|
84646
|
+
}
|
|
84647
|
+
},
|
|
84640
84648
|
"glm-4.5-air": {
|
|
84641
84649
|
"id": "glm-4.5-air",
|
|
84642
84650
|
"name": "GLM-4.5-Air",
|
|
@@ -84670,6 +84678,39 @@
|
|
|
84670
84678
|
]
|
|
84671
84679
|
}
|
|
84672
84680
|
},
|
|
84681
|
+
"glm-4.6": {
|
|
84682
|
+
"id": "glm-4.6",
|
|
84683
|
+
"name": "glm-4.6",
|
|
84684
|
+
"api": "openai-completions",
|
|
84685
|
+
"provider": "zhipu-coding-plan",
|
|
84686
|
+
"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
|
|
84687
|
+
"reasoning": true,
|
|
84688
|
+
"input": [
|
|
84689
|
+
"text"
|
|
84690
|
+
],
|
|
84691
|
+
"cost": {
|
|
84692
|
+
"input": 0,
|
|
84693
|
+
"output": 0,
|
|
84694
|
+
"cacheRead": 0,
|
|
84695
|
+
"cacheWrite": 0
|
|
84696
|
+
},
|
|
84697
|
+
"contextWindow": 202752,
|
|
84698
|
+
"maxTokens": 131072,
|
|
84699
|
+
"thinking": {
|
|
84700
|
+
"mode": "effort",
|
|
84701
|
+
"efforts": [
|
|
84702
|
+
"minimal",
|
|
84703
|
+
"low",
|
|
84704
|
+
"medium",
|
|
84705
|
+
"high"
|
|
84706
|
+
]
|
|
84707
|
+
},
|
|
84708
|
+
"compat": {
|
|
84709
|
+
"thinkingFormat": "zai",
|
|
84710
|
+
"reasoningContentField": "reasoning_content",
|
|
84711
|
+
"supportsDeveloperRole": false
|
|
84712
|
+
}
|
|
84713
|
+
},
|
|
84673
84714
|
"glm-4.6v": {
|
|
84674
84715
|
"id": "glm-4.6v",
|
|
84675
84716
|
"name": "GLM-4.6V",
|
|
@@ -84737,6 +84778,39 @@
|
|
|
84737
84778
|
]
|
|
84738
84779
|
}
|
|
84739
84780
|
},
|
|
84781
|
+
"glm-5": {
|
|
84782
|
+
"id": "glm-5",
|
|
84783
|
+
"name": "GLM-5",
|
|
84784
|
+
"api": "openai-completions",
|
|
84785
|
+
"provider": "zhipu-coding-plan",
|
|
84786
|
+
"baseUrl": "https://open.bigmodel.cn/api/coding/paas/v4",
|
|
84787
|
+
"reasoning": true,
|
|
84788
|
+
"input": [
|
|
84789
|
+
"text"
|
|
84790
|
+
],
|
|
84791
|
+
"cost": {
|
|
84792
|
+
"input": 0,
|
|
84793
|
+
"output": 0,
|
|
84794
|
+
"cacheRead": 0,
|
|
84795
|
+
"cacheWrite": 0
|
|
84796
|
+
},
|
|
84797
|
+
"contextWindow": 204800,
|
|
84798
|
+
"maxTokens": 131072,
|
|
84799
|
+
"thinking": {
|
|
84800
|
+
"mode": "effort",
|
|
84801
|
+
"efforts": [
|
|
84802
|
+
"minimal",
|
|
84803
|
+
"low",
|
|
84804
|
+
"medium",
|
|
84805
|
+
"high"
|
|
84806
|
+
]
|
|
84807
|
+
},
|
|
84808
|
+
"compat": {
|
|
84809
|
+
"thinkingFormat": "zai",
|
|
84810
|
+
"reasoningContentField": "reasoning_content",
|
|
84811
|
+
"supportsDeveloperRole": false
|
|
84812
|
+
}
|
|
84813
|
+
},
|
|
84740
84814
|
"glm-5-turbo": {
|
|
84741
84815
|
"id": "glm-5-turbo",
|
|
84742
84816
|
"name": "GLM-5-Turbo",
|
|
@@ -41,7 +41,6 @@ import {
|
|
|
41
41
|
veniceModelManagerOptions,
|
|
42
42
|
vercelAiGatewayModelManagerOptions,
|
|
43
43
|
vllmModelManagerOptions,
|
|
44
|
-
waferPassModelManagerOptions,
|
|
45
44
|
waferServerlessModelManagerOptions,
|
|
46
45
|
xaiModelManagerOptions,
|
|
47
46
|
xaiOAuthModelManagerOptions,
|
|
@@ -219,7 +218,9 @@ export const CATALOG_PROVIDERS = [
|
|
|
219
218
|
{
|
|
220
219
|
id: "moonshot",
|
|
221
220
|
defaultModel: "kimi-k2.7-code",
|
|
222
|
-
|
|
221
|
+
// KIMI_API_KEY is the most intuitive name for a Kimi/Moonshot key; accept it
|
|
222
|
+
// as a fallback so China users need not learn MOONSHOT_API_KEY. (#2883)
|
|
223
|
+
envVars: ["MOONSHOT_API_KEY", "KIMI_API_KEY"],
|
|
223
224
|
createModelManagerOptions: (config: ModelManagerConfig) => moonshotModelManagerOptions(config),
|
|
224
225
|
catalogDiscovery: { label: "Moonshot" },
|
|
225
226
|
},
|
|
@@ -347,13 +348,6 @@ export const CATALOG_PROVIDERS = [
|
|
|
347
348
|
createModelManagerOptions: (config: ModelManagerConfig) => vllmModelManagerOptions(config),
|
|
348
349
|
catalogDiscovery: { label: "vLLM", allowUnauthenticated: true },
|
|
349
350
|
},
|
|
350
|
-
{
|
|
351
|
-
id: "wafer-pass",
|
|
352
|
-
defaultModel: "GLM-5.1",
|
|
353
|
-
envVars: ["WAFER_PASS_API_KEY"],
|
|
354
|
-
createModelManagerOptions: (config: ModelManagerConfig) => waferPassModelManagerOptions(config),
|
|
355
|
-
catalogDiscovery: { label: "Wafer Pass", oauthProvider: "wafer-pass" },
|
|
356
|
-
},
|
|
357
351
|
{
|
|
358
352
|
id: "wafer-serverless",
|
|
359
353
|
defaultModel: "GLM-5.1",
|
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
type OpenAICompatibleModelRecord,
|
|
5
5
|
} from "../discovery/openai-compatible";
|
|
6
6
|
import { Effort } from "../effort";
|
|
7
|
-
import { toFireworksPublicModelId } from "../fireworks-model-id";
|
|
7
|
+
import { FIREWORKS_FAST_SUFFIX, toFireworksPublicModelId } from "../fireworks-model-id";
|
|
8
8
|
import { isGlmVisionModelId, isGrokReasoningEffortCapable, isReasoningGlmModelId } from "../identity/family";
|
|
9
9
|
import type { ModelManagerOptions } from "../model-manager";
|
|
10
10
|
import { getBundledModels } from "../models";
|
|
@@ -197,6 +197,8 @@ function mapWithBundledReference<TApi extends Api>(
|
|
|
197
197
|
...reference,
|
|
198
198
|
id: defaults.id,
|
|
199
199
|
name,
|
|
200
|
+
api: defaults.api,
|
|
201
|
+
provider: defaults.provider,
|
|
200
202
|
baseUrl: defaults.baseUrl,
|
|
201
203
|
contextWindow: toPositiveNumber(entry.context_length, reference.contextWindow),
|
|
202
204
|
maxTokens: toPositiveNumber(entry.max_completion_tokens, reference.maxTokens),
|
|
@@ -1258,6 +1260,51 @@ export function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | n
|
|
|
1258
1260
|
return isKimiK27CodeModelId(modelId) ? Math.min(candidate, KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS) : candidate;
|
|
1259
1261
|
}
|
|
1260
1262
|
|
|
1263
|
+
/**
|
|
1264
|
+
* Fireworks Fast variants we surface. Each inherits the base model's
|
|
1265
|
+
* limits/modalities/thinking and overrides only the cost with the Standard-column
|
|
1266
|
+
* Fast prices from the Serverless pricing table; `cacheWrite` stays 0 (Fireworks
|
|
1267
|
+
* bills no cache-write). Derived from the bundled base entries so metadata stays
|
|
1268
|
+
* in lockstep, and the runtime auto-falls back to the base id on a failed fast
|
|
1269
|
+
* request. See https://docs.fireworks.ai/serverless/pricing.
|
|
1270
|
+
*/
|
|
1271
|
+
const FIREWORKS_FAST_VARIANT_SPECS: ReadonlyArray<{
|
|
1272
|
+
base: string;
|
|
1273
|
+
name: string;
|
|
1274
|
+
cost: { input: number; output: number; cacheRead: number };
|
|
1275
|
+
}> = [
|
|
1276
|
+
{ base: "kimi-k2.7-code", name: "Kimi K2.7 Code Fast", cost: { input: 1.9, output: 8, cacheRead: 0.38 } },
|
|
1277
|
+
{ base: "kimi-k2.6", name: "Kimi K2.6 Fast", cost: { input: 2, output: 8, cacheRead: 0.3 } },
|
|
1278
|
+
{ base: "glm-5.1", name: "GLM-5.1 Fast", cost: { input: 2.8, output: 8.8, cacheRead: 0.52 } },
|
|
1279
|
+
];
|
|
1280
|
+
|
|
1281
|
+
/**
|
|
1282
|
+
* Build the Fireworks Fast seed by projecting each base bundled spec into a
|
|
1283
|
+
* `<id>-fast` variant. Pushed into the generated catalog (Fast routers never
|
|
1284
|
+
* appear in the serverless control-plane list, so discovery cannot surface
|
|
1285
|
+
* them) and deduped behind any identical previous-snapshot entry.
|
|
1286
|
+
*/
|
|
1287
|
+
export function buildFireworksFastSeed(): ModelSpec<"openai-completions">[] {
|
|
1288
|
+
const bundled = createBundledReferenceMap<"openai-completions">("fireworks");
|
|
1289
|
+
const seeds: ModelSpec<"openai-completions">[] = [];
|
|
1290
|
+
for (const variant of FIREWORKS_FAST_VARIANT_SPECS) {
|
|
1291
|
+
const base = bundled.get(variant.base);
|
|
1292
|
+
if (!base) continue;
|
|
1293
|
+
seeds.push({
|
|
1294
|
+
...base,
|
|
1295
|
+
id: `${variant.base}${FIREWORKS_FAST_SUFFIX}`,
|
|
1296
|
+
name: variant.name,
|
|
1297
|
+
cost: {
|
|
1298
|
+
input: variant.cost.input,
|
|
1299
|
+
output: variant.cost.output,
|
|
1300
|
+
cacheRead: variant.cost.cacheRead,
|
|
1301
|
+
cacheWrite: 0,
|
|
1302
|
+
},
|
|
1303
|
+
});
|
|
1304
|
+
}
|
|
1305
|
+
return seeds;
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1261
1308
|
/**
|
|
1262
1309
|
* Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
|
|
1263
1310
|
* DeepSeek-native binary `thinking` toggle when both are present.
|
|
@@ -1523,7 +1570,7 @@ export function firepassModelManagerOptions(
|
|
|
1523
1570
|
}
|
|
1524
1571
|
|
|
1525
1572
|
// ---------------------------------------------------------------------------
|
|
1526
|
-
// 7.7 Wafer
|
|
1573
|
+
// 7.7 Wafer Serverless
|
|
1527
1574
|
// ---------------------------------------------------------------------------
|
|
1528
1575
|
|
|
1529
1576
|
export interface WaferModelManagerConfig {
|
|
@@ -1536,13 +1583,14 @@ const WAFER_DEFAULT_BASE_URL = "https://pass.wafer.ai/v1";
|
|
|
1536
1583
|
const WAFER_MAX_TOKENS_CAP = 65536;
|
|
1537
1584
|
|
|
1538
1585
|
/**
|
|
1539
|
-
*
|
|
1586
|
+
* Mapper for Wafer Serverless `/v1/models` records.
|
|
1540
1587
|
*
|
|
1541
|
-
* Wafer wraps each entry with a `wafer` envelope describing
|
|
1542
|
-
*
|
|
1543
|
-
*
|
|
1544
|
-
* compat when the entry advertises reasoning support
|
|
1545
|
-
*
|
|
1588
|
+
* Wafer wraps each entry with a `wafer` envelope describing capabilities and
|
|
1589
|
+
* pricing. The mapper folds that metadata into the canonical
|
|
1590
|
+
* `ModelSpec<"openai-completions">` shape and applies upstream-specific thinking
|
|
1591
|
+
* compat when the entry advertises reasoning support. Wafer pricing is exposed
|
|
1592
|
+
* through internal wholesale units; the public Serverless rate equals
|
|
1593
|
+
* `cents × 125 / 10000`.
|
|
1546
1594
|
*/
|
|
1547
1595
|
interface WaferRecord {
|
|
1548
1596
|
context_length?: unknown;
|
|
@@ -1563,7 +1611,7 @@ function readWaferRecord(entry: OpenAICompatibleModelRecord): WaferRecord | unde
|
|
|
1563
1611
|
}
|
|
1564
1612
|
|
|
1565
1613
|
function mapWaferModel(
|
|
1566
|
-
providerId: "wafer-
|
|
1614
|
+
providerId: "wafer-serverless",
|
|
1567
1615
|
baseUrl: string,
|
|
1568
1616
|
entry: OpenAICompatibleModelRecord,
|
|
1569
1617
|
defaults: ModelSpec<"openai-completions">,
|
|
@@ -1579,25 +1627,12 @@ function mapWaferModel(
|
|
|
1579
1627
|
);
|
|
1580
1628
|
const maxTokens = contextWindow !== null ? Math.min(contextWindow, WAFER_MAX_TOKENS_CAP) : null;
|
|
1581
1629
|
const pricing = wafer?.pricing ?? {};
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
// For the Pass SKU the per-token rate is bundled in the flat-rate
|
|
1589
|
-
// subscription, so we follow the convention shared with
|
|
1590
|
-
// `kimi-code`/`firepass`/`alibaba-coding-plan` and seed every Pass model with
|
|
1591
|
-
// `cost: 0` regardless of what the upstream envelope says.
|
|
1592
|
-
const isPassSku = providerId === "wafer-pass";
|
|
1593
|
-
const cost = isPassSku
|
|
1594
|
-
? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }
|
|
1595
|
-
: {
|
|
1596
|
-
input: (toPositiveNumber(pricing.input_cents_per_million, 0) * 125) / 10000,
|
|
1597
|
-
output: (toPositiveNumber(pricing.output_cents_per_million, 0) * 125) / 10000,
|
|
1598
|
-
cacheRead: (toPositiveNumber(pricing.cache_read_cents_per_million, 0) * 125) / 10000,
|
|
1599
|
-
cacheWrite: 0,
|
|
1600
|
-
};
|
|
1630
|
+
const cost = {
|
|
1631
|
+
input: (toPositiveNumber(pricing.input_cents_per_million, 0) * 125) / 10000,
|
|
1632
|
+
output: (toPositiveNumber(pricing.output_cents_per_million, 0) * 125) / 10000,
|
|
1633
|
+
cacheRead: (toPositiveNumber(pricing.cache_read_cents_per_million, 0) * 125) / 10000,
|
|
1634
|
+
cacheWrite: 0,
|
|
1635
|
+
};
|
|
1601
1636
|
const name = toModelName(wafer?.display_name, defaults.name);
|
|
1602
1637
|
const base: ModelSpec<"openai-completions"> = {
|
|
1603
1638
|
...defaults,
|
|
@@ -1643,13 +1678,12 @@ function mapWaferModel(
|
|
|
1643
1678
|
};
|
|
1644
1679
|
}
|
|
1645
1680
|
|
|
1646
|
-
function
|
|
1647
|
-
|
|
1648
|
-
config: WaferModelManagerConfig | undefined,
|
|
1681
|
+
export function waferServerlessModelManagerOptions(
|
|
1682
|
+
config?: WaferModelManagerConfig,
|
|
1649
1683
|
): ModelManagerOptions<"openai-completions"> {
|
|
1650
1684
|
const apiKey = config?.apiKey;
|
|
1651
1685
|
const baseUrl = config?.baseUrl ?? WAFER_DEFAULT_BASE_URL;
|
|
1652
|
-
const
|
|
1686
|
+
const providerId = "wafer-serverless" as const;
|
|
1653
1687
|
return {
|
|
1654
1688
|
providerId,
|
|
1655
1689
|
...(apiKey && {
|
|
@@ -1659,11 +1693,6 @@ function createWaferOptions(
|
|
|
1659
1693
|
provider: providerId,
|
|
1660
1694
|
baseUrl,
|
|
1661
1695
|
apiKey,
|
|
1662
|
-
filterModel: entry => {
|
|
1663
|
-
if (!passOnly) return true;
|
|
1664
|
-
const wafer = readWaferRecord(entry);
|
|
1665
|
-
return wafer?.tier === "pass_included";
|
|
1666
|
-
},
|
|
1667
1696
|
mapModel: (entry, defaults) => mapWaferModel(providerId, baseUrl, entry, defaults),
|
|
1668
1697
|
fetch: config?.fetch,
|
|
1669
1698
|
}),
|
|
@@ -1671,18 +1700,6 @@ function createWaferOptions(
|
|
|
1671
1700
|
};
|
|
1672
1701
|
}
|
|
1673
1702
|
|
|
1674
|
-
export function waferPassModelManagerOptions(
|
|
1675
|
-
config?: WaferModelManagerConfig,
|
|
1676
|
-
): ModelManagerOptions<"openai-completions"> {
|
|
1677
|
-
return createWaferOptions("wafer-pass", config);
|
|
1678
|
-
}
|
|
1679
|
-
|
|
1680
|
-
export function waferServerlessModelManagerOptions(
|
|
1681
|
-
config?: WaferModelManagerConfig,
|
|
1682
|
-
): ModelManagerOptions<"openai-completions"> {
|
|
1683
|
-
return createWaferOptions("wafer-serverless", config);
|
|
1684
|
-
}
|
|
1685
|
-
|
|
1686
1703
|
// ---------------------------------------------------------------------------
|
|
1687
1704
|
// 7. Mistral
|
|
1688
1705
|
// ---------------------------------------------------------------------------
|
|
@@ -2448,7 +2465,10 @@ export function moonshotModelManagerOptions(
|
|
|
2448
2465
|
config?: MoonshotModelManagerConfig,
|
|
2449
2466
|
): ModelManagerOptions<"openai-completions"> {
|
|
2450
2467
|
const apiKey = config?.apiKey;
|
|
2451
|
-
|
|
2468
|
+
// `MOONSHOT_BASE_URL` redirects discovery (and the streaming request that
|
|
2469
|
+
// inherits this baseUrl) at the Kimi China platform `api.moonshot.cn`; an
|
|
2470
|
+
// explicit `config.baseUrl` still wins. Mirrors LITELLM_BASE_URL/LM_STUDIO_BASE_URL. (#2883)
|
|
2471
|
+
const baseUrl = config?.baseUrl ?? Bun.env.MOONSHOT_BASE_URL ?? "https://api.moonshot.ai/v1";
|
|
2452
2472
|
const references = createBundledReferenceMap<"openai-completions">("moonshot");
|
|
2453
2473
|
return {
|
|
2454
2474
|
providerId: "moonshot",
|