ugly-app 0.1.430 → 0.1.431
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/version.d.ts +1 -1
- package/dist/cli/version.js +1 -1
- package/dist/shared/FrameworkRequests.d.ts +1 -1
- package/dist/shared/TextGen.d.ts +15 -17
- package/dist/shared/TextGen.d.ts.map +1 -1
- package/dist/shared/TextGen.js +248 -164
- package/dist/shared/TextGen.js.map +1 -1
- package/package.json +1 -1
- package/src/cli/version.ts +1 -1
- package/src/shared/TextGen.ts +248 -164
package/src/shared/TextGen.ts
CHANGED
|
@@ -28,7 +28,6 @@ export type TextGenTogetherModelT = (typeof textGenTogetherModels)[number];
|
|
|
28
28
|
// Note: Llama 4 models require enterprise deployment on Fireworks, use Groq/Together instead
|
|
29
29
|
export const textGenFireworksModels = [
|
|
30
30
|
'fireworks_gpt_oss_120b',
|
|
31
|
-
'fireworks_gpt_oss_20b',
|
|
32
31
|
] as const;
|
|
33
32
|
|
|
34
33
|
export type TextGenFireworksModelT = (typeof textGenFireworksModels)[number];
|
|
@@ -53,11 +52,9 @@ export type TextGenDeepSeekModelT = (typeof textGenDeepSeekModels)[number];
|
|
|
53
52
|
// Groq models
|
|
54
53
|
export const textGenGroqModels = [
|
|
55
54
|
'groq_llama_4_scout',
|
|
56
|
-
'groq_llama_4_maverick',
|
|
57
55
|
'groq_mixtral_8x7b',
|
|
58
56
|
'groq_llama_3_3_70b',
|
|
59
57
|
'groq_gpt_oss_120b',
|
|
60
|
-
'groq_gpt_oss_20b',
|
|
61
58
|
] as const;
|
|
62
59
|
|
|
63
60
|
export type TextGenGroqModelT = (typeof textGenGroqModels)[number];
|
|
@@ -90,7 +87,6 @@ export const textGenOpenAIModels = [
|
|
|
90
87
|
'openai_o1_mini',
|
|
91
88
|
'openai_o3',
|
|
92
89
|
'openai_o4_mini',
|
|
93
|
-
'openai_gpt_41_nano',
|
|
94
90
|
] as const;
|
|
95
91
|
|
|
96
92
|
export type TextGenOpenAIModelT = (typeof textGenOpenAIModels)[number];
|
|
@@ -108,8 +104,11 @@ export type TextGenMiniMaxModelT = (typeof textGenMiniMaxModels)[number];
|
|
|
108
104
|
// us a warm path when the direct upstream is throttled or down.
|
|
109
105
|
export const textGenOpenRouterModels = [
|
|
110
106
|
'openrouter_glm_5_1',
|
|
107
|
+
'openrouter_glm_4_6',
|
|
111
108
|
'openrouter_qwen_36_plus',
|
|
109
|
+
'openrouter_qwen3_6_27b',
|
|
112
110
|
'openrouter_kimi_k2_thinking',
|
|
111
|
+
'openrouter_kimi_k2_6',
|
|
113
112
|
'openrouter_gemma_4_31b',
|
|
114
113
|
'openrouter_gemma_4_26b',
|
|
115
114
|
'openrouter_gemma_4_26b_free',
|
|
@@ -121,6 +120,7 @@ export const textGenOpenRouterModels = [
|
|
|
121
120
|
'openrouter_o4_mini',
|
|
122
121
|
'openrouter_claude_sonnet_4_6',
|
|
123
122
|
'openrouter_claude_haiku_4_5',
|
|
123
|
+
'openrouter_claude_opus_4_7',
|
|
124
124
|
] as const;
|
|
125
125
|
export type TextGenOpenRouterModelT = (typeof textGenOpenRouterModels)[number];
|
|
126
126
|
|
|
@@ -132,9 +132,13 @@ export const textGenDeepInfraModels = [
|
|
|
132
132
|
'deepinfra_gemma_4_31b',
|
|
133
133
|
'deepinfra_gemma_4_26b',
|
|
134
134
|
'deepinfra_gpt_oss_120b',
|
|
135
|
-
'deepinfra_gpt_oss_20b',
|
|
136
135
|
'deepinfra_qwen3_235b',
|
|
137
|
-
'
|
|
136
|
+
'deepinfra_qwen3_6_27b',
|
|
137
|
+
'deepinfra_qwen3_6_35b_a3b',
|
|
138
|
+
'deepinfra_kimi_k2_6',
|
|
139
|
+
'deepinfra_glm_4_6',
|
|
140
|
+
'deepinfra_glm_5_1',
|
|
141
|
+
'deepinfra_minimax_m2_5',
|
|
138
142
|
'deepinfra_deepseek_v4_pro',
|
|
139
143
|
'deepinfra_deepseek_v4_flash',
|
|
140
144
|
] as const;
|
|
@@ -165,24 +169,23 @@ const textGenProviderModelValues = [
|
|
|
165
169
|
'openai_o1_mini',
|
|
166
170
|
'openai_o3',
|
|
167
171
|
'openai_o4_mini',
|
|
168
|
-
'openai_gpt_41_nano',
|
|
169
172
|
// Fireworks (GPT-OSS - Llama 4 requires enterprise deployment)
|
|
170
173
|
'fireworks_gpt_oss_120b',
|
|
171
|
-
'fireworks_gpt_oss_20b',
|
|
172
174
|
// Groq
|
|
173
175
|
'groq_llama_4_scout',
|
|
174
|
-
'groq_llama_4_maverick',
|
|
175
176
|
'groq_mixtral_8x7b',
|
|
176
177
|
'groq_llama_3_3_70b',
|
|
177
178
|
'groq_gpt_oss_120b',
|
|
178
|
-
'groq_gpt_oss_20b',
|
|
179
179
|
// MiniMax
|
|
180
180
|
'minimax_m2_5',
|
|
181
181
|
'minimax_m2_7',
|
|
182
182
|
// OpenRouter
|
|
183
183
|
'openrouter_glm_5_1',
|
|
184
|
+
'openrouter_glm_4_6',
|
|
184
185
|
'openrouter_qwen_36_plus',
|
|
186
|
+
'openrouter_qwen3_6_27b',
|
|
185
187
|
'openrouter_kimi_k2_thinking',
|
|
188
|
+
'openrouter_kimi_k2_6',
|
|
186
189
|
'openrouter_gemma_4_31b',
|
|
187
190
|
'openrouter_gemma_4_26b',
|
|
188
191
|
'openrouter_gemma_4_26b_free',
|
|
@@ -193,13 +196,18 @@ const textGenProviderModelValues = [
|
|
|
193
196
|
'openrouter_o4_mini',
|
|
194
197
|
'openrouter_claude_sonnet_4_6',
|
|
195
198
|
'openrouter_claude_haiku_4_5',
|
|
199
|
+
'openrouter_claude_opus_4_7',
|
|
196
200
|
// DeepInfra
|
|
197
201
|
'deepinfra_gemma_4_31b',
|
|
198
202
|
'deepinfra_gemma_4_26b',
|
|
199
203
|
'deepinfra_gpt_oss_120b',
|
|
200
|
-
'deepinfra_gpt_oss_20b',
|
|
201
204
|
'deepinfra_qwen3_235b',
|
|
202
|
-
'
|
|
205
|
+
'deepinfra_qwen3_6_27b',
|
|
206
|
+
'deepinfra_qwen3_6_35b_a3b',
|
|
207
|
+
'deepinfra_kimi_k2_6',
|
|
208
|
+
'deepinfra_glm_4_6',
|
|
209
|
+
'deepinfra_glm_5_1',
|
|
210
|
+
'deepinfra_minimax_m2_5',
|
|
203
211
|
'deepinfra_deepseek_v4_pro',
|
|
204
212
|
'deepinfra_deepseek_v4_flash',
|
|
205
213
|
// DeepSeek (direct, BYO key, OpenAI-compatible)
|
|
@@ -225,13 +233,11 @@ export const textGenProviderModelsSet = new Set(textGenProviderModels);
|
|
|
225
233
|
const textGenModelValues = [
|
|
226
234
|
// Multi-provider routed models
|
|
227
235
|
'llama_4_scout',
|
|
228
|
-
'llama_4_maverick',
|
|
229
236
|
'llama_3_3_70b',
|
|
230
|
-
// DeepSeek V4 — direct
|
|
237
|
+
// DeepSeek V4 — direct (api.deepseek.com) + DeepInfra fallback
|
|
231
238
|
'deepseek_v4_pro',
|
|
232
239
|
'deepseek_v4_flash',
|
|
233
240
|
'gpt_oss_120b',
|
|
234
|
-
'gpt_oss_20b',
|
|
235
241
|
// Google
|
|
236
242
|
'gemini_2_5_flash',
|
|
237
243
|
'gemini_2_5',
|
|
@@ -239,24 +245,27 @@ const textGenModelValues = [
|
|
|
239
245
|
'gpt_4o',
|
|
240
246
|
'gpt_5',
|
|
241
247
|
'gpt_5_mini',
|
|
242
|
-
'gpt_41_nano',
|
|
243
248
|
'o3',
|
|
244
249
|
'o4_mini',
|
|
245
250
|
// Anthropic — coding-agent tiers (Anthropic direct + OpenRouter fallback)
|
|
246
251
|
'claude_opus_4_7',
|
|
247
252
|
'claude_sonnet_4_6',
|
|
248
|
-
'claude_3_sonnet',
|
|
249
253
|
'claude_haiku_4_5',
|
|
250
254
|
// Multi-provider open-weight (DeepInfra / OpenRouter / Together)
|
|
251
255
|
'gemma_4_31b',
|
|
252
256
|
'gemma_4_26b',
|
|
253
|
-
// MiniMax
|
|
257
|
+
// MiniMax — direct + DeepInfra fallback for M2.5
|
|
254
258
|
'minimax_m2_5',
|
|
255
259
|
'minimax_m2_7',
|
|
256
|
-
// OpenRouter
|
|
260
|
+
// GLM — DeepInfra primary + OpenRouter fallback
|
|
257
261
|
'glm_5_1',
|
|
262
|
+
'glm_4_6',
|
|
263
|
+
// Qwen 3.6 — OpenRouter + DeepInfra
|
|
258
264
|
'qwen3_6_plus',
|
|
265
|
+
'qwen3_6_27b',
|
|
266
|
+
// Kimi — OpenRouter + DeepInfra
|
|
259
267
|
'kimi_k2_thinking',
|
|
268
|
+
'kimi_k2_6',
|
|
260
269
|
// Single-provider (Together — misc)
|
|
261
270
|
'qwen2_72b',
|
|
262
271
|
'qwen2_vision_72b',
|
|
@@ -524,25 +533,6 @@ export const TextGenOpenAIModelData: Record<
|
|
|
524
533
|
smartness: 4,
|
|
525
534
|
compactAt: 0.90,
|
|
526
535
|
},
|
|
527
|
-
// GPT-4.1 Nano — cheapest viable model, 1M context
|
|
528
|
-
openai_gpt_41_nano: {
|
|
529
|
-
model: 'gpt-4.1-nano',
|
|
530
|
-
contextWindow: 1_000_000,
|
|
531
|
-
inputTokenNanoDollar: 100,
|
|
532
|
-
outputTokenNanoDollar: 400,
|
|
533
|
-
vision: true,
|
|
534
|
-
toolCalling: true,
|
|
535
|
-
jsonMode: true,
|
|
536
|
-
streaming: true,
|
|
537
|
-
parallelToolCalls: true,
|
|
538
|
-
// Coding agent
|
|
539
|
-
name: 'GPT-4.1 Nano',
|
|
540
|
-
provider: 'OpenAI',
|
|
541
|
-
speed: 'fast',
|
|
542
|
-
reasoning: 'strong',
|
|
543
|
-
smartness: 2,
|
|
544
|
-
compactAt: 0.90,
|
|
545
|
-
},
|
|
546
536
|
};
|
|
547
537
|
|
|
548
538
|
export const TextGenTogetherModelData: Record<
|
|
@@ -695,17 +685,6 @@ export const TextGenFireworksModelData: Record<
|
|
|
695
685
|
streaming: true,
|
|
696
686
|
parallelToolCalls: false,
|
|
697
687
|
},
|
|
698
|
-
fireworks_gpt_oss_20b: {
|
|
699
|
-
model: 'accounts/fireworks/models/gpt-oss-20b',
|
|
700
|
-
contextWindow: 128000,
|
|
701
|
-
// $0.075 input / $0.30 output per 1M tokens
|
|
702
|
-
inputTokenNanoDollar: 75,
|
|
703
|
-
outputTokenNanoDollar: 300,
|
|
704
|
-
toolCalling: true,
|
|
705
|
-
jsonMode: true,
|
|
706
|
-
streaming: true,
|
|
707
|
-
parallelToolCalls: false,
|
|
708
|
-
},
|
|
709
688
|
};
|
|
710
689
|
|
|
711
690
|
// Groq models - ultra-fast inference
|
|
@@ -724,18 +703,6 @@ export const TextGenGroqModelData: Record<TextGenGroqModelT, TextGenModelData> =
|
|
|
724
703
|
streaming: true,
|
|
725
704
|
parallelToolCalls: false,
|
|
726
705
|
},
|
|
727
|
-
groq_llama_4_maverick: {
|
|
728
|
-
model: 'meta-llama/llama-4-maverick-17b-128e-instruct',
|
|
729
|
-
contextWindow: 128000,
|
|
730
|
-
inputTokenNanoDollar: 200,
|
|
731
|
-
outputTokenNanoDollar: 600,
|
|
732
|
-
vision: true,
|
|
733
|
-
// Note: toolCalling marked 'unreliable' due to known 100% failure rate with structured prompts
|
|
734
|
-
toolCalling: 'unreliable',
|
|
735
|
-
jsonMode: true,
|
|
736
|
-
streaming: true,
|
|
737
|
-
parallelToolCalls: false,
|
|
738
|
-
},
|
|
739
706
|
groq_mixtral_8x7b: {
|
|
740
707
|
model: 'mixtral-8x7b-32768',
|
|
741
708
|
contextWindow: 32768,
|
|
@@ -768,17 +735,6 @@ export const TextGenGroqModelData: Record<TextGenGroqModelT, TextGenModelData> =
|
|
|
768
735
|
streaming: true,
|
|
769
736
|
parallelToolCalls: false,
|
|
770
737
|
},
|
|
771
|
-
groq_gpt_oss_20b: {
|
|
772
|
-
model: 'openai/gpt-oss-20b',
|
|
773
|
-
contextWindow: 128000,
|
|
774
|
-
// $0.075 input / $0.30 output per 1M tokens
|
|
775
|
-
inputTokenNanoDollar: 75,
|
|
776
|
-
outputTokenNanoDollar: 300,
|
|
777
|
-
toolCalling: true,
|
|
778
|
-
jsonMode: true,
|
|
779
|
-
streaming: true,
|
|
780
|
-
parallelToolCalls: false,
|
|
781
|
-
},
|
|
782
738
|
};
|
|
783
739
|
|
|
784
740
|
export const TextGenKieModelData: Record<TextGenKieModelT, TextGenModelData> =
|
|
@@ -1089,6 +1045,60 @@ export const TextGenOpenRouterModelData: Record<
|
|
|
1089
1045
|
// doesn't assume a cache discount that never materializes.
|
|
1090
1046
|
supportsCacheControl: false,
|
|
1091
1047
|
},
|
|
1048
|
+
// Claude Opus 4.7 fallback — primary still Anthropic direct. OpenRouter
|
|
1049
|
+
// list price is ~5% over upstream. Cache passthrough verified to work
|
|
1050
|
+
// for sonnet-4.6; opus-4.7 not yet verified — leave conservative.
|
|
1051
|
+
openrouter_claude_opus_4_7: {
|
|
1052
|
+
model: 'anthropic/claude-opus-4.7',
|
|
1053
|
+
contextWindow: 200_000,
|
|
1054
|
+
inputTokenNanoDollar: 15_750, // ~$15/M × 1.05
|
|
1055
|
+
outputTokenNanoDollar: 78_750, // ~$75/M × 1.05
|
|
1056
|
+
vision: true,
|
|
1057
|
+
toolCalling: true,
|
|
1058
|
+
jsonMode: true,
|
|
1059
|
+
streaming: true,
|
|
1060
|
+
parallelToolCalls: true,
|
|
1061
|
+
supportsCacheControl: false,
|
|
1062
|
+
},
|
|
1063
|
+
// GLM 4.6 OpenRouter route. List price is roughly the same as
|
|
1064
|
+
// DeepInfra's published rate; OpenRouter wins when DeepInfra is
|
|
1065
|
+
// throttled.
|
|
1066
|
+
openrouter_glm_4_6: {
|
|
1067
|
+
model: 'z-ai/glm-4.6',
|
|
1068
|
+
contextWindow: 200_000,
|
|
1069
|
+
inputTokenNanoDollar: 600,
|
|
1070
|
+
outputTokenNanoDollar: 2200,
|
|
1071
|
+
toolCalling: true,
|
|
1072
|
+
jsonMode: true,
|
|
1073
|
+
streaming: true,
|
|
1074
|
+
parallelToolCalls: false,
|
|
1075
|
+
supportsCacheControl: false,
|
|
1076
|
+
},
|
|
1077
|
+
// Qwen 3.6 27B OpenRouter route — dense (non-MoE) variant.
|
|
1078
|
+
openrouter_qwen3_6_27b: {
|
|
1079
|
+
model: 'qwen/qwen3.6-27b',
|
|
1080
|
+
contextWindow: 262_144,
|
|
1081
|
+
inputTokenNanoDollar: 350,
|
|
1082
|
+
outputTokenNanoDollar: 3360,
|
|
1083
|
+
vision: true,
|
|
1084
|
+
toolCalling: true,
|
|
1085
|
+
jsonMode: true,
|
|
1086
|
+
streaming: true,
|
|
1087
|
+
parallelToolCalls: false,
|
|
1088
|
+
supportsCacheControl: false,
|
|
1089
|
+
},
|
|
1090
|
+
// Kimi K2.6 OpenRouter route.
|
|
1091
|
+
openrouter_kimi_k2_6: {
|
|
1092
|
+
model: 'moonshotai/kimi-k2.6',
|
|
1093
|
+
contextWindow: 262_144,
|
|
1094
|
+
inputTokenNanoDollar: 800,
|
|
1095
|
+
outputTokenNanoDollar: 3700,
|
|
1096
|
+
toolCalling: true,
|
|
1097
|
+
jsonMode: true,
|
|
1098
|
+
streaming: true,
|
|
1099
|
+
parallelToolCalls: false,
|
|
1100
|
+
supportsCacheControl: false,
|
|
1101
|
+
},
|
|
1092
1102
|
};
|
|
1093
1103
|
|
|
1094
1104
|
// DeepInfra — direct route, bypasses OpenRouter's shared rate limits.
|
|
@@ -1159,43 +1169,107 @@ export const TextGenDeepInfraModelData: Record<
|
|
|
1159
1169
|
parallelToolCalls: false,
|
|
1160
1170
|
supportsCacheControl: false,
|
|
1161
1171
|
},
|
|
1162
|
-
//
|
|
1163
|
-
|
|
1164
|
-
model: '
|
|
1165
|
-
contextWindow:
|
|
1166
|
-
inputTokenNanoDollar:
|
|
1167
|
-
outputTokenNanoDollar:
|
|
1172
|
+
// Qwen3-235B-A22B-Instruct-2507 (live 2026-05-03 on DeepInfra).
|
|
1173
|
+
deepinfra_qwen3_235b: {
|
|
1174
|
+
model: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
|
|
1175
|
+
contextWindow: 256_000,
|
|
1176
|
+
inputTokenNanoDollar: 71,
|
|
1177
|
+
outputTokenNanoDollar: 100,
|
|
1168
1178
|
toolCalling: true,
|
|
1169
1179
|
jsonMode: true,
|
|
1170
1180
|
streaming: true,
|
|
1171
1181
|
parallelToolCalls: false,
|
|
1172
1182
|
supportsCacheControl: false,
|
|
1173
1183
|
},
|
|
1174
|
-
// Qwen3
|
|
1175
|
-
//
|
|
1176
|
-
|
|
1177
|
-
model: 'Qwen/Qwen3-
|
|
1178
|
-
contextWindow:
|
|
1179
|
-
inputTokenNanoDollar:
|
|
1180
|
-
outputTokenNanoDollar:
|
|
1184
|
+
// Qwen3.6 27B — $0.32/$3.20 per 1M (live 2026-05-03), 256k ctx,
|
|
1185
|
+
// multimodal + reasoning. The dense, non-MoE variant.
|
|
1186
|
+
deepinfra_qwen3_6_27b: {
|
|
1187
|
+
model: 'Qwen/Qwen3.6-27B',
|
|
1188
|
+
contextWindow: 262_144,
|
|
1189
|
+
inputTokenNanoDollar: 320,
|
|
1190
|
+
outputTokenNanoDollar: 3200,
|
|
1191
|
+
vision: true,
|
|
1181
1192
|
toolCalling: true,
|
|
1182
1193
|
jsonMode: true,
|
|
1183
1194
|
streaming: true,
|
|
1184
1195
|
parallelToolCalls: false,
|
|
1185
1196
|
supportsCacheControl: false,
|
|
1186
1197
|
},
|
|
1187
|
-
//
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1198
|
+
// Qwen3.6 35B-A3B — MoE variant, dramatically cheaper.
|
|
1199
|
+
// $0.15/$0.95 per 1M (live 2026-05-03), 256k ctx.
|
|
1200
|
+
deepinfra_qwen3_6_35b_a3b: {
|
|
1201
|
+
model: 'Qwen/Qwen3.6-35B-A3B',
|
|
1202
|
+
contextWindow: 262_144,
|
|
1203
|
+
inputTokenNanoDollar: 150,
|
|
1204
|
+
outputTokenNanoDollar: 950,
|
|
1205
|
+
vision: true,
|
|
1193
1206
|
toolCalling: true,
|
|
1194
1207
|
jsonMode: true,
|
|
1195
1208
|
streaming: true,
|
|
1196
1209
|
parallelToolCalls: false,
|
|
1197
1210
|
supportsCacheControl: false,
|
|
1198
1211
|
},
|
|
1212
|
+
// Kimi K2.6 — newer than the (now-removed) K2-Instruct. $0.75/$3.50
|
|
1213
|
+
// per 1M (live 2026-05-03), 256k ctx, native reasoning. Cache-read
|
|
1214
|
+
// is 20% of input → 150 nanoDollar/M.
|
|
1215
|
+
deepinfra_kimi_k2_6: {
|
|
1216
|
+
model: 'moonshotai/Kimi-K2.6',
|
|
1217
|
+
contextWindow: 262_144,
|
|
1218
|
+
inputTokenNanoDollar: 750,
|
|
1219
|
+
outputTokenNanoDollar: 3500,
|
|
1220
|
+
toolCalling: true,
|
|
1221
|
+
jsonMode: true,
|
|
1222
|
+
streaming: true,
|
|
1223
|
+
parallelToolCalls: false,
|
|
1224
|
+
supportsCacheControl: true,
|
|
1225
|
+
cacheReadTokenNanoDollar: 150,
|
|
1226
|
+
},
|
|
1227
|
+
// GLM 4.6 — $0.43/$1.74 per 1M (live 2026-05-03), 200k ctx.
|
|
1228
|
+
// Cache-read 18.6% of input → 80 nanoDollar/M.
|
|
1229
|
+
deepinfra_glm_4_6: {
|
|
1230
|
+
model: 'zai-org/GLM-4.6',
|
|
1231
|
+
contextWindow: 202_752,
|
|
1232
|
+
inputTokenNanoDollar: 430,
|
|
1233
|
+
outputTokenNanoDollar: 1740,
|
|
1234
|
+
toolCalling: true,
|
|
1235
|
+
jsonMode: true,
|
|
1236
|
+
streaming: true,
|
|
1237
|
+
parallelToolCalls: false,
|
|
1238
|
+
supportsCacheControl: true,
|
|
1239
|
+
cacheReadTokenNanoDollar: 80,
|
|
1240
|
+
},
|
|
1241
|
+
// GLM 5.1 fallback route. Pricing $1.05/$3.50 per 1M (live
|
|
1242
|
+
// 2026-05-03), 200k ctx. Slightly higher than the OpenRouter
|
|
1243
|
+
// route's published list, so OpenRouter stays primary on price.
|
|
1244
|
+
// Cache-read 19.5% of input → 205 nanoDollar/M.
|
|
1245
|
+
deepinfra_glm_5_1: {
|
|
1246
|
+
model: 'zai-org/GLM-5.1',
|
|
1247
|
+
contextWindow: 202_752,
|
|
1248
|
+
inputTokenNanoDollar: 1050,
|
|
1249
|
+
outputTokenNanoDollar: 3500,
|
|
1250
|
+
toolCalling: true,
|
|
1251
|
+
jsonMode: true,
|
|
1252
|
+
streaming: true,
|
|
1253
|
+
parallelToolCalls: false,
|
|
1254
|
+
supportsCacheControl: true,
|
|
1255
|
+
cacheReadTokenNanoDollar: 205,
|
|
1256
|
+
},
|
|
1257
|
+
// MiniMax M2.5 fallback. Pricing $0.15/$1.15 per 1M (live
|
|
1258
|
+
// 2026-05-03) — actually CHEAPER than direct MiniMax ($0.30/$1.20),
|
|
1259
|
+
// so the price-priority sort prefers DeepInfra. Direct stays as a
|
|
1260
|
+
// healthy backup. 196k ctx, cache-read 20% → 30 nanoDollar/M.
|
|
1261
|
+
deepinfra_minimax_m2_5: {
|
|
1262
|
+
model: 'MiniMaxAI/MiniMax-M2.5',
|
|
1263
|
+
contextWindow: 196_608,
|
|
1264
|
+
inputTokenNanoDollar: 150,
|
|
1265
|
+
outputTokenNanoDollar: 1150,
|
|
1266
|
+
toolCalling: true,
|
|
1267
|
+
jsonMode: true,
|
|
1268
|
+
streaming: true,
|
|
1269
|
+
parallelToolCalls: false,
|
|
1270
|
+
supportsCacheControl: true,
|
|
1271
|
+
cacheReadTokenNanoDollar: 30,
|
|
1272
|
+
},
|
|
1199
1273
|
// DeepSeek V4 Pro fallback route — used when api.deepseek.com is
|
|
1200
1274
|
// throttled or down. Pricing live-verified 2026-05-03 against
|
|
1201
1275
|
// /models/deepseek-ai/DeepSeek-V4-Pro: $1.74 in / $3.48 out per 1M
|
|
@@ -1376,22 +1450,6 @@ export const textGenMultiProviderModels: Record<
|
|
|
1376
1450
|
available: true,
|
|
1377
1451
|
},
|
|
1378
1452
|
],
|
|
1379
|
-
llama_4_maverick: [
|
|
1380
|
-
// Groq deprecated llama-4-maverick-17b-128e effective March 9, 2026
|
|
1381
|
-
{
|
|
1382
|
-
provider: 'groq',
|
|
1383
|
-
providerModel: 'groq_llama_4_maverick',
|
|
1384
|
-
latencyTier: 'fast',
|
|
1385
|
-
available: false,
|
|
1386
|
-
},
|
|
1387
|
-
// Fireworks Llama 4 requires enterprise deployment, not available on serverless
|
|
1388
|
-
{
|
|
1389
|
-
provider: 'together',
|
|
1390
|
-
providerModel: 'together_meta_llama4_400b',
|
|
1391
|
-
latencyTier: 'standard',
|
|
1392
|
-
available: true,
|
|
1393
|
-
},
|
|
1394
|
-
],
|
|
1395
1453
|
// DeepSeek V4 — direct route to api.deepseek.com (BYO key, OpenAI-
|
|
1396
1454
|
// compatible) primary, DeepInfra fallback for failover only. Direct
|
|
1397
1455
|
// is ~6× cheaper on Pro and ~2× on Flash so the price-priority sort
|
|
@@ -1456,26 +1514,6 @@ export const textGenMultiProviderModels: Record<
|
|
|
1456
1514
|
available: true,
|
|
1457
1515
|
},
|
|
1458
1516
|
],
|
|
1459
|
-
gpt_oss_20b: [
|
|
1460
|
-
{
|
|
1461
|
-
provider: 'deepinfra',
|
|
1462
|
-
providerModel: 'deepinfra_gpt_oss_20b',
|
|
1463
|
-
latencyTier: 'standard',
|
|
1464
|
-
available: true,
|
|
1465
|
-
},
|
|
1466
|
-
{
|
|
1467
|
-
provider: 'groq',
|
|
1468
|
-
providerModel: 'groq_gpt_oss_20b',
|
|
1469
|
-
latencyTier: 'fast',
|
|
1470
|
-
available: true,
|
|
1471
|
-
},
|
|
1472
|
-
{
|
|
1473
|
-
provider: 'fireworks',
|
|
1474
|
-
providerModel: 'fireworks_gpt_oss_20b',
|
|
1475
|
-
latencyTier: 'standard',
|
|
1476
|
-
available: true,
|
|
1477
|
-
},
|
|
1478
|
-
],
|
|
1479
1517
|
// Single-provider Google models
|
|
1480
1518
|
gemini_2_5_flash: [
|
|
1481
1519
|
{
|
|
@@ -1569,19 +1607,11 @@ export const textGenMultiProviderModels: Record<
|
|
|
1569
1607
|
available: true,
|
|
1570
1608
|
},
|
|
1571
1609
|
],
|
|
1572
|
-
gpt_41_nano: [
|
|
1573
|
-
{
|
|
1574
|
-
provider: 'openai',
|
|
1575
|
-
providerModel: 'openai_gpt_41_nano',
|
|
1576
|
-
latencyTier: 'fast',
|
|
1577
|
-
available: true,
|
|
1578
|
-
},
|
|
1579
|
-
],
|
|
1580
1610
|
// Anthropic models — direct first, OpenRouter fallback second.
|
|
1581
|
-
//
|
|
1582
|
-
// the
|
|
1583
|
-
//
|
|
1584
|
-
|
|
1611
|
+
// The Anthropic entry ships 4.x behind a wire-dated snapshot, and
|
|
1612
|
+
// OpenRouter mirrors the same model with ~5% markup. Cache_control
|
|
1613
|
+
// passthrough is verified for sonnet but unverified for opus/haiku.
|
|
1614
|
+
claude_sonnet_4_6: [
|
|
1585
1615
|
{
|
|
1586
1616
|
provider: 'anthropic',
|
|
1587
1617
|
providerModel: 'anthrophic_claude_3_sonnet',
|
|
@@ -1595,28 +1625,16 @@ export const textGenMultiProviderModels: Record<
|
|
|
1595
1625
|
available: true,
|
|
1596
1626
|
},
|
|
1597
1627
|
],
|
|
1598
|
-
|
|
1599
|
-
// to 4.6 / 4.7 without depending on claude_3_sonnet's legacy label.
|
|
1600
|
-
// Same offering chain — the Anthropic entry ships 4.x behind the
|
|
1601
|
-
// wire-dated snapshot, and OpenRouter mirrors the same model.
|
|
1602
|
-
claude_sonnet_4_6: [
|
|
1628
|
+
claude_opus_4_7: [
|
|
1603
1629
|
{
|
|
1604
1630
|
provider: 'anthropic',
|
|
1605
|
-
providerModel: '
|
|
1606
|
-
latencyTier: '
|
|
1631
|
+
providerModel: 'anthrophic_claude_3_opus',
|
|
1632
|
+
latencyTier: 'slow',
|
|
1607
1633
|
available: true,
|
|
1608
1634
|
},
|
|
1609
1635
|
{
|
|
1610
1636
|
provider: 'openrouter',
|
|
1611
|
-
providerModel: '
|
|
1612
|
-
latencyTier: 'standard',
|
|
1613
|
-
available: true,
|
|
1614
|
-
},
|
|
1615
|
-
],
|
|
1616
|
-
claude_opus_4_7: [
|
|
1617
|
-
{
|
|
1618
|
-
provider: 'anthropic',
|
|
1619
|
-
providerModel: 'anthrophic_claude_3_opus',
|
|
1637
|
+
providerModel: 'openrouter_claude_opus_4_7',
|
|
1620
1638
|
latencyTier: 'slow',
|
|
1621
1639
|
available: true,
|
|
1622
1640
|
},
|
|
@@ -1669,17 +1687,29 @@ export const textGenMultiProviderModels: Record<
|
|
|
1669
1687
|
available: true,
|
|
1670
1688
|
},
|
|
1671
1689
|
],
|
|
1690
|
+
// Kimi K2 (original Instruct) — DeepInfra retired Kimi-K2-Instruct
|
|
1691
|
+
// 2026-05-03 in favor of K2.5/K2.6 (different model). Together still
|
|
1692
|
+
// serves the original K2-Instruct as the only path.
|
|
1672
1693
|
kimi_k2: [
|
|
1673
|
-
|
|
1694
|
+
{
|
|
1695
|
+
provider: 'together',
|
|
1696
|
+
providerModel: 'together_kimi_k2',
|
|
1697
|
+
latencyTier: 'standard',
|
|
1698
|
+
available: true,
|
|
1699
|
+
},
|
|
1700
|
+
],
|
|
1701
|
+
// Kimi K2.6 — newer model. DeepInfra primary (cheaper), OpenRouter
|
|
1702
|
+
// fallback. Both expose the same wire model id family.
|
|
1703
|
+
kimi_k2_6: [
|
|
1674
1704
|
{
|
|
1675
1705
|
provider: 'deepinfra',
|
|
1676
|
-
providerModel: '
|
|
1706
|
+
providerModel: 'deepinfra_kimi_k2_6',
|
|
1677
1707
|
latencyTier: 'standard',
|
|
1678
1708
|
available: true,
|
|
1679
1709
|
},
|
|
1680
1710
|
{
|
|
1681
|
-
provider: '
|
|
1682
|
-
providerModel: '
|
|
1711
|
+
provider: 'openrouter',
|
|
1712
|
+
providerModel: 'openrouter_kimi_k2_6',
|
|
1683
1713
|
latencyTier: 'standard',
|
|
1684
1714
|
available: true,
|
|
1685
1715
|
},
|
|
@@ -1730,7 +1760,16 @@ export const textGenMultiProviderModels: Record<
|
|
|
1730
1760
|
available: true,
|
|
1731
1761
|
},
|
|
1732
1762
|
],
|
|
1763
|
+
// MiniMax M2.5 — DeepInfra is cheaper than direct ($0.15/$1.15 vs
|
|
1764
|
+
// $0.30/$1.20), so DeepInfra wins the price-priority sort. Direct
|
|
1765
|
+
// stays as a healthy fallback.
|
|
1733
1766
|
minimax_m2_5: [
|
|
1767
|
+
{
|
|
1768
|
+
provider: 'deepinfra',
|
|
1769
|
+
providerModel: 'deepinfra_minimax_m2_5',
|
|
1770
|
+
latencyTier: 'standard',
|
|
1771
|
+
available: true,
|
|
1772
|
+
},
|
|
1734
1773
|
{
|
|
1735
1774
|
provider: 'minimax',
|
|
1736
1775
|
providerModel: 'minimax_m2_5',
|
|
@@ -1738,6 +1777,8 @@ export const textGenMultiProviderModels: Record<
|
|
|
1738
1777
|
available: true,
|
|
1739
1778
|
},
|
|
1740
1779
|
],
|
|
1780
|
+
// MiniMax M2.7 — not on DeepInfra (only M2.5 is hosted there).
|
|
1781
|
+
// Direct route only.
|
|
1741
1782
|
minimax_m2_7: [
|
|
1742
1783
|
{
|
|
1743
1784
|
provider: 'minimax',
|
|
@@ -1746,6 +1787,9 @@ export const textGenMultiProviderModels: Record<
|
|
|
1746
1787
|
available: true,
|
|
1747
1788
|
},
|
|
1748
1789
|
],
|
|
1790
|
+
// GLM 5.1 — OpenRouter is cheaper ($0.95/$3.15 vs DeepInfra
|
|
1791
|
+
// $1.05/$3.50) and has been stable; OpenRouter primary, DeepInfra
|
|
1792
|
+
// fallback when OR is throttled.
|
|
1749
1793
|
glm_5_1: [
|
|
1750
1794
|
{
|
|
1751
1795
|
provider: 'openrouter',
|
|
@@ -1753,7 +1797,31 @@ export const textGenMultiProviderModels: Record<
|
|
|
1753
1797
|
latencyTier: 'standard',
|
|
1754
1798
|
available: true,
|
|
1755
1799
|
},
|
|
1800
|
+
{
|
|
1801
|
+
provider: 'deepinfra',
|
|
1802
|
+
providerModel: 'deepinfra_glm_5_1',
|
|
1803
|
+
latencyTier: 'standard',
|
|
1804
|
+
available: true,
|
|
1805
|
+
},
|
|
1756
1806
|
],
|
|
1807
|
+
// GLM 4.6 — DeepInfra primary at $0.43/$1.74; OpenRouter fallback.
|
|
1808
|
+
glm_4_6: [
|
|
1809
|
+
{
|
|
1810
|
+
provider: 'deepinfra',
|
|
1811
|
+
providerModel: 'deepinfra_glm_4_6',
|
|
1812
|
+
latencyTier: 'standard',
|
|
1813
|
+
available: true,
|
|
1814
|
+
},
|
|
1815
|
+
{
|
|
1816
|
+
provider: 'openrouter',
|
|
1817
|
+
providerModel: 'openrouter_glm_4_6',
|
|
1818
|
+
latencyTier: 'standard',
|
|
1819
|
+
available: true,
|
|
1820
|
+
},
|
|
1821
|
+
],
|
|
1822
|
+
// Qwen 3.6 Plus — OpenRouter only. The "plus" SKU isn't carried by
|
|
1823
|
+
// DeepInfra (DeepInfra exposes 27B and 35B-A3B variants under
|
|
1824
|
+
// separate clean names below).
|
|
1757
1825
|
qwen3_6_plus: [
|
|
1758
1826
|
{
|
|
1759
1827
|
provider: 'openrouter',
|
|
@@ -1762,6 +1830,23 @@ export const textGenMultiProviderModels: Record<
|
|
|
1762
1830
|
available: true,
|
|
1763
1831
|
},
|
|
1764
1832
|
],
|
|
1833
|
+
// Qwen 3.6 27B — dense (non-MoE). DeepInfra primary at $0.32/$3.20.
|
|
1834
|
+
qwen3_6_27b: [
|
|
1835
|
+
{
|
|
1836
|
+
provider: 'deepinfra',
|
|
1837
|
+
providerModel: 'deepinfra_qwen3_6_27b',
|
|
1838
|
+
latencyTier: 'standard',
|
|
1839
|
+
available: true,
|
|
1840
|
+
},
|
|
1841
|
+
{
|
|
1842
|
+
provider: 'openrouter',
|
|
1843
|
+
providerModel: 'openrouter_qwen3_6_27b',
|
|
1844
|
+
latencyTier: 'standard',
|
|
1845
|
+
available: true,
|
|
1846
|
+
},
|
|
1847
|
+
],
|
|
1848
|
+
// Kimi K2 Thinking — OpenRouter only. DeepInfra has K2.5/K2.6 but
|
|
1849
|
+
// no specific "thinking" variant.
|
|
1765
1850
|
kimi_k2_thinking: [
|
|
1766
1851
|
{
|
|
1767
1852
|
provider: 'openrouter',
|
|
@@ -1775,12 +1860,10 @@ export const textGenMultiProviderModels: Record<
|
|
|
1775
1860
|
/** Model data keyed by clean model name. Use for billing/display lookups. */
|
|
1776
1861
|
export const textGenModelData: Record<TextGenModel, TextGenModelData> = {
|
|
1777
1862
|
llama_4_scout: TextGenGroqModelData.groq_llama_4_scout,
|
|
1778
|
-
llama_4_maverick: TextGenTogetherModelData.together_meta_llama4_400b,
|
|
1779
1863
|
llama_3_3_70b: TextGenGroqModelData.groq_llama_3_3_70b,
|
|
1780
1864
|
deepseek_v4_pro: TextGenDeepSeekModelData.deepseek_deepseek_v4_pro,
|
|
1781
1865
|
deepseek_v4_flash: TextGenDeepSeekModelData.deepseek_deepseek_v4_flash,
|
|
1782
1866
|
gpt_oss_120b: TextGenGroqModelData.groq_gpt_oss_120b,
|
|
1783
|
-
gpt_oss_20b: TextGenGroqModelData.groq_gpt_oss_20b,
|
|
1784
1867
|
gemini_2_5_flash: TextGenGoogleModelData.google_gemini_2_5_flash,
|
|
1785
1868
|
gemini_2_5: TextGenGoogleModelData.google_gemini_2_5,
|
|
1786
1869
|
gpt_4o: TextGenOpenAIModelData.openai_gpt_4o,
|
|
@@ -1788,20 +1871,21 @@ export const textGenModelData: Record<TextGenModel, TextGenModelData> = {
|
|
|
1788
1871
|
gpt_5_mini: TextGenOpenAIModelData.openai_gpt_5_mini,
|
|
1789
1872
|
o3: TextGenOpenAIModelData.openai_o3,
|
|
1790
1873
|
o4_mini: TextGenOpenAIModelData.openai_o4_mini,
|
|
1791
|
-
gpt_41_nano: TextGenOpenAIModelData.openai_gpt_41_nano,
|
|
1792
|
-
claude_3_sonnet: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
|
|
1793
1874
|
claude_sonnet_4_6: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
|
|
1794
1875
|
claude_opus_4_7: TextGenAnthropicModelData.anthrophic_claude_3_opus,
|
|
1795
1876
|
claude_haiku_4_5: TextGenAnthropicModelData.anthrophic_claude_haiku_4_5,
|
|
1796
1877
|
qwen2_72b: TextGenTogetherModelData.together_qwen2_72b,
|
|
1797
1878
|
qwen2_vision_72b: TextGenTogetherModelData.together_qwen2_vision_72b,
|
|
1798
1879
|
qwen3_235b: TextGenTogetherModelData.together_qwen3_235b,
|
|
1880
|
+
qwen3_6_27b: TextGenDeepInfraModelData.deepinfra_qwen3_6_27b,
|
|
1799
1881
|
kimi_k2: TextGenTogetherModelData.together_kimi_k2,
|
|
1882
|
+
kimi_k2_6: TextGenDeepInfraModelData.deepinfra_kimi_k2_6,
|
|
1800
1883
|
gemma_4_31b: TextGenTogetherModelData.together_gemma_4_31b,
|
|
1801
1884
|
gemma_4_26b: TextGenDeepInfraModelData.deepinfra_gemma_4_26b,
|
|
1802
|
-
minimax_m2_5:
|
|
1885
|
+
minimax_m2_5: TextGenDeepInfraModelData.deepinfra_minimax_m2_5,
|
|
1803
1886
|
minimax_m2_7: TextGenMiniMaxModelData.minimax_m2_7,
|
|
1804
1887
|
glm_5_1: TextGenOpenRouterModelData.openrouter_glm_5_1,
|
|
1888
|
+
glm_4_6: TextGenDeepInfraModelData.deepinfra_glm_4_6,
|
|
1805
1889
|
qwen3_6_plus: TextGenOpenRouterModelData.openrouter_qwen_36_plus,
|
|
1806
1890
|
kimi_k2_thinking: TextGenOpenRouterModelData.openrouter_kimi_k2_thinking,
|
|
1807
1891
|
};
|