ugly-app 0.1.430 → 0.1.431
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/version.d.ts +1 -1
- package/dist/cli/version.js +1 -1
- package/dist/shared/FrameworkRequests.d.ts +1 -1
- package/dist/shared/TextGen.d.ts +15 -17
- package/dist/shared/TextGen.d.ts.map +1 -1
- package/dist/shared/TextGen.js +248 -164
- package/dist/shared/TextGen.js.map +1 -1
- package/package.json +1 -1
- package/src/cli/version.ts +1 -1
- package/src/shared/TextGen.ts +248 -164
package/dist/shared/TextGen.js
CHANGED
|
@@ -19,7 +19,6 @@ export const textGenTogetherModels = [
|
|
|
19
19
|
// Note: Llama 4 models require enterprise deployment on Fireworks, use Groq/Together instead
|
|
20
20
|
export const textGenFireworksModels = [
|
|
21
21
|
'fireworks_gpt_oss_120b',
|
|
22
|
-
'fireworks_gpt_oss_20b',
|
|
23
22
|
];
|
|
24
23
|
// DeepSeek — direct route to api.deepseek.com (OpenAI-compatible).
|
|
25
24
|
// Only V4 Pro and V4 Flash are supported; the older R1/V3/V3.1/V3.2
|
|
@@ -38,11 +37,9 @@ export const textGenDeepSeekModels = [
|
|
|
38
37
|
// Groq models
|
|
39
38
|
export const textGenGroqModels = [
|
|
40
39
|
'groq_llama_4_scout',
|
|
41
|
-
'groq_llama_4_maverick',
|
|
42
40
|
'groq_mixtral_8x7b',
|
|
43
41
|
'groq_llama_3_3_70b',
|
|
44
42
|
'groq_gpt_oss_120b',
|
|
45
|
-
'groq_gpt_oss_20b',
|
|
46
43
|
];
|
|
47
44
|
export const textGenGoogleModels = [
|
|
48
45
|
'google_gemini_2_5',
|
|
@@ -63,7 +60,6 @@ export const textGenOpenAIModels = [
|
|
|
63
60
|
'openai_o1_mini',
|
|
64
61
|
'openai_o3',
|
|
65
62
|
'openai_o4_mini',
|
|
66
|
-
'openai_gpt_41_nano',
|
|
67
63
|
];
|
|
68
64
|
// MiniMax — agentic coding models (M2.5 / M2.7)
|
|
69
65
|
export const textGenMiniMaxModels = [
|
|
@@ -76,8 +72,11 @@ export const textGenMiniMaxModels = [
|
|
|
76
72
|
// us a warm path when the direct upstream is throttled or down.
|
|
77
73
|
export const textGenOpenRouterModels = [
|
|
78
74
|
'openrouter_glm_5_1',
|
|
75
|
+
'openrouter_glm_4_6',
|
|
79
76
|
'openrouter_qwen_36_plus',
|
|
77
|
+
'openrouter_qwen3_6_27b',
|
|
80
78
|
'openrouter_kimi_k2_thinking',
|
|
79
|
+
'openrouter_kimi_k2_6',
|
|
81
80
|
'openrouter_gemma_4_31b',
|
|
82
81
|
'openrouter_gemma_4_26b',
|
|
83
82
|
'openrouter_gemma_4_26b_free',
|
|
@@ -89,6 +88,7 @@ export const textGenOpenRouterModels = [
|
|
|
89
88
|
'openrouter_o4_mini',
|
|
90
89
|
'openrouter_claude_sonnet_4_6',
|
|
91
90
|
'openrouter_claude_haiku_4_5',
|
|
91
|
+
'openrouter_claude_opus_4_7',
|
|
92
92
|
];
|
|
93
93
|
// DeepInfra — direct route to Gemma 4 family (and potentially other
|
|
94
94
|
// open-weight models). Auto-caches on identical prefixes and reports
|
|
@@ -98,9 +98,13 @@ export const textGenDeepInfraModels = [
|
|
|
98
98
|
'deepinfra_gemma_4_31b',
|
|
99
99
|
'deepinfra_gemma_4_26b',
|
|
100
100
|
'deepinfra_gpt_oss_120b',
|
|
101
|
-
'deepinfra_gpt_oss_20b',
|
|
102
101
|
'deepinfra_qwen3_235b',
|
|
103
|
-
'
|
|
102
|
+
'deepinfra_qwen3_6_27b',
|
|
103
|
+
'deepinfra_qwen3_6_35b_a3b',
|
|
104
|
+
'deepinfra_kimi_k2_6',
|
|
105
|
+
'deepinfra_glm_4_6',
|
|
106
|
+
'deepinfra_glm_5_1',
|
|
107
|
+
'deepinfra_minimax_m2_5',
|
|
104
108
|
'deepinfra_deepseek_v4_pro',
|
|
105
109
|
'deepinfra_deepseek_v4_flash',
|
|
106
110
|
];
|
|
@@ -129,24 +133,23 @@ const textGenProviderModelValues = [
|
|
|
129
133
|
'openai_o1_mini',
|
|
130
134
|
'openai_o3',
|
|
131
135
|
'openai_o4_mini',
|
|
132
|
-
'openai_gpt_41_nano',
|
|
133
136
|
// Fireworks (GPT-OSS - Llama 4 requires enterprise deployment)
|
|
134
137
|
'fireworks_gpt_oss_120b',
|
|
135
|
-
'fireworks_gpt_oss_20b',
|
|
136
138
|
// Groq
|
|
137
139
|
'groq_llama_4_scout',
|
|
138
|
-
'groq_llama_4_maverick',
|
|
139
140
|
'groq_mixtral_8x7b',
|
|
140
141
|
'groq_llama_3_3_70b',
|
|
141
142
|
'groq_gpt_oss_120b',
|
|
142
|
-
'groq_gpt_oss_20b',
|
|
143
143
|
// MiniMax
|
|
144
144
|
'minimax_m2_5',
|
|
145
145
|
'minimax_m2_7',
|
|
146
146
|
// OpenRouter
|
|
147
147
|
'openrouter_glm_5_1',
|
|
148
|
+
'openrouter_glm_4_6',
|
|
148
149
|
'openrouter_qwen_36_plus',
|
|
150
|
+
'openrouter_qwen3_6_27b',
|
|
149
151
|
'openrouter_kimi_k2_thinking',
|
|
152
|
+
'openrouter_kimi_k2_6',
|
|
150
153
|
'openrouter_gemma_4_31b',
|
|
151
154
|
'openrouter_gemma_4_26b',
|
|
152
155
|
'openrouter_gemma_4_26b_free',
|
|
@@ -157,13 +160,18 @@ const textGenProviderModelValues = [
|
|
|
157
160
|
'openrouter_o4_mini',
|
|
158
161
|
'openrouter_claude_sonnet_4_6',
|
|
159
162
|
'openrouter_claude_haiku_4_5',
|
|
163
|
+
'openrouter_claude_opus_4_7',
|
|
160
164
|
// DeepInfra
|
|
161
165
|
'deepinfra_gemma_4_31b',
|
|
162
166
|
'deepinfra_gemma_4_26b',
|
|
163
167
|
'deepinfra_gpt_oss_120b',
|
|
164
|
-
'deepinfra_gpt_oss_20b',
|
|
165
168
|
'deepinfra_qwen3_235b',
|
|
166
|
-
'
|
|
169
|
+
'deepinfra_qwen3_6_27b',
|
|
170
|
+
'deepinfra_qwen3_6_35b_a3b',
|
|
171
|
+
'deepinfra_kimi_k2_6',
|
|
172
|
+
'deepinfra_glm_4_6',
|
|
173
|
+
'deepinfra_glm_5_1',
|
|
174
|
+
'deepinfra_minimax_m2_5',
|
|
167
175
|
'deepinfra_deepseek_v4_pro',
|
|
168
176
|
'deepinfra_deepseek_v4_flash',
|
|
169
177
|
// DeepSeek (direct, BYO key, OpenAI-compatible)
|
|
@@ -181,13 +189,11 @@ export const textGenProviderModelsSet = new Set(textGenProviderModels);
|
|
|
181
189
|
const textGenModelValues = [
|
|
182
190
|
// Multi-provider routed models
|
|
183
191
|
'llama_4_scout',
|
|
184
|
-
'llama_4_maverick',
|
|
185
192
|
'llama_3_3_70b',
|
|
186
|
-
// DeepSeek V4 — direct
|
|
193
|
+
// DeepSeek V4 — direct (api.deepseek.com) + DeepInfra fallback
|
|
187
194
|
'deepseek_v4_pro',
|
|
188
195
|
'deepseek_v4_flash',
|
|
189
196
|
'gpt_oss_120b',
|
|
190
|
-
'gpt_oss_20b',
|
|
191
197
|
// Google
|
|
192
198
|
'gemini_2_5_flash',
|
|
193
199
|
'gemini_2_5',
|
|
@@ -195,24 +201,27 @@ const textGenModelValues = [
|
|
|
195
201
|
'gpt_4o',
|
|
196
202
|
'gpt_5',
|
|
197
203
|
'gpt_5_mini',
|
|
198
|
-
'gpt_41_nano',
|
|
199
204
|
'o3',
|
|
200
205
|
'o4_mini',
|
|
201
206
|
// Anthropic — coding-agent tiers (Anthropic direct + OpenRouter fallback)
|
|
202
207
|
'claude_opus_4_7',
|
|
203
208
|
'claude_sonnet_4_6',
|
|
204
|
-
'claude_3_sonnet',
|
|
205
209
|
'claude_haiku_4_5',
|
|
206
210
|
// Multi-provider open-weight (DeepInfra / OpenRouter / Together)
|
|
207
211
|
'gemma_4_31b',
|
|
208
212
|
'gemma_4_26b',
|
|
209
|
-
// MiniMax
|
|
213
|
+
// MiniMax — direct + DeepInfra fallback for M2.5
|
|
210
214
|
'minimax_m2_5',
|
|
211
215
|
'minimax_m2_7',
|
|
212
|
-
// OpenRouter
|
|
216
|
+
// GLM — DeepInfra primary + OpenRouter fallback
|
|
213
217
|
'glm_5_1',
|
|
218
|
+
'glm_4_6',
|
|
219
|
+
// Qwen 3.6 — OpenRouter + DeepInfra
|
|
214
220
|
'qwen3_6_plus',
|
|
221
|
+
'qwen3_6_27b',
|
|
222
|
+
// Kimi — OpenRouter + DeepInfra
|
|
215
223
|
'kimi_k2_thinking',
|
|
224
|
+
'kimi_k2_6',
|
|
216
225
|
// Single-provider (Together — misc)
|
|
217
226
|
'qwen2_72b',
|
|
218
227
|
'qwen2_vision_72b',
|
|
@@ -419,25 +428,6 @@ export const TextGenOpenAIModelData = {
|
|
|
419
428
|
smartness: 4,
|
|
420
429
|
compactAt: 0.90,
|
|
421
430
|
},
|
|
422
|
-
// GPT-4.1 Nano — cheapest viable model, 1M context
|
|
423
|
-
openai_gpt_41_nano: {
|
|
424
|
-
model: 'gpt-4.1-nano',
|
|
425
|
-
contextWindow: 1_000_000,
|
|
426
|
-
inputTokenNanoDollar: 100,
|
|
427
|
-
outputTokenNanoDollar: 400,
|
|
428
|
-
vision: true,
|
|
429
|
-
toolCalling: true,
|
|
430
|
-
jsonMode: true,
|
|
431
|
-
streaming: true,
|
|
432
|
-
parallelToolCalls: true,
|
|
433
|
-
// Coding agent
|
|
434
|
-
name: 'GPT-4.1 Nano',
|
|
435
|
-
provider: 'OpenAI',
|
|
436
|
-
speed: 'fast',
|
|
437
|
-
reasoning: 'strong',
|
|
438
|
-
smartness: 2,
|
|
439
|
-
compactAt: 0.90,
|
|
440
|
-
},
|
|
441
431
|
};
|
|
442
432
|
export const TextGenTogetherModelData = {
|
|
443
433
|
together_meta_llama_vision_3_11b: {
|
|
@@ -578,17 +568,6 @@ export const TextGenFireworksModelData = {
|
|
|
578
568
|
streaming: true,
|
|
579
569
|
parallelToolCalls: false,
|
|
580
570
|
},
|
|
581
|
-
fireworks_gpt_oss_20b: {
|
|
582
|
-
model: 'accounts/fireworks/models/gpt-oss-20b',
|
|
583
|
-
contextWindow: 128000,
|
|
584
|
-
// $0.075 input / $0.30 output per 1M tokens
|
|
585
|
-
inputTokenNanoDollar: 75,
|
|
586
|
-
outputTokenNanoDollar: 300,
|
|
587
|
-
toolCalling: true,
|
|
588
|
-
jsonMode: true,
|
|
589
|
-
streaming: true,
|
|
590
|
-
parallelToolCalls: false,
|
|
591
|
-
},
|
|
592
571
|
};
|
|
593
572
|
// Groq models - ultra-fast inference
|
|
594
573
|
// Note: Tool calling for Llama 4 has known reliability issues
|
|
@@ -605,18 +584,6 @@ export const TextGenGroqModelData = {
|
|
|
605
584
|
streaming: true,
|
|
606
585
|
parallelToolCalls: false,
|
|
607
586
|
},
|
|
608
|
-
groq_llama_4_maverick: {
|
|
609
|
-
model: 'meta-llama/llama-4-maverick-17b-128e-instruct',
|
|
610
|
-
contextWindow: 128000,
|
|
611
|
-
inputTokenNanoDollar: 200,
|
|
612
|
-
outputTokenNanoDollar: 600,
|
|
613
|
-
vision: true,
|
|
614
|
-
// Note: toolCalling marked 'unreliable' due to known 100% failure rate with structured prompts
|
|
615
|
-
toolCalling: 'unreliable',
|
|
616
|
-
jsonMode: true,
|
|
617
|
-
streaming: true,
|
|
618
|
-
parallelToolCalls: false,
|
|
619
|
-
},
|
|
620
587
|
groq_mixtral_8x7b: {
|
|
621
588
|
model: 'mixtral-8x7b-32768',
|
|
622
589
|
contextWindow: 32768,
|
|
@@ -649,17 +616,6 @@ export const TextGenGroqModelData = {
|
|
|
649
616
|
streaming: true,
|
|
650
617
|
parallelToolCalls: false,
|
|
651
618
|
},
|
|
652
|
-
groq_gpt_oss_20b: {
|
|
653
|
-
model: 'openai/gpt-oss-20b',
|
|
654
|
-
contextWindow: 128000,
|
|
655
|
-
// $0.075 input / $0.30 output per 1M tokens
|
|
656
|
-
inputTokenNanoDollar: 75,
|
|
657
|
-
outputTokenNanoDollar: 300,
|
|
658
|
-
toolCalling: true,
|
|
659
|
-
jsonMode: true,
|
|
660
|
-
streaming: true,
|
|
661
|
-
parallelToolCalls: false,
|
|
662
|
-
},
|
|
663
619
|
};
|
|
664
620
|
export const TextGenKieModelData = {};
|
|
665
621
|
// MiniMax — agentic coding line (M2.5 / M2.7).
|
|
@@ -959,6 +915,60 @@ export const TextGenOpenRouterModelData = {
|
|
|
959
915
|
// doesn't assume a cache discount that never materializes.
|
|
960
916
|
supportsCacheControl: false,
|
|
961
917
|
},
|
|
918
|
+
// Claude Opus 4.7 fallback — primary still Anthropic direct. OpenRouter
|
|
919
|
+
// list price is ~5% over upstream. Cache passthrough verified to work
|
|
920
|
+
// for sonnet-4.6; opus-4.7 not yet verified — leave conservative.
|
|
921
|
+
openrouter_claude_opus_4_7: {
|
|
922
|
+
model: 'anthropic/claude-opus-4.7',
|
|
923
|
+
contextWindow: 200_000,
|
|
924
|
+
inputTokenNanoDollar: 15_750, // ~$15/M × 1.05
|
|
925
|
+
outputTokenNanoDollar: 78_750, // ~$75/M × 1.05
|
|
926
|
+
vision: true,
|
|
927
|
+
toolCalling: true,
|
|
928
|
+
jsonMode: true,
|
|
929
|
+
streaming: true,
|
|
930
|
+
parallelToolCalls: true,
|
|
931
|
+
supportsCacheControl: false,
|
|
932
|
+
},
|
|
933
|
+
// GLM 4.6 OpenRouter route. List price is roughly the same as
|
|
934
|
+
// DeepInfra's published rate; OpenRouter wins when DeepInfra is
|
|
935
|
+
// throttled.
|
|
936
|
+
openrouter_glm_4_6: {
|
|
937
|
+
model: 'z-ai/glm-4.6',
|
|
938
|
+
contextWindow: 200_000,
|
|
939
|
+
inputTokenNanoDollar: 600,
|
|
940
|
+
outputTokenNanoDollar: 2200,
|
|
941
|
+
toolCalling: true,
|
|
942
|
+
jsonMode: true,
|
|
943
|
+
streaming: true,
|
|
944
|
+
parallelToolCalls: false,
|
|
945
|
+
supportsCacheControl: false,
|
|
946
|
+
},
|
|
947
|
+
// Qwen 3.6 27B OpenRouter route — dense (non-MoE) variant.
|
|
948
|
+
openrouter_qwen3_6_27b: {
|
|
949
|
+
model: 'qwen/qwen3.6-27b',
|
|
950
|
+
contextWindow: 262_144,
|
|
951
|
+
inputTokenNanoDollar: 350,
|
|
952
|
+
outputTokenNanoDollar: 3360,
|
|
953
|
+
vision: true,
|
|
954
|
+
toolCalling: true,
|
|
955
|
+
jsonMode: true,
|
|
956
|
+
streaming: true,
|
|
957
|
+
parallelToolCalls: false,
|
|
958
|
+
supportsCacheControl: false,
|
|
959
|
+
},
|
|
960
|
+
// Kimi K2.6 OpenRouter route.
|
|
961
|
+
openrouter_kimi_k2_6: {
|
|
962
|
+
model: 'moonshotai/kimi-k2.6',
|
|
963
|
+
contextWindow: 262_144,
|
|
964
|
+
inputTokenNanoDollar: 800,
|
|
965
|
+
outputTokenNanoDollar: 3700,
|
|
966
|
+
toolCalling: true,
|
|
967
|
+
jsonMode: true,
|
|
968
|
+
streaming: true,
|
|
969
|
+
parallelToolCalls: false,
|
|
970
|
+
supportsCacheControl: false,
|
|
971
|
+
},
|
|
962
972
|
};
|
|
963
973
|
// DeepInfra — direct route, bypasses OpenRouter's shared rate limits.
|
|
964
974
|
// IMPORTANT: DeepInfra does NOT cache Gemma-4 prompts — probed with
|
|
@@ -1025,43 +1035,107 @@ export const TextGenDeepInfraModelData = {
|
|
|
1025
1035
|
parallelToolCalls: false,
|
|
1026
1036
|
supportsCacheControl: false,
|
|
1027
1037
|
},
|
|
1028
|
-
//
|
|
1029
|
-
|
|
1030
|
-
model: '
|
|
1031
|
-
contextWindow:
|
|
1032
|
-
inputTokenNanoDollar:
|
|
1033
|
-
outputTokenNanoDollar:
|
|
1038
|
+
// Qwen3-235B-A22B-Instruct-2507 (live 2026-05-03 on DeepInfra).
|
|
1039
|
+
deepinfra_qwen3_235b: {
|
|
1040
|
+
model: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
|
|
1041
|
+
contextWindow: 256_000,
|
|
1042
|
+
inputTokenNanoDollar: 71,
|
|
1043
|
+
outputTokenNanoDollar: 100,
|
|
1034
1044
|
toolCalling: true,
|
|
1035
1045
|
jsonMode: true,
|
|
1036
1046
|
streaming: true,
|
|
1037
1047
|
parallelToolCalls: false,
|
|
1038
1048
|
supportsCacheControl: false,
|
|
1039
1049
|
},
|
|
1040
|
-
// Qwen3
|
|
1041
|
-
//
|
|
1042
|
-
|
|
1043
|
-
model: 'Qwen/Qwen3-
|
|
1044
|
-
contextWindow:
|
|
1045
|
-
inputTokenNanoDollar:
|
|
1046
|
-
outputTokenNanoDollar:
|
|
1050
|
+
// Qwen3.6 27B — $0.32/$3.20 per 1M (live 2026-05-03), 256k ctx,
|
|
1051
|
+
// multimodal + reasoning. The dense, non-MoE variant.
|
|
1052
|
+
deepinfra_qwen3_6_27b: {
|
|
1053
|
+
model: 'Qwen/Qwen3.6-27B',
|
|
1054
|
+
contextWindow: 262_144,
|
|
1055
|
+
inputTokenNanoDollar: 320,
|
|
1056
|
+
outputTokenNanoDollar: 3200,
|
|
1057
|
+
vision: true,
|
|
1047
1058
|
toolCalling: true,
|
|
1048
1059
|
jsonMode: true,
|
|
1049
1060
|
streaming: true,
|
|
1050
1061
|
parallelToolCalls: false,
|
|
1051
1062
|
supportsCacheControl: false,
|
|
1052
1063
|
},
|
|
1053
|
-
//
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1064
|
+
// Qwen3.6 35B-A3B — MoE variant, dramatically cheaper.
|
|
1065
|
+
// $0.15/$0.95 per 1M (live 2026-05-03), 256k ctx.
|
|
1066
|
+
deepinfra_qwen3_6_35b_a3b: {
|
|
1067
|
+
model: 'Qwen/Qwen3.6-35B-A3B',
|
|
1068
|
+
contextWindow: 262_144,
|
|
1069
|
+
inputTokenNanoDollar: 150,
|
|
1070
|
+
outputTokenNanoDollar: 950,
|
|
1071
|
+
vision: true,
|
|
1059
1072
|
toolCalling: true,
|
|
1060
1073
|
jsonMode: true,
|
|
1061
1074
|
streaming: true,
|
|
1062
1075
|
parallelToolCalls: false,
|
|
1063
1076
|
supportsCacheControl: false,
|
|
1064
1077
|
},
|
|
1078
|
+
// Kimi K2.6 — newer than the (now-removed) K2-Instruct. $0.75/$3.50
|
|
1079
|
+
// per 1M (live 2026-05-03), 256k ctx, native reasoning. Cache-read
|
|
1080
|
+
// is 20% of input → 150 nanoDollar/M.
|
|
1081
|
+
deepinfra_kimi_k2_6: {
|
|
1082
|
+
model: 'moonshotai/Kimi-K2.6',
|
|
1083
|
+
contextWindow: 262_144,
|
|
1084
|
+
inputTokenNanoDollar: 750,
|
|
1085
|
+
outputTokenNanoDollar: 3500,
|
|
1086
|
+
toolCalling: true,
|
|
1087
|
+
jsonMode: true,
|
|
1088
|
+
streaming: true,
|
|
1089
|
+
parallelToolCalls: false,
|
|
1090
|
+
supportsCacheControl: true,
|
|
1091
|
+
cacheReadTokenNanoDollar: 150,
|
|
1092
|
+
},
|
|
1093
|
+
// GLM 4.6 — $0.43/$1.74 per 1M (live 2026-05-03), 200k ctx.
|
|
1094
|
+
// Cache-read 18.6% of input → 80 nanoDollar/M.
|
|
1095
|
+
deepinfra_glm_4_6: {
|
|
1096
|
+
model: 'zai-org/GLM-4.6',
|
|
1097
|
+
contextWindow: 202_752,
|
|
1098
|
+
inputTokenNanoDollar: 430,
|
|
1099
|
+
outputTokenNanoDollar: 1740,
|
|
1100
|
+
toolCalling: true,
|
|
1101
|
+
jsonMode: true,
|
|
1102
|
+
streaming: true,
|
|
1103
|
+
parallelToolCalls: false,
|
|
1104
|
+
supportsCacheControl: true,
|
|
1105
|
+
cacheReadTokenNanoDollar: 80,
|
|
1106
|
+
},
|
|
1107
|
+
// GLM 5.1 fallback route. Pricing $1.05/$3.50 per 1M (live
|
|
1108
|
+
// 2026-05-03), 200k ctx. Slightly higher than the OpenRouter
|
|
1109
|
+
// route's published list, so OpenRouter stays primary on price.
|
|
1110
|
+
// Cache-read 19.5% of input → 205 nanoDollar/M.
|
|
1111
|
+
deepinfra_glm_5_1: {
|
|
1112
|
+
model: 'zai-org/GLM-5.1',
|
|
1113
|
+
contextWindow: 202_752,
|
|
1114
|
+
inputTokenNanoDollar: 1050,
|
|
1115
|
+
outputTokenNanoDollar: 3500,
|
|
1116
|
+
toolCalling: true,
|
|
1117
|
+
jsonMode: true,
|
|
1118
|
+
streaming: true,
|
|
1119
|
+
parallelToolCalls: false,
|
|
1120
|
+
supportsCacheControl: true,
|
|
1121
|
+
cacheReadTokenNanoDollar: 205,
|
|
1122
|
+
},
|
|
1123
|
+
// MiniMax M2.5 fallback. Pricing $0.15/$1.15 per 1M (live
|
|
1124
|
+
// 2026-05-03) — actually CHEAPER than direct MiniMax ($0.30/$1.20),
|
|
1125
|
+
// so the price-priority sort prefers DeepInfra. Direct stays as a
|
|
1126
|
+
// healthy backup. 196k ctx, cache-read 20% → 30 nanoDollar/M.
|
|
1127
|
+
deepinfra_minimax_m2_5: {
|
|
1128
|
+
model: 'MiniMaxAI/MiniMax-M2.5',
|
|
1129
|
+
contextWindow: 196_608,
|
|
1130
|
+
inputTokenNanoDollar: 150,
|
|
1131
|
+
outputTokenNanoDollar: 1150,
|
|
1132
|
+
toolCalling: true,
|
|
1133
|
+
jsonMode: true,
|
|
1134
|
+
streaming: true,
|
|
1135
|
+
parallelToolCalls: false,
|
|
1136
|
+
supportsCacheControl: true,
|
|
1137
|
+
cacheReadTokenNanoDollar: 30,
|
|
1138
|
+
},
|
|
1065
1139
|
// DeepSeek V4 Pro fallback route — used when api.deepseek.com is
|
|
1066
1140
|
// throttled or down. Pricing live-verified 2026-05-03 against
|
|
1067
1141
|
// /models/deepseek-ai/DeepSeek-V4-Pro: $1.74 in / $3.48 out per 1M
|
|
@@ -1192,22 +1266,6 @@ export const textGenMultiProviderModels = {
|
|
|
1192
1266
|
available: true,
|
|
1193
1267
|
},
|
|
1194
1268
|
],
|
|
1195
|
-
llama_4_maverick: [
|
|
1196
|
-
// Groq deprecated llama-4-maverick-17b-128e effective March 9, 2026
|
|
1197
|
-
{
|
|
1198
|
-
provider: 'groq',
|
|
1199
|
-
providerModel: 'groq_llama_4_maverick',
|
|
1200
|
-
latencyTier: 'fast',
|
|
1201
|
-
available: false,
|
|
1202
|
-
},
|
|
1203
|
-
// Fireworks Llama 4 requires enterprise deployment, not available on serverless
|
|
1204
|
-
{
|
|
1205
|
-
provider: 'together',
|
|
1206
|
-
providerModel: 'together_meta_llama4_400b',
|
|
1207
|
-
latencyTier: 'standard',
|
|
1208
|
-
available: true,
|
|
1209
|
-
},
|
|
1210
|
-
],
|
|
1211
1269
|
// DeepSeek V4 — direct route to api.deepseek.com (BYO key, OpenAI-
|
|
1212
1270
|
// compatible) primary, DeepInfra fallback for failover only. Direct
|
|
1213
1271
|
// is ~6× cheaper on Pro and ~2× on Flash so the price-priority sort
|
|
@@ -1272,26 +1330,6 @@ export const textGenMultiProviderModels = {
|
|
|
1272
1330
|
available: true,
|
|
1273
1331
|
},
|
|
1274
1332
|
],
|
|
1275
|
-
gpt_oss_20b: [
|
|
1276
|
-
{
|
|
1277
|
-
provider: 'deepinfra',
|
|
1278
|
-
providerModel: 'deepinfra_gpt_oss_20b',
|
|
1279
|
-
latencyTier: 'standard',
|
|
1280
|
-
available: true,
|
|
1281
|
-
},
|
|
1282
|
-
{
|
|
1283
|
-
provider: 'groq',
|
|
1284
|
-
providerModel: 'groq_gpt_oss_20b',
|
|
1285
|
-
latencyTier: 'fast',
|
|
1286
|
-
available: true,
|
|
1287
|
-
},
|
|
1288
|
-
{
|
|
1289
|
-
provider: 'fireworks',
|
|
1290
|
-
providerModel: 'fireworks_gpt_oss_20b',
|
|
1291
|
-
latencyTier: 'standard',
|
|
1292
|
-
available: true,
|
|
1293
|
-
},
|
|
1294
|
-
],
|
|
1295
1333
|
// Single-provider Google models
|
|
1296
1334
|
gemini_2_5_flash: [
|
|
1297
1335
|
{
|
|
@@ -1385,19 +1423,11 @@ export const textGenMultiProviderModels = {
|
|
|
1385
1423
|
available: true,
|
|
1386
1424
|
},
|
|
1387
1425
|
],
|
|
1388
|
-
gpt_41_nano: [
|
|
1389
|
-
{
|
|
1390
|
-
provider: 'openai',
|
|
1391
|
-
providerModel: 'openai_gpt_41_nano',
|
|
1392
|
-
latencyTier: 'fast',
|
|
1393
|
-
available: true,
|
|
1394
|
-
},
|
|
1395
|
-
],
|
|
1396
1426
|
// Anthropic models — direct first, OpenRouter fallback second.
|
|
1397
|
-
//
|
|
1398
|
-
// the
|
|
1399
|
-
//
|
|
1400
|
-
|
|
1427
|
+
// The Anthropic entry ships 4.x behind a wire-dated snapshot, and
|
|
1428
|
+
// OpenRouter mirrors the same model with ~5% markup. Cache_control
|
|
1429
|
+
// passthrough is verified for sonnet but unverified for opus/haiku.
|
|
1430
|
+
claude_sonnet_4_6: [
|
|
1401
1431
|
{
|
|
1402
1432
|
provider: 'anthropic',
|
|
1403
1433
|
providerModel: 'anthrophic_claude_3_sonnet',
|
|
@@ -1411,28 +1441,16 @@ export const textGenMultiProviderModels = {
|
|
|
1411
1441
|
available: true,
|
|
1412
1442
|
},
|
|
1413
1443
|
],
|
|
1414
|
-
|
|
1415
|
-
// to 4.6 / 4.7 without depending on claude_3_sonnet's legacy label.
|
|
1416
|
-
// Same offering chain — the Anthropic entry ships 4.x behind the
|
|
1417
|
-
// wire-dated snapshot, and OpenRouter mirrors the same model.
|
|
1418
|
-
claude_sonnet_4_6: [
|
|
1444
|
+
claude_opus_4_7: [
|
|
1419
1445
|
{
|
|
1420
1446
|
provider: 'anthropic',
|
|
1421
|
-
providerModel: '
|
|
1422
|
-
latencyTier: '
|
|
1447
|
+
providerModel: 'anthrophic_claude_3_opus',
|
|
1448
|
+
latencyTier: 'slow',
|
|
1423
1449
|
available: true,
|
|
1424
1450
|
},
|
|
1425
1451
|
{
|
|
1426
1452
|
provider: 'openrouter',
|
|
1427
|
-
providerModel: '
|
|
1428
|
-
latencyTier: 'standard',
|
|
1429
|
-
available: true,
|
|
1430
|
-
},
|
|
1431
|
-
],
|
|
1432
|
-
claude_opus_4_7: [
|
|
1433
|
-
{
|
|
1434
|
-
provider: 'anthropic',
|
|
1435
|
-
providerModel: 'anthrophic_claude_3_opus',
|
|
1453
|
+
providerModel: 'openrouter_claude_opus_4_7',
|
|
1436
1454
|
latencyTier: 'slow',
|
|
1437
1455
|
available: true,
|
|
1438
1456
|
},
|
|
@@ -1485,17 +1503,29 @@ export const textGenMultiProviderModels = {
|
|
|
1485
1503
|
available: true,
|
|
1486
1504
|
},
|
|
1487
1505
|
],
|
|
1506
|
+
// Kimi K2 (original Instruct) — DeepInfra retired Kimi-K2-Instruct
|
|
1507
|
+
// 2026-05-03 in favor of K2.5/K2.6 (different model). Together still
|
|
1508
|
+
// serves the original K2-Instruct as the only path.
|
|
1488
1509
|
kimi_k2: [
|
|
1489
|
-
|
|
1510
|
+
{
|
|
1511
|
+
provider: 'together',
|
|
1512
|
+
providerModel: 'together_kimi_k2',
|
|
1513
|
+
latencyTier: 'standard',
|
|
1514
|
+
available: true,
|
|
1515
|
+
},
|
|
1516
|
+
],
|
|
1517
|
+
// Kimi K2.6 — newer model. DeepInfra primary (cheaper), OpenRouter
|
|
1518
|
+
// fallback. Both expose the same wire model id family.
|
|
1519
|
+
kimi_k2_6: [
|
|
1490
1520
|
{
|
|
1491
1521
|
provider: 'deepinfra',
|
|
1492
|
-
providerModel: '
|
|
1522
|
+
providerModel: 'deepinfra_kimi_k2_6',
|
|
1493
1523
|
latencyTier: 'standard',
|
|
1494
1524
|
available: true,
|
|
1495
1525
|
},
|
|
1496
1526
|
{
|
|
1497
|
-
provider: '
|
|
1498
|
-
providerModel: '
|
|
1527
|
+
provider: 'openrouter',
|
|
1528
|
+
providerModel: 'openrouter_kimi_k2_6',
|
|
1499
1529
|
latencyTier: 'standard',
|
|
1500
1530
|
available: true,
|
|
1501
1531
|
},
|
|
@@ -1546,7 +1576,16 @@ export const textGenMultiProviderModels = {
|
|
|
1546
1576
|
available: true,
|
|
1547
1577
|
},
|
|
1548
1578
|
],
|
|
1579
|
+
// MiniMax M2.5 — DeepInfra is cheaper than direct ($0.15/$1.15 vs
|
|
1580
|
+
// $0.30/$1.20), so DeepInfra wins the price-priority sort. Direct
|
|
1581
|
+
// stays as a healthy fallback.
|
|
1549
1582
|
minimax_m2_5: [
|
|
1583
|
+
{
|
|
1584
|
+
provider: 'deepinfra',
|
|
1585
|
+
providerModel: 'deepinfra_minimax_m2_5',
|
|
1586
|
+
latencyTier: 'standard',
|
|
1587
|
+
available: true,
|
|
1588
|
+
},
|
|
1550
1589
|
{
|
|
1551
1590
|
provider: 'minimax',
|
|
1552
1591
|
providerModel: 'minimax_m2_5',
|
|
@@ -1554,6 +1593,8 @@ export const textGenMultiProviderModels = {
|
|
|
1554
1593
|
available: true,
|
|
1555
1594
|
},
|
|
1556
1595
|
],
|
|
1596
|
+
// MiniMax M2.7 — not on DeepInfra (only M2.5 is hosted there).
|
|
1597
|
+
// Direct route only.
|
|
1557
1598
|
minimax_m2_7: [
|
|
1558
1599
|
{
|
|
1559
1600
|
provider: 'minimax',
|
|
@@ -1562,6 +1603,9 @@ export const textGenMultiProviderModels = {
|
|
|
1562
1603
|
available: true,
|
|
1563
1604
|
},
|
|
1564
1605
|
],
|
|
1606
|
+
// GLM 5.1 — OpenRouter is cheaper ($0.95/$3.15 vs DeepInfra
|
|
1607
|
+
// $1.05/$3.50) and has been stable; OpenRouter primary, DeepInfra
|
|
1608
|
+
// fallback when OR is throttled.
|
|
1565
1609
|
glm_5_1: [
|
|
1566
1610
|
{
|
|
1567
1611
|
provider: 'openrouter',
|
|
@@ -1569,7 +1613,31 @@ export const textGenMultiProviderModels = {
|
|
|
1569
1613
|
latencyTier: 'standard',
|
|
1570
1614
|
available: true,
|
|
1571
1615
|
},
|
|
1616
|
+
{
|
|
1617
|
+
provider: 'deepinfra',
|
|
1618
|
+
providerModel: 'deepinfra_glm_5_1',
|
|
1619
|
+
latencyTier: 'standard',
|
|
1620
|
+
available: true,
|
|
1621
|
+
},
|
|
1572
1622
|
],
|
|
1623
|
+
// GLM 4.6 — DeepInfra primary at $0.43/$1.74; OpenRouter fallback.
|
|
1624
|
+
glm_4_6: [
|
|
1625
|
+
{
|
|
1626
|
+
provider: 'deepinfra',
|
|
1627
|
+
providerModel: 'deepinfra_glm_4_6',
|
|
1628
|
+
latencyTier: 'standard',
|
|
1629
|
+
available: true,
|
|
1630
|
+
},
|
|
1631
|
+
{
|
|
1632
|
+
provider: 'openrouter',
|
|
1633
|
+
providerModel: 'openrouter_glm_4_6',
|
|
1634
|
+
latencyTier: 'standard',
|
|
1635
|
+
available: true,
|
|
1636
|
+
},
|
|
1637
|
+
],
|
|
1638
|
+
// Qwen 3.6 Plus — OpenRouter only. The "plus" SKU isn't carried by
|
|
1639
|
+
// DeepInfra (DeepInfra exposes 27B and 35B-A3B variants under
|
|
1640
|
+
// separate clean names below).
|
|
1573
1641
|
qwen3_6_plus: [
|
|
1574
1642
|
{
|
|
1575
1643
|
provider: 'openrouter',
|
|
@@ -1578,6 +1646,23 @@ export const textGenMultiProviderModels = {
|
|
|
1578
1646
|
available: true,
|
|
1579
1647
|
},
|
|
1580
1648
|
],
|
|
1649
|
+
// Qwen 3.6 27B — dense (non-MoE). DeepInfra primary at $0.32/$3.20.
|
|
1650
|
+
qwen3_6_27b: [
|
|
1651
|
+
{
|
|
1652
|
+
provider: 'deepinfra',
|
|
1653
|
+
providerModel: 'deepinfra_qwen3_6_27b',
|
|
1654
|
+
latencyTier: 'standard',
|
|
1655
|
+
available: true,
|
|
1656
|
+
},
|
|
1657
|
+
{
|
|
1658
|
+
provider: 'openrouter',
|
|
1659
|
+
providerModel: 'openrouter_qwen3_6_27b',
|
|
1660
|
+
latencyTier: 'standard',
|
|
1661
|
+
available: true,
|
|
1662
|
+
},
|
|
1663
|
+
],
|
|
1664
|
+
// Kimi K2 Thinking — OpenRouter only. DeepInfra has K2.5/K2.6 but
|
|
1665
|
+
// no specific "thinking" variant.
|
|
1581
1666
|
kimi_k2_thinking: [
|
|
1582
1667
|
{
|
|
1583
1668
|
provider: 'openrouter',
|
|
@@ -1590,12 +1675,10 @@ export const textGenMultiProviderModels = {
|
|
|
1590
1675
|
/** Model data keyed by clean model name. Use for billing/display lookups. */
|
|
1591
1676
|
export const textGenModelData = {
|
|
1592
1677
|
llama_4_scout: TextGenGroqModelData.groq_llama_4_scout,
|
|
1593
|
-
llama_4_maverick: TextGenTogetherModelData.together_meta_llama4_400b,
|
|
1594
1678
|
llama_3_3_70b: TextGenGroqModelData.groq_llama_3_3_70b,
|
|
1595
1679
|
deepseek_v4_pro: TextGenDeepSeekModelData.deepseek_deepseek_v4_pro,
|
|
1596
1680
|
deepseek_v4_flash: TextGenDeepSeekModelData.deepseek_deepseek_v4_flash,
|
|
1597
1681
|
gpt_oss_120b: TextGenGroqModelData.groq_gpt_oss_120b,
|
|
1598
|
-
gpt_oss_20b: TextGenGroqModelData.groq_gpt_oss_20b,
|
|
1599
1682
|
gemini_2_5_flash: TextGenGoogleModelData.google_gemini_2_5_flash,
|
|
1600
1683
|
gemini_2_5: TextGenGoogleModelData.google_gemini_2_5,
|
|
1601
1684
|
gpt_4o: TextGenOpenAIModelData.openai_gpt_4o,
|
|
@@ -1603,20 +1686,21 @@ export const textGenModelData = {
|
|
|
1603
1686
|
gpt_5_mini: TextGenOpenAIModelData.openai_gpt_5_mini,
|
|
1604
1687
|
o3: TextGenOpenAIModelData.openai_o3,
|
|
1605
1688
|
o4_mini: TextGenOpenAIModelData.openai_o4_mini,
|
|
1606
|
-
gpt_41_nano: TextGenOpenAIModelData.openai_gpt_41_nano,
|
|
1607
|
-
claude_3_sonnet: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
|
|
1608
1689
|
claude_sonnet_4_6: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
|
|
1609
1690
|
claude_opus_4_7: TextGenAnthropicModelData.anthrophic_claude_3_opus,
|
|
1610
1691
|
claude_haiku_4_5: TextGenAnthropicModelData.anthrophic_claude_haiku_4_5,
|
|
1611
1692
|
qwen2_72b: TextGenTogetherModelData.together_qwen2_72b,
|
|
1612
1693
|
qwen2_vision_72b: TextGenTogetherModelData.together_qwen2_vision_72b,
|
|
1613
1694
|
qwen3_235b: TextGenTogetherModelData.together_qwen3_235b,
|
|
1695
|
+
qwen3_6_27b: TextGenDeepInfraModelData.deepinfra_qwen3_6_27b,
|
|
1614
1696
|
kimi_k2: TextGenTogetherModelData.together_kimi_k2,
|
|
1697
|
+
kimi_k2_6: TextGenDeepInfraModelData.deepinfra_kimi_k2_6,
|
|
1615
1698
|
gemma_4_31b: TextGenTogetherModelData.together_gemma_4_31b,
|
|
1616
1699
|
gemma_4_26b: TextGenDeepInfraModelData.deepinfra_gemma_4_26b,
|
|
1617
|
-
minimax_m2_5:
|
|
1700
|
+
minimax_m2_5: TextGenDeepInfraModelData.deepinfra_minimax_m2_5,
|
|
1618
1701
|
minimax_m2_7: TextGenMiniMaxModelData.minimax_m2_7,
|
|
1619
1702
|
glm_5_1: TextGenOpenRouterModelData.openrouter_glm_5_1,
|
|
1703
|
+
glm_4_6: TextGenDeepInfraModelData.deepinfra_glm_4_6,
|
|
1620
1704
|
qwen3_6_plus: TextGenOpenRouterModelData.openrouter_qwen_36_plus,
|
|
1621
1705
|
kimi_k2_thinking: TextGenOpenRouterModelData.openrouter_kimi_k2_thinking,
|
|
1622
1706
|
};
|