ugly-app 0.1.430 → 0.1.431

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,6 @@ export const textGenTogetherModels = [
19
19
  // Note: Llama 4 models require enterprise deployment on Fireworks, use Groq/Together instead
20
20
  export const textGenFireworksModels = [
21
21
  'fireworks_gpt_oss_120b',
22
- 'fireworks_gpt_oss_20b',
23
22
  ];
24
23
  // DeepSeek — direct route to api.deepseek.com (OpenAI-compatible).
25
24
  // Only V4 Pro and V4 Flash are supported; the older R1/V3/V3.1/V3.2
@@ -38,11 +37,9 @@ export const textGenDeepSeekModels = [
38
37
  // Groq models
39
38
  export const textGenGroqModels = [
40
39
  'groq_llama_4_scout',
41
- 'groq_llama_4_maverick',
42
40
  'groq_mixtral_8x7b',
43
41
  'groq_llama_3_3_70b',
44
42
  'groq_gpt_oss_120b',
45
- 'groq_gpt_oss_20b',
46
43
  ];
47
44
  export const textGenGoogleModels = [
48
45
  'google_gemini_2_5',
@@ -63,7 +60,6 @@ export const textGenOpenAIModels = [
63
60
  'openai_o1_mini',
64
61
  'openai_o3',
65
62
  'openai_o4_mini',
66
- 'openai_gpt_41_nano',
67
63
  ];
68
64
  // MiniMax — agentic coding models (M2.5 / M2.7)
69
65
  export const textGenMiniMaxModels = [
@@ -76,8 +72,11 @@ export const textGenMiniMaxModels = [
76
72
  // us a warm path when the direct upstream is throttled or down.
77
73
  export const textGenOpenRouterModels = [
78
74
  'openrouter_glm_5_1',
75
+ 'openrouter_glm_4_6',
79
76
  'openrouter_qwen_36_plus',
77
+ 'openrouter_qwen3_6_27b',
80
78
  'openrouter_kimi_k2_thinking',
79
+ 'openrouter_kimi_k2_6',
81
80
  'openrouter_gemma_4_31b',
82
81
  'openrouter_gemma_4_26b',
83
82
  'openrouter_gemma_4_26b_free',
@@ -89,6 +88,7 @@ export const textGenOpenRouterModels = [
89
88
  'openrouter_o4_mini',
90
89
  'openrouter_claude_sonnet_4_6',
91
90
  'openrouter_claude_haiku_4_5',
91
+ 'openrouter_claude_opus_4_7',
92
92
  ];
93
93
  // DeepInfra — direct route to Gemma 4 family (and potentially other
94
94
  // open-weight models). Auto-caches on identical prefixes and reports
@@ -98,9 +98,13 @@ export const textGenDeepInfraModels = [
98
98
  'deepinfra_gemma_4_31b',
99
99
  'deepinfra_gemma_4_26b',
100
100
  'deepinfra_gpt_oss_120b',
101
- 'deepinfra_gpt_oss_20b',
102
101
  'deepinfra_qwen3_235b',
103
- 'deepinfra_kimi_k2',
102
+ 'deepinfra_qwen3_6_27b',
103
+ 'deepinfra_qwen3_6_35b_a3b',
104
+ 'deepinfra_kimi_k2_6',
105
+ 'deepinfra_glm_4_6',
106
+ 'deepinfra_glm_5_1',
107
+ 'deepinfra_minimax_m2_5',
104
108
  'deepinfra_deepseek_v4_pro',
105
109
  'deepinfra_deepseek_v4_flash',
106
110
  ];
@@ -129,24 +133,23 @@ const textGenProviderModelValues = [
129
133
  'openai_o1_mini',
130
134
  'openai_o3',
131
135
  'openai_o4_mini',
132
- 'openai_gpt_41_nano',
133
136
  // Fireworks (GPT-OSS - Llama 4 requires enterprise deployment)
134
137
  'fireworks_gpt_oss_120b',
135
- 'fireworks_gpt_oss_20b',
136
138
  // Groq
137
139
  'groq_llama_4_scout',
138
- 'groq_llama_4_maverick',
139
140
  'groq_mixtral_8x7b',
140
141
  'groq_llama_3_3_70b',
141
142
  'groq_gpt_oss_120b',
142
- 'groq_gpt_oss_20b',
143
143
  // MiniMax
144
144
  'minimax_m2_5',
145
145
  'minimax_m2_7',
146
146
  // OpenRouter
147
147
  'openrouter_glm_5_1',
148
+ 'openrouter_glm_4_6',
148
149
  'openrouter_qwen_36_plus',
150
+ 'openrouter_qwen3_6_27b',
149
151
  'openrouter_kimi_k2_thinking',
152
+ 'openrouter_kimi_k2_6',
150
153
  'openrouter_gemma_4_31b',
151
154
  'openrouter_gemma_4_26b',
152
155
  'openrouter_gemma_4_26b_free',
@@ -157,13 +160,18 @@ const textGenProviderModelValues = [
157
160
  'openrouter_o4_mini',
158
161
  'openrouter_claude_sonnet_4_6',
159
162
  'openrouter_claude_haiku_4_5',
163
+ 'openrouter_claude_opus_4_7',
160
164
  // DeepInfra
161
165
  'deepinfra_gemma_4_31b',
162
166
  'deepinfra_gemma_4_26b',
163
167
  'deepinfra_gpt_oss_120b',
164
- 'deepinfra_gpt_oss_20b',
165
168
  'deepinfra_qwen3_235b',
166
- 'deepinfra_kimi_k2',
169
+ 'deepinfra_qwen3_6_27b',
170
+ 'deepinfra_qwen3_6_35b_a3b',
171
+ 'deepinfra_kimi_k2_6',
172
+ 'deepinfra_glm_4_6',
173
+ 'deepinfra_glm_5_1',
174
+ 'deepinfra_minimax_m2_5',
167
175
  'deepinfra_deepseek_v4_pro',
168
176
  'deepinfra_deepseek_v4_flash',
169
177
  // DeepSeek (direct, BYO key, OpenAI-compatible)
@@ -181,13 +189,11 @@ export const textGenProviderModelsSet = new Set(textGenProviderModels);
181
189
  const textGenModelValues = [
182
190
  // Multi-provider routed models
183
191
  'llama_4_scout',
184
- 'llama_4_maverick',
185
192
  'llama_3_3_70b',
186
- // DeepSeek V4 — direct route only (api.deepseek.com)
193
+ // DeepSeek V4 — direct (api.deepseek.com) + DeepInfra fallback
187
194
  'deepseek_v4_pro',
188
195
  'deepseek_v4_flash',
189
196
  'gpt_oss_120b',
190
- 'gpt_oss_20b',
191
197
  // Google
192
198
  'gemini_2_5_flash',
193
199
  'gemini_2_5',
@@ -195,24 +201,27 @@ const textGenModelValues = [
195
201
  'gpt_4o',
196
202
  'gpt_5',
197
203
  'gpt_5_mini',
198
- 'gpt_41_nano',
199
204
  'o3',
200
205
  'o4_mini',
201
206
  // Anthropic — coding-agent tiers (Anthropic direct + OpenRouter fallback)
202
207
  'claude_opus_4_7',
203
208
  'claude_sonnet_4_6',
204
- 'claude_3_sonnet',
205
209
  'claude_haiku_4_5',
206
210
  // Multi-provider open-weight (DeepInfra / OpenRouter / Together)
207
211
  'gemma_4_31b',
208
212
  'gemma_4_26b',
209
- // MiniMax (direct)
213
+ // MiniMax direct + DeepInfra fallback for M2.5
210
214
  'minimax_m2_5',
211
215
  'minimax_m2_7',
212
- // OpenRouter-only frontier
216
+ // GLM — DeepInfra primary + OpenRouter fallback
213
217
  'glm_5_1',
218
+ 'glm_4_6',
219
+ // Qwen 3.6 — OpenRouter + DeepInfra
214
220
  'qwen3_6_plus',
221
+ 'qwen3_6_27b',
222
+ // Kimi — OpenRouter + DeepInfra
215
223
  'kimi_k2_thinking',
224
+ 'kimi_k2_6',
216
225
  // Single-provider (Together — misc)
217
226
  'qwen2_72b',
218
227
  'qwen2_vision_72b',
@@ -419,25 +428,6 @@ export const TextGenOpenAIModelData = {
419
428
  smartness: 4,
420
429
  compactAt: 0.90,
421
430
  },
422
- // GPT-4.1 Nano — cheapest viable model, 1M context
423
- openai_gpt_41_nano: {
424
- model: 'gpt-4.1-nano',
425
- contextWindow: 1_000_000,
426
- inputTokenNanoDollar: 100,
427
- outputTokenNanoDollar: 400,
428
- vision: true,
429
- toolCalling: true,
430
- jsonMode: true,
431
- streaming: true,
432
- parallelToolCalls: true,
433
- // Coding agent
434
- name: 'GPT-4.1 Nano',
435
- provider: 'OpenAI',
436
- speed: 'fast',
437
- reasoning: 'strong',
438
- smartness: 2,
439
- compactAt: 0.90,
440
- },
441
431
  };
442
432
  export const TextGenTogetherModelData = {
443
433
  together_meta_llama_vision_3_11b: {
@@ -578,17 +568,6 @@ export const TextGenFireworksModelData = {
578
568
  streaming: true,
579
569
  parallelToolCalls: false,
580
570
  },
581
- fireworks_gpt_oss_20b: {
582
- model: 'accounts/fireworks/models/gpt-oss-20b',
583
- contextWindow: 128000,
584
- // $0.075 input / $0.30 output per 1M tokens
585
- inputTokenNanoDollar: 75,
586
- outputTokenNanoDollar: 300,
587
- toolCalling: true,
588
- jsonMode: true,
589
- streaming: true,
590
- parallelToolCalls: false,
591
- },
592
571
  };
593
572
  // Groq models - ultra-fast inference
594
573
  // Note: Tool calling for Llama 4 has known reliability issues
@@ -605,18 +584,6 @@ export const TextGenGroqModelData = {
605
584
  streaming: true,
606
585
  parallelToolCalls: false,
607
586
  },
608
- groq_llama_4_maverick: {
609
- model: 'meta-llama/llama-4-maverick-17b-128e-instruct',
610
- contextWindow: 128000,
611
- inputTokenNanoDollar: 200,
612
- outputTokenNanoDollar: 600,
613
- vision: true,
614
- // Note: toolCalling marked 'unreliable' due to known 100% failure rate with structured prompts
615
- toolCalling: 'unreliable',
616
- jsonMode: true,
617
- streaming: true,
618
- parallelToolCalls: false,
619
- },
620
587
  groq_mixtral_8x7b: {
621
588
  model: 'mixtral-8x7b-32768',
622
589
  contextWindow: 32768,
@@ -649,17 +616,6 @@ export const TextGenGroqModelData = {
649
616
  streaming: true,
650
617
  parallelToolCalls: false,
651
618
  },
652
- groq_gpt_oss_20b: {
653
- model: 'openai/gpt-oss-20b',
654
- contextWindow: 128000,
655
- // $0.075 input / $0.30 output per 1M tokens
656
- inputTokenNanoDollar: 75,
657
- outputTokenNanoDollar: 300,
658
- toolCalling: true,
659
- jsonMode: true,
660
- streaming: true,
661
- parallelToolCalls: false,
662
- },
663
619
  };
664
620
  export const TextGenKieModelData = {};
665
621
  // MiniMax — agentic coding line (M2.5 / M2.7).
@@ -959,6 +915,60 @@ export const TextGenOpenRouterModelData = {
959
915
  // doesn't assume a cache discount that never materializes.
960
916
  supportsCacheControl: false,
961
917
  },
918
+ // Claude Opus 4.7 fallback — primary still Anthropic direct. OpenRouter
919
+ // list price is ~5% over upstream. Cache passthrough verified to work
920
+ // for sonnet-4.6; opus-4.7 not yet verified — leave conservative.
921
+ openrouter_claude_opus_4_7: {
922
+ model: 'anthropic/claude-opus-4.7',
923
+ contextWindow: 200_000,
924
+ inputTokenNanoDollar: 15_750, // ~$15/M × 1.05
925
+ outputTokenNanoDollar: 78_750, // ~$75/M × 1.05
926
+ vision: true,
927
+ toolCalling: true,
928
+ jsonMode: true,
929
+ streaming: true,
930
+ parallelToolCalls: true,
931
+ supportsCacheControl: false,
932
+ },
933
+ // GLM 4.6 OpenRouter route. List price is roughly the same as
934
+ // DeepInfra's published rate; OpenRouter wins when DeepInfra is
935
+ // throttled.
936
+ openrouter_glm_4_6: {
937
+ model: 'z-ai/glm-4.6',
938
+ contextWindow: 200_000,
939
+ inputTokenNanoDollar: 600,
940
+ outputTokenNanoDollar: 2200,
941
+ toolCalling: true,
942
+ jsonMode: true,
943
+ streaming: true,
944
+ parallelToolCalls: false,
945
+ supportsCacheControl: false,
946
+ },
947
+ // Qwen 3.6 27B OpenRouter route — dense (non-MoE) variant.
948
+ openrouter_qwen3_6_27b: {
949
+ model: 'qwen/qwen3.6-27b',
950
+ contextWindow: 262_144,
951
+ inputTokenNanoDollar: 350,
952
+ outputTokenNanoDollar: 3360,
953
+ vision: true,
954
+ toolCalling: true,
955
+ jsonMode: true,
956
+ streaming: true,
957
+ parallelToolCalls: false,
958
+ supportsCacheControl: false,
959
+ },
960
+ // Kimi K2.6 OpenRouter route.
961
+ openrouter_kimi_k2_6: {
962
+ model: 'moonshotai/kimi-k2.6',
963
+ contextWindow: 262_144,
964
+ inputTokenNanoDollar: 800,
965
+ outputTokenNanoDollar: 3700,
966
+ toolCalling: true,
967
+ jsonMode: true,
968
+ streaming: true,
969
+ parallelToolCalls: false,
970
+ supportsCacheControl: false,
971
+ },
962
972
  };
963
973
  // DeepInfra — direct route, bypasses OpenRouter's shared rate limits.
964
974
  // IMPORTANT: DeepInfra does NOT cache Gemma-4 prompts — probed with
@@ -1025,43 +1035,107 @@ export const TextGenDeepInfraModelData = {
1025
1035
  parallelToolCalls: false,
1026
1036
  supportsCacheControl: false,
1027
1037
  },
1028
- // GPT-OSS 20B same model as Groq/Fireworks at lower price.
1029
- deepinfra_gpt_oss_20b: {
1030
- model: 'openai/gpt-oss-20b',
1031
- contextWindow: 128_000,
1032
- inputTokenNanoDollar: 30,
1033
- outputTokenNanoDollar: 150,
1038
+ // Qwen3-235B-A22B-Instruct-2507 (live 2026-05-03 on DeepInfra).
1039
+ deepinfra_qwen3_235b: {
1040
+ model: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
1041
+ contextWindow: 256_000,
1042
+ inputTokenNanoDollar: 71,
1043
+ outputTokenNanoDollar: 100,
1034
1044
  toolCalling: true,
1035
1045
  jsonMode: true,
1036
1046
  streaming: true,
1037
1047
  parallelToolCalls: false,
1038
1048
  supportsCacheControl: false,
1039
1049
  },
1040
- // Qwen3-235BDeepInfra at $0.071/$0.10 vs Together at $0.20/$0.60.
1041
- // ~3× cheaper input, ~6× cheaper output.
1042
- deepinfra_qwen3_235b: {
1043
- model: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
1044
- contextWindow: 256_000,
1045
- inputTokenNanoDollar: 71,
1046
- outputTokenNanoDollar: 100,
1050
+ // Qwen3.6 27B — $0.32/$3.20 per 1M (live 2026-05-03), 256k ctx,
1051
+ // multimodal + reasoning. The dense, non-MoE variant.
1052
+ deepinfra_qwen3_6_27b: {
1053
+ model: 'Qwen/Qwen3.6-27B',
1054
+ contextWindow: 262_144,
1055
+ inputTokenNanoDollar: 320,
1056
+ outputTokenNanoDollar: 3200,
1057
+ vision: true,
1047
1058
  toolCalling: true,
1048
1059
  jsonMode: true,
1049
1060
  streaming: true,
1050
1061
  parallelToolCalls: false,
1051
1062
  supportsCacheControl: false,
1052
1063
  },
1053
- // Kimi K2DeepInfra at $0.50/$2.00 vs Together at $1.00/$3.00.
1054
- deepinfra_kimi_k2: {
1055
- model: 'moonshotai/Kimi-K2-Instruct',
1056
- contextWindow: 128_000,
1057
- inputTokenNanoDollar: 500,
1058
- outputTokenNanoDollar: 2000,
1064
+ // Qwen3.6 35B-A3BMoE variant, dramatically cheaper.
1065
+ // $0.15/$0.95 per 1M (live 2026-05-03), 256k ctx.
1066
+ deepinfra_qwen3_6_35b_a3b: {
1067
+ model: 'Qwen/Qwen3.6-35B-A3B',
1068
+ contextWindow: 262_144,
1069
+ inputTokenNanoDollar: 150,
1070
+ outputTokenNanoDollar: 950,
1071
+ vision: true,
1059
1072
  toolCalling: true,
1060
1073
  jsonMode: true,
1061
1074
  streaming: true,
1062
1075
  parallelToolCalls: false,
1063
1076
  supportsCacheControl: false,
1064
1077
  },
1078
+ // Kimi K2.6 — newer than the (now-removed) K2-Instruct. $0.75/$3.50
1079
+ // per 1M (live 2026-05-03), 256k ctx, native reasoning. Cache-read
1080
+ // is 20% of input → 150 nanoDollar/M.
1081
+ deepinfra_kimi_k2_6: {
1082
+ model: 'moonshotai/Kimi-K2.6',
1083
+ contextWindow: 262_144,
1084
+ inputTokenNanoDollar: 750,
1085
+ outputTokenNanoDollar: 3500,
1086
+ toolCalling: true,
1087
+ jsonMode: true,
1088
+ streaming: true,
1089
+ parallelToolCalls: false,
1090
+ supportsCacheControl: true,
1091
+ cacheReadTokenNanoDollar: 150,
1092
+ },
1093
+ // GLM 4.6 — $0.43/$1.74 per 1M (live 2026-05-03), 200k ctx.
1094
+ // Cache-read 18.6% of input → 80 nanoDollar/M.
1095
+ deepinfra_glm_4_6: {
1096
+ model: 'zai-org/GLM-4.6',
1097
+ contextWindow: 202_752,
1098
+ inputTokenNanoDollar: 430,
1099
+ outputTokenNanoDollar: 1740,
1100
+ toolCalling: true,
1101
+ jsonMode: true,
1102
+ streaming: true,
1103
+ parallelToolCalls: false,
1104
+ supportsCacheControl: true,
1105
+ cacheReadTokenNanoDollar: 80,
1106
+ },
1107
+ // GLM 5.1 fallback route. Pricing $1.05/$3.50 per 1M (live
1108
+ // 2026-05-03), 200k ctx. Slightly higher than the OpenRouter
1109
+ // route's published list, so OpenRouter stays primary on price.
1110
+ // Cache-read 19.5% of input → 205 nanoDollar/M.
1111
+ deepinfra_glm_5_1: {
1112
+ model: 'zai-org/GLM-5.1',
1113
+ contextWindow: 202_752,
1114
+ inputTokenNanoDollar: 1050,
1115
+ outputTokenNanoDollar: 3500,
1116
+ toolCalling: true,
1117
+ jsonMode: true,
1118
+ streaming: true,
1119
+ parallelToolCalls: false,
1120
+ supportsCacheControl: true,
1121
+ cacheReadTokenNanoDollar: 205,
1122
+ },
1123
+ // MiniMax M2.5 fallback. Pricing $0.15/$1.15 per 1M (live
1124
+ // 2026-05-03) — actually CHEAPER than direct MiniMax ($0.30/$1.20),
1125
+ // so the price-priority sort prefers DeepInfra. Direct stays as a
1126
+ // healthy backup. 196k ctx, cache-read 20% → 30 nanoDollar/M.
1127
+ deepinfra_minimax_m2_5: {
1128
+ model: 'MiniMaxAI/MiniMax-M2.5',
1129
+ contextWindow: 196_608,
1130
+ inputTokenNanoDollar: 150,
1131
+ outputTokenNanoDollar: 1150,
1132
+ toolCalling: true,
1133
+ jsonMode: true,
1134
+ streaming: true,
1135
+ parallelToolCalls: false,
1136
+ supportsCacheControl: true,
1137
+ cacheReadTokenNanoDollar: 30,
1138
+ },
1065
1139
  // DeepSeek V4 Pro fallback route — used when api.deepseek.com is
1066
1140
  // throttled or down. Pricing live-verified 2026-05-03 against
1067
1141
  // /models/deepseek-ai/DeepSeek-V4-Pro: $1.74 in / $3.48 out per 1M
@@ -1192,22 +1266,6 @@ export const textGenMultiProviderModels = {
1192
1266
  available: true,
1193
1267
  },
1194
1268
  ],
1195
- llama_4_maverick: [
1196
- // Groq deprecated llama-4-maverick-17b-128e effective March 9, 2026
1197
- {
1198
- provider: 'groq',
1199
- providerModel: 'groq_llama_4_maverick',
1200
- latencyTier: 'fast',
1201
- available: false,
1202
- },
1203
- // Fireworks Llama 4 requires enterprise deployment, not available on serverless
1204
- {
1205
- provider: 'together',
1206
- providerModel: 'together_meta_llama4_400b',
1207
- latencyTier: 'standard',
1208
- available: true,
1209
- },
1210
- ],
1211
1269
  // DeepSeek V4 — direct route to api.deepseek.com (BYO key, OpenAI-
1212
1270
  // compatible) primary, DeepInfra fallback for failover only. Direct
1213
1271
  // is ~6× cheaper on Pro and ~2× on Flash so the price-priority sort
@@ -1272,26 +1330,6 @@ export const textGenMultiProviderModels = {
1272
1330
  available: true,
1273
1331
  },
1274
1332
  ],
1275
- gpt_oss_20b: [
1276
- {
1277
- provider: 'deepinfra',
1278
- providerModel: 'deepinfra_gpt_oss_20b',
1279
- latencyTier: 'standard',
1280
- available: true,
1281
- },
1282
- {
1283
- provider: 'groq',
1284
- providerModel: 'groq_gpt_oss_20b',
1285
- latencyTier: 'fast',
1286
- available: true,
1287
- },
1288
- {
1289
- provider: 'fireworks',
1290
- providerModel: 'fireworks_gpt_oss_20b',
1291
- latencyTier: 'standard',
1292
- available: true,
1293
- },
1294
- ],
1295
1333
  // Single-provider Google models
1296
1334
  gemini_2_5_flash: [
1297
1335
  {
@@ -1385,19 +1423,11 @@ export const textGenMultiProviderModels = {
1385
1423
  available: true,
1386
1424
  },
1387
1425
  ],
1388
- gpt_41_nano: [
1389
- {
1390
- provider: 'openai',
1391
- providerModel: 'openai_gpt_41_nano',
1392
- latencyTier: 'fast',
1393
- available: true,
1394
- },
1395
- ],
1396
1426
  // Anthropic models — direct first, OpenRouter fallback second.
1397
- // claude_3_sonnet historically maps to claude-sonnet-4-20250514;
1398
- // the OpenRouter fallback uses claude-sonnet-4.6 (current Anthropic
1399
- // live model) and has verified cache_control passthrough.
1400
- claude_3_sonnet: [
1427
+ // The Anthropic entry ships 4.x behind a wire-dated snapshot, and
1428
+ // OpenRouter mirrors the same model with ~5% markup. Cache_control
1429
+ // passthrough is verified for sonnet but unverified for opus/haiku.
1430
+ claude_sonnet_4_6: [
1401
1431
  {
1402
1432
  provider: 'anthropic',
1403
1433
  providerModel: 'anthrophic_claude_3_sonnet',
@@ -1411,28 +1441,16 @@ export const textGenMultiProviderModels = {
1411
1441
  available: true,
1412
1442
  },
1413
1443
  ],
1414
- // Explicit current-version aliases so coding-agent callers can pin
1415
- // to 4.6 / 4.7 without depending on claude_3_sonnet's legacy label.
1416
- // Same offering chain — the Anthropic entry ships 4.x behind the
1417
- // wire-dated snapshot, and OpenRouter mirrors the same model.
1418
- claude_sonnet_4_6: [
1444
+ claude_opus_4_7: [
1419
1445
  {
1420
1446
  provider: 'anthropic',
1421
- providerModel: 'anthrophic_claude_3_sonnet',
1422
- latencyTier: 'standard',
1447
+ providerModel: 'anthrophic_claude_3_opus',
1448
+ latencyTier: 'slow',
1423
1449
  available: true,
1424
1450
  },
1425
1451
  {
1426
1452
  provider: 'openrouter',
1427
- providerModel: 'openrouter_claude_sonnet_4_6',
1428
- latencyTier: 'standard',
1429
- available: true,
1430
- },
1431
- ],
1432
- claude_opus_4_7: [
1433
- {
1434
- provider: 'anthropic',
1435
- providerModel: 'anthrophic_claude_3_opus',
1453
+ providerModel: 'openrouter_claude_opus_4_7',
1436
1454
  latencyTier: 'slow',
1437
1455
  available: true,
1438
1456
  },
@@ -1485,17 +1503,29 @@ export const textGenMultiProviderModels = {
1485
1503
  available: true,
1486
1504
  },
1487
1505
  ],
1506
+ // Kimi K2 (original Instruct) — DeepInfra retired Kimi-K2-Instruct
1507
+ // 2026-05-03 in favor of K2.5/K2.6 (different model). Together still
1508
+ // serves the original K2-Instruct as the only path.
1488
1509
  kimi_k2: [
1489
- // DeepInfra at $0.50/$2.00 vs Together at $1.00/$3.00 — ~2× cheaper.
1510
+ {
1511
+ provider: 'together',
1512
+ providerModel: 'together_kimi_k2',
1513
+ latencyTier: 'standard',
1514
+ available: true,
1515
+ },
1516
+ ],
1517
+ // Kimi K2.6 — newer model. DeepInfra primary (cheaper), OpenRouter
1518
+ // fallback. Both expose the same wire model id family.
1519
+ kimi_k2_6: [
1490
1520
  {
1491
1521
  provider: 'deepinfra',
1492
- providerModel: 'deepinfra_kimi_k2',
1522
+ providerModel: 'deepinfra_kimi_k2_6',
1493
1523
  latencyTier: 'standard',
1494
1524
  available: true,
1495
1525
  },
1496
1526
  {
1497
- provider: 'together',
1498
- providerModel: 'together_kimi_k2',
1527
+ provider: 'openrouter',
1528
+ providerModel: 'openrouter_kimi_k2_6',
1499
1529
  latencyTier: 'standard',
1500
1530
  available: true,
1501
1531
  },
@@ -1546,7 +1576,16 @@ export const textGenMultiProviderModels = {
1546
1576
  available: true,
1547
1577
  },
1548
1578
  ],
1579
+ // MiniMax M2.5 — DeepInfra is cheaper than direct ($0.15/$1.15 vs
1580
+ // $0.30/$1.20), so DeepInfra wins the price-priority sort. Direct
1581
+ // stays as a healthy fallback.
1549
1582
  minimax_m2_5: [
1583
+ {
1584
+ provider: 'deepinfra',
1585
+ providerModel: 'deepinfra_minimax_m2_5',
1586
+ latencyTier: 'standard',
1587
+ available: true,
1588
+ },
1550
1589
  {
1551
1590
  provider: 'minimax',
1552
1591
  providerModel: 'minimax_m2_5',
@@ -1554,6 +1593,8 @@ export const textGenMultiProviderModels = {
1554
1593
  available: true,
1555
1594
  },
1556
1595
  ],
1596
+ // MiniMax M2.7 — not on DeepInfra (only M2.5 is hosted there).
1597
+ // Direct route only.
1557
1598
  minimax_m2_7: [
1558
1599
  {
1559
1600
  provider: 'minimax',
@@ -1562,6 +1603,9 @@ export const textGenMultiProviderModels = {
1562
1603
  available: true,
1563
1604
  },
1564
1605
  ],
1606
+ // GLM 5.1 — OpenRouter is cheaper ($0.95/$3.15 vs DeepInfra
1607
+ // $1.05/$3.50) and has been stable; OpenRouter primary, DeepInfra
1608
+ // fallback when OR is throttled.
1565
1609
  glm_5_1: [
1566
1610
  {
1567
1611
  provider: 'openrouter',
@@ -1569,7 +1613,31 @@ export const textGenMultiProviderModels = {
1569
1613
  latencyTier: 'standard',
1570
1614
  available: true,
1571
1615
  },
1616
+ {
1617
+ provider: 'deepinfra',
1618
+ providerModel: 'deepinfra_glm_5_1',
1619
+ latencyTier: 'standard',
1620
+ available: true,
1621
+ },
1572
1622
  ],
1623
+ // GLM 4.6 — DeepInfra primary at $0.43/$1.74; OpenRouter fallback.
1624
+ glm_4_6: [
1625
+ {
1626
+ provider: 'deepinfra',
1627
+ providerModel: 'deepinfra_glm_4_6',
1628
+ latencyTier: 'standard',
1629
+ available: true,
1630
+ },
1631
+ {
1632
+ provider: 'openrouter',
1633
+ providerModel: 'openrouter_glm_4_6',
1634
+ latencyTier: 'standard',
1635
+ available: true,
1636
+ },
1637
+ ],
1638
+ // Qwen 3.6 Plus — OpenRouter only. The "plus" SKU isn't carried by
1639
+ // DeepInfra (DeepInfra exposes 27B and 35B-A3B variants under
1640
+ // separate clean names below).
1573
1641
  qwen3_6_plus: [
1574
1642
  {
1575
1643
  provider: 'openrouter',
@@ -1578,6 +1646,23 @@ export const textGenMultiProviderModels = {
1578
1646
  available: true,
1579
1647
  },
1580
1648
  ],
1649
+ // Qwen 3.6 27B — dense (non-MoE). DeepInfra primary at $0.32/$3.20.
1650
+ qwen3_6_27b: [
1651
+ {
1652
+ provider: 'deepinfra',
1653
+ providerModel: 'deepinfra_qwen3_6_27b',
1654
+ latencyTier: 'standard',
1655
+ available: true,
1656
+ },
1657
+ {
1658
+ provider: 'openrouter',
1659
+ providerModel: 'openrouter_qwen3_6_27b',
1660
+ latencyTier: 'standard',
1661
+ available: true,
1662
+ },
1663
+ ],
1664
+ // Kimi K2 Thinking — OpenRouter only. DeepInfra has K2.5/K2.6 but
1665
+ // no specific "thinking" variant.
1581
1666
  kimi_k2_thinking: [
1582
1667
  {
1583
1668
  provider: 'openrouter',
@@ -1590,12 +1675,10 @@ export const textGenMultiProviderModels = {
1590
1675
  /** Model data keyed by clean model name. Use for billing/display lookups. */
1591
1676
  export const textGenModelData = {
1592
1677
  llama_4_scout: TextGenGroqModelData.groq_llama_4_scout,
1593
- llama_4_maverick: TextGenTogetherModelData.together_meta_llama4_400b,
1594
1678
  llama_3_3_70b: TextGenGroqModelData.groq_llama_3_3_70b,
1595
1679
  deepseek_v4_pro: TextGenDeepSeekModelData.deepseek_deepseek_v4_pro,
1596
1680
  deepseek_v4_flash: TextGenDeepSeekModelData.deepseek_deepseek_v4_flash,
1597
1681
  gpt_oss_120b: TextGenGroqModelData.groq_gpt_oss_120b,
1598
- gpt_oss_20b: TextGenGroqModelData.groq_gpt_oss_20b,
1599
1682
  gemini_2_5_flash: TextGenGoogleModelData.google_gemini_2_5_flash,
1600
1683
  gemini_2_5: TextGenGoogleModelData.google_gemini_2_5,
1601
1684
  gpt_4o: TextGenOpenAIModelData.openai_gpt_4o,
@@ -1603,20 +1686,21 @@ export const textGenModelData = {
1603
1686
  gpt_5_mini: TextGenOpenAIModelData.openai_gpt_5_mini,
1604
1687
  o3: TextGenOpenAIModelData.openai_o3,
1605
1688
  o4_mini: TextGenOpenAIModelData.openai_o4_mini,
1606
- gpt_41_nano: TextGenOpenAIModelData.openai_gpt_41_nano,
1607
- claude_3_sonnet: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
1608
1689
  claude_sonnet_4_6: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
1609
1690
  claude_opus_4_7: TextGenAnthropicModelData.anthrophic_claude_3_opus,
1610
1691
  claude_haiku_4_5: TextGenAnthropicModelData.anthrophic_claude_haiku_4_5,
1611
1692
  qwen2_72b: TextGenTogetherModelData.together_qwen2_72b,
1612
1693
  qwen2_vision_72b: TextGenTogetherModelData.together_qwen2_vision_72b,
1613
1694
  qwen3_235b: TextGenTogetherModelData.together_qwen3_235b,
1695
+ qwen3_6_27b: TextGenDeepInfraModelData.deepinfra_qwen3_6_27b,
1614
1696
  kimi_k2: TextGenTogetherModelData.together_kimi_k2,
1697
+ kimi_k2_6: TextGenDeepInfraModelData.deepinfra_kimi_k2_6,
1615
1698
  gemma_4_31b: TextGenTogetherModelData.together_gemma_4_31b,
1616
1699
  gemma_4_26b: TextGenDeepInfraModelData.deepinfra_gemma_4_26b,
1617
- minimax_m2_5: TextGenMiniMaxModelData.minimax_m2_5,
1700
+ minimax_m2_5: TextGenDeepInfraModelData.deepinfra_minimax_m2_5,
1618
1701
  minimax_m2_7: TextGenMiniMaxModelData.minimax_m2_7,
1619
1702
  glm_5_1: TextGenOpenRouterModelData.openrouter_glm_5_1,
1703
+ glm_4_6: TextGenDeepInfraModelData.deepinfra_glm_4_6,
1620
1704
  qwen3_6_plus: TextGenOpenRouterModelData.openrouter_qwen_36_plus,
1621
1705
  kimi_k2_thinking: TextGenOpenRouterModelData.openrouter_kimi_k2_thinking,
1622
1706
  };