ugly-app 0.1.430 → 0.1.431

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,6 @@ export type TextGenTogetherModelT = (typeof textGenTogetherModels)[number];
28
28
  // Note: Llama 4 models require enterprise deployment on Fireworks, use Groq/Together instead
29
29
  export const textGenFireworksModels = [
30
30
  'fireworks_gpt_oss_120b',
31
- 'fireworks_gpt_oss_20b',
32
31
  ] as const;
33
32
 
34
33
  export type TextGenFireworksModelT = (typeof textGenFireworksModels)[number];
@@ -53,11 +52,9 @@ export type TextGenDeepSeekModelT = (typeof textGenDeepSeekModels)[number];
53
52
  // Groq models
54
53
  export const textGenGroqModels = [
55
54
  'groq_llama_4_scout',
56
- 'groq_llama_4_maverick',
57
55
  'groq_mixtral_8x7b',
58
56
  'groq_llama_3_3_70b',
59
57
  'groq_gpt_oss_120b',
60
- 'groq_gpt_oss_20b',
61
58
  ] as const;
62
59
 
63
60
  export type TextGenGroqModelT = (typeof textGenGroqModels)[number];
@@ -90,7 +87,6 @@ export const textGenOpenAIModels = [
90
87
  'openai_o1_mini',
91
88
  'openai_o3',
92
89
  'openai_o4_mini',
93
- 'openai_gpt_41_nano',
94
90
  ] as const;
95
91
 
96
92
  export type TextGenOpenAIModelT = (typeof textGenOpenAIModels)[number];
@@ -108,8 +104,11 @@ export type TextGenMiniMaxModelT = (typeof textGenMiniMaxModels)[number];
108
104
  // us a warm path when the direct upstream is throttled or down.
109
105
  export const textGenOpenRouterModels = [
110
106
  'openrouter_glm_5_1',
107
+ 'openrouter_glm_4_6',
111
108
  'openrouter_qwen_36_plus',
109
+ 'openrouter_qwen3_6_27b',
112
110
  'openrouter_kimi_k2_thinking',
111
+ 'openrouter_kimi_k2_6',
113
112
  'openrouter_gemma_4_31b',
114
113
  'openrouter_gemma_4_26b',
115
114
  'openrouter_gemma_4_26b_free',
@@ -121,6 +120,7 @@ export const textGenOpenRouterModels = [
121
120
  'openrouter_o4_mini',
122
121
  'openrouter_claude_sonnet_4_6',
123
122
  'openrouter_claude_haiku_4_5',
123
+ 'openrouter_claude_opus_4_7',
124
124
  ] as const;
125
125
  export type TextGenOpenRouterModelT = (typeof textGenOpenRouterModels)[number];
126
126
 
@@ -132,9 +132,13 @@ export const textGenDeepInfraModels = [
132
132
  'deepinfra_gemma_4_31b',
133
133
  'deepinfra_gemma_4_26b',
134
134
  'deepinfra_gpt_oss_120b',
135
- 'deepinfra_gpt_oss_20b',
136
135
  'deepinfra_qwen3_235b',
137
- 'deepinfra_kimi_k2',
136
+ 'deepinfra_qwen3_6_27b',
137
+ 'deepinfra_qwen3_6_35b_a3b',
138
+ 'deepinfra_kimi_k2_6',
139
+ 'deepinfra_glm_4_6',
140
+ 'deepinfra_glm_5_1',
141
+ 'deepinfra_minimax_m2_5',
138
142
  'deepinfra_deepseek_v4_pro',
139
143
  'deepinfra_deepseek_v4_flash',
140
144
  ] as const;
@@ -165,24 +169,23 @@ const textGenProviderModelValues = [
165
169
  'openai_o1_mini',
166
170
  'openai_o3',
167
171
  'openai_o4_mini',
168
- 'openai_gpt_41_nano',
169
172
  // Fireworks (GPT-OSS - Llama 4 requires enterprise deployment)
170
173
  'fireworks_gpt_oss_120b',
171
- 'fireworks_gpt_oss_20b',
172
174
  // Groq
173
175
  'groq_llama_4_scout',
174
- 'groq_llama_4_maverick',
175
176
  'groq_mixtral_8x7b',
176
177
  'groq_llama_3_3_70b',
177
178
  'groq_gpt_oss_120b',
178
- 'groq_gpt_oss_20b',
179
179
  // MiniMax
180
180
  'minimax_m2_5',
181
181
  'minimax_m2_7',
182
182
  // OpenRouter
183
183
  'openrouter_glm_5_1',
184
+ 'openrouter_glm_4_6',
184
185
  'openrouter_qwen_36_plus',
186
+ 'openrouter_qwen3_6_27b',
185
187
  'openrouter_kimi_k2_thinking',
188
+ 'openrouter_kimi_k2_6',
186
189
  'openrouter_gemma_4_31b',
187
190
  'openrouter_gemma_4_26b',
188
191
  'openrouter_gemma_4_26b_free',
@@ -193,13 +196,18 @@ const textGenProviderModelValues = [
193
196
  'openrouter_o4_mini',
194
197
  'openrouter_claude_sonnet_4_6',
195
198
  'openrouter_claude_haiku_4_5',
199
+ 'openrouter_claude_opus_4_7',
196
200
  // DeepInfra
197
201
  'deepinfra_gemma_4_31b',
198
202
  'deepinfra_gemma_4_26b',
199
203
  'deepinfra_gpt_oss_120b',
200
- 'deepinfra_gpt_oss_20b',
201
204
  'deepinfra_qwen3_235b',
202
- 'deepinfra_kimi_k2',
205
+ 'deepinfra_qwen3_6_27b',
206
+ 'deepinfra_qwen3_6_35b_a3b',
207
+ 'deepinfra_kimi_k2_6',
208
+ 'deepinfra_glm_4_6',
209
+ 'deepinfra_glm_5_1',
210
+ 'deepinfra_minimax_m2_5',
203
211
  'deepinfra_deepseek_v4_pro',
204
212
  'deepinfra_deepseek_v4_flash',
205
213
  // DeepSeek (direct, BYO key, OpenAI-compatible)
@@ -225,13 +233,11 @@ export const textGenProviderModelsSet = new Set(textGenProviderModels);
225
233
  const textGenModelValues = [
226
234
  // Multi-provider routed models
227
235
  'llama_4_scout',
228
- 'llama_4_maverick',
229
236
  'llama_3_3_70b',
230
- // DeepSeek V4 — direct route only (api.deepseek.com)
237
+ // DeepSeek V4 — direct (api.deepseek.com) + DeepInfra fallback
231
238
  'deepseek_v4_pro',
232
239
  'deepseek_v4_flash',
233
240
  'gpt_oss_120b',
234
- 'gpt_oss_20b',
235
241
  // Google
236
242
  'gemini_2_5_flash',
237
243
  'gemini_2_5',
@@ -239,24 +245,27 @@ const textGenModelValues = [
239
245
  'gpt_4o',
240
246
  'gpt_5',
241
247
  'gpt_5_mini',
242
- 'gpt_41_nano',
243
248
  'o3',
244
249
  'o4_mini',
245
250
  // Anthropic — coding-agent tiers (Anthropic direct + OpenRouter fallback)
246
251
  'claude_opus_4_7',
247
252
  'claude_sonnet_4_6',
248
- 'claude_3_sonnet',
249
253
  'claude_haiku_4_5',
250
254
  // Multi-provider open-weight (DeepInfra / OpenRouter / Together)
251
255
  'gemma_4_31b',
252
256
  'gemma_4_26b',
253
- // MiniMax (direct)
257
+ // MiniMax direct + DeepInfra fallback for M2.5
254
258
  'minimax_m2_5',
255
259
  'minimax_m2_7',
256
- // OpenRouter-only frontier
260
+ // GLM — DeepInfra primary + OpenRouter fallback
257
261
  'glm_5_1',
262
+ 'glm_4_6',
263
+ // Qwen 3.6 — OpenRouter + DeepInfra
258
264
  'qwen3_6_plus',
265
+ 'qwen3_6_27b',
266
+ // Kimi — OpenRouter + DeepInfra
259
267
  'kimi_k2_thinking',
268
+ 'kimi_k2_6',
260
269
  // Single-provider (Together — misc)
261
270
  'qwen2_72b',
262
271
  'qwen2_vision_72b',
@@ -524,25 +533,6 @@ export const TextGenOpenAIModelData: Record<
524
533
  smartness: 4,
525
534
  compactAt: 0.90,
526
535
  },
527
- // GPT-4.1 Nano — cheapest viable model, 1M context
528
- openai_gpt_41_nano: {
529
- model: 'gpt-4.1-nano',
530
- contextWindow: 1_000_000,
531
- inputTokenNanoDollar: 100,
532
- outputTokenNanoDollar: 400,
533
- vision: true,
534
- toolCalling: true,
535
- jsonMode: true,
536
- streaming: true,
537
- parallelToolCalls: true,
538
- // Coding agent
539
- name: 'GPT-4.1 Nano',
540
- provider: 'OpenAI',
541
- speed: 'fast',
542
- reasoning: 'strong',
543
- smartness: 2,
544
- compactAt: 0.90,
545
- },
546
536
  };
547
537
 
548
538
  export const TextGenTogetherModelData: Record<
@@ -695,17 +685,6 @@ export const TextGenFireworksModelData: Record<
695
685
  streaming: true,
696
686
  parallelToolCalls: false,
697
687
  },
698
- fireworks_gpt_oss_20b: {
699
- model: 'accounts/fireworks/models/gpt-oss-20b',
700
- contextWindow: 128000,
701
- // $0.075 input / $0.30 output per 1M tokens
702
- inputTokenNanoDollar: 75,
703
- outputTokenNanoDollar: 300,
704
- toolCalling: true,
705
- jsonMode: true,
706
- streaming: true,
707
- parallelToolCalls: false,
708
- },
709
688
  };
710
689
 
711
690
  // Groq models - ultra-fast inference
@@ -724,18 +703,6 @@ export const TextGenGroqModelData: Record<TextGenGroqModelT, TextGenModelData> =
724
703
  streaming: true,
725
704
  parallelToolCalls: false,
726
705
  },
727
- groq_llama_4_maverick: {
728
- model: 'meta-llama/llama-4-maverick-17b-128e-instruct',
729
- contextWindow: 128000,
730
- inputTokenNanoDollar: 200,
731
- outputTokenNanoDollar: 600,
732
- vision: true,
733
- // Note: toolCalling marked 'unreliable' due to known 100% failure rate with structured prompts
734
- toolCalling: 'unreliable',
735
- jsonMode: true,
736
- streaming: true,
737
- parallelToolCalls: false,
738
- },
739
706
  groq_mixtral_8x7b: {
740
707
  model: 'mixtral-8x7b-32768',
741
708
  contextWindow: 32768,
@@ -768,17 +735,6 @@ export const TextGenGroqModelData: Record<TextGenGroqModelT, TextGenModelData> =
768
735
  streaming: true,
769
736
  parallelToolCalls: false,
770
737
  },
771
- groq_gpt_oss_20b: {
772
- model: 'openai/gpt-oss-20b',
773
- contextWindow: 128000,
774
- // $0.075 input / $0.30 output per 1M tokens
775
- inputTokenNanoDollar: 75,
776
- outputTokenNanoDollar: 300,
777
- toolCalling: true,
778
- jsonMode: true,
779
- streaming: true,
780
- parallelToolCalls: false,
781
- },
782
738
  };
783
739
 
784
740
  export const TextGenKieModelData: Record<TextGenKieModelT, TextGenModelData> =
@@ -1089,6 +1045,60 @@ export const TextGenOpenRouterModelData: Record<
1089
1045
  // doesn't assume a cache discount that never materializes.
1090
1046
  supportsCacheControl: false,
1091
1047
  },
1048
+ // Claude Opus 4.7 fallback — primary still Anthropic direct. OpenRouter
1049
+ // list price is ~5% over upstream. Cache passthrough verified to work
1050
+ // for sonnet-4.6; opus-4.7 not yet verified — leave conservative.
1051
+ openrouter_claude_opus_4_7: {
1052
+ model: 'anthropic/claude-opus-4.7',
1053
+ contextWindow: 200_000,
1054
+ inputTokenNanoDollar: 15_750, // ~$15/M × 1.05
1055
+ outputTokenNanoDollar: 78_750, // ~$75/M × 1.05
1056
+ vision: true,
1057
+ toolCalling: true,
1058
+ jsonMode: true,
1059
+ streaming: true,
1060
+ parallelToolCalls: true,
1061
+ supportsCacheControl: false,
1062
+ },
1063
+ // GLM 4.6 OpenRouter route. List price is roughly the same as
1064
+ // DeepInfra's published rate; OpenRouter wins when DeepInfra is
1065
+ // throttled.
1066
+ openrouter_glm_4_6: {
1067
+ model: 'z-ai/glm-4.6',
1068
+ contextWindow: 200_000,
1069
+ inputTokenNanoDollar: 600,
1070
+ outputTokenNanoDollar: 2200,
1071
+ toolCalling: true,
1072
+ jsonMode: true,
1073
+ streaming: true,
1074
+ parallelToolCalls: false,
1075
+ supportsCacheControl: false,
1076
+ },
1077
+ // Qwen 3.6 27B OpenRouter route — dense (non-MoE) variant.
1078
+ openrouter_qwen3_6_27b: {
1079
+ model: 'qwen/qwen3.6-27b',
1080
+ contextWindow: 262_144,
1081
+ inputTokenNanoDollar: 350,
1082
+ outputTokenNanoDollar: 3360,
1083
+ vision: true,
1084
+ toolCalling: true,
1085
+ jsonMode: true,
1086
+ streaming: true,
1087
+ parallelToolCalls: false,
1088
+ supportsCacheControl: false,
1089
+ },
1090
+ // Kimi K2.6 OpenRouter route.
1091
+ openrouter_kimi_k2_6: {
1092
+ model: 'moonshotai/kimi-k2.6',
1093
+ contextWindow: 262_144,
1094
+ inputTokenNanoDollar: 800,
1095
+ outputTokenNanoDollar: 3700,
1096
+ toolCalling: true,
1097
+ jsonMode: true,
1098
+ streaming: true,
1099
+ parallelToolCalls: false,
1100
+ supportsCacheControl: false,
1101
+ },
1092
1102
  };
1093
1103
 
1094
1104
  // DeepInfra — direct route, bypasses OpenRouter's shared rate limits.
@@ -1159,43 +1169,107 @@ export const TextGenDeepInfraModelData: Record<
1159
1169
  parallelToolCalls: false,
1160
1170
  supportsCacheControl: false,
1161
1171
  },
1162
- // GPT-OSS 20B same model as Groq/Fireworks at lower price.
1163
- deepinfra_gpt_oss_20b: {
1164
- model: 'openai/gpt-oss-20b',
1165
- contextWindow: 128_000,
1166
- inputTokenNanoDollar: 30,
1167
- outputTokenNanoDollar: 150,
1172
+ // Qwen3-235B-A22B-Instruct-2507 (live 2026-05-03 on DeepInfra).
1173
+ deepinfra_qwen3_235b: {
1174
+ model: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
1175
+ contextWindow: 256_000,
1176
+ inputTokenNanoDollar: 71,
1177
+ outputTokenNanoDollar: 100,
1168
1178
  toolCalling: true,
1169
1179
  jsonMode: true,
1170
1180
  streaming: true,
1171
1181
  parallelToolCalls: false,
1172
1182
  supportsCacheControl: false,
1173
1183
  },
1174
- // Qwen3-235BDeepInfra at $0.071/$0.10 vs Together at $0.20/$0.60.
1175
- // ~3× cheaper input, ~6× cheaper output.
1176
- deepinfra_qwen3_235b: {
1177
- model: 'Qwen/Qwen3-235B-A22B-Instruct-2507',
1178
- contextWindow: 256_000,
1179
- inputTokenNanoDollar: 71,
1180
- outputTokenNanoDollar: 100,
1184
+ // Qwen3.6 27B — $0.32/$3.20 per 1M (live 2026-05-03), 256k ctx,
1185
+ // multimodal + reasoning. The dense, non-MoE variant.
1186
+ deepinfra_qwen3_6_27b: {
1187
+ model: 'Qwen/Qwen3.6-27B',
1188
+ contextWindow: 262_144,
1189
+ inputTokenNanoDollar: 320,
1190
+ outputTokenNanoDollar: 3200,
1191
+ vision: true,
1181
1192
  toolCalling: true,
1182
1193
  jsonMode: true,
1183
1194
  streaming: true,
1184
1195
  parallelToolCalls: false,
1185
1196
  supportsCacheControl: false,
1186
1197
  },
1187
- // Kimi K2DeepInfra at $0.50/$2.00 vs Together at $1.00/$3.00.
1188
- deepinfra_kimi_k2: {
1189
- model: 'moonshotai/Kimi-K2-Instruct',
1190
- contextWindow: 128_000,
1191
- inputTokenNanoDollar: 500,
1192
- outputTokenNanoDollar: 2000,
1198
+ // Qwen3.6 35B-A3BMoE variant, dramatically cheaper.
1199
+ // $0.15/$0.95 per 1M (live 2026-05-03), 256k ctx.
1200
+ deepinfra_qwen3_6_35b_a3b: {
1201
+ model: 'Qwen/Qwen3.6-35B-A3B',
1202
+ contextWindow: 262_144,
1203
+ inputTokenNanoDollar: 150,
1204
+ outputTokenNanoDollar: 950,
1205
+ vision: true,
1193
1206
  toolCalling: true,
1194
1207
  jsonMode: true,
1195
1208
  streaming: true,
1196
1209
  parallelToolCalls: false,
1197
1210
  supportsCacheControl: false,
1198
1211
  },
1212
+ // Kimi K2.6 — newer than the (now-removed) K2-Instruct. $0.75/$3.50
1213
+ // per 1M (live 2026-05-03), 256k ctx, native reasoning. Cache-read
1214
+ // is 20% of input → 150 nanoDollar/M.
1215
+ deepinfra_kimi_k2_6: {
1216
+ model: 'moonshotai/Kimi-K2.6',
1217
+ contextWindow: 262_144,
1218
+ inputTokenNanoDollar: 750,
1219
+ outputTokenNanoDollar: 3500,
1220
+ toolCalling: true,
1221
+ jsonMode: true,
1222
+ streaming: true,
1223
+ parallelToolCalls: false,
1224
+ supportsCacheControl: true,
1225
+ cacheReadTokenNanoDollar: 150,
1226
+ },
1227
+ // GLM 4.6 — $0.43/$1.74 per 1M (live 2026-05-03), 200k ctx.
1228
+ // Cache-read 18.6% of input → 80 nanoDollar/M.
1229
+ deepinfra_glm_4_6: {
1230
+ model: 'zai-org/GLM-4.6',
1231
+ contextWindow: 202_752,
1232
+ inputTokenNanoDollar: 430,
1233
+ outputTokenNanoDollar: 1740,
1234
+ toolCalling: true,
1235
+ jsonMode: true,
1236
+ streaming: true,
1237
+ parallelToolCalls: false,
1238
+ supportsCacheControl: true,
1239
+ cacheReadTokenNanoDollar: 80,
1240
+ },
1241
+ // GLM 5.1 fallback route. Pricing $1.05/$3.50 per 1M (live
1242
+ // 2026-05-03), 200k ctx. Slightly higher than the OpenRouter
1243
+ // route's published list, so OpenRouter stays primary on price.
1244
+ // Cache-read 19.5% of input → 205 nanoDollar/M.
1245
+ deepinfra_glm_5_1: {
1246
+ model: 'zai-org/GLM-5.1',
1247
+ contextWindow: 202_752,
1248
+ inputTokenNanoDollar: 1050,
1249
+ outputTokenNanoDollar: 3500,
1250
+ toolCalling: true,
1251
+ jsonMode: true,
1252
+ streaming: true,
1253
+ parallelToolCalls: false,
1254
+ supportsCacheControl: true,
1255
+ cacheReadTokenNanoDollar: 205,
1256
+ },
1257
+ // MiniMax M2.5 fallback. Pricing $0.15/$1.15 per 1M (live
1258
+ // 2026-05-03) — actually CHEAPER than direct MiniMax ($0.30/$1.20),
1259
+ // so the price-priority sort prefers DeepInfra. Direct stays as a
1260
+ // healthy backup. 196k ctx, cache-read 20% → 30 nanoDollar/M.
1261
+ deepinfra_minimax_m2_5: {
1262
+ model: 'MiniMaxAI/MiniMax-M2.5',
1263
+ contextWindow: 196_608,
1264
+ inputTokenNanoDollar: 150,
1265
+ outputTokenNanoDollar: 1150,
1266
+ toolCalling: true,
1267
+ jsonMode: true,
1268
+ streaming: true,
1269
+ parallelToolCalls: false,
1270
+ supportsCacheControl: true,
1271
+ cacheReadTokenNanoDollar: 30,
1272
+ },
1199
1273
  // DeepSeek V4 Pro fallback route — used when api.deepseek.com is
1200
1274
  // throttled or down. Pricing live-verified 2026-05-03 against
1201
1275
  // /models/deepseek-ai/DeepSeek-V4-Pro: $1.74 in / $3.48 out per 1M
@@ -1376,22 +1450,6 @@ export const textGenMultiProviderModels: Record<
1376
1450
  available: true,
1377
1451
  },
1378
1452
  ],
1379
- llama_4_maverick: [
1380
- // Groq deprecated llama-4-maverick-17b-128e effective March 9, 2026
1381
- {
1382
- provider: 'groq',
1383
- providerModel: 'groq_llama_4_maverick',
1384
- latencyTier: 'fast',
1385
- available: false,
1386
- },
1387
- // Fireworks Llama 4 requires enterprise deployment, not available on serverless
1388
- {
1389
- provider: 'together',
1390
- providerModel: 'together_meta_llama4_400b',
1391
- latencyTier: 'standard',
1392
- available: true,
1393
- },
1394
- ],
1395
1453
  // DeepSeek V4 — direct route to api.deepseek.com (BYO key, OpenAI-
1396
1454
  // compatible) primary, DeepInfra fallback for failover only. Direct
1397
1455
  // is ~6× cheaper on Pro and ~2× on Flash so the price-priority sort
@@ -1456,26 +1514,6 @@ export const textGenMultiProviderModels: Record<
1456
1514
  available: true,
1457
1515
  },
1458
1516
  ],
1459
- gpt_oss_20b: [
1460
- {
1461
- provider: 'deepinfra',
1462
- providerModel: 'deepinfra_gpt_oss_20b',
1463
- latencyTier: 'standard',
1464
- available: true,
1465
- },
1466
- {
1467
- provider: 'groq',
1468
- providerModel: 'groq_gpt_oss_20b',
1469
- latencyTier: 'fast',
1470
- available: true,
1471
- },
1472
- {
1473
- provider: 'fireworks',
1474
- providerModel: 'fireworks_gpt_oss_20b',
1475
- latencyTier: 'standard',
1476
- available: true,
1477
- },
1478
- ],
1479
1517
  // Single-provider Google models
1480
1518
  gemini_2_5_flash: [
1481
1519
  {
@@ -1569,19 +1607,11 @@ export const textGenMultiProviderModels: Record<
1569
1607
  available: true,
1570
1608
  },
1571
1609
  ],
1572
- gpt_41_nano: [
1573
- {
1574
- provider: 'openai',
1575
- providerModel: 'openai_gpt_41_nano',
1576
- latencyTier: 'fast',
1577
- available: true,
1578
- },
1579
- ],
1580
1610
  // Anthropic models — direct first, OpenRouter fallback second.
1581
- // claude_3_sonnet historically maps to claude-sonnet-4-20250514;
1582
- // the OpenRouter fallback uses claude-sonnet-4.6 (current Anthropic
1583
- // live model) and has verified cache_control passthrough.
1584
- claude_3_sonnet: [
1611
+ // The Anthropic entry ships 4.x behind a wire-dated snapshot, and
1612
+ // OpenRouter mirrors the same model with ~5% markup. Cache_control
1613
+ // passthrough is verified for sonnet but unverified for opus/haiku.
1614
+ claude_sonnet_4_6: [
1585
1615
  {
1586
1616
  provider: 'anthropic',
1587
1617
  providerModel: 'anthrophic_claude_3_sonnet',
@@ -1595,28 +1625,16 @@ export const textGenMultiProviderModels: Record<
1595
1625
  available: true,
1596
1626
  },
1597
1627
  ],
1598
- // Explicit current-version aliases so coding-agent callers can pin
1599
- // to 4.6 / 4.7 without depending on claude_3_sonnet's legacy label.
1600
- // Same offering chain — the Anthropic entry ships 4.x behind the
1601
- // wire-dated snapshot, and OpenRouter mirrors the same model.
1602
- claude_sonnet_4_6: [
1628
+ claude_opus_4_7: [
1603
1629
  {
1604
1630
  provider: 'anthropic',
1605
- providerModel: 'anthrophic_claude_3_sonnet',
1606
- latencyTier: 'standard',
1631
+ providerModel: 'anthrophic_claude_3_opus',
1632
+ latencyTier: 'slow',
1607
1633
  available: true,
1608
1634
  },
1609
1635
  {
1610
1636
  provider: 'openrouter',
1611
- providerModel: 'openrouter_claude_sonnet_4_6',
1612
- latencyTier: 'standard',
1613
- available: true,
1614
- },
1615
- ],
1616
- claude_opus_4_7: [
1617
- {
1618
- provider: 'anthropic',
1619
- providerModel: 'anthrophic_claude_3_opus',
1637
+ providerModel: 'openrouter_claude_opus_4_7',
1620
1638
  latencyTier: 'slow',
1621
1639
  available: true,
1622
1640
  },
@@ -1669,17 +1687,29 @@ export const textGenMultiProviderModels: Record<
1669
1687
  available: true,
1670
1688
  },
1671
1689
  ],
1690
+ // Kimi K2 (original Instruct) — DeepInfra retired Kimi-K2-Instruct
1691
+ // 2026-05-03 in favor of K2.5/K2.6 (different model). Together still
1692
+ // serves the original K2-Instruct as the only path.
1672
1693
  kimi_k2: [
1673
- // DeepInfra at $0.50/$2.00 vs Together at $1.00/$3.00 — ~2× cheaper.
1694
+ {
1695
+ provider: 'together',
1696
+ providerModel: 'together_kimi_k2',
1697
+ latencyTier: 'standard',
1698
+ available: true,
1699
+ },
1700
+ ],
1701
+ // Kimi K2.6 — newer model. DeepInfra primary (cheaper), OpenRouter
1702
+ // fallback. Both expose the same wire model id family.
1703
+ kimi_k2_6: [
1674
1704
  {
1675
1705
  provider: 'deepinfra',
1676
- providerModel: 'deepinfra_kimi_k2',
1706
+ providerModel: 'deepinfra_kimi_k2_6',
1677
1707
  latencyTier: 'standard',
1678
1708
  available: true,
1679
1709
  },
1680
1710
  {
1681
- provider: 'together',
1682
- providerModel: 'together_kimi_k2',
1711
+ provider: 'openrouter',
1712
+ providerModel: 'openrouter_kimi_k2_6',
1683
1713
  latencyTier: 'standard',
1684
1714
  available: true,
1685
1715
  },
@@ -1730,7 +1760,16 @@ export const textGenMultiProviderModels: Record<
1730
1760
  available: true,
1731
1761
  },
1732
1762
  ],
1763
+ // MiniMax M2.5 — DeepInfra is cheaper than direct ($0.15/$1.15 vs
1764
+ // $0.30/$1.20), so DeepInfra wins the price-priority sort. Direct
1765
+ // stays as a healthy fallback.
1733
1766
  minimax_m2_5: [
1767
+ {
1768
+ provider: 'deepinfra',
1769
+ providerModel: 'deepinfra_minimax_m2_5',
1770
+ latencyTier: 'standard',
1771
+ available: true,
1772
+ },
1734
1773
  {
1735
1774
  provider: 'minimax',
1736
1775
  providerModel: 'minimax_m2_5',
@@ -1738,6 +1777,8 @@ export const textGenMultiProviderModels: Record<
1738
1777
  available: true,
1739
1778
  },
1740
1779
  ],
1780
+ // MiniMax M2.7 — not on DeepInfra (only M2.5 is hosted there).
1781
+ // Direct route only.
1741
1782
  minimax_m2_7: [
1742
1783
  {
1743
1784
  provider: 'minimax',
@@ -1746,6 +1787,9 @@ export const textGenMultiProviderModels: Record<
1746
1787
  available: true,
1747
1788
  },
1748
1789
  ],
1790
+ // GLM 5.1 — OpenRouter is cheaper ($0.95/$3.15 vs DeepInfra
1791
+ // $1.05/$3.50) and has been stable; OpenRouter primary, DeepInfra
1792
+ // fallback when OR is throttled.
1749
1793
  glm_5_1: [
1750
1794
  {
1751
1795
  provider: 'openrouter',
@@ -1753,7 +1797,31 @@ export const textGenMultiProviderModels: Record<
1753
1797
  latencyTier: 'standard',
1754
1798
  available: true,
1755
1799
  },
1800
+ {
1801
+ provider: 'deepinfra',
1802
+ providerModel: 'deepinfra_glm_5_1',
1803
+ latencyTier: 'standard',
1804
+ available: true,
1805
+ },
1756
1806
  ],
1807
+ // GLM 4.6 — DeepInfra primary at $0.43/$1.74; OpenRouter fallback.
1808
+ glm_4_6: [
1809
+ {
1810
+ provider: 'deepinfra',
1811
+ providerModel: 'deepinfra_glm_4_6',
1812
+ latencyTier: 'standard',
1813
+ available: true,
1814
+ },
1815
+ {
1816
+ provider: 'openrouter',
1817
+ providerModel: 'openrouter_glm_4_6',
1818
+ latencyTier: 'standard',
1819
+ available: true,
1820
+ },
1821
+ ],
1822
+ // Qwen 3.6 Plus — OpenRouter only. The "plus" SKU isn't carried by
1823
+ // DeepInfra (DeepInfra exposes 27B and 35B-A3B variants under
1824
+ // separate clean names below).
1757
1825
  qwen3_6_plus: [
1758
1826
  {
1759
1827
  provider: 'openrouter',
@@ -1762,6 +1830,23 @@ export const textGenMultiProviderModels: Record<
1762
1830
  available: true,
1763
1831
  },
1764
1832
  ],
1833
+ // Qwen 3.6 27B — dense (non-MoE). DeepInfra primary at $0.32/$3.20.
1834
+ qwen3_6_27b: [
1835
+ {
1836
+ provider: 'deepinfra',
1837
+ providerModel: 'deepinfra_qwen3_6_27b',
1838
+ latencyTier: 'standard',
1839
+ available: true,
1840
+ },
1841
+ {
1842
+ provider: 'openrouter',
1843
+ providerModel: 'openrouter_qwen3_6_27b',
1844
+ latencyTier: 'standard',
1845
+ available: true,
1846
+ },
1847
+ ],
1848
+ // Kimi K2 Thinking — OpenRouter only. DeepInfra has K2.5/K2.6 but
1849
+ // no specific "thinking" variant.
1765
1850
  kimi_k2_thinking: [
1766
1851
  {
1767
1852
  provider: 'openrouter',
@@ -1775,12 +1860,10 @@ export const textGenMultiProviderModels: Record<
1775
1860
  /** Model data keyed by clean model name. Use for billing/display lookups. */
1776
1861
  export const textGenModelData: Record<TextGenModel, TextGenModelData> = {
1777
1862
  llama_4_scout: TextGenGroqModelData.groq_llama_4_scout,
1778
- llama_4_maverick: TextGenTogetherModelData.together_meta_llama4_400b,
1779
1863
  llama_3_3_70b: TextGenGroqModelData.groq_llama_3_3_70b,
1780
1864
  deepseek_v4_pro: TextGenDeepSeekModelData.deepseek_deepseek_v4_pro,
1781
1865
  deepseek_v4_flash: TextGenDeepSeekModelData.deepseek_deepseek_v4_flash,
1782
1866
  gpt_oss_120b: TextGenGroqModelData.groq_gpt_oss_120b,
1783
- gpt_oss_20b: TextGenGroqModelData.groq_gpt_oss_20b,
1784
1867
  gemini_2_5_flash: TextGenGoogleModelData.google_gemini_2_5_flash,
1785
1868
  gemini_2_5: TextGenGoogleModelData.google_gemini_2_5,
1786
1869
  gpt_4o: TextGenOpenAIModelData.openai_gpt_4o,
@@ -1788,20 +1871,21 @@ export const textGenModelData: Record<TextGenModel, TextGenModelData> = {
1788
1871
  gpt_5_mini: TextGenOpenAIModelData.openai_gpt_5_mini,
1789
1872
  o3: TextGenOpenAIModelData.openai_o3,
1790
1873
  o4_mini: TextGenOpenAIModelData.openai_o4_mini,
1791
- gpt_41_nano: TextGenOpenAIModelData.openai_gpt_41_nano,
1792
- claude_3_sonnet: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
1793
1874
  claude_sonnet_4_6: TextGenAnthropicModelData.anthrophic_claude_3_sonnet,
1794
1875
  claude_opus_4_7: TextGenAnthropicModelData.anthrophic_claude_3_opus,
1795
1876
  claude_haiku_4_5: TextGenAnthropicModelData.anthrophic_claude_haiku_4_5,
1796
1877
  qwen2_72b: TextGenTogetherModelData.together_qwen2_72b,
1797
1878
  qwen2_vision_72b: TextGenTogetherModelData.together_qwen2_vision_72b,
1798
1879
  qwen3_235b: TextGenTogetherModelData.together_qwen3_235b,
1880
+ qwen3_6_27b: TextGenDeepInfraModelData.deepinfra_qwen3_6_27b,
1799
1881
  kimi_k2: TextGenTogetherModelData.together_kimi_k2,
1882
+ kimi_k2_6: TextGenDeepInfraModelData.deepinfra_kimi_k2_6,
1800
1883
  gemma_4_31b: TextGenTogetherModelData.together_gemma_4_31b,
1801
1884
  gemma_4_26b: TextGenDeepInfraModelData.deepinfra_gemma_4_26b,
1802
- minimax_m2_5: TextGenMiniMaxModelData.minimax_m2_5,
1885
+ minimax_m2_5: TextGenDeepInfraModelData.deepinfra_minimax_m2_5,
1803
1886
  minimax_m2_7: TextGenMiniMaxModelData.minimax_m2_7,
1804
1887
  glm_5_1: TextGenOpenRouterModelData.openrouter_glm_5_1,
1888
+ glm_4_6: TextGenDeepInfraModelData.deepinfra_glm_4_6,
1805
1889
  qwen3_6_plus: TextGenOpenRouterModelData.openrouter_qwen_36_plus,
1806
1890
  kimi_k2_thinking: TextGenOpenRouterModelData.openrouter_kimi_k2_thinking,
1807
1891
  };