lm-deluge 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -5,1284 +5,21 @@ from dataclasses import dataclass, field
5
5
 
6
6
  from ..request_context import RequestContext
7
7
 
8
- BUILTIN_MODELS = {
9
- # `7MMM. ,MMF' mm
10
- # MMMb dPMM MM
11
- # M YM ,M MM .gP"Ya mmMMmm ,6"Yb.
12
- # M Mb M' MM ,M' Yb MM 8) MM
13
- # M YM.P' MM 8M"""""" MM ,pm9MM
14
- # M `YM' MM YM. , MM 8M MM
15
- # .JML. `' .JMML.`Mbmmd' `Mbmo`Moo9^Yo.
16
- "llama-4-scout": {
17
- "id": "llama-4-scout",
18
- "name": "Llama-4-Scout-17B-16E-Instruct-FP8",
19
- "api_base": "https://api.llama.com/compat/v1",
20
- "api_key_env_var": "META_API_KEY",
21
- "supports_json": True,
22
- "supports_logprobs": True,
23
- "api_spec": "openai",
24
- "input_cost": 0.0,
25
- "output_cost": 0.0,
26
- "requests_per_minute": 3_000,
27
- "tokens_per_minute": 1_000_000,
28
- "reasoning_model": False,
29
- },
30
- "llama-4-maverick": {
31
- "id": "llama-4-maverick",
32
- "name": "Llama-4-Maverick-17B-128E-Instruct-FP8",
33
- "api_base": "https://api.llama.com/compat/v1",
34
- "api_key_env_var": "META_API_KEY",
35
- "supports_json": True,
36
- "supports_logprobs": True,
37
- "api_spec": "openai",
38
- "input_cost": 0.0,
39
- "output_cost": 0.0,
40
- "requests_per_minute": 3_000,
41
- "tokens_per_minute": 1_000_000,
42
- "reasoning_model": False,
43
- },
44
- "llama-3.3-70b": {
45
- "id": "llama-3.3-70b",
46
- "name": "Llama-3.3-70B-Instruct",
47
- "api_base": "https://api.llama.com/compat/v1",
48
- "api_key_env_var": "META_API_KEY",
49
- "supports_json": True,
50
- "supports_logprobs": True,
51
- "api_spec": "openai",
52
- "input_cost": 0.0,
53
- "output_cost": 0.0,
54
- "requests_per_minute": 3_000,
55
- "tokens_per_minute": 1_000_000,
56
- "reasoning_model": False,
57
- },
58
- "llama-3.3-8b": {
59
- "id": "llama-3.3-8b",
60
- "name": "Llama-3.3-8B-Instruct",
61
- "api_base": "https://api.llama.com/compat/v1",
62
- "api_key_env_var": "META_API_KEY",
63
- "supports_json": True,
64
- "supports_logprobs": True,
65
- "api_spec": "openai",
66
- "input_cost": 0.0,
67
- "output_cost": 0.0,
68
- "requests_per_minute": 3_000,
69
- "tokens_per_minute": 1_000_000,
70
- "reasoning_model": False,
71
- },
72
- # .d8888b. 888
73
- # d88P Y88b 888
74
- # 888 888 888
75
- # 888 888d888 .d88b. 888 888
76
- # 888 88888 888P" d88""88b 888 .88P
77
- # 888 888 888 888 888 888888K
78
- # Y88b d88P 888 Y88..88P 888 "88b
79
- # "Y8888P88 888 "Y88P" 888 888
80
- "grok-3": {
81
- "id": "grok-3",
82
- "name": "grok-3-latest",
83
- "api_base": "https://api.x.ai/v1",
84
- "api_key_env_var": "GROK_API_KEY",
85
- "supports_json": True,
86
- "supports_logprobs": True,
87
- "api_spec": "openai",
88
- "input_cost": 2.0,
89
- "output_cost": 8.0,
90
- "requests_per_minute": 20,
91
- "tokens_per_minute": 100_000,
92
- "reasoning_model": False,
93
- },
94
- "grok-3-mini": {
95
- "id": "grok-3-mini",
96
- "name": "grok-3-mini-latest",
97
- "api_base": "https://api.x.ai/v1",
98
- "api_key_env_var": "GROK_API_KEY",
99
- "supports_json": True,
100
- "supports_logprobs": True,
101
- "api_spec": "openai",
102
- "input_cost": 2.0,
103
- "output_cost": 8.0,
104
- "requests_per_minute": 20,
105
- "tokens_per_minute": 100_000,
106
- "reasoning_model": True,
107
- },
108
- # .oooooo. oooo .o. ooooo
109
- # d8P' `Y8b `888 .888. `888'
110
- # 888 .ooooo. .ooooo. .oooooooo 888 .ooooo. .8"888. 888
111
- # 888 d88' `88b d88' `88b 888' `88b 888 d88' `88b .8' `888. 888
112
- # 888 ooooo 888 888 888 888 888 888 888 888ooo888 .88ooo8888. 888
113
- # `88. .88' 888 888 888 888 `88bod8P' 888 888 .o .8' `888. 888
114
- # `Y8bood8P' `Y8bod8P' `Y8bod8P' `8oooooo. o888o `Y8bod8P' o88o o8888o o888o
115
- # d" YD
116
- # "Y88888P'
117
- # these are through AI studio rather than Vertex, and using the OpenAI-compatible endpoints
118
- "gemini-2.0-flash": {
119
- "id": "gemini-2.0-flash",
120
- "name": "gemini-2.0-flash",
121
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
122
- "api_key_env_var": "GEMINI_API_KEY",
123
- "supports_json": True,
124
- "supports_logprobs": False,
125
- "api_spec": "openai",
126
- "input_cost": 0.1,
127
- "output_cost": 0.4,
128
- "requests_per_minute": 20,
129
- "tokens_per_minute": 100_000,
130
- "reasoning_model": False,
131
- },
132
- "gemini-2.0-flash-lite": {
133
- "id": "gemini-2.0-flash-lite",
134
- "name": "gemini-2.0-flash-lite",
135
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
136
- "api_key_env_var": "GEMINI_API_KEY",
137
- "supports_json": True,
138
- "supports_logprobs": False,
139
- "api_spec": "openai",
140
- "input_cost": 0.1,
141
- "output_cost": 0.4,
142
- "requests_per_minute": 20,
143
- "tokens_per_minute": 100_000,
144
- "reasoning_model": False,
145
- },
146
- "gemini-2.5-pro": {
147
- "id": "gemini-2.5-pro",
148
- "name": "gemini-2.5-pro",
149
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
150
- "api_key_env_var": "GEMINI_API_KEY",
151
- "supports_json": True,
152
- "supports_logprobs": False,
153
- "api_spec": "openai",
154
- "input_cost": 0.1,
155
- "output_cost": 0.4,
156
- "requests_per_minute": 20,
157
- "tokens_per_minute": 100_000,
158
- "reasoning_model": True,
159
- },
160
- "gemini-2.5-flash": {
161
- "id": "gemini-2.5-flash",
162
- "name": "gemini-2.5-flash",
163
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
164
- "api_key_env_var": "GEMINI_API_KEY",
165
- "supports_json": True,
166
- "supports_logprobs": False,
167
- "api_spec": "openai",
168
- "input_cost": 0.1,
169
- "output_cost": 0.4,
170
- "requests_per_minute": 20,
171
- "tokens_per_minute": 100_000,
172
- "reasoning_model": True,
173
- },
174
- "gemini-2.5-flash-lite": {
175
- "id": "gemini-2.5-flash-lite",
176
- "name": "gemini-2.5-flash-lite",
177
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
178
- "api_key_env_var": "GEMINI_API_KEY",
179
- "supports_json": True,
180
- "supports_logprobs": False,
181
- "api_spec": "openai",
182
- "input_cost": 0.1,
183
- "output_cost": 0.4,
184
- "requests_per_minute": 20,
185
- "tokens_per_minute": 100_000,
186
- "reasoning_model": True,
187
- },
188
- # Native Gemini API versions with file support
189
- "gemini-2.0-flash-gemini": {
190
- "id": "gemini-2.0-flash-gemini",
191
- "name": "gemini-2.0-flash",
192
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
193
- "api_key_env_var": "GEMINI_API_KEY",
194
- "supports_json": True,
195
- "supports_logprobs": False,
196
- "api_spec": "gemini",
197
- "input_cost": 0.1,
198
- "output_cost": 0.4,
199
- "requests_per_minute": 20,
200
- "tokens_per_minute": 100_000,
201
- "reasoning_model": False,
202
- },
203
- "gemini-2.0-flash-lite-gemini": {
204
- "id": "gemini-2.0-flash-lite-gemini",
205
- "name": "gemini-2.0-flash-lite",
206
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
207
- "api_key_env_var": "GEMINI_API_KEY",
208
- "supports_json": True,
209
- "supports_logprobs": False,
210
- "api_spec": "gemini",
211
- "input_cost": 0.1,
212
- "output_cost": 0.4,
213
- "requests_per_minute": 20,
214
- "tokens_per_minute": 100_000,
215
- "reasoning_model": False,
216
- },
217
- "gemini-2.5-pro-gemini": {
218
- "id": "gemini-2.5-pro-gemini",
219
- "name": "gemini-2.5-pro",
220
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
221
- "api_key_env_var": "GEMINI_API_KEY",
222
- "supports_json": True,
223
- "supports_logprobs": False,
224
- "api_spec": "gemini",
225
- "input_cost": 0.1,
226
- "output_cost": 0.4,
227
- "requests_per_minute": 20,
228
- "tokens_per_minute": 100_000,
229
- "reasoning_model": True,
230
- },
231
- "gemini-2.5-flash-gemini": {
232
- "id": "gemini-2.5-flash-gemini",
233
- "name": "gemini-2.5-flash",
234
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
235
- "api_key_env_var": "GEMINI_API_KEY",
236
- "supports_json": True,
237
- "supports_logprobs": False,
238
- "api_spec": "gemini",
239
- "input_cost": 0.1,
240
- "output_cost": 0.4,
241
- "requests_per_minute": 20,
242
- "tokens_per_minute": 100_000,
243
- "reasoning_model": True,
244
- },
245
- "gemini-2.5-flash-lite-gemini": {
246
- "id": "gemini-2.5-flash-lite-gemini",
247
- "name": "gemini-2.5-flash-lite",
248
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
249
- "api_key_env_var": "GEMINI_API_KEY",
250
- "supports_json": True,
251
- "supports_logprobs": False,
252
- "api_spec": "gemini",
253
- "input_cost": 0.1,
254
- "output_cost": 0.4,
255
- "requests_per_minute": 20,
256
- "tokens_per_minute": 100_000,
257
- "reasoning_model": True,
258
- },
259
- # ███████ █████████ █████
260
- # ███░░░░░███ ███░░░░░███ ░░███
261
- # ███ ░░███ ████████ ██████ ████████ ░███ ░███ ░███
262
- # ░███ ░███░░███░░███ ███░░███░░███░░███ ░███████████ ░███
263
- # ░███ ░███ ░███ ░███░███████ ░███ ░███ ░███░░░░░███ ░███
264
- # ░░███ ███ ░███ ░███░███░░░ ░███ ░███ ░███ ░███ ░███
265
- # ░░░███████░ ░███████ ░░██████ ████ █████ █████ █████ █████
266
- # ░░░░░░░ ░███░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░
267
- # ░███
268
- # █████
269
- # ░░░░░
270
- "gpt-5": {
271
- "id": "gpt-5",
272
- "name": "gpt-5",
273
- "api_base": "https://api.openai.com/v1",
274
- "api_key_env_var": "OPENAI_API_KEY",
275
- "supports_json": False,
276
- "supports_logprobs": True,
277
- "supports_responses": True,
278
- "api_spec": "openai",
279
- "input_cost": 1.25,
280
- "cached_input_cost": 0.125,
281
- "output_cost": 10.0,
282
- "reasoning_model": True,
283
- },
284
- "gpt-5-chat": {
285
- "id": "gpt-5-chat",
286
- "name": "gpt-5-chat-latest",
287
- "api_base": "https://api.openai.com/v1",
288
- "api_key_env_var": "OPENAI_API_KEY",
289
- "supports_json": False,
290
- "supports_logprobs": True,
291
- "supports_responses": True,
292
- "api_spec": "openai",
293
- "input_cost": 1.25,
294
- "cached_input_cost": 0.125,
295
- "output_cost": 10.0,
296
- "reasoning_model": False,
297
- },
298
- "gpt-5-mini": {
299
- "id": "gpt-5-mini",
300
- "name": "gpt-5-mini",
301
- "api_base": "https://api.openai.com/v1",
302
- "api_key_env_var": "OPENAI_API_KEY",
303
- "supports_json": False,
304
- "supports_logprobs": True,
305
- "supports_responses": True,
306
- "api_spec": "openai",
307
- "input_cost": 0.25,
308
- "cached_input_cost": 0.025,
309
- "output_cost": 2.0,
310
- "reasoning_model": True,
311
- },
312
- "gpt-5-nano": {
313
- "id": "gpt-5-nano",
314
- "name": "gpt-5-nano",
315
- "api_base": "https://api.openai.com/v1",
316
- "api_key_env_var": "OPENAI_API_KEY",
317
- "supports_json": False,
318
- "supports_logprobs": True,
319
- "supports_responses": True,
320
- "api_spec": "openai",
321
- "input_cost": 0.05,
322
- "cached_input_cost": 0.005,
323
- "output_cost": 0.40,
324
- "reasoning_model": True,
325
- },
326
- "openai-computer-use-preview": {
327
- "id": "openai-computer-use-preview",
328
- "name": "computer-use-preview",
329
- "api_base": "https://api.openai.com/v1",
330
- "api_key_env_var": "OPENAI_API_KEY",
331
- "supports_json": True,
332
- "supports_logprobs": False,
333
- "supports_responses": True,
334
- "api_spec": "openai",
335
- "input_cost": 2.0,
336
- "output_cost": 8.0,
337
- "requests_per_minute": 20,
338
- "tokens_per_minute": 100_000,
339
- "reasoning_model": False,
340
- },
341
- "o3": {
342
- "id": "o3",
343
- "name": "o3-2025-04-16",
344
- "api_base": "https://api.openai.com/v1",
345
- "api_key_env_var": "OPENAI_API_KEY",
346
- "supports_json": False,
347
- "supports_logprobs": True,
348
- "supports_responses": True,
349
- "api_spec": "openai",
350
- "input_cost": 10.0,
351
- "output_cost": 40.0,
352
- "requests_per_minute": 20,
353
- "tokens_per_minute": 100_000,
354
- "reasoning_model": True,
355
- },
356
- "o4-mini": {
357
- "id": "o4-mini",
358
- "name": "o4-mini-2025-04-16",
359
- "api_base": "https://api.openai.com/v1",
360
- "api_key_env_var": "OPENAI_API_KEY",
361
- "supports_json": False,
362
- "supports_logprobs": True,
363
- "supports_responses": True,
364
- "api_spec": "openai",
365
- "input_cost": 1.1,
366
- "output_cost": 4.4,
367
- "requests_per_minute": 20,
368
- "tokens_per_minute": 100_000,
369
- "reasoning_model": True,
370
- },
371
- "gpt-4.1": {
372
- "id": "gpt-4.1",
373
- "name": "gpt-4.1-2025-04-14",
374
- "api_base": "https://api.openai.com/v1",
375
- "api_key_env_var": "OPENAI_API_KEY",
376
- "supports_json": True,
377
- "supports_logprobs": True,
378
- "supports_responses": True,
379
- "api_spec": "openai",
380
- "input_cost": 2.0,
381
- "output_cost": 8.0,
382
- "requests_per_minute": 20,
383
- "tokens_per_minute": 100_000,
384
- "reasoning_model": False,
385
- },
386
- "gpt-4.1-mini": {
387
- "id": "gpt-4.1-mini",
388
- "name": "gpt-4.1-mini-2025-04-14",
389
- "api_base": "https://api.openai.com/v1",
390
- "api_key_env_var": "OPENAI_API_KEY",
391
- "supports_json": True,
392
- "supports_logprobs": True,
393
- "supports_responses": True,
394
- "api_spec": "openai",
395
- "input_cost": 0.4,
396
- "output_cost": 1.6,
397
- "requests_per_minute": 20,
398
- "tokens_per_minute": 100_000,
399
- "reasoning_model": False,
400
- },
401
- "gpt-4.1-nano": {
402
- "id": "gpt-4.1-nano",
403
- "name": "gpt-4.1-nano-2025-04-14",
404
- "api_base": "https://api.openai.com/v1",
405
- "api_key_env_var": "OPENAI_API_KEY",
406
- "supports_json": True,
407
- "supports_logprobs": True,
408
- "supports_responses": True,
409
- "api_spec": "openai",
410
- "input_cost": 0.1,
411
- "output_cost": 0.4,
412
- "requests_per_minute": 20,
413
- "tokens_per_minute": 100_000,
414
- "reasoning_model": False,
415
- },
416
- "gpt-4.5": {
417
- "id": "gpt-4.5",
418
- "name": "gpt-4.5-preview-2025-02-27",
419
- "api_base": "https://api.openai.com/v1",
420
- "api_key_env_var": "OPENAI_API_KEY",
421
- "supports_json": False,
422
- "supports_logprobs": True,
423
- "supports_responses": True,
424
- "api_spec": "openai",
425
- "input_cost": 75.0,
426
- "output_cost": 150.0,
427
- "requests_per_minute": 20,
428
- "tokens_per_minute": 100_000,
429
- "reasoning_model": False,
430
- },
431
- "o3-mini": {
432
- "id": "o3-mini",
433
- "name": "o3-mini-2025-01-31",
434
- "api_base": "https://api.openai.com/v1",
435
- "api_key_env_var": "OPENAI_API_KEY",
436
- "supports_json": False,
437
- "supports_logprobs": True,
438
- "supports_responses": True,
439
- "api_spec": "openai",
440
- "input_cost": 1.1,
441
- "output_cost": 4.4,
442
- "requests_per_minute": 20,
443
- "tokens_per_minute": 100_000,
444
- "reasoning_model": True,
445
- },
446
- "o1": {
447
- "id": "o1",
448
- "name": "o1-2024-12-17",
449
- "api_base": "https://api.openai.com/v1",
450
- "api_key_env_var": "OPENAI_API_KEY",
451
- "supports_json": False,
452
- "supports_logprobs": True,
453
- "supports_responses": True,
454
- "api_spec": "openai",
455
- "input_cost": 15.0,
456
- "output_cost": 60.0,
457
- "requests_per_minute": 20,
458
- "tokens_per_minute": 100_000,
459
- "reasoning_model": True,
460
- },
461
- "o1-preview": {
462
- "id": "o1-preview",
463
- "name": "o1-preview-2024-09-12",
464
- "api_base": "https://api.openai.com/v1",
465
- "api_key_env_var": "OPENAI_API_KEY",
466
- "supports_json": False,
467
- "supports_logprobs": True,
468
- "supports_responses": True,
469
- "api_spec": "openai",
470
- "input_cost": 15.0,
471
- "output_cost": 60.0,
472
- "requests_per_minute": 20,
473
- "tokens_per_minute": 100_000,
474
- "reasoning_model": True,
475
- },
476
- "o1-mini": {
477
- "id": "o1-mini",
478
- "name": "o1-mini-2024-09-12",
479
- "api_base": "https://api.openai.com/v1",
480
- "api_key_env_var": "OPENAI_API_KEY",
481
- "supports_json": False,
482
- "supports_logprobs": True,
483
- "supports_responses": True,
484
- "api_spec": "openai",
485
- "input_cost": 3.0,
486
- "output_cost": 15.0,
487
- "requests_per_minute": 20,
488
- "tokens_per_minute": 100_000,
489
- "reasoning_model": True,
490
- },
491
- "gpt-4o": {
492
- "id": "gpt-4o",
493
- "name": "gpt-4o-2024-08-06",
494
- "api_base": "https://api.openai.com/v1",
495
- "api_key_env_var": "OPENAI_API_KEY",
496
- "supports_json": True,
497
- "supports_logprobs": True,
498
- "supports_responses": True,
499
- "api_spec": "openai",
500
- "input_cost": 5.0,
501
- "output_cost": 15.0,
502
- "requests_per_minute": 10_000,
503
- "tokens_per_minute": 30_000_000,
504
- },
505
- "gpt-4o-mini": {
506
- "id": "gpt-4o-mini",
507
- "name": "gpt-4o-mini-2024-07-18",
508
- "api_base": "https://api.openai.com/v1",
509
- "api_key_env_var": "OPENAI_API_KEY",
510
- "supports_json": True,
511
- "supports_logprobs": True,
512
- "supports_responses": True,
513
- "api_spec": "openai",
514
- "input_cost": 0.15,
515
- "output_cost": 0.6,
516
- "requests_per_minute": 60_000,
517
- "tokens_per_minute": 250_000_000,
518
- },
519
- "gpt-4o-mini-free": {
520
- "id": "gpt-4o-mini-free",
521
- "name": "gpt-4o-mini-2024-07-18-free",
522
- "api_base": "https://api.openai.com/v1",
523
- "api_key_env_var": "OPENAI_API_KEY",
524
- "supports_json": True,
525
- "supports_logprobs": True,
526
- "supports_responses": True,
527
- "api_spec": "openai",
528
- "input_cost": 0.0,
529
- "output_cost": 0.0,
530
- "requests_per_minute": 20_000,
531
- "tokens_per_minute": 50_000_000,
532
- },
533
- "gpt-3.5-turbo": {
534
- "id": "gpt-3.5-turbo",
535
- "name": "gpt-3.5-turbo-0125",
536
- "api_base": "https://api.openai.com/v1",
537
- "api_key_env_var": "OPENAI_API_KEY",
538
- "supports_json": True,
539
- "supports_logprobs": True,
540
- "supports_responses": True,
541
- "api_spec": "openai",
542
- "input_cost": 0.5,
543
- "output_cost": 1.5,
544
- "requests_per_minute": 40_000,
545
- "tokens_per_minute": 75_000_000,
546
- },
547
- "gpt-4-turbo": {
548
- "id": "gpt-4-turbo",
549
- "name": "gpt-4-turbo-2024-04-09",
550
- "api_base": "https://api.openai.com/v1",
551
- "api_key_env_var": "OPENAI_API_KEY",
552
- "supports_json": True,
553
- "supports_logprobs": True,
554
- "supports_responses": True,
555
- "api_spec": "openai",
556
- "input_cost": 10.0,
557
- "output_cost": 30.0,
558
- "requests_per_minute": 10_000,
559
- "tokens_per_minute": 1_500_000,
560
- },
561
- "gpt-4": {
562
- "id": "gpt-4",
563
- "name": "gpt-4-0613",
564
- "api_base": "https://api.openai.com/v1",
565
- "api_key_env_var": "OPENAI_API_KEY",
566
- "supports_json": False,
567
- "supports_logprobs": False,
568
- "supports_responses": True,
569
- "api_spec": "openai",
570
- "input_cost": 30.0,
571
- "output_cost": 60.0,
572
- "requests_per_minute": 10_000,
573
- "tokens_per_minute": 300_000,
574
- },
575
- "gpt-4-32k": {
576
- "id": "gpt-4-32k",
577
- "name": "gpt-4-32k-0613",
578
- "api_base": "https://api.openai.com/v1",
579
- "api_key_env_var": "OPENAI_API_KEY",
580
- "supports_json": False,
581
- "supports_logprobs": False,
582
- "supports_responses": True,
583
- "api_spec": "openai",
584
- "input_cost": 60.0,
585
- "output_cost": 120.0,
586
- "requests_per_minute": 1_000,
587
- "tokens_per_minute": 150_000,
588
- },
589
- # █████████ █████ █████ ███
590
- # ███░░░░░███ ░░███ ░░███ ░░░
591
- # ░███ ░███ ████████ ███████ ░███████ ████████ ██████ ████████ ████ ██████
592
- # ░███████████ ░░███░░███ ░░░███░ ░███░░███ ░░███░░███ ███░░███░░███░░███░░███ ███░░███
593
- # ░███░░░░░███ ░███ ░███ ░███ ░███ ░███ ░███ ░░░ ░███ ░███ ░███ ░███ ░███ ░███ ░░░
594
- # ░███ ░███ ░███ ░███ ░███ ███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ███
595
- # █████ █████ ████ █████ ░░█████ ████ █████ █████ ░░██████ ░███████ █████░░██████
596
- # ░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░░ ░███░░░ ░░░░░ ░░░░░░
597
- # ░███
598
- # █████
599
- # ░░░░░
600
- "claude-4-opus": {
601
- "id": "claude-4-opus",
602
- "name": "claude-opus-4-20250514",
603
- "api_base": "https://api.anthropic.com/v1",
604
- "api_key_env_var": "ANTHROPIC_API_KEY",
605
- "supports_json": False,
606
- "api_spec": "anthropic",
607
- "input_cost": 3.0,
608
- "output_cost": 15.0,
609
- "requests_per_minute": 4_000,
610
- "tokens_per_minute": 400_000,
611
- "reasoning_model": True,
612
- },
613
- "claude-4-sonnet": {
614
- "id": "claude-4-sonnet",
615
- "name": "claude-sonnet-4-20250514",
616
- "api_base": "https://api.anthropic.com/v1",
617
- "api_key_env_var": "ANTHROPIC_API_KEY",
618
- "supports_json": False,
619
- "api_spec": "anthropic",
620
- "input_cost": 3.0,
621
- "output_cost": 15.0,
622
- "requests_per_minute": 4_000,
623
- "tokens_per_minute": 400_000,
624
- },
625
- "claude-3.7-sonnet": {
626
- "id": "claude-3.7-sonnet",
627
- "name": "claude-3-7-sonnet-20250219",
628
- "api_base": "https://api.anthropic.com/v1",
629
- "api_key_env_var": "ANTHROPIC_API_KEY",
630
- "supports_json": False,
631
- "api_spec": "anthropic",
632
- "input_cost": 3.0,
633
- "output_cost": 15.0,
634
- "requests_per_minute": 4_000,
635
- "tokens_per_minute": 400_000,
636
- "reasoning_model": True,
637
- },
638
- "claude-3.6-sonnet": {
639
- "id": "claude-3.6-sonnet",
640
- "name": "claude-3-5-sonnet-20241022",
641
- "api_base": "https://api.anthropic.com/v1",
642
- "api_key_env_var": "ANTHROPIC_API_KEY",
643
- "supports_json": False,
644
- "api_spec": "anthropic",
645
- "input_cost": 3.0,
646
- "output_cost": 15.0,
647
- "requests_per_minute": 4_000,
648
- "tokens_per_minute": 400_000,
649
- },
650
- "claude-3.5-sonnet": {
651
- "id": "claude-3.5-sonnet",
652
- "name": "claude-3-5-sonnet-20240620",
653
- "api_base": "https://api.anthropic.com/v1",
654
- "api_key_env_var": "ANTHROPIC_API_KEY",
655
- "supports_json": False,
656
- "api_spec": "anthropic",
657
- "input_cost": 3.0,
658
- "output_cost": 15.0,
659
- "requests_per_minute": 4_000,
660
- "tokens_per_minute": 400_000,
661
- },
662
- "claude-3-opus": {
663
- "id": "claude-3-opus",
664
- "name": "claude-3-opus-20240229",
665
- "api_base": "https://api.anthropic.com/v1",
666
- "api_key_env_var": "ANTHROPIC_API_KEY",
667
- "supports_json": False,
668
- "api_spec": "anthropic",
669
- "input_cost": 15.0,
670
- "output_cost": 75.0,
671
- "requests_per_minute": 4_000,
672
- "tokens_per_minute": 400_000,
673
- },
674
- "claude-3-sonnet": {
675
- "id": "claude-3-sonnet",
676
- "name": "claude-3-sonnet-20240229",
677
- "api_base": "https://api.anthropic.com/v1",
678
- "api_key_env_var": "ANTHROPIC_API_KEY",
679
- "supports_json": False,
680
- "api_spec": "anthropic",
681
- "input_cost": 15.0,
682
- "output_cost": 75.0,
683
- "requests_per_minute": 4_000,
684
- "tokens_per_minute": 400_000,
685
- },
686
- "claude-3.5-haiku": {
687
- "id": "claude-3.5-haiku",
688
- "name": "claude-3-5-haiku-20241022",
689
- "api_base": "https://api.anthropic.com/v1",
690
- "api_key_env_var": "ANTHROPIC_API_KEY",
691
- "supports_json": False,
692
- "api_spec": "anthropic",
693
- "input_cost": 1.00,
694
- "output_cost": 5.00,
695
- "requests_per_minute": 20_000,
696
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
697
- },
698
- "claude-3-haiku": {
699
- "id": "claude-3-haiku",
700
- "name": "claude-3-haiku-20240307",
701
- "api_base": "https://api.anthropic.com/v1",
702
- "api_key_env_var": "ANTHROPIC_API_KEY",
703
- "supports_json": False,
704
- "api_spec": "anthropic",
705
- "input_cost": 0.25,
706
- "output_cost": 1.25,
707
- "requests_per_minute": 10_000,
708
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
709
- },
710
- # █████ █████ █████
711
- # ░░███ ░░███ ░░███
712
- # ░███ ░███ ██████ ████████ ███████ ██████ █████ █████
713
- # ░███ ░███ ███░░███░░███░░███░░░███░ ███░░███░░███ ░░███
714
- # ░░███ ███ ░███████ ░███ ░░░ ░███ ░███████ ░░░█████░
715
- # ░░░█████░ ░███░░░ ░███ ░███ ███░███░░░ ███░░░███
716
- # ░░███ ░░██████ █████ ░░█████ ░░██████ █████ █████
717
- # ░░░ ░░░░░░ ░░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░
718
- # "claude-haiku-vertex": {
719
- # "id": "claude-haiku-vertex",
720
- # "name": "claude-3-haiku@20240307",
721
- # "regions": ["europe-west4", "us-central1"],
722
- # "api_base": "",
723
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
724
- # "supports_json": False,
725
- # "api_spec": "vertex_anthropic",
726
- # "input_cost": 0.25,
727
- # "output_cost": 1.25,
728
- # "requests_per_minute": 120,
729
- # "tokens_per_minute": None,
730
- # },
731
- # "claude-sonnet-vertex": {
732
- # "id": "claude-sonnet-vertex",
733
- # "name": "claude-3-sonnet@20240229",
734
- # "regions": ["us-central1", "asia-southeast1"],
735
- # "api_base": "",
736
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
737
- # "supports_json": False,
738
- # "api_spec": "vertex_anthropic",
739
- # "input_cost": 3.0,
740
- # "output_cost": 15.0,
741
- # "requests_per_minute": 120,
742
- # "tokens_per_minute": None,
743
- # },
744
- # "claude-opus-vertex": {
745
- # "id": "claude-opus-vertex",
746
- # "name": "claude-3-opus@20240229",
747
- # "regions": ["us-east5"],
748
- # "api_base": "",
749
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
750
- # "supports_json": False,
751
- # "api_spec": "vertex_anthropic",
752
- # "input_cost": 15.0,
753
- # "output_cost": 75.0,
754
- # "requests_per_minute": 120,
755
- # "tokens_per_minute": None,
756
- # },
757
- # "gemini-2.5-pro-vertex": {
758
- # "id": "gemini-2.5-pro",
759
- # "name": "gemini-2.5-pro-preview-05-06",
760
- # "api_base": "",
761
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
762
- # "supports_json": True,
763
- # "supports_logprobs": False,
764
- # "api_spec": "vertex_gemini",
765
- # "input_cost": 1.25,
766
- # "output_cost": 10.0,
767
- # "requests_per_minute": 20,
768
- # "tokens_per_minute": 100_000,
769
- # "reasoning_model": True,
770
- # },
771
- # "gemini-2.5-flash-vertex": {
772
- # "id": "gemini-2.5-flash",
773
- # "name": "gemini-2.5-flash-preview-05-20",
774
- # "api_base": "",
775
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
776
- # "supports_json": True,
777
- # "supports_logprobs": False,
778
- # "api_spec": "vertex_gemini",
779
- # "input_cost": 0.15,
780
- # "output_cost": 0.6,
781
- # "requests_per_minute": 20,
782
- # "tokens_per_minute": 100_000,
783
- # "reasoning_model": True,
784
- # },
785
- # "gemini-2.0-flash-vertex": {
786
- # "id": "gemini-2.0-flash",
787
- # "name": "gemini-2.0-flash",
788
- # "api_base": "",
789
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
790
- # "supports_json": True,
791
- # "supports_logprobs": False,
792
- # "api_spec": "vertex_gemini",
793
- # "input_cost": 0.10,
794
- # "output_cost": 0.40,
795
- # "requests_per_minute": 20,
796
- # "tokens_per_minute": 100_000,
797
- # "reasoning_model": False,
798
- # },
799
- # "gemini-2.0-flash-lite-vertex": {
800
- # "id": "gemini-2.0-flash-lite",
801
- # "name": "gemini-2.0-flash-lite",
802
- # "api_base": "",
803
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
804
- # "supports_json": True,
805
- # "supports_logprobs": False,
806
- # "api_spec": "vertex_gemini",
807
- # "input_cost": 0.075,
808
- # "output_cost": 0.30,
809
- # "requests_per_minute": 20,
810
- # "tokens_per_minute": 100_000,
811
- # "reasoning_model": False,
812
- # },
813
- # ███████████ █████ █████
814
- # ░░███░░░░░███ ░░███ ░░███
815
- # ░███ ░███ ██████ ███████ ████████ ██████ ██████ ░███ █████
816
- # ░██████████ ███░░███ ███░░███ ░░███░░███ ███░░███ ███░░███ ░███░░███
817
- # ░███░░░░░███░███████ ░███ ░███ ░███ ░░░ ░███ ░███░███ ░░░ ░██████░
818
- # ░███ ░███░███░░░ ░███ ░███ ░███ ░███ ░███░███ ███ ░███░░███
819
- # ███████████ ░░██████ ░░████████ █████ ░░██████ ░░██████ ████ █████
820
- # ░░░░░░░░░░░ ░░░░░░ ░░░░░░░░ ░░░░░ ░░░░░░ ░░░░░░ ░░░░ ░░░░░
821
- "claude-3-haiku-bedrock": {
822
- "id": "claude-3-haiku-bedrock",
823
- "name": "us.anthropic.claude-3-haiku-20240307-v1:0",
824
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
825
- "api_base": "",
826
- "api_key_env_var": "",
827
- "api_spec": "bedrock",
828
- "input_cost": 0.25,
829
- "output_cost": 1.25,
830
- "requests_per_minute": 4_000,
831
- "tokens_per_minute": 8_000_000,
832
- },
833
- "claude-3.5-haiku-bedrock": {
834
- "id": "claude-3.5-haiku-bedrock",
835
- "name": "us.anthropic.claude-3-5-haiku-20241022-v1:0",
836
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
837
- "api_base": "",
838
- "api_key_env_var": "",
839
- "api_spec": "bedrock",
840
- "input_cost": 0.25,
841
- "output_cost": 1.25,
842
- "requests_per_minute": 4_000,
843
- "tokens_per_minute": 8_000_000,
844
- },
845
- "claude-3.5-sonnet-bedrock": {
846
- "id": "claude-3.5-sonnet-bedrock",
847
- "name": "us.anthropic.claude-3-5-sonnet-20240620-v1:0",
848
- "regions": ["us-east-1", "us-west-2"],
849
- "api_base": "",
850
- "api_key_env_var": "",
851
- "api_spec": "bedrock",
852
- "input_cost": 3.0,
853
- "output_cost": 15.0,
854
- "requests_per_minute": 4_000,
855
- "tokens_per_minute": 400_000,
856
- "reasoning_model": False,
857
- },
858
- "claude-3.6-sonnet-bedrock": {
859
- "id": "claude-3.6-sonnet-bedrock",
860
- "name": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
861
- "regions": ["us-east-1", "us-west-2", "us-east-2"],
862
- "api_base": "",
863
- "api_key_env_var": "",
864
- "api_spec": "bedrock",
865
- "input_cost": 3.0,
866
- "output_cost": 15.0,
867
- "requests_per_minute": 4_000,
868
- "tokens_per_minute": 400_000,
869
- "reasoning_model": False,
870
- },
871
- "claude-3.7-sonnet-bedrock": {
872
- "id": "claude-3.7-sonnet-bedrock",
873
- "name": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
874
- "regions": ["us-east-1", "us-west-2", "eu-west-1"],
875
- "api_base": "",
876
- "api_key_env_var": "",
877
- "api_spec": "bedrock",
878
- "input_cost": 3.0,
879
- "output_cost": 15.0,
880
- "requests_per_minute": 4_000,
881
- "tokens_per_minute": 400_000,
882
- "reasoning_model": True,
883
- },
884
- "claude-4-sonnet-bedrock": {
885
- "id": "claude-4-sonnet-bedrock",
886
- "name": "us.anthropic.claude-sonnet-4-20250514-v1:0",
887
- "regions": ["us-east-1", "us-west-2", "us-east-2"],
888
- "api_base": "",
889
- "api_key_env_var": "",
890
- "api_spec": "bedrock",
891
- "input_cost": 3.0,
892
- "output_cost": 15.0,
893
- "requests_per_minute": 4_000,
894
- "tokens_per_minute": 400_000,
895
- "reasoning_model": True,
896
- },
897
- "claude-4-opus-bedrock": {
898
- "id": "claude-4-opus-bedrock",
899
- "name": "us.anthropic.claude-opus-4-20250514-v1:0",
900
- "regions": ["us-east-1", "us-west-2", "us-east-2"],
901
- "api_base": "",
902
- "api_key_env_var": "",
903
- "api_spec": "bedrock",
904
- "input_cost": 3.0,
905
- "output_cost": 15.0,
906
- "requests_per_minute": 4_000,
907
- "tokens_per_minute": 400_000,
908
- "reasoning_model": True,
909
- },
910
- # "mistral-7b-bedrock": {
911
- # "id": "mistral-7b-bedrock",
912
- # "name": "mistral.mistral-7b-instruct-v0:2",
913
- # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
914
- # "api_base": "",
915
- # "api_key_env_var": "",
916
- # "api_spec": "bedrock_mistral",
917
- # "input_cost": 0.15,
918
- # "output_cost": 0.2,
919
- # "requests_per_minute": 3_200,
920
- # "tokens_per_minute": 1_200_000,
921
- # },
922
- # "mixtral-8x7b-bedrock": {
923
- # "id": "mixtral-8x7b-bedrock",
924
- # "name": "mistral.mixtral-8x7b-instruct-v0:1",
925
- # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
926
- # "api_base": "",
927
- # "api_key_env_var": "",
928
- # "api_spec": "bedrock_mistral",
929
- # "input_cost": 0.45,
930
- # "output_cost": 0.7,
931
- # "requests_per_minute": 1_600,
932
- # "tokens_per_minute": 1_200_000,
933
- # },
934
- # "mistral-large-bedrock": {
935
- # "id": "mistral-large-bedrock",
936
- # "name": "mistral.mistral-large-2402-v1:0",
937
- # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
938
- # "api_base": "",
939
- # "api_key_env_var": "",
940
- # "api_spec": "bedrock_mistral",
941
- # "input_cost": 8.0,
942
- # "output_cost": 24.0,
943
- # "requests_per_minute": 1_600,
944
- # "tokens_per_minute": 1_200_000,
945
- # },
946
- # ███████████ █████ █████
947
- # ░█░░░███░░░█ ░░███ ░░███
948
- # ░ ░███ ░ ██████ ███████ ██████ ███████ ░███████ ██████ ████████
949
- # ░███ ███░░███ ███░░███ ███░░███░░░███░ ░███░░███ ███░░███░░███░░███
950
- # ░███ ░███ ░███░███ ░███░███████ ░███ ░███ ░███ ░███████ ░███ ░░░
951
- # ░███ ░███ ░███░███ ░███░███░░░ ░███ ███ ░███ ░███ ░███░░░ ░███
952
- # █████ ░░██████ ░░███████░░██████ ░░█████ ████ █████░░██████ █████
953
- # ░░░░░ ░░░░░░ ░░░░░███ ░░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░
954
- # ███ ░███
955
- # ░░██████
956
- # ░░░░░░
957
- # tbh only reason to use these are that they're cheap, but all worse than haiku
958
- "deepseek-r1-together": {
959
- "id": "deepseek-r1-together",
960
- "name": "deepseek-ai/DeepSeek-R1",
961
- "api_base": "https://api.together.xyz/v1",
962
- "api_key_env_var": "TOGETHER_API_KEY",
963
- "supports_json": False,
964
- "api_spec": "openai",
965
- "input_cost": 3.0,
966
- "output_cost": 7.0,
967
- "requests_per_minute": None,
968
- "tokens_per_minute": None,
969
- },
970
- "deepseek-v3-together": {
971
- "id": "deepseek-v3-together",
972
- "name": "deepseek-ai/DeepSeek-V3",
973
- "api_base": "https://api.together.xyz/v1",
974
- "api_key_env_var": "TOGETHER_API_KEY",
975
- "supports_json": False,
976
- "api_spec": "openai",
977
- "input_cost": 1.25,
978
- "output_cost": 1.25,
979
- "requests_per_minute": None,
980
- "tokens_per_minute": None,
981
- },
982
- "qwen-3-235b-together": {
983
- "id": "qwen-3-235b-together",
984
- "name": "Qwen/Qwen3-235B-A22B-fp8",
985
- "api_base": "https://api.together.xyz/v1",
986
- "api_key_env_var": "TOGETHER_API_KEY",
987
- "supports_json": False,
988
- "api_spec": "openai",
989
- "input_cost": 0.2,
990
- "output_cost": 0.6,
991
- "requests_per_minute": None,
992
- "tokens_per_minute": None,
993
- },
994
- "qwen-2.5-vl-together": {
995
- "id": "qwen-2.5-vl-together",
996
- "name": "Qwen/Qwen2.5-VL-72B-Instruct",
997
- "api_base": "https://api.together.xyz/v1",
998
- "api_key_env_var": "TOGETHER_API_KEY",
999
- "supports_json": False,
1000
- "api_spec": "openai",
1001
- "input_cost": 1.95,
1002
- "output_cost": 8.0,
1003
- "requests_per_minute": None,
1004
- "tokens_per_minute": None,
1005
- },
1006
- "llama-4-maverick-together": {
1007
- "id": "llama-4-maverick-together",
1008
- "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
1009
- "api_base": "https://api.together.xyz/v1",
1010
- "api_key_env_var": "TOGETHER_API_KEY",
1011
- "supports_json": False,
1012
- "api_spec": "openai",
1013
- "input_cost": 0.27,
1014
- "output_cost": 0.85,
1015
- "requests_per_minute": None,
1016
- "tokens_per_minute": None,
1017
- },
1018
- "llama-4-scout-together": {
1019
- "id": "llama-4-scout-together",
1020
- "name": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
1021
- "api_base": "https://api.together.xyz/v1",
1022
- "api_key_env_var": "TOGETHER_API_KEY",
1023
- "supports_json": False,
1024
- "api_spec": "openai",
1025
- "input_cost": 0.18,
1026
- "output_cost": 0.59,
1027
- "requests_per_minute": None,
1028
- "tokens_per_minute": None,
1029
- },
1030
- "gpt-oss-120b-together": {
1031
- "id": "gpt-oss-120b-together",
1032
- "name": "openai/gpt-oss-120b",
1033
- "api_base": "https://api.together.xyz/v1",
1034
- "api_key_env_var": "TOGETHER_API_KEY",
1035
- "supports_json": False,
1036
- "api_spec": "openai",
1037
- "input_cost": 0.18,
1038
- "output_cost": 0.59,
1039
- "requests_per_minute": None,
1040
- "tokens_per_minute": None,
1041
- "reasoning_model": True
1042
- },
1043
- "gpt-oss-20b-together": {
1044
- "id": "gpt-oss-20b-together",
1045
- "name": "openai/gpt-oss-20b",
1046
- "api_base": "https://api.together.xyz/v1",
1047
- "api_key_env_var": "TOGETHER_API_KEY",
1048
- "supports_json": False,
1049
- "api_spec": "openai",
1050
- "input_cost": 0.18,
1051
- "output_cost": 0.59,
1052
- "requests_per_minute": None,
1053
- "tokens_per_minute": None,
1054
- "reasoning_model": True
1055
- },
1056
- # █████████ █████
1057
- # ███░░░░░███ ░░███
1058
- # ███ ░░░ ██████ ░███████ ██████ ████████ ██████
1059
- # ░███ ███░░███ ░███░░███ ███░░███░░███░░███ ███░░███
1060
- # ░███ ░███ ░███ ░███ ░███ ░███████ ░███ ░░░ ░███████
1061
- # ░░███ ███░███ ░███ ░███ ░███ ░███░░░ ░███ ░███░░░
1062
- # ░░█████████ ░░██████ ████ █████░░██████ █████ ░░██████
1063
- # ░░░░░░░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░░
1064
- "aya-expanse-8b": {
1065
- "id": "aya-expanse-8b",
1066
- "name": "c4ai-aya-expanse-8b",
1067
- "api_base": "https://api.cohere.ai/compatibility/v1",
1068
- "api_key_env_var": "COHERE_API_KEY",
1069
- "api_spec": "openai",
1070
- "input_cost": 0.5,
1071
- "output_cost": 1.5,
1072
- "requests_per_minute": 10_000,
1073
- "tokens_per_minute": None,
1074
- },
1075
- "aya-expanse-32b": {
1076
- "id": "aya-expanse-32b",
1077
- "name": "c4ai-aya-expanse-32b",
1078
- "api_base": "https://api.cohere.ai/compatibility/v1",
1079
- "api_key_env_var": "COHERE_API_KEY",
1080
- "api_spec": "openai",
1081
- "input_cost": 0.5,
1082
- "output_cost": 1.5,
1083
- "requests_per_minute": 10_000,
1084
- "tokens_per_minute": None,
1085
- },
1086
- "aya-vision-8b": {
1087
- "id": "aya-vision-8b",
1088
- "name": "c4ai-aya-vision-8b",
1089
- "api_base": "https://api.cohere.ai/compatibility/v1",
1090
- "api_key_env_var": "COHERE_API_KEY",
1091
- "api_spec": "openai",
1092
- "input_cost": 0.5,
1093
- "output_cost": 1.5,
1094
- "requests_per_minute": 10_000,
1095
- "tokens_per_minute": None,
1096
- },
1097
- "aya-vision-32b": {
1098
- "id": "aya-vision-32b",
1099
- "name": "c4ai-aya-vision-32b",
1100
- "api_base": "https://api.cohere.ai/compatibility/v1",
1101
- "api_key_env_var": "COHERE_API_KEY",
1102
- "api_spec": "openai",
1103
- "input_cost": 0.5,
1104
- "output_cost": 1.5,
1105
- "requests_per_minute": 10_000,
1106
- "tokens_per_minute": None,
1107
- },
1108
- "command-a": {
1109
- "id": "command-a",
1110
- "name": "command-a-03-2025",
1111
- "api_base": "https://api.cohere.ai/compatibility/v1",
1112
- "api_key_env_var": "COHERE_API_KEY",
1113
- "api_spec": "openai",
1114
- "input_cost": 0.5,
1115
- "output_cost": 1.5,
1116
- "requests_per_minute": 10_000,
1117
- "tokens_per_minute": None,
1118
- },
1119
- "command-r-7b": {
1120
- "id": "command-r-cohere",
1121
- "name": "command-r7b-12-2024",
1122
- "api_base": "https://api.cohere.ai/compatibility/v1",
1123
- "api_key_env_var": "COHERE_API_KEY",
1124
- "api_spec": "openai",
1125
- "input_cost": 0.5,
1126
- "output_cost": 1.5,
1127
- "requests_per_minute": 10_000,
1128
- "tokens_per_minute": None,
1129
- },
1130
- "command-r": {
1131
- "id": "command-r",
1132
- "name": "command-r-08-2024",
1133
- "api_base": "https://api.cohere.ai/compatibility/v1",
1134
- "api_key_env_var": "COHERE_API_KEY",
1135
- "api_spec": "openai",
1136
- "input_cost": 0.5,
1137
- "output_cost": 1.5,
1138
- "requests_per_minute": 10_000,
1139
- "tokens_per_minute": None,
1140
- },
1141
- "command-r-plus": {
1142
- "id": "command-r-plus",
1143
- "name": "command-r-plus-04-2024",
1144
- "api_base": "https://api.cohere.ai/compatibility/v1",
1145
- "api_key_env_var": "COHERE_API_KEY",
1146
- "api_spec": "openai",
1147
- "input_cost": 3.0,
1148
- "output_cost": 15.0,
1149
- "requests_per_minute": 10_000,
1150
- "tokens_per_minute": None,
1151
- },
1152
- # ██████ ██████ ███ █████ ████
1153
- # ░░██████ ██████ ░░░ ░░███ ░░███
1154
- # ░███░█████░███ ████ █████ ███████ ████████ ██████ ░███
1155
- # ░███░░███ ░███ ░░███ ███░░ ░░░███░ ░░███░░███ ░░░░░███ ░███
1156
- # ░███ ░░░ ░███ ░███ ░░█████ ░███ ░███ ░░░ ███████ ░███
1157
- # ░███ ░███ ░███ ░░░░███ ░███ ███ ░███ ███░░███ ░███
1158
- # █████ █████ █████ ██████ ░░█████ █████ ░░████████ █████
1159
- # ░░░░░ ░░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░ ░░░░░░░░ ░░░░░
1160
- "mistral-medium": {
1161
- "id": "mistral-medium",
1162
- "name": "mistral-medium-latest",
1163
- "api_base": "https://api.mistral.ai/v1",
1164
- "api_key_env_var": "MISTRAL_API_KEY",
1165
- "supports_json": True,
1166
- "api_spec": "mistral",
1167
- "input_cost": 0.4,
1168
- "output_cost": 2.0,
1169
- },
1170
- "mistral-large": {
1171
- "id": "mistral-large",
1172
- "name": "mistral-large-latest",
1173
- "api_base": "https://api.mistral.ai/v1",
1174
- "api_key_env_var": "MISTRAL_API_KEY",
1175
- "supports_json": True,
1176
- "api_spec": "mistral",
1177
- "input_cost": 2.0,
1178
- "output_cost": 6.0,
1179
- },
1180
- "pixtral-large": {
1181
- "id": "pixtral-large",
1182
- "name": "pixtral-large-latest",
1183
- "api_base": "https://api.mistral.ai/v1",
1184
- "api_key_env_var": "MISTRAL_API_KEY",
1185
- "supports_json": True,
1186
- "api_spec": "mistral",
1187
- "input_cost": 2.0,
1188
- "output_cost": 6.0,
1189
- },
1190
- "mistral-small": {
1191
- "id": "mistral-small",
1192
- "name": "mistral-small-latest",
1193
- "api_base": "https://api.mistral.ai/v1",
1194
- "api_key_env_var": "MISTRAL_API_KEY",
1195
- "supports_json": True,
1196
- "api_spec": "mistral",
1197
- "input_cost": 0.1,
1198
- "output_cost": 0.3,
1199
- },
1200
- "devstral-small": {
1201
- "id": "devstral-small",
1202
- "name": "devstral-small-2505",
1203
- "api_base": "https://api.mistral.ai/v1",
1204
- "api_key_env_var": "MISTRAL_API_KEY",
1205
- "supports_json": True,
1206
- "api_spec": "mistral",
1207
- "input_cost": 0.1,
1208
- "output_cost": 0.3,
1209
- },
1210
- "codestral": {
1211
- "id": "codestral",
1212
- "name": "codestral-latest",
1213
- "api_base": "https://api.mistral.ai/v1",
1214
- "api_key_env_var": "MISTRAL_API_KEY",
1215
- "supports_json": True,
1216
- "api_spec": "mistral",
1217
- "input_cost": 0.2,
1218
- "output_cost": 0.6,
1219
- },
1220
- "pixtral-12b": {
1221
- "id": "pixtral-12b",
1222
- "name": "pixtral-12b",
1223
- "api_base": "https://api.mistral.ai/v1",
1224
- "api_key_env_var": "MISTRAL_API_KEY",
1225
- "supports_json": True,
1226
- "api_spec": "mistral",
1227
- "input_cost": 0.1,
1228
- "output_cost": 0.3,
1229
- },
1230
- "mistral-nemo": {
1231
- "id": "mistral-nemo",
1232
- "name": "open-mistral-nemo",
1233
- "api_base": "https://api.mistral.ai/v1",
1234
- "api_key_env_var": "MISTRAL_API_KEY",
1235
- "supports_json": True,
1236
- "api_spec": "mistral",
1237
- "input_cost": 0.1,
1238
- "output_cost": 0.3,
1239
- },
1240
- "ministral-8b": {
1241
- "id": "ministral-8b",
1242
- "name": "ministral-8b-latest",
1243
- "api_base": "https://api.mistral.ai/v1",
1244
- "api_key_env_var": "MISTRAL_API_KEY",
1245
- "supports_json": True,
1246
- "api_spec": "mistral",
1247
- "input_cost": 0.7,
1248
- "output_cost": 0.7,
1249
- },
1250
- "mixtral-8x22b": {
1251
- "id": "mixtral-8x22b",
1252
- "name": "open-mixtral-8x22b",
1253
- "api_base": "https://api.mistral.ai/v1",
1254
- "api_key_env_var": "MISTRAL_API_KEY",
1255
- "supports_json": True,
1256
- "api_spec": "mistral",
1257
- "input_cost": 2.0,
1258
- "output_cost": 6.0,
1259
- },
1260
- # ______ _
1261
- # (______) | |
1262
- # _ _ _____ _____ ____ ___ _____ _____| | _
1263
- # | | | | ___ | ___ | _ \ /___) ___ | ___ | |_/ )
1264
- # | |__/ /| ____| ____| |_| |___ | ____| ____| _ (
1265
- # |_____/ |_____)_____) __/(___/|_____)_____)_| \_)
1266
- # |_|
1267
- "deepseek-chat": {
1268
- "id": "deepseek-chat",
1269
- "name": "deepseek-chat",
1270
- "api_base": "https://api.deepseek.com/v1",
1271
- "api_key_env_var": "DEEPSEEK_API_KEY",
1272
- "api_spec": "openai",
1273
- "input_cost": 0.27,
1274
- "output_cost": 1.10,
1275
- },
1276
- "deepseek-r1": {
1277
- "id": "deepseek-r1",
1278
- "name": "deepseek-reasoner",
1279
- "api_base": "https://api.deepseek.com/v1",
1280
- "api_key_env_var": "DEEPSEEK_API_KEY",
1281
- "api_spec": "openai",
1282
- "input_cost": 0.55,
1283
- "output_cost": 2.19,
1284
- },
1285
- }
8
+ # Import and register all provider models
9
+ from .anthropic import ANTHROPIC_MODELS
10
+ from .bedrock import BEDROCK_MODELS
11
+ from .cerebras import CEREBRAS_MODELS
12
+ from .cohere import COHERE_MODELS
13
+ from .deepseek import DEEPSEEK_MODELS
14
+ from .fireworks import FIREWORKS_MODELS
15
+ from .google import GOOGLE_MODELS
16
+ from .grok import XAI_MODELS
17
+ from .groq import GROQ_MODELS
18
+ from .meta import META_MODELS
19
+ from .mistral import MISTRAL_MODELS
20
+ from .openai import OPENAI_MODELS
21
+ from .openrouter import OPENROUTER_MODELS
22
+ from .together import TOGETHER_MODELS
1286
23
 
1287
24
 
1288
25
  @dataclass
@@ -1349,7 +86,7 @@ def register_model(
1349
86
  name: str,
1350
87
  api_base: str,
1351
88
  api_key_env_var: str,
1352
- api_spec: str,
89
+ api_spec: str = "openai",
1353
90
  input_cost: float | None = 0, # $ per million input tokens
1354
91
  cached_input_cost: float | None = 0,
1355
92
  output_cost: float | None = 0, # $ per million output tokens
@@ -1383,8 +120,25 @@ def register_model(
1383
120
  return model
1384
121
 
1385
122
 
1386
- # Populate registry with builtin models
1387
- for cfg in BUILTIN_MODELS.values():
1388
- register_model(**cfg)
123
+ # Register all models from all providers
124
+ for model_dict in [
125
+ ANTHROPIC_MODELS,
126
+ BEDROCK_MODELS,
127
+ COHERE_MODELS,
128
+ DEEPSEEK_MODELS,
129
+ FIREWORKS_MODELS,
130
+ GOOGLE_MODELS,
131
+ XAI_MODELS,
132
+ META_MODELS,
133
+ MISTRAL_MODELS,
134
+ OPENAI_MODELS,
135
+ OPENROUTER_MODELS,
136
+ TOGETHER_MODELS,
137
+ GROQ_MODELS,
138
+ CEREBRAS_MODELS,
139
+ ]:
140
+ for cfg in model_dict.values():
141
+ register_model(**cfg)
142
+
1389
143
 
1390
144
  # print("Valid models:", registry.keys())