lm-deluge 0.0.34__py3-none-any.whl → 0.0.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

lm_deluge/models.py DELETED
@@ -1,1305 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import random
4
- from dataclasses import dataclass, field
5
-
6
- from .request_context import RequestContext
7
-
8
- BUILTIN_MODELS = {
9
- # `7MMM. ,MMF' mm
10
- # MMMb dPMM MM
11
- # M YM ,M MM .gP"Ya mmMMmm ,6"Yb.
12
- # M Mb M' MM ,M' Yb MM 8) MM
13
- # M YM.P' MM 8M"""""" MM ,pm9MM
14
- # M `YM' MM YM. , MM 8M MM
15
- # .JML. `' .JMML.`Mbmmd' `Mbmo`Moo9^Yo.
16
- "llama-4-scout": {
17
- "id": "llama-4-scout",
18
- "name": "Llama-4-Scout-17B-16E-Instruct-FP8",
19
- "api_base": "https://api.llama.com/compat/v1",
20
- "api_key_env_var": "META_API_KEY",
21
- "supports_json": True,
22
- "supports_logprobs": True,
23
- "api_spec": "openai",
24
- "input_cost": 0.0,
25
- "output_cost": 0.0,
26
- "requests_per_minute": 3_000,
27
- "tokens_per_minute": 1_000_000,
28
- "reasoning_model": False,
29
- },
30
- "llama-4-maverick": {
31
- "id": "llama-4-maverick",
32
- "name": "Llama-4-Maverick-17B-128E-Instruct-FP8",
33
- "api_base": "https://api.llama.com/compat/v1",
34
- "api_key_env_var": "META_API_KEY",
35
- "supports_json": True,
36
- "supports_logprobs": True,
37
- "api_spec": "openai",
38
- "input_cost": 0.0,
39
- "output_cost": 0.0,
40
- "requests_per_minute": 3_000,
41
- "tokens_per_minute": 1_000_000,
42
- "reasoning_model": False,
43
- },
44
- "llama-3.3-70b": {
45
- "id": "llama-3.3-70b",
46
- "name": "Llama-3.3-70B-Instruct",
47
- "api_base": "https://api.llama.com/compat/v1",
48
- "api_key_env_var": "META_API_KEY",
49
- "supports_json": True,
50
- "supports_logprobs": True,
51
- "api_spec": "openai",
52
- "input_cost": 0.0,
53
- "output_cost": 0.0,
54
- "requests_per_minute": 3_000,
55
- "tokens_per_minute": 1_000_000,
56
- "reasoning_model": False,
57
- },
58
- "llama-3.3-8b": {
59
- "id": "llama-3.3-8b",
60
- "name": "Llama-3.3-8B-Instruct",
61
- "api_base": "https://api.llama.com/compat/v1",
62
- "api_key_env_var": "META_API_KEY",
63
- "supports_json": True,
64
- "supports_logprobs": True,
65
- "api_spec": "openai",
66
- "input_cost": 0.0,
67
- "output_cost": 0.0,
68
- "requests_per_minute": 3_000,
69
- "tokens_per_minute": 1_000_000,
70
- "reasoning_model": False,
71
- },
72
- # .d8888b. 888
73
- # d88P Y88b 888
74
- # 888 888 888
75
- # 888 888d888 .d88b. 888 888
76
- # 888 88888 888P" d88""88b 888 .88P
77
- # 888 888 888 888 888 888888K
78
- # Y88b d88P 888 Y88..88P 888 "88b
79
- # "Y8888P88 888 "Y88P" 888 888
80
- "grok-3": {
81
- "id": "grok-3",
82
- "name": "grok-3-latest",
83
- "api_base": "https://api.x.ai/v1",
84
- "api_key_env_var": "GROK_API_KEY",
85
- "supports_json": True,
86
- "supports_logprobs": True,
87
- "api_spec": "openai",
88
- "input_cost": 2.0,
89
- "output_cost": 8.0,
90
- "requests_per_minute": 20,
91
- "tokens_per_minute": 100_000,
92
- "reasoning_model": False,
93
- },
94
- "grok-3-mini": {
95
- "id": "grok-3-mini",
96
- "name": "grok-3-mini-latest",
97
- "api_base": "https://api.x.ai/v1",
98
- "api_key_env_var": "GROK_API_KEY",
99
- "supports_json": True,
100
- "supports_logprobs": True,
101
- "api_spec": "openai",
102
- "input_cost": 2.0,
103
- "output_cost": 8.0,
104
- "requests_per_minute": 20,
105
- "tokens_per_minute": 100_000,
106
- "reasoning_model": True,
107
- },
108
- # .oooooo. oooo .o. ooooo
109
- # d8P' `Y8b `888 .888. `888'
110
- # 888 .ooooo. .ooooo. .oooooooo 888 .ooooo. .8"888. 888
111
- # 888 d88' `88b d88' `88b 888' `88b 888 d88' `88b .8' `888. 888
112
- # 888 ooooo 888 888 888 888 888 888 888 888ooo888 .88ooo8888. 888
113
- # `88. .88' 888 888 888 888 `88bod8P' 888 888 .o .8' `888. 888
114
- # `Y8bood8P' `Y8bod8P' `Y8bod8P' `8oooooo. o888o `Y8bod8P' o88o o8888o o888o
115
- # d" YD
116
- # "Y88888P'
117
- # these are through AI studio rather than Vertex, and using the OpenAI-compatible endpoints
118
- "gemini-2.0-flash": {
119
- "id": "gemini-2.0-flash",
120
- "name": "gemini-2.0-flash",
121
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
122
- "api_key_env_var": "GEMINI_API_KEY",
123
- "supports_json": True,
124
- "supports_logprobs": False,
125
- "api_spec": "openai",
126
- "input_cost": 0.1,
127
- "output_cost": 0.4,
128
- "requests_per_minute": 20,
129
- "tokens_per_minute": 100_000,
130
- "reasoning_model": False,
131
- },
132
- "gemini-2.0-flash-lite": {
133
- "id": "gemini-2.0-flash-lite",
134
- "name": "gemini-2.0-flash-lite",
135
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
136
- "api_key_env_var": "GEMINI_API_KEY",
137
- "supports_json": True,
138
- "supports_logprobs": False,
139
- "api_spec": "openai",
140
- "input_cost": 0.1,
141
- "output_cost": 0.4,
142
- "requests_per_minute": 20,
143
- "tokens_per_minute": 100_000,
144
- "reasoning_model": False,
145
- },
146
- "gemini-2.5-pro": {
147
- "id": "gemini-2.5-pro",
148
- "name": "gemini-2.5-pro",
149
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
150
- "api_key_env_var": "GEMINI_API_KEY",
151
- "supports_json": True,
152
- "supports_logprobs": False,
153
- "api_spec": "openai",
154
- "input_cost": 0.1,
155
- "output_cost": 0.4,
156
- "requests_per_minute": 20,
157
- "tokens_per_minute": 100_000,
158
- "reasoning_model": True,
159
- },
160
- "gemini-2.5-flash": {
161
- "id": "gemini-2.5-flash",
162
- "name": "gemini-2.5-flash",
163
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
164
- "api_key_env_var": "GEMINI_API_KEY",
165
- "supports_json": True,
166
- "supports_logprobs": False,
167
- "api_spec": "openai",
168
- "input_cost": 0.1,
169
- "output_cost": 0.4,
170
- "requests_per_minute": 20,
171
- "tokens_per_minute": 100_000,
172
- "reasoning_model": True,
173
- },
174
- "gemini-2.5-flash-lite": {
175
- "id": "gemini-2.5-flash-lite",
176
- "name": "gemini-2.5-flash-lite",
177
- "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
178
- "api_key_env_var": "GEMINI_API_KEY",
179
- "supports_json": True,
180
- "supports_logprobs": False,
181
- "api_spec": "openai",
182
- "input_cost": 0.1,
183
- "output_cost": 0.4,
184
- "requests_per_minute": 20,
185
- "tokens_per_minute": 100_000,
186
- "reasoning_model": True,
187
- },
188
- # Native Gemini API versions with file support
189
- "gemini-2.0-flash-gemini": {
190
- "id": "gemini-2.0-flash-gemini",
191
- "name": "gemini-2.0-flash",
192
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
193
- "api_key_env_var": "GEMINI_API_KEY",
194
- "supports_json": True,
195
- "supports_logprobs": False,
196
- "api_spec": "gemini",
197
- "input_cost": 0.1,
198
- "output_cost": 0.4,
199
- "requests_per_minute": 20,
200
- "tokens_per_minute": 100_000,
201
- "reasoning_model": False,
202
- },
203
- "gemini-2.0-flash-lite-gemini": {
204
- "id": "gemini-2.0-flash-lite-gemini",
205
- "name": "gemini-2.0-flash-lite",
206
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
207
- "api_key_env_var": "GEMINI_API_KEY",
208
- "supports_json": True,
209
- "supports_logprobs": False,
210
- "api_spec": "gemini",
211
- "input_cost": 0.1,
212
- "output_cost": 0.4,
213
- "requests_per_minute": 20,
214
- "tokens_per_minute": 100_000,
215
- "reasoning_model": False,
216
- },
217
- "gemini-2.5-pro-gemini": {
218
- "id": "gemini-2.5-pro-gemini",
219
- "name": "gemini-2.5-pro",
220
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
221
- "api_key_env_var": "GEMINI_API_KEY",
222
- "supports_json": True,
223
- "supports_logprobs": False,
224
- "api_spec": "gemini",
225
- "input_cost": 0.1,
226
- "output_cost": 0.4,
227
- "requests_per_minute": 20,
228
- "tokens_per_minute": 100_000,
229
- "reasoning_model": True,
230
- },
231
- "gemini-2.5-flash-gemini": {
232
- "id": "gemini-2.5-flash-gemini",
233
- "name": "gemini-2.5-flash",
234
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
235
- "api_key_env_var": "GEMINI_API_KEY",
236
- "supports_json": True,
237
- "supports_logprobs": False,
238
- "api_spec": "gemini",
239
- "input_cost": 0.1,
240
- "output_cost": 0.4,
241
- "requests_per_minute": 20,
242
- "tokens_per_minute": 100_000,
243
- "reasoning_model": True,
244
- },
245
- "gemini-2.5-flash-lite-gemini": {
246
- "id": "gemini-2.5-flash-lite-gemini",
247
- "name": "gemini-2.5-flash-lite",
248
- "api_base": "https://generativelanguage.googleapis.com/v1beta",
249
- "api_key_env_var": "GEMINI_API_KEY",
250
- "supports_json": True,
251
- "supports_logprobs": False,
252
- "api_spec": "gemini",
253
- "input_cost": 0.1,
254
- "output_cost": 0.4,
255
- "requests_per_minute": 20,
256
- "tokens_per_minute": 100_000,
257
- "reasoning_model": True,
258
- },
259
- # ███████ █████████ █████
260
- # ███░░░░░███ ███░░░░░███ ░░███
261
- # ███ ░░███ ████████ ██████ ████████ ░███ ░███ ░███
262
- # ░███ ░███░░███░░███ ███░░███░░███░░███ ░███████████ ░███
263
- # ░███ ░███ ░███ ░███░███████ ░███ ░███ ░███░░░░░███ ░███
264
- # ░░███ ███ ░███ ░███░███░░░ ░███ ░███ ░███ ░███ ░███
265
- # ░░░███████░ ░███████ ░░██████ ████ █████ █████ █████ █████
266
- # ░░░░░░░ ░███░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░
267
- # ░███
268
- # █████
269
- # ░░░░░
270
- "openai-computer-use-preview": {
271
- "id": "openai-computer-use-preview",
272
- "name": "computer-use-preview",
273
- "api_base": "https://api.openai.com/v1",
274
- "api_key_env_var": "OPENAI_API_KEY",
275
- "supports_json": True,
276
- "supports_logprobs": False,
277
- "supports_responses": True,
278
- "api_spec": "openai",
279
- "input_cost": 2.0,
280
- "output_cost": 8.0,
281
- "requests_per_minute": 20,
282
- "tokens_per_minute": 100_000,
283
- "reasoning_model": False,
284
- },
285
- "o3": {
286
- "id": "o3",
287
- "name": "o3-2025-04-16",
288
- "api_base": "https://api.openai.com/v1",
289
- "api_key_env_var": "OPENAI_API_KEY",
290
- "supports_json": False,
291
- "supports_logprobs": True,
292
- "supports_responses": True,
293
- "api_spec": "openai",
294
- "input_cost": 10.0,
295
- "output_cost": 40.0,
296
- "requests_per_minute": 20,
297
- "tokens_per_minute": 100_000,
298
- "reasoning_model": True,
299
- },
300
- "o4-mini": {
301
- "id": "o4-mini",
302
- "name": "o4-mini-2025-04-16",
303
- "api_base": "https://api.openai.com/v1",
304
- "api_key_env_var": "OPENAI_API_KEY",
305
- "supports_json": False,
306
- "supports_logprobs": True,
307
- "supports_responses": True,
308
- "api_spec": "openai",
309
- "input_cost": 1.1,
310
- "output_cost": 4.4,
311
- "requests_per_minute": 20,
312
- "tokens_per_minute": 100_000,
313
- "reasoning_model": True,
314
- },
315
- "gpt-4.1": {
316
- "id": "gpt-4.1",
317
- "name": "gpt-4.1-2025-04-14",
318
- "api_base": "https://api.openai.com/v1",
319
- "api_key_env_var": "OPENAI_API_KEY",
320
- "supports_json": True,
321
- "supports_logprobs": True,
322
- "supports_responses": True,
323
- "api_spec": "openai",
324
- "input_cost": 2.0,
325
- "output_cost": 8.0,
326
- "requests_per_minute": 20,
327
- "tokens_per_minute": 100_000,
328
- "reasoning_model": False,
329
- },
330
- "gpt-4.1-mini": {
331
- "id": "gpt-4.1-mini",
332
- "name": "gpt-4.1-mini-2025-04-14",
333
- "api_base": "https://api.openai.com/v1",
334
- "api_key_env_var": "OPENAI_API_KEY",
335
- "supports_json": True,
336
- "supports_logprobs": True,
337
- "supports_responses": True,
338
- "api_spec": "openai",
339
- "input_cost": 0.4,
340
- "output_cost": 1.6,
341
- "requests_per_minute": 20,
342
- "tokens_per_minute": 100_000,
343
- "reasoning_model": False,
344
- },
345
- "gpt-4.1-nano": {
346
- "id": "gpt-4.1-nano",
347
- "name": "gpt-4.1-nano-2025-04-14",
348
- "api_base": "https://api.openai.com/v1",
349
- "api_key_env_var": "OPENAI_API_KEY",
350
- "supports_json": True,
351
- "supports_logprobs": True,
352
- "supports_responses": True,
353
- "api_spec": "openai",
354
- "input_cost": 0.1,
355
- "output_cost": 0.4,
356
- "requests_per_minute": 20,
357
- "tokens_per_minute": 100_000,
358
- "reasoning_model": False,
359
- },
360
- "gpt-4.5": {
361
- "id": "gpt-4.5",
362
- "name": "gpt-4.5-preview-2025-02-27",
363
- "api_base": "https://api.openai.com/v1",
364
- "api_key_env_var": "OPENAI_API_KEY",
365
- "supports_json": False,
366
- "supports_logprobs": True,
367
- "supports_responses": True,
368
- "api_spec": "openai",
369
- "input_cost": 75.0,
370
- "output_cost": 150.0,
371
- "requests_per_minute": 20,
372
- "tokens_per_minute": 100_000,
373
- "reasoning_model": False,
374
- },
375
- "o3-mini": {
376
- "id": "o3-mini",
377
- "name": "o3-mini-2025-01-31",
378
- "api_base": "https://api.openai.com/v1",
379
- "api_key_env_var": "OPENAI_API_KEY",
380
- "supports_json": False,
381
- "supports_logprobs": True,
382
- "supports_responses": True,
383
- "api_spec": "openai",
384
- "input_cost": 1.1,
385
- "output_cost": 4.4,
386
- "requests_per_minute": 20,
387
- "tokens_per_minute": 100_000,
388
- "reasoning_model": True,
389
- },
390
- "o1": {
391
- "id": "o1",
392
- "name": "o1-2024-12-17",
393
- "api_base": "https://api.openai.com/v1",
394
- "api_key_env_var": "OPENAI_API_KEY",
395
- "supports_json": False,
396
- "supports_logprobs": True,
397
- "supports_responses": True,
398
- "api_spec": "openai",
399
- "input_cost": 15.0,
400
- "output_cost": 60.0,
401
- "requests_per_minute": 20,
402
- "tokens_per_minute": 100_000,
403
- "reasoning_model": True,
404
- },
405
- "o1-preview": {
406
- "id": "o1-preview",
407
- "name": "o1-preview-2024-09-12",
408
- "api_base": "https://api.openai.com/v1",
409
- "api_key_env_var": "OPENAI_API_KEY",
410
- "supports_json": False,
411
- "supports_logprobs": True,
412
- "supports_responses": True,
413
- "api_spec": "openai",
414
- "input_cost": 15.0,
415
- "output_cost": 60.0,
416
- "requests_per_minute": 20,
417
- "tokens_per_minute": 100_000,
418
- "reasoning_model": True,
419
- },
420
- "o1-mini": {
421
- "id": "o1-mini",
422
- "name": "o1-mini-2024-09-12",
423
- "api_base": "https://api.openai.com/v1",
424
- "api_key_env_var": "OPENAI_API_KEY",
425
- "supports_json": False,
426
- "supports_logprobs": True,
427
- "supports_responses": True,
428
- "api_spec": "openai",
429
- "input_cost": 3.0,
430
- "output_cost": 15.0,
431
- "requests_per_minute": 20,
432
- "tokens_per_minute": 100_000,
433
- "reasoning_model": True,
434
- },
435
- "gpt-4o": {
436
- "id": "gpt-4o",
437
- "name": "gpt-4o-2024-08-06",
438
- "api_base": "https://api.openai.com/v1",
439
- "api_key_env_var": "OPENAI_API_KEY",
440
- "supports_json": True,
441
- "supports_logprobs": True,
442
- "supports_responses": True,
443
- "api_spec": "openai",
444
- "input_cost": 5.0,
445
- "output_cost": 15.0,
446
- "requests_per_minute": 10_000,
447
- "tokens_per_minute": 30_000_000,
448
- },
449
- "gpt-4o-mini": {
450
- "id": "gpt-4o-mini",
451
- "name": "gpt-4o-mini-2024-07-18",
452
- "api_base": "https://api.openai.com/v1",
453
- "api_key_env_var": "OPENAI_API_KEY",
454
- "supports_json": True,
455
- "supports_logprobs": True,
456
- "supports_responses": True,
457
- "api_spec": "openai",
458
- "input_cost": 0.15,
459
- "output_cost": 0.6,
460
- "requests_per_minute": 60_000,
461
- "tokens_per_minute": 250_000_000,
462
- },
463
- "gpt-4o-mini-free": {
464
- "id": "gpt-4o-mini-free",
465
- "name": "gpt-4o-mini-2024-07-18-free",
466
- "api_base": "https://api.openai.com/v1",
467
- "api_key_env_var": "OPENAI_API_KEY",
468
- "supports_json": True,
469
- "supports_logprobs": True,
470
- "supports_responses": True,
471
- "api_spec": "openai",
472
- "input_cost": 0.0,
473
- "output_cost": 0.0,
474
- "requests_per_minute": 20_000,
475
- "tokens_per_minute": 50_000_000,
476
- },
477
- "gpt-3.5-turbo": {
478
- "id": "gpt-3.5-turbo",
479
- "name": "gpt-3.5-turbo-0125",
480
- "api_base": "https://api.openai.com/v1",
481
- "api_key_env_var": "OPENAI_API_KEY",
482
- "supports_json": True,
483
- "supports_logprobs": True,
484
- "supports_responses": True,
485
- "api_spec": "openai",
486
- "input_cost": 0.5,
487
- "output_cost": 1.5,
488
- "requests_per_minute": 40_000,
489
- "tokens_per_minute": 75_000_000,
490
- },
491
- "gpt-4-turbo": {
492
- "id": "gpt-4-turbo",
493
- "name": "gpt-4-turbo-2024-04-09",
494
- "api_base": "https://api.openai.com/v1",
495
- "api_key_env_var": "OPENAI_API_KEY",
496
- "supports_json": True,
497
- "supports_logprobs": True,
498
- "supports_responses": True,
499
- "api_spec": "openai",
500
- "input_cost": 10.0,
501
- "output_cost": 30.0,
502
- "requests_per_minute": 10_000,
503
- "tokens_per_minute": 1_500_000,
504
- },
505
- "gpt-4": {
506
- "id": "gpt-4",
507
- "name": "gpt-4-0613",
508
- "api_base": "https://api.openai.com/v1",
509
- "api_key_env_var": "OPENAI_API_KEY",
510
- "supports_json": False,
511
- "supports_logprobs": False,
512
- "supports_responses": True,
513
- "api_spec": "openai",
514
- "input_cost": 30.0,
515
- "output_cost": 60.0,
516
- "requests_per_minute": 10_000,
517
- "tokens_per_minute": 300_000,
518
- },
519
- "gpt-4-32k": {
520
- "id": "gpt-4-32k",
521
- "name": "gpt-4-32k-0613",
522
- "api_base": "https://api.openai.com/v1",
523
- "api_key_env_var": "OPENAI_API_KEY",
524
- "supports_json": False,
525
- "supports_logprobs": False,
526
- "supports_responses": True,
527
- "api_spec": "openai",
528
- "input_cost": 60.0,
529
- "output_cost": 120.0,
530
- "requests_per_minute": 1_000,
531
- "tokens_per_minute": 150_000,
532
- },
533
- # █████████ █████ █████ ███
534
- # ███░░░░░███ ░░███ ░░███ ░░░
535
- # ░███ ░███ ████████ ███████ ░███████ ████████ ██████ ████████ ████ ██████
536
- # ░███████████ ░░███░░███ ░░░███░ ░███░░███ ░░███░░███ ███░░███░░███░░███░░███ ███░░███
537
- # ░███░░░░░███ ░███ ░███ ░███ ░███ ░███ ░███ ░░░ ░███ ░███ ░███ ░███ ░███ ░███ ░░░
538
- # ░███ ░███ ░███ ░███ ░███ ███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ███
539
- # █████ █████ ████ █████ ░░█████ ████ █████ █████ ░░██████ ░███████ █████░░██████
540
- # ░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░░ ░███░░░ ░░░░░ ░░░░░░
541
- # ░███
542
- # █████
543
- # ░░░░░
544
- "claude-4-opus": {
545
- "id": "claude-4-opus",
546
- "name": "claude-opus-4-20250514",
547
- "api_base": "https://api.anthropic.com/v1",
548
- "api_key_env_var": "ANTHROPIC_API_KEY",
549
- "supports_json": False,
550
- "api_spec": "anthropic",
551
- "input_cost": 3.0,
552
- "output_cost": 15.0,
553
- "requests_per_minute": 4_000,
554
- "tokens_per_minute": 400_000,
555
- "reasoning_model": True,
556
- },
557
- "claude-4-sonnet": {
558
- "id": "claude-4-sonnet",
559
- "name": "claude-sonnet-4-20250514",
560
- "api_base": "https://api.anthropic.com/v1",
561
- "api_key_env_var": "ANTHROPIC_API_KEY",
562
- "supports_json": False,
563
- "api_spec": "anthropic",
564
- "input_cost": 3.0,
565
- "output_cost": 15.0,
566
- "requests_per_minute": 4_000,
567
- "tokens_per_minute": 400_000,
568
- },
569
- "claude-3.7-sonnet": {
570
- "id": "claude-3.7-sonnet",
571
- "name": "claude-3-7-sonnet-20250219",
572
- "api_base": "https://api.anthropic.com/v1",
573
- "api_key_env_var": "ANTHROPIC_API_KEY",
574
- "supports_json": False,
575
- "api_spec": "anthropic",
576
- "input_cost": 3.0,
577
- "output_cost": 15.0,
578
- "requests_per_minute": 4_000,
579
- "tokens_per_minute": 400_000,
580
- "reasoning_model": True,
581
- },
582
- "claude-3.6-sonnet": {
583
- "id": "claude-3.6-sonnet",
584
- "name": "claude-3-5-sonnet-20241022",
585
- "api_base": "https://api.anthropic.com/v1",
586
- "api_key_env_var": "ANTHROPIC_API_KEY",
587
- "supports_json": False,
588
- "api_spec": "anthropic",
589
- "input_cost": 3.0,
590
- "output_cost": 15.0,
591
- "requests_per_minute": 4_000,
592
- "tokens_per_minute": 400_000,
593
- },
594
- "claude-3.5-sonnet": {
595
- "id": "claude-3.5-sonnet",
596
- "name": "claude-3-5-sonnet-20240620",
597
- "api_base": "https://api.anthropic.com/v1",
598
- "api_key_env_var": "ANTHROPIC_API_KEY",
599
- "supports_json": False,
600
- "api_spec": "anthropic",
601
- "input_cost": 3.0,
602
- "output_cost": 15.0,
603
- "requests_per_minute": 4_000,
604
- "tokens_per_minute": 400_000,
605
- },
606
- "claude-3-opus": {
607
- "id": "claude-3-opus",
608
- "name": "claude-3-opus-20240229",
609
- "api_base": "https://api.anthropic.com/v1",
610
- "api_key_env_var": "ANTHROPIC_API_KEY",
611
- "supports_json": False,
612
- "api_spec": "anthropic",
613
- "input_cost": 15.0,
614
- "output_cost": 75.0,
615
- "requests_per_minute": 4_000,
616
- "tokens_per_minute": 400_000,
617
- },
618
- "claude-3-sonnet": {
619
- "id": "claude-3-sonnet",
620
- "name": "claude-3-sonnet-20240229",
621
- "api_base": "https://api.anthropic.com/v1",
622
- "api_key_env_var": "ANTHROPIC_API_KEY",
623
- "supports_json": False,
624
- "api_spec": "anthropic",
625
- "input_cost": 15.0,
626
- "output_cost": 75.0,
627
- "requests_per_minute": 4_000,
628
- "tokens_per_minute": 400_000,
629
- },
630
- "claude-3.5-haiku": {
631
- "id": "claude-3.5-haiku",
632
- "name": "claude-3-5-haiku-20241022",
633
- "api_base": "https://api.anthropic.com/v1",
634
- "api_key_env_var": "ANTHROPIC_API_KEY",
635
- "supports_json": False,
636
- "api_spec": "anthropic",
637
- "input_cost": 1.00,
638
- "output_cost": 5.00,
639
- "requests_per_minute": 20_000,
640
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
641
- },
642
- "claude-3-haiku": {
643
- "id": "claude-3-haiku",
644
- "name": "claude-3-haiku-20240307",
645
- "api_base": "https://api.anthropic.com/v1",
646
- "api_key_env_var": "ANTHROPIC_API_KEY",
647
- "supports_json": False,
648
- "api_spec": "anthropic",
649
- "input_cost": 0.25,
650
- "output_cost": 1.25,
651
- "requests_per_minute": 10_000,
652
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
653
- },
654
- # █████ █████ █████
655
- # ░░███ ░░███ ░░███
656
- # ░███ ░███ ██████ ████████ ███████ ██████ █████ █████
657
- # ░███ ░███ ███░░███░░███░░███░░░███░ ███░░███░░███ ░░███
658
- # ░░███ ███ ░███████ ░███ ░░░ ░███ ░███████ ░░░█████░
659
- # ░░░█████░ ░███░░░ ░███ ░███ ███░███░░░ ███░░░███
660
- # ░░███ ░░██████ █████ ░░█████ ░░██████ █████ █████
661
- # ░░░ ░░░░░░ ░░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░
662
- # "claude-haiku-vertex": {
663
- # "id": "claude-haiku-vertex",
664
- # "name": "claude-3-haiku@20240307",
665
- # "regions": ["europe-west4", "us-central1"],
666
- # "api_base": "",
667
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
668
- # "supports_json": False,
669
- # "api_spec": "vertex_anthropic",
670
- # "input_cost": 0.25,
671
- # "output_cost": 1.25,
672
- # "requests_per_minute": 120,
673
- # "tokens_per_minute": None,
674
- # },
675
- # "claude-sonnet-vertex": {
676
- # "id": "claude-sonnet-vertex",
677
- # "name": "claude-3-sonnet@20240229",
678
- # "regions": ["us-central1", "asia-southeast1"],
679
- # "api_base": "",
680
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
681
- # "supports_json": False,
682
- # "api_spec": "vertex_anthropic",
683
- # "input_cost": 3.0,
684
- # "output_cost": 15.0,
685
- # "requests_per_minute": 120,
686
- # "tokens_per_minute": None,
687
- # },
688
- # "claude-opus-vertex": {
689
- # "id": "claude-opus-vertex",
690
- # "name": "claude-3-opus@20240229",
691
- # "regions": ["us-east5"],
692
- # "api_base": "",
693
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
694
- # "supports_json": False,
695
- # "api_spec": "vertex_anthropic",
696
- # "input_cost": 15.0,
697
- # "output_cost": 75.0,
698
- # "requests_per_minute": 120,
699
- # "tokens_per_minute": None,
700
- # },
701
- # "gemini-2.5-pro-vertex": {
702
- # "id": "gemini-2.5-pro",
703
- # "name": "gemini-2.5-pro-preview-05-06",
704
- # "api_base": "",
705
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
706
- # "supports_json": True,
707
- # "supports_logprobs": False,
708
- # "api_spec": "vertex_gemini",
709
- # "input_cost": 1.25,
710
- # "output_cost": 10.0,
711
- # "requests_per_minute": 20,
712
- # "tokens_per_minute": 100_000,
713
- # "reasoning_model": True,
714
- # },
715
- # "gemini-2.5-flash-vertex": {
716
- # "id": "gemini-2.5-flash",
717
- # "name": "gemini-2.5-flash-preview-05-20",
718
- # "api_base": "",
719
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
720
- # "supports_json": True,
721
- # "supports_logprobs": False,
722
- # "api_spec": "vertex_gemini",
723
- # "input_cost": 0.15,
724
- # "output_cost": 0.6,
725
- # "requests_per_minute": 20,
726
- # "tokens_per_minute": 100_000,
727
- # "reasoning_model": True,
728
- # },
729
- # "gemini-2.0-flash-vertex": {
730
- # "id": "gemini-2.0-flash",
731
- # "name": "gemini-2.0-flash",
732
- # "api_base": "",
733
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
734
- # "supports_json": True,
735
- # "supports_logprobs": False,
736
- # "api_spec": "vertex_gemini",
737
- # "input_cost": 0.10,
738
- # "output_cost": 0.40,
739
- # "requests_per_minute": 20,
740
- # "tokens_per_minute": 100_000,
741
- # "reasoning_model": False,
742
- # },
743
- # "gemini-2.0-flash-lite-vertex": {
744
- # "id": "gemini-2.0-flash-lite",
745
- # "name": "gemini-2.0-flash-lite",
746
- # "api_base": "",
747
- # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
748
- # "supports_json": True,
749
- # "supports_logprobs": False,
750
- # "api_spec": "vertex_gemini",
751
- # "input_cost": 0.075,
752
- # "output_cost": 0.30,
753
- # "requests_per_minute": 20,
754
- # "tokens_per_minute": 100_000,
755
- # "reasoning_model": False,
756
- # },
757
- # ███████████ █████ █████
758
- # ░░███░░░░░███ ░░███ ░░███
759
- # ░███ ░███ ██████ ███████ ████████ ██████ ██████ ░███ █████
760
- # ░██████████ ███░░███ ███░░███ ░░███░░███ ███░░███ ███░░███ ░███░░███
761
- # ░███░░░░░███░███████ ░███ ░███ ░███ ░░░ ░███ ░███░███ ░░░ ░██████░
762
- # ░███ ░███░███░░░ ░███ ░███ ░███ ░███ ░███░███ ███ ░███░░███
763
- # ███████████ ░░██████ ░░████████ █████ ░░██████ ░░██████ ████ █████
764
- # ░░░░░░░░░░░ ░░░░░░ ░░░░░░░░ ░░░░░ ░░░░░░ ░░░░░░ ░░░░ ░░░░░
765
- "claude-3-haiku-bedrock": {
766
- "id": "claude-3-haiku-bedrock",
767
- "name": "us.anthropic.claude-3-haiku-20240307-v1:0",
768
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
769
- "api_base": "",
770
- "api_key_env_var": "",
771
- "api_spec": "bedrock",
772
- "input_cost": 0.25,
773
- "output_cost": 1.25,
774
- "requests_per_minute": 4_000,
775
- "tokens_per_minute": 8_000_000,
776
- },
777
- "claude-3.5-haiku-bedrock": {
778
- "id": "claude-3.5-haiku-bedrock",
779
- "name": "us.anthropic.claude-3-5-haiku-20241022-v1:0",
780
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
781
- "api_base": "",
782
- "api_key_env_var": "",
783
- "api_spec": "bedrock",
784
- "input_cost": 0.25,
785
- "output_cost": 1.25,
786
- "requests_per_minute": 4_000,
787
- "tokens_per_minute": 8_000_000,
788
- },
789
- "claude-3.5-sonnet-bedrock": {
790
- "id": "claude-3.5-sonnet-bedrock",
791
- "name": "us.anthropic.claude-3-5-sonnet-20240620-v1:0",
792
- "regions": ["us-east-1", "us-west-2"],
793
- "api_base": "",
794
- "api_key_env_var": "",
795
- "api_spec": "bedrock",
796
- "input_cost": 3.0,
797
- "output_cost": 15.0,
798
- "requests_per_minute": 4_000,
799
- "tokens_per_minute": 400_000,
800
- "reasoning_model": False,
801
- },
802
- "claude-3.6-sonnet-bedrock": {
803
- "id": "claude-3.6-sonnet-bedrock",
804
- "name": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
805
- "regions": ["us-east-1", "us-west-2", "us-east-2"],
806
- "api_base": "",
807
- "api_key_env_var": "",
808
- "api_spec": "bedrock",
809
- "input_cost": 3.0,
810
- "output_cost": 15.0,
811
- "requests_per_minute": 4_000,
812
- "tokens_per_minute": 400_000,
813
- "reasoning_model": False,
814
- },
815
- "claude-3.7-sonnet-bedrock": {
816
- "id": "claude-3.7-sonnet-bedrock",
817
- "name": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
818
- "regions": ["us-east-1", "us-west-2", "eu-west-1"],
819
- "api_base": "",
820
- "api_key_env_var": "",
821
- "api_spec": "bedrock",
822
- "input_cost": 3.0,
823
- "output_cost": 15.0,
824
- "requests_per_minute": 4_000,
825
- "tokens_per_minute": 400_000,
826
- "reasoning_model": True,
827
- },
828
- "claude-4-sonnet-bedrock": {
829
- "id": "claude-4-sonnet-bedrock",
830
- "name": "us.anthropic.claude-sonnet-4-20250514-v1:0",
831
- "regions": ["us-east-1", "us-west-2", "us-east-2"],
832
- "api_base": "",
833
- "api_key_env_var": "",
834
- "api_spec": "bedrock",
835
- "input_cost": 3.0,
836
- "output_cost": 15.0,
837
- "requests_per_minute": 4_000,
838
- "tokens_per_minute": 400_000,
839
- "reasoning_model": True,
840
- },
841
- "claude-4-opus-bedrock": {
842
- "id": "claude-4-opus-bedrock",
843
- "name": "us.anthropic.claude-opus-4-20250514-v1:0",
844
- "regions": ["us-east-1", "us-west-2", "us-east-2"],
845
- "api_base": "",
846
- "api_key_env_var": "",
847
- "api_spec": "bedrock",
848
- "input_cost": 3.0,
849
- "output_cost": 15.0,
850
- "requests_per_minute": 4_000,
851
- "tokens_per_minute": 400_000,
852
- "reasoning_model": True,
853
- },
854
- # "mistral-7b-bedrock": {
855
- # "id": "mistral-7b-bedrock",
856
- # "name": "mistral.mistral-7b-instruct-v0:2",
857
- # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
858
- # "api_base": "",
859
- # "api_key_env_var": "",
860
- # "api_spec": "bedrock_mistral",
861
- # "input_cost": 0.15,
862
- # "output_cost": 0.2,
863
- # "requests_per_minute": 3_200,
864
- # "tokens_per_minute": 1_200_000,
865
- # },
866
- # "mixtral-8x7b-bedrock": {
867
- # "id": "mixtral-8x7b-bedrock",
868
- # "name": "mistral.mixtral-8x7b-instruct-v0:1",
869
- # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
870
- # "api_base": "",
871
- # "api_key_env_var": "",
872
- # "api_spec": "bedrock_mistral",
873
- # "input_cost": 0.45,
874
- # "output_cost": 0.7,
875
- # "requests_per_minute": 1_600,
876
- # "tokens_per_minute": 1_200_000,
877
- # },
878
- # "mistral-large-bedrock": {
879
- # "id": "mistral-large-bedrock",
880
- # "name": "mistral.mistral-large-2402-v1:0",
881
- # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
882
- # "api_base": "",
883
- # "api_key_env_var": "",
884
- # "api_spec": "bedrock_mistral",
885
- # "input_cost": 8.0,
886
- # "output_cost": 24.0,
887
- # "requests_per_minute": 1_600,
888
- # "tokens_per_minute": 1_200_000,
889
- # },
890
- # ███████████ █████ █████
891
- # ░█░░░███░░░█ ░░███ ░░███
892
- # ░ ░███ ░ ██████ ███████ ██████ ███████ ░███████ ██████ ████████
893
- # ░███ ███░░███ ███░░███ ███░░███░░░███░ ░███░░███ ███░░███░░███░░███
894
- # ░███ ░███ ░███░███ ░███░███████ ░███ ░███ ░███ ░███████ ░███ ░░░
895
- # ░███ ░███ ░███░███ ░███░███░░░ ░███ ███ ░███ ░███ ░███░░░ ░███
896
- # █████ ░░██████ ░░███████░░██████ ░░█████ ████ █████░░██████ █████
897
- # ░░░░░ ░░░░░░ ░░░░░███ ░░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░
898
- # ███ ░███
899
- # ░░██████
900
- # ░░░░░░
901
- # tbh only reason to use these are that they're cheap, but all worse than haiku
902
- "deepseek-r1-together": {
903
- "id": "deepseek-r1-together",
904
- "name": "deepseek-ai/DeepSeek-R1",
905
- "api_base": "https://api.together.xyz/v1",
906
- "api_key_env_var": "TOGETHER_API_KEY",
907
- "supports_json": False,
908
- "api_spec": "openai",
909
- "input_cost": 3.0,
910
- "output_cost": 7.0,
911
- "requests_per_minute": None,
912
- "tokens_per_minute": None,
913
- },
914
- "deepseek-v3-together": {
915
- "id": "deepseek-v3-together",
916
- "name": "deepseek-ai/DeepSeek-V3",
917
- "api_base": "https://api.together.xyz/v1",
918
- "api_key_env_var": "TOGETHER_API_KEY",
919
- "supports_json": False,
920
- "api_spec": "openai",
921
- "input_cost": 1.25,
922
- "output_cost": 1.25,
923
- "requests_per_minute": None,
924
- "tokens_per_minute": None,
925
- },
926
- "qwen-3-235b-together": {
927
- "id": "qwen-3-235b-together",
928
- "name": "Qwen/Qwen3-235B-A22B-fp8",
929
- "api_base": "https://api.together.xyz/v1",
930
- "api_key_env_var": "TOGETHER_API_KEY",
931
- "supports_json": False,
932
- "api_spec": "openai",
933
- "input_cost": 0.2,
934
- "output_cost": 0.6,
935
- "requests_per_minute": None,
936
- "tokens_per_minute": None,
937
- },
938
- "qwen-2.5-vl-together": {
939
- "id": "qwen-2.5-vl-together",
940
- "name": "Qwen/Qwen2.5-VL-72B-Instruct",
941
- "api_base": "https://api.together.xyz/v1",
942
- "api_key_env_var": "TOGETHER_API_KEY",
943
- "supports_json": False,
944
- "api_spec": "openai",
945
- "input_cost": 1.95,
946
- "output_cost": 8.0,
947
- "requests_per_minute": None,
948
- "tokens_per_minute": None,
949
- },
950
- "llama-4-maverick-together": {
951
- "id": "llama-4-maverick-together",
952
- "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
953
- "api_base": "https://api.together.xyz/v1",
954
- "api_key_env_var": "TOGETHER_API_KEY",
955
- "supports_json": False,
956
- "api_spec": "openai",
957
- "input_cost": 0.27,
958
- "output_cost": 0.85,
959
- "requests_per_minute": None,
960
- "tokens_per_minute": None,
961
- },
962
- "llama-4-scout-together": {
963
- "id": "llama-4-scout-together",
964
- "name": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
965
- "api_base": "https://api.together.xyz/v1",
966
- "api_key_env_var": "TOGETHER_API_KEY",
967
- "supports_json": False,
968
- "api_spec": "openai",
969
- "input_cost": 0.18,
970
- "output_cost": 0.59,
971
- "requests_per_minute": None,
972
- "tokens_per_minute": None,
973
- },
974
- # █████████ █████
975
- # ███░░░░░███ ░░███
976
- # ███ ░░░ ██████ ░███████ ██████ ████████ ██████
977
- # ░███ ███░░███ ░███░░███ ███░░███░░███░░███ ███░░███
978
- # ░███ ░███ ░███ ░███ ░███ ░███████ ░███ ░░░ ░███████
979
- # ░░███ ███░███ ░███ ░███ ░███ ░███░░░ ░███ ░███░░░
980
- # ░░█████████ ░░██████ ████ █████░░██████ █████ ░░██████
981
- # ░░░░░░░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░░
982
- "aya-expanse-8b": {
983
- "id": "aya-expanse-8b",
984
- "name": "c4ai-aya-expanse-8b",
985
- "api_base": "https://api.cohere.ai/compatibility/v1",
986
- "api_key_env_var": "COHERE_API_KEY",
987
- "api_spec": "openai",
988
- "input_cost": 0.5,
989
- "output_cost": 1.5,
990
- "requests_per_minute": 10_000,
991
- "tokens_per_minute": None,
992
- },
993
- "aya-expanse-32b": {
994
- "id": "aya-expanse-32b",
995
- "name": "c4ai-aya-expanse-32b",
996
- "api_base": "https://api.cohere.ai/compatibility/v1",
997
- "api_key_env_var": "COHERE_API_KEY",
998
- "api_spec": "openai",
999
- "input_cost": 0.5,
1000
- "output_cost": 1.5,
1001
- "requests_per_minute": 10_000,
1002
- "tokens_per_minute": None,
1003
- },
1004
- "aya-vision-8b": {
1005
- "id": "aya-vision-8b",
1006
- "name": "c4ai-aya-vision-8b",
1007
- "api_base": "https://api.cohere.ai/compatibility/v1",
1008
- "api_key_env_var": "COHERE_API_KEY",
1009
- "api_spec": "openai",
1010
- "input_cost": 0.5,
1011
- "output_cost": 1.5,
1012
- "requests_per_minute": 10_000,
1013
- "tokens_per_minute": None,
1014
- },
1015
- "aya-vision-32b": {
1016
- "id": "aya-vision-32b",
1017
- "name": "c4ai-aya-vision-32b",
1018
- "api_base": "https://api.cohere.ai/compatibility/v1",
1019
- "api_key_env_var": "COHERE_API_KEY",
1020
- "api_spec": "openai",
1021
- "input_cost": 0.5,
1022
- "output_cost": 1.5,
1023
- "requests_per_minute": 10_000,
1024
- "tokens_per_minute": None,
1025
- },
1026
- "command-a": {
1027
- "id": "command-a",
1028
- "name": "command-a-03-2025",
1029
- "api_base": "https://api.cohere.ai/compatibility/v1",
1030
- "api_key_env_var": "COHERE_API_KEY",
1031
- "api_spec": "openai",
1032
- "input_cost": 0.5,
1033
- "output_cost": 1.5,
1034
- "requests_per_minute": 10_000,
1035
- "tokens_per_minute": None,
1036
- },
1037
- "command-r-7b": {
1038
- "id": "command-r-cohere",
1039
- "name": "command-r7b-12-2024",
1040
- "api_base": "https://api.cohere.ai/compatibility/v1",
1041
- "api_key_env_var": "COHERE_API_KEY",
1042
- "api_spec": "openai",
1043
- "input_cost": 0.5,
1044
- "output_cost": 1.5,
1045
- "requests_per_minute": 10_000,
1046
- "tokens_per_minute": None,
1047
- },
1048
- "command-r": {
1049
- "id": "command-r",
1050
- "name": "command-r-08-2024",
1051
- "api_base": "https://api.cohere.ai/compatibility/v1",
1052
- "api_key_env_var": "COHERE_API_KEY",
1053
- "api_spec": "openai",
1054
- "input_cost": 0.5,
1055
- "output_cost": 1.5,
1056
- "requests_per_minute": 10_000,
1057
- "tokens_per_minute": None,
1058
- },
1059
- "command-r-plus": {
1060
- "id": "command-r-plus",
1061
- "name": "command-r-plus-04-2024",
1062
- "api_base": "https://api.cohere.ai/compatibility/v1",
1063
- "api_key_env_var": "COHERE_API_KEY",
1064
- "api_spec": "openai",
1065
- "input_cost": 3.0,
1066
- "output_cost": 15.0,
1067
- "requests_per_minute": 10_000,
1068
- "tokens_per_minute": None,
1069
- },
1070
- # ██████ ██████ ███ █████ ████
1071
- # ░░██████ ██████ ░░░ ░░███ ░░███
1072
- # ░███░█████░███ ████ █████ ███████ ████████ ██████ ░███
1073
- # ░███░░███ ░███ ░░███ ███░░ ░░░███░ ░░███░░███ ░░░░░███ ░███
1074
- # ░███ ░░░ ░███ ░███ ░░█████ ░███ ░███ ░░░ ███████ ░███
1075
- # ░███ ░███ ░███ ░░░░███ ░███ ███ ░███ ███░░███ ░███
1076
- # █████ █████ █████ ██████ ░░█████ █████ ░░████████ █████
1077
- # ░░░░░ ░░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░ ░░░░░░░░ ░░░░░
1078
- "mistral-medium": {
1079
- "id": "mistral-medium",
1080
- "name": "mistral-medium-latest",
1081
- "api_base": "https://api.mistral.ai/v1",
1082
- "api_key_env_var": "MISTRAL_API_KEY",
1083
- "supports_json": True,
1084
- "api_spec": "mistral",
1085
- "input_cost": 0.4,
1086
- "output_cost": 2.0,
1087
- },
1088
- "mistral-large": {
1089
- "id": "mistral-large",
1090
- "name": "mistral-large-latest",
1091
- "api_base": "https://api.mistral.ai/v1",
1092
- "api_key_env_var": "MISTRAL_API_KEY",
1093
- "supports_json": True,
1094
- "api_spec": "mistral",
1095
- "input_cost": 2.0,
1096
- "output_cost": 6.0,
1097
- },
1098
- "pixtral-large": {
1099
- "id": "pixtral-large",
1100
- "name": "pixtral-large-latest",
1101
- "api_base": "https://api.mistral.ai/v1",
1102
- "api_key_env_var": "MISTRAL_API_KEY",
1103
- "supports_json": True,
1104
- "api_spec": "mistral",
1105
- "input_cost": 2.0,
1106
- "output_cost": 6.0,
1107
- },
1108
- "mistral-small": {
1109
- "id": "mistral-small",
1110
- "name": "mistral-small-latest",
1111
- "api_base": "https://api.mistral.ai/v1",
1112
- "api_key_env_var": "MISTRAL_API_KEY",
1113
- "supports_json": True,
1114
- "api_spec": "mistral",
1115
- "input_cost": 0.1,
1116
- "output_cost": 0.3,
1117
- },
1118
- "devstral-small": {
1119
- "id": "devstral-small",
1120
- "name": "devstral-small-2505",
1121
- "api_base": "https://api.mistral.ai/v1",
1122
- "api_key_env_var": "MISTRAL_API_KEY",
1123
- "supports_json": True,
1124
- "api_spec": "mistral",
1125
- "input_cost": 0.1,
1126
- "output_cost": 0.3,
1127
- },
1128
- "codestral": {
1129
- "id": "codestral",
1130
- "name": "codestral-latest",
1131
- "api_base": "https://api.mistral.ai/v1",
1132
- "api_key_env_var": "MISTRAL_API_KEY",
1133
- "supports_json": True,
1134
- "api_spec": "mistral",
1135
- "input_cost": 0.2,
1136
- "output_cost": 0.6,
1137
- },
1138
- "pixtral-12b": {
1139
- "id": "pixtral-12b",
1140
- "name": "pixtral-12b",
1141
- "api_base": "https://api.mistral.ai/v1",
1142
- "api_key_env_var": "MISTRAL_API_KEY",
1143
- "supports_json": True,
1144
- "api_spec": "mistral",
1145
- "input_cost": 0.1,
1146
- "output_cost": 0.3,
1147
- },
1148
- "mistral-nemo": {
1149
- "id": "mistral-nemo",
1150
- "name": "open-mistral-nemo",
1151
- "api_base": "https://api.mistral.ai/v1",
1152
- "api_key_env_var": "MISTRAL_API_KEY",
1153
- "supports_json": True,
1154
- "api_spec": "mistral",
1155
- "input_cost": 0.1,
1156
- "output_cost": 0.3,
1157
- },
1158
- "ministral-8b": {
1159
- "id": "ministral-8b",
1160
- "name": "ministral-8b-latest",
1161
- "api_base": "https://api.mistral.ai/v1",
1162
- "api_key_env_var": "MISTRAL_API_KEY",
1163
- "supports_json": True,
1164
- "api_spec": "mistral",
1165
- "input_cost": 0.7,
1166
- "output_cost": 0.7,
1167
- },
1168
- "mixtral-8x22b": {
1169
- "id": "mixtral-8x22b",
1170
- "name": "open-mixtral-8x22b",
1171
- "api_base": "https://api.mistral.ai/v1",
1172
- "api_key_env_var": "MISTRAL_API_KEY",
1173
- "supports_json": True,
1174
- "api_spec": "mistral",
1175
- "input_cost": 2.0,
1176
- "output_cost": 6.0,
1177
- },
1178
- # ______ _
1179
- # (______) | |
1180
- # _ _ _____ _____ ____ ___ _____ _____| | _
1181
- # | | | | ___ | ___ | _ \ /___) ___ | ___ | |_/ )
1182
- # | |__/ /| ____| ____| |_| |___ | ____| ____| _ (
1183
- # |_____/ |_____)_____) __/(___/|_____)_____)_| \_)
1184
- # |_|
1185
- "deepseek-chat": {
1186
- "id": "deepseek-chat",
1187
- "name": "deepseek-chat",
1188
- "api_base": "https://api.deepseek.com/v1",
1189
- "api_key_env_var": "DEEPSEEK_API_KEY",
1190
- "api_spec": "openai",
1191
- "input_cost": 0.27,
1192
- "output_cost": 1.10,
1193
- },
1194
- "deepseek-r1": {
1195
- "id": "deepseek-r1",
1196
- "name": "deepseek-reasoner",
1197
- "api_base": "https://api.deepseek.com/v1",
1198
- "api_key_env_var": "DEEPSEEK_API_KEY",
1199
- "api_spec": "openai",
1200
- "input_cost": 0.55,
1201
- "output_cost": 2.19,
1202
- },
1203
- }
1204
-
1205
-
1206
- @dataclass
1207
- class APIModel:
1208
- id: str
1209
- name: str
1210
- api_base: str
1211
- api_key_env_var: str
1212
- api_spec: str
1213
- input_cost: float | None = 0 # $ per million input tokens
1214
- output_cost: float | None = 0 # $ per million output tokens
1215
- supports_json: bool = False
1216
- supports_logprobs: bool = False
1217
- supports_responses: bool = False
1218
- reasoning_model: bool = False
1219
- regions: list[str] | dict[str, int] = field(default_factory=list)
1220
- tokens_per_minute: int | None = None
1221
- requests_per_minute: int | None = None
1222
- gpus: list[str] | None = None
1223
-
1224
- @classmethod
1225
- def from_registry(cls, name: str):
1226
- if name not in registry:
1227
- raise ValueError(f"Model {name} not found in registry")
1228
- cfg = registry[name]
1229
- if isinstance(cfg, APIModel):
1230
- return cfg
1231
- return cls(**cfg)
1232
-
1233
- def sample_region(self):
1234
- if isinstance(self.regions, list):
1235
- regions = self.regions
1236
- weights = [1] * len(regions)
1237
- elif isinstance(self.regions, dict):
1238
- regions = list(self.regions.keys())
1239
- weights = self.regions.values()
1240
- else:
1241
- raise ValueError("no regions to sample")
1242
- random.sample(regions, 1, counts=weights)[0]
1243
-
1244
- def make_request(self, context: RequestContext): # -> "APIRequestBase"
1245
- from .api_requests.common import CLASSES
1246
-
1247
- api_spec = self.api_spec
1248
- if (
1249
- context.use_responses_api
1250
- and self.supports_responses
1251
- and api_spec == "openai"
1252
- ):
1253
- api_spec = "openai-responses"
1254
-
1255
- request_class = CLASSES.get(api_spec, None)
1256
- if request_class is None:
1257
- raise ValueError(f"Unsupported API spec: {api_spec}")
1258
- return request_class(context=context)
1259
-
1260
-
1261
- registry: dict[str, APIModel] = {}
1262
-
1263
-
1264
- def register_model(
1265
- id: str,
1266
- name: str,
1267
- api_base: str,
1268
- api_key_env_var: str,
1269
- api_spec: str,
1270
- input_cost: float | None = 0, # $ per million input tokens
1271
- output_cost: float | None = 0, # $ per million output tokens
1272
- supports_json: bool = False,
1273
- supports_logprobs: bool = False,
1274
- supports_responses: bool = False,
1275
- reasoning_model: bool = False,
1276
- regions: list[str] | dict[str, int] = field(default_factory=list),
1277
- tokens_per_minute: int | None = None,
1278
- requests_per_minute: int | None = None,
1279
- ) -> APIModel:
1280
- """Register a model configuration and return the created APIModel."""
1281
- model = APIModel(
1282
- id=id,
1283
- name=name,
1284
- api_base=api_base,
1285
- api_key_env_var=api_key_env_var,
1286
- api_spec=api_spec,
1287
- input_cost=input_cost,
1288
- output_cost=output_cost,
1289
- supports_json=supports_json,
1290
- supports_logprobs=supports_logprobs,
1291
- supports_responses=supports_responses,
1292
- reasoning_model=reasoning_model,
1293
- regions=regions,
1294
- tokens_per_minute=tokens_per_minute,
1295
- requests_per_minute=requests_per_minute,
1296
- )
1297
- registry[model.id] = model
1298
- return model
1299
-
1300
-
1301
- # Populate registry with builtin models
1302
- for cfg in BUILTIN_MODELS.values():
1303
- register_model(**cfg)
1304
-
1305
- # print("Valid models:", registry.keys())