lm-deluge 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

lm_deluge/models.py ADDED
@@ -0,0 +1,957 @@
1
+ import random
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+ from .gemini_limits import gemini_1_5_pro_limits, gemini_flash_limits
5
+
6
+ registry = {
7
+ # `7MMM. ,MMF' mm
8
+ # MMMb dPMM MM
9
+ # M YM ,M MM .gP"Ya mmMMmm ,6"Yb.
10
+ # M Mb M' MM ,M' Yb MM 8) MM
11
+ # M YM.P' MM 8M"""""" MM ,pm9MM
12
+ # M `YM' MM YM. , MM 8M MM
13
+ # .JML. `' .JMML.`Mbmmd' `Mbmo`Moo9^Yo.
14
+ "llama-4-scout": {
15
+ "id": "llama-4-scout",
16
+ "name": "Llama-4-Scout-17B-16E-Instruct-FP8",
17
+ "api_base": "https://api.llama.com/compat/v1",
18
+ "api_key_env_var": "META_API_KEY",
19
+ "supports_json": True,
20
+ "supports_logprobs": True,
21
+ "api_spec": "openai",
22
+ "input_cost": 0.0,
23
+ "output_cost": 0.0,
24
+ "requests_per_minute": 3_000,
25
+ "tokens_per_minute": 1_000_000,
26
+ "reasoning_model": False,
27
+ },
28
+ "llama-4-maverick": {
29
+ "id": "llama-4-scout",
30
+ "name": "Llama-4-Maverick-17B-128E-Instruct-FP8",
31
+ "api_base": "https://api.llama.com/compat/v1",
32
+ "api_key_env_var": "META_API_KEY",
33
+ "supports_json": True,
34
+ "supports_logprobs": True,
35
+ "api_spec": "openai",
36
+ "input_cost": 0.0,
37
+ "output_cost": 0.0,
38
+ "requests_per_minute": 3_000,
39
+ "tokens_per_minute": 1_000_000,
40
+ "reasoning_model": False,
41
+ },
42
+ "llama-3.3-70B": {
43
+ "id": "llama-3.3-70B",
44
+ "name": "Llama-3.3-70B-Instruct",
45
+ "api_base": "https://api.llama.com/compat/v1",
46
+ "api_key_env_var": "META_API_KEY",
47
+ "supports_json": True,
48
+ "supports_logprobs": True,
49
+ "api_spec": "openai",
50
+ "input_cost": 0.0,
51
+ "output_cost": 0.0,
52
+ "requests_per_minute": 3_000,
53
+ "tokens_per_minute": 1_000_000,
54
+ "reasoning_model": False,
55
+ },
56
+ "llama-3.3-8B": {
57
+ "id": "llama-3.3-8B",
58
+ "name": "Llama-3.3-8B-Instruct",
59
+ "api_base": "https://api.llama.com/compat/v1",
60
+ "api_key_env_var": "META_API_KEY",
61
+ "supports_json": True,
62
+ "supports_logprobs": True,
63
+ "api_spec": "openai",
64
+ "input_cost": 0.0,
65
+ "output_cost": 0.0,
66
+ "requests_per_minute": 3_000,
67
+ "tokens_per_minute": 1_000_000,
68
+ "reasoning_model": False,
69
+ },
70
+ # .d8888b. 888
71
+ # d88P Y88b 888
72
+ # 888 888 888
73
+ # 888 888d888 .d88b. 888 888
74
+ # 888 88888 888P" d88""88b 888 .88P
75
+ # 888 888 888 888 888 888888K
76
+ # Y88b d88P 888 Y88..88P 888 "88b
77
+ # "Y8888P88 888 "Y88P" 888 888
78
+ "grok-3": {
79
+ "id": "grok-3",
80
+ "name": "grok-3-latest",
81
+ "api_base": "https://api.x.ai/v1",
82
+ "api_key_env_var": "GROK_API_KEY",
83
+ "supports_json": True,
84
+ "supports_logprobs": True,
85
+ "api_spec": "openai",
86
+ "input_cost": 2.0,
87
+ "output_cost": 8.0,
88
+ "requests_per_minute": 20,
89
+ "tokens_per_minute": 100_000,
90
+ "reasoning_model": False,
91
+ },
92
+ "grok-3-mini": {
93
+ "id": "grok-3-mini",
94
+ "name": "grok-3-mini-latest",
95
+ "api_base": "https://api.x.ai/v1",
96
+ "api_key_env_var": "GROK_API_KEY",
97
+ "supports_json": True,
98
+ "supports_logprobs": True,
99
+ "api_spec": "openai",
100
+ "input_cost": 2.0,
101
+ "output_cost": 8.0,
102
+ "requests_per_minute": 20,
103
+ "tokens_per_minute": 100_000,
104
+ "reasoning_model": True,
105
+ },
106
+ # .oooooo. oooo .o. ooooo
107
+ # d8P' `Y8b `888 .888. `888'
108
+ # 888 .ooooo. .ooooo. .oooooooo 888 .ooooo. .8"888. 888
109
+ # 888 d88' `88b d88' `88b 888' `88b 888 d88' `88b .8' `888. 888
110
+ # 888 ooooo 888 888 888 888 888 888 888 888ooo888 .88ooo8888. 888
111
+ # `88. .88' 888 888 888 888 `88bod8P' 888 888 .o .8' `888. 888
112
+ # `Y8bood8P' `Y8bod8P' `Y8bod8P' `8oooooo. o888o `Y8bod8P' o88o o8888o o888o
113
+ # d" YD
114
+ # "Y88888P'
115
+ # these are through AI studio rather than Vertex, and using the OpenAI-compatible endpoints
116
+ "gemini-2.0-flash": {
117
+ "id": "gemini-2.0-flash",
118
+ "name": "gemini-2.0-flash",
119
+ "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
120
+ "api_key_env_var": "GEMINI_API_KEY",
121
+ "supports_json": True,
122
+ "supports_logprobs": False,
123
+ "api_spec": "openai",
124
+ "input_cost": 0.1,
125
+ "output_cost": 0.4,
126
+ "requests_per_minute": 20,
127
+ "tokens_per_minute": 100_000,
128
+ "reasoning_model": False,
129
+ },
130
+ "gemini-2.0-flash-lite": {
131
+ "id": "gemini-2.0-flash-lite",
132
+ "name": "gemini-2.0-flash-lite",
133
+ "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
134
+ "api_key_env_var": "GEMINI_API_KEY",
135
+ "supports_json": True,
136
+ "supports_logprobs": False,
137
+ "api_spec": "openai",
138
+ "input_cost": 0.1,
139
+ "output_cost": 0.4,
140
+ "requests_per_minute": 20,
141
+ "tokens_per_minute": 100_000,
142
+ "reasoning_model": False,
143
+ },
144
+ "gemini-2.5-pro": {
145
+ "id": "gemini-2.5-pro-exp-03-25",
146
+ "name": "gemini-2.5-pro-exp-03-25",
147
+ "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
148
+ "api_key_env_var": "GEMINI_API_KEY",
149
+ "supports_json": True,
150
+ "supports_logprobs": False,
151
+ "api_spec": "openai",
152
+ "input_cost": 0.1,
153
+ "output_cost": 0.4,
154
+ "requests_per_minute": 20,
155
+ "tokens_per_minute": 100_000,
156
+ "reasoning_model": False,
157
+ },
158
+ # ███████ █████████ █████
159
+ # ███░░░░░███ ███░░░░░███ ░░███
160
+ # ███ ░░███ ████████ ██████ ████████ ░███ ░███ ░███
161
+ # ░███ ░███░░███░░███ ███░░███░░███░░███ ░███████████ ░███
162
+ # ░███ ░███ ░███ ░███░███████ ░███ ░███ ░███░░░░░███ ░███
163
+ # ░░███ ███ ░███ ░███░███░░░ ░███ ░███ ░███ ░███ ░███
164
+ # ░░░███████░ ░███████ ░░██████ ████ █████ █████ █████ █████
165
+ # ░░░░░░░ ░███░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░
166
+ # ░███
167
+ # █████
168
+ # â–‘â–‘â–‘â–‘â–‘
169
+ "gpt-4.1": {
170
+ "id": "gpt-4.1",
171
+ "name": "gpt-4.1-2025-04-14",
172
+ "api_base": "https://api.openai.com/v1",
173
+ "api_key_env_var": "OPENAI_API_KEY",
174
+ "supports_json": True,
175
+ "supports_logprobs": True,
176
+ "api_spec": "openai",
177
+ "input_cost": 2.0,
178
+ "output_cost": 8.0,
179
+ "requests_per_minute": 20,
180
+ "tokens_per_minute": 100_000,
181
+ "reasoning_model": False,
182
+ },
183
+ "gpt-4.1-mini": {
184
+ "id": "gpt-4.1-mini",
185
+ "name": "gpt-4.1-mini-2025-04-14",
186
+ "api_base": "https://api.openai.com/v1",
187
+ "api_key_env_var": "OPENAI_API_KEY",
188
+ "supports_json": True,
189
+ "supports_logprobs": True,
190
+ "api_spec": "openai",
191
+ "input_cost": 0.4,
192
+ "output_cost": 1.6,
193
+ "requests_per_minute": 20,
194
+ "tokens_per_minute": 100_000,
195
+ "reasoning_model": False,
196
+ },
197
+ "gpt-4.1-nano": {
198
+ "id": "gpt-4.1-nano",
199
+ "name": "gpt-4.1-nano-2025-04-14",
200
+ "api_base": "https://api.openai.com/v1",
201
+ "api_key_env_var": "OPENAI_API_KEY",
202
+ "supports_json": True,
203
+ "supports_logprobs": True,
204
+ "api_spec": "openai",
205
+ "input_cost": 0.1,
206
+ "output_cost": 0.4,
207
+ "requests_per_minute": 20,
208
+ "tokens_per_minute": 100_000,
209
+ "reasoning_model": False,
210
+ },
211
+ "gpt-4.5": {
212
+ "id": "gpt-4.5",
213
+ "name": "gpt-4.5-preview-2025-02-27",
214
+ "api_base": "https://api.openai.com/v1",
215
+ "api_key_env_var": "OPENAI_API_KEY",
216
+ "supports_json": False,
217
+ "supports_logprobs": True,
218
+ "api_spec": "openai",
219
+ "input_cost": 75.0,
220
+ "output_cost": 150.0,
221
+ "requests_per_minute": 20,
222
+ "tokens_per_minute": 100_000,
223
+ "reasoning_model": False,
224
+ },
225
+ "o3-mini": {
226
+ "id": "o3-mini",
227
+ "name": "o3-mini-2025-01-31",
228
+ "api_base": "https://api.openai.com/v1",
229
+ "api_key_env_var": "OPENAI_API_KEY",
230
+ "supports_json": False,
231
+ "supports_logprobs": True,
232
+ "api_spec": "openai",
233
+ "input_cost": 1.1,
234
+ "output_cost": 4.4,
235
+ "requests_per_minute": 20,
236
+ "tokens_per_minute": 100_000,
237
+ "reasoning_model": True,
238
+ },
239
+ "o1": {
240
+ "id": "o1",
241
+ "name": "o1-2024-12-17",
242
+ "api_base": "https://api.openai.com/v1",
243
+ "api_key_env_var": "OPENAI_API_KEY",
244
+ "supports_json": False,
245
+ "supports_logprobs": True,
246
+ "api_spec": "openai",
247
+ "input_cost": 15.0,
248
+ "output_cost": 60.0,
249
+ "requests_per_minute": 20,
250
+ "tokens_per_minute": 100_000,
251
+ "reasoning_model": True,
252
+ },
253
+ "o1-preview": {
254
+ "id": "o1-preview",
255
+ "name": "o1-preview-2024-09-12",
256
+ "api_base": "https://api.openai.com/v1",
257
+ "api_key_env_var": "OPENAI_API_KEY",
258
+ "supports_json": False,
259
+ "supports_logprobs": True,
260
+ "api_spec": "openai",
261
+ "input_cost": 15.0,
262
+ "output_cost": 60.0,
263
+ "requests_per_minute": 20,
264
+ "tokens_per_minute": 100_000,
265
+ "reasoning_model": True,
266
+ },
267
+ "o1-mini": {
268
+ "id": "o1-mini",
269
+ "name": "o1-mini-2024-09-12",
270
+ "api_base": "https://api.openai.com/v1",
271
+ "api_key_env_var": "OPENAI_API_KEY",
272
+ "supports_json": False,
273
+ "supports_logprobs": True,
274
+ "api_spec": "openai",
275
+ "input_cost": 3.0,
276
+ "output_cost": 15.0,
277
+ "requests_per_minute": 20,
278
+ "tokens_per_minute": 100_000,
279
+ "reasoning_model": True,
280
+ },
281
+ "gpt-4o": {
282
+ "id": "gpt-4o",
283
+ "name": "gpt-4o-2024-08-06",
284
+ "api_base": "https://api.openai.com/v1",
285
+ "api_key_env_var": "OPENAI_API_KEY",
286
+ "supports_json": True,
287
+ "supports_logprobs": True,
288
+ "api_spec": "openai",
289
+ "input_cost": 5.0,
290
+ "output_cost": 15.0,
291
+ "requests_per_minute": 10_000,
292
+ "tokens_per_minute": 30_000_000,
293
+ },
294
+ "gpt-4o-mini": {
295
+ "id": "gpt-4o-mini",
296
+ "name": "gpt-4o-mini-2024-07-18",
297
+ "api_base": "https://api.openai.com/v1",
298
+ "api_key_env_var": "OPENAI_API_KEY",
299
+ "supports_json": True,
300
+ "supports_logprobs": True,
301
+ "api_spec": "openai",
302
+ "input_cost": 0.15,
303
+ "output_cost": 0.6,
304
+ "requests_per_minute": 60_000,
305
+ "tokens_per_minute": 250_000_000,
306
+ },
307
+ "gpt-4o-mini-free": {
308
+ "id": "gpt-4o-mini-free",
309
+ "name": "gpt-4o-mini-2024-07-18-free",
310
+ "api_base": "https://api.openai.com/v1",
311
+ "api_key_env_var": "OPENAI_API_KEY",
312
+ "supports_json": True,
313
+ "supports_logprobs": True,
314
+ "api_spec": "openai",
315
+ "input_cost": 0.0,
316
+ "output_cost": 0.0,
317
+ "requests_per_minute": 20_000,
318
+ "tokens_per_minute": 50_000_000,
319
+ },
320
+ "gpt-3.5-turbo": {
321
+ "id": "gpt-3.5-turbo",
322
+ "name": "gpt-3.5-turbo-0125",
323
+ "api_base": "https://api.openai.com/v1",
324
+ "api_key_env_var": "OPENAI_API_KEY",
325
+ "supports_json": True,
326
+ "supports_logprobs": True,
327
+ "api_spec": "openai",
328
+ "input_cost": 0.5,
329
+ "output_cost": 1.5,
330
+ "requests_per_minute": 40_000,
331
+ "tokens_per_minute": 75_000_000,
332
+ },
333
+ "gpt-4-turbo": {
334
+ "id": "gpt-4-turbo",
335
+ "name": "gpt-4-turbo-2024-04-09",
336
+ "api_base": "https://api.openai.com/v1",
337
+ "api_key_env_var": "OPENAI_API_KEY",
338
+ "supports_json": True,
339
+ "supports_logprobs": True,
340
+ "api_spec": "openai",
341
+ "input_cost": 10.0,
342
+ "output_cost": 30.0,
343
+ "requests_per_minute": 10_000,
344
+ "tokens_per_minute": 1_500_000,
345
+ },
346
+ "gpt-4": {
347
+ "id": "gpt-4",
348
+ "name": "gpt-4-0613",
349
+ "api_base": "https://api.openai.com/v1",
350
+ "api_key_env_var": "OPENAI_API_KEY",
351
+ "supports_json": False,
352
+ "supports_logprobs": False,
353
+ "api_spec": "openai",
354
+ "input_cost": 30.0,
355
+ "output_cost": 60.0,
356
+ "requests_per_minute": 10_000,
357
+ "tokens_per_minute": 300_000,
358
+ },
359
+ "gpt-4-32k": {
360
+ "id": "gpt-4-32k",
361
+ "name": "gpt-4-32k-0613",
362
+ "api_base": "https://api.openai.com/v1",
363
+ "api_key_env_var": "OPENAI_API_KEY",
364
+ "supports_json": False,
365
+ "supports_logprobs": False,
366
+ "api_spec": "openai",
367
+ "input_cost": 60.0,
368
+ "output_cost": 120.0,
369
+ "requests_per_minute": 1_000,
370
+ "tokens_per_minute": 150_000,
371
+ },
372
+ # █████████ █████ █████ ███
373
+ # ███░░░░░███ ░░███ ░░███ ░░░
374
+ # ░███ ░███ ████████ ███████ ░███████ ████████ ██████ ████████ ████ ██████
375
+ # ░███████████ ░░███░░███ ░░░███░ ░███░░███ ░░███░░███ ███░░███░░███░░███░░███ ███░░███
376
+ # ░███░░░░░███ ░███ ░███ ░███ ░███ ░███ ░███ ░░░ ░███ ░███ ░███ ░███ ░███ ░███ ░░░
377
+ # ░███ ░███ ░███ ░███ ░███ ███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ░███ ███
378
+ # █████ █████ ████ █████ ░░█████ ████ █████ █████ ░░██████ ░███████ █████░░██████
379
+ # ░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░░ ░███░░░ ░░░░░ ░░░░░░
380
+ # ░███
381
+ # █████
382
+ # â–‘â–‘â–‘â–‘â–‘
383
+ "claude-haiku-anthropic": {
384
+ "id": "claude-haiku-anthropic",
385
+ "name": "claude-3-haiku-20240307",
386
+ "api_base": "https://api.anthropic.com/v1",
387
+ "api_key_env_var": "ANTHROPIC_API_KEY",
388
+ "supports_json": False,
389
+ "api_spec": "anthropic",
390
+ "input_cost": 0.25,
391
+ "output_cost": 1.25,
392
+ "requests_per_minute": 10_000,
393
+ "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
394
+ },
395
+ "claude-haiku-anthropic-expensive": {
396
+ "id": "claude-haiku-anthropic-expensive",
397
+ "name": "claude-3-5-haiku-20241022",
398
+ "api_base": "https://api.anthropic.com/v1",
399
+ "api_key_env_var": "ANTHROPIC_API_KEY",
400
+ "supports_json": False,
401
+ "api_spec": "anthropic",
402
+ "input_cost": 1.00,
403
+ "output_cost": 5.00,
404
+ "requests_per_minute": 20_000,
405
+ "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
406
+ },
407
+ "claude-sonnet-anthropic": {
408
+ "id": "claude-sonnet-anthropic",
409
+ "name": "claude-3-7-sonnet-20250219", # "claude-3-5-sonnet-20241022", # "claude-3-5-sonnet-20240620", # "claude-3-sonnet-20240229",
410
+ "api_base": "https://api.anthropic.com/v1",
411
+ "api_key_env_var": "ANTHROPIC_API_KEY",
412
+ "supports_json": False,
413
+ "api_spec": "anthropic",
414
+ "input_cost": 3.0,
415
+ "output_cost": 15.0,
416
+ "requests_per_minute": 4_000,
417
+ "tokens_per_minute": 400_000,
418
+ "reasoning_model": True,
419
+ },
420
+ "claude-3-6-sonnet-anthropic": {
421
+ "id": "claude-sonnet-anthropic",
422
+ "name": "claude-3-5-sonnet-20241022",
423
+ "api_base": "https://api.anthropic.com/v1",
424
+ "api_key_env_var": "ANTHROPIC_API_KEY",
425
+ "supports_json": False,
426
+ "api_spec": "anthropic",
427
+ "input_cost": 3.0,
428
+ "output_cost": 15.0,
429
+ "requests_per_minute": 4_000,
430
+ "tokens_per_minute": 400_000,
431
+ },
432
+ "claude-3-5-sonnet-anthropic": {
433
+ "id": "claude-sonnet-anthropic",
434
+ "name": "claude-3-5-sonnet-20240620",
435
+ "api_base": "https://api.anthropic.com/v1",
436
+ "api_key_env_var": "ANTHROPIC_API_KEY",
437
+ "supports_json": False,
438
+ "api_spec": "anthropic",
439
+ "input_cost": 3.0,
440
+ "output_cost": 15.0,
441
+ "requests_per_minute": 4_000,
442
+ "tokens_per_minute": 400_000,
443
+ },
444
+ "claude-opus-anthropic": {
445
+ "id": "claude-opus-anthropic",
446
+ "name": "claude-3-opus-20240229",
447
+ "api_base": "https://api.anthropic.com/v1",
448
+ "api_key_env_var": "ANTHROPIC_API_KEY",
449
+ "supports_json": False,
450
+ "api_spec": "anthropic",
451
+ "input_cost": 15.0,
452
+ "output_cost": 75.0,
453
+ "requests_per_minute": 4_000,
454
+ "tokens_per_minute": 400_000,
455
+ },
456
+ # █████ █████ █████
457
+ # ░░███ ░░███ ░░███
458
+ # ░███ ░███ ██████ ████████ ███████ ██████ █████ █████
459
+ # ░███ ░███ ███░░███░░███░░███░░░███░ ███░░███░░███ ░░███
460
+ # ░░███ ███ ░███████ ░███ ░░░ ░███ ░███████ ░░░█████░
461
+ # ░░░█████░ ░███░░░ ░███ ░███ ███░███░░░ ███░░░███
462
+ # ░░███ ░░██████ █████ ░░█████ ░░██████ █████ █████
463
+ # â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘
464
+ "claude-haiku-vertex": {
465
+ "id": "claude-haiku-vertex",
466
+ "name": "claude-3-haiku@20240307",
467
+ "regions": ["europe-west4", "us-central1"],
468
+ "api_base": "",
469
+ "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
470
+ "supports_json": False,
471
+ "api_spec": "vertex_anthropic",
472
+ "input_cost": 0.25,
473
+ "output_cost": 1.25,
474
+ "requests_per_minute": 120,
475
+ "tokens_per_minute": None,
476
+ },
477
+ "claude-sonnet-vertex": {
478
+ "id": "claude-sonnet-vertex",
479
+ "name": "claude-3-sonnet@20240229",
480
+ "regions": ["us-central1", "asia-southeast1"],
481
+ "api_base": "",
482
+ "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
483
+ "supports_json": False,
484
+ "api_spec": "vertex_anthropic",
485
+ "input_cost": 3.0,
486
+ "output_cost": 15.0,
487
+ "requests_per_minute": 120,
488
+ "tokens_per_minute": None,
489
+ },
490
+ "claude-opus-vertex": {
491
+ "id": "claude-opus-vertex",
492
+ "name": "claude-3-opus@20240229",
493
+ "regions": ["us-east5"],
494
+ "api_base": "",
495
+ "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
496
+ "supports_json": False,
497
+ "api_spec": "vertex_anthropic",
498
+ "input_cost": 15.0,
499
+ "output_cost": 75.0,
500
+ "requests_per_minute": 120,
501
+ "tokens_per_minute": None,
502
+ },
503
+ "gemini-1.5-flash": {
504
+ "id": "gemini-1.5-flash",
505
+ "name": "gemini-1.5-flash-002", # "gemini-1.5-flash-001",
506
+ "regions": gemini_flash_limits,
507
+ "api_base": "",
508
+ "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
509
+ "supports_json": True,
510
+ "api_spec": "vertex_gemini",
511
+ "input_cost": 0.35,
512
+ "output_cost": 0.35,
513
+ "requests_per_minute": sum(gemini_flash_limits.values()),
514
+ "tokens_per_minute": None,
515
+ },
516
+ "gemini-1.5-pro": {
517
+ "id": "gemini-1.5-pro",
518
+ "name": "gemini-1.5-pro-002", # "gemini-1.5-pro-001",
519
+ "regions": gemini_1_5_pro_limits,
520
+ "api_base": "",
521
+ "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
522
+ "supports_json": True,
523
+ "api_spec": "vertex_gemini",
524
+ "input_cost": 3.5,
525
+ "output_cost": 10.5,
526
+ "requests_per_minute": sum(gemini_1_5_pro_limits.values()),
527
+ "tokens_per_minute": None,
528
+ },
529
+ "gemini-2.0-flash-vertex": {
530
+ "id": "gemini-2.0-flash",
531
+ "name": "gemini-2.0-flash-exp", # "gemini-1.5-flash-001",
532
+ "regions": gemini_flash_limits,
533
+ "api_base": "",
534
+ "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
535
+ "supports_json": True,
536
+ "api_spec": "vertex_gemini",
537
+ "input_cost": 0.35,
538
+ "output_cost": 0.35,
539
+ "requests_per_minute": sum(gemini_flash_limits.values()),
540
+ "tokens_per_minute": None,
541
+ },
542
+ # ███████████ █████ █████
543
+ # ░░███░░░░░███ ░░███ ░░███
544
+ # ░███ ░███ ██████ ███████ ████████ ██████ ██████ ░███ █████
545
+ # ░██████████ ███░░███ ███░░███ ░░███░░███ ███░░███ ███░░███ ░███░░███
546
+ # ░███░░░░░███░███████ ░███ ░███ ░███ ░░░ ░███ ░███░███ ░░░ ░██████░
547
+ # ░███ ░███░███░░░ ░███ ░███ ░███ ░███ ░███░███ ███ ░███░░███
548
+ # ███████████ ░░██████ ░░████████ █████ ░░██████ ░░██████ ████ █████
549
+ # â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘
550
+ "claude-haiku-bedrock": {
551
+ "id": "claude-haiku-bedrock",
552
+ "name": "anthropic.claude-3-haiku-20240307-v1:0",
553
+ "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
554
+ "api_base": "",
555
+ "api_key_env_var": "",
556
+ "api_spec": "bedrock_anthropic",
557
+ "input_cost": 0.25,
558
+ "output_cost": 1.25,
559
+ "requests_per_minute": 4_000,
560
+ "tokens_per_minute": 8_000_000,
561
+ },
562
+ "claude-sonnet-bedrock": {
563
+ "id": "claude-sonnet-bedrock",
564
+ "name": "anthropic.claude-3-sonnet-20240229-v1:0",
565
+ "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
566
+ "api_base": "",
567
+ "api_key_env_var": "",
568
+ "api_spec": "bedrock_anthropic",
569
+ "input_cost": 3.0,
570
+ "output_cost": 15.0,
571
+ "requests_per_minute": 2_000,
572
+ "tokens_per_minute": 4_000_000,
573
+ },
574
+ "mistral-7b-bedrock": {
575
+ "id": "mistral-7b-bedrock",
576
+ "name": "mistral.mistral-7b-instruct-v0:2",
577
+ "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
578
+ "api_base": "",
579
+ "api_key_env_var": "",
580
+ "api_spec": "bedrock_mistral",
581
+ "input_cost": 0.15,
582
+ "output_cost": 0.2,
583
+ "requests_per_minute": 3_200,
584
+ "tokens_per_minute": 1_200_000,
585
+ },
586
+ "mixtral-8x7b-bedrock": {
587
+ "id": "mixtral-8x7b-bedrock",
588
+ "name": "mistral.mixtral-8x7b-instruct-v0:1",
589
+ "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
590
+ "api_base": "",
591
+ "api_key_env_var": "",
592
+ "api_spec": "bedrock_mistral",
593
+ "input_cost": 0.45,
594
+ "output_cost": 0.7,
595
+ "requests_per_minute": 1_600,
596
+ "tokens_per_minute": 1_200_000,
597
+ },
598
+ "mistral-large-bedrock": {
599
+ "id": "mistral-large-bedrock",
600
+ "name": "mistral.mistral-large-2402-v1:0",
601
+ "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
602
+ "api_base": "",
603
+ "api_key_env_var": "",
604
+ "api_spec": "bedrock_mistral",
605
+ "input_cost": 8.0,
606
+ "output_cost": 24.0,
607
+ "requests_per_minute": 1_600,
608
+ "tokens_per_minute": 1_200_000,
609
+ },
610
+ # ███████████ █████ █████
611
+ # ░█░░░███░░░█ ░░███ ░░███
612
+ # ░ ░███ ░ ██████ ███████ ██████ ███████ ░███████ ██████ ████████
613
+ # ░███ ███░░███ ███░░███ ███░░███░░░███░ ░███░░███ ███░░███░░███░░███
614
+ # ░███ ░███ ░███░███ ░███░███████ ░███ ░███ ░███ ░███████ ░███ ░░░
615
+ # ░███ ░███ ░███░███ ░███░███░░░ ░███ ███ ░███ ░███ ░███░░░ ░███
616
+ # █████ ░░██████ ░░███████░░██████ ░░█████ ████ █████░░██████ █████
617
+ # ░░░░░ ░░░░░░ ░░░░░███ ░░░░░░ ░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░
618
+ # ███ ░███
619
+ # ░░██████
620
+ # â–‘â–‘â–‘â–‘â–‘â–‘
621
+ # tbh only reason to use these are that they're cheap, but all worse than haiku
622
+ "gemma-7b-together": {
623
+ "id": "gemma-7b-together",
624
+ "name": "google/gemma-7b-it",
625
+ "api_base": "https://api.together.xyz/v1",
626
+ "api_key_env_var": "TOGETHER_API_KEY",
627
+ "supports_json": False,
628
+ "api_spec": "openai",
629
+ "input_cost": 0.2,
630
+ "output_cost": 0.2,
631
+ "requests_per_minute": 6000,
632
+ "tokens_per_minute": None,
633
+ },
634
+ "gemma-2b-together": {
635
+ "id": "gemma-2b-together",
636
+ "name": "google/gemma-2b-it",
637
+ "api_base": "https://api.together.xyz/v1",
638
+ "api_key_env_var": "TOGETHER_API_KEY",
639
+ "supports_json": False,
640
+ "api_spec": "openai",
641
+ "input_cost": 0.1,
642
+ "output_cost": 0.1,
643
+ "requests_per_minute": 6000,
644
+ "tokens_per_minute": None,
645
+ },
646
+ "phi2-together": {
647
+ "id": "phi2-together",
648
+ "name": "microsoft/phi-2",
649
+ "api_base": "https://api.together.xyz/v1",
650
+ "api_key_env_var": "TOGETHER_API_KEY",
651
+ "supports_json": False,
652
+ "api_spec": "openai",
653
+ "input_cost": 0.1,
654
+ "output_cost": 0.1,
655
+ "requests_per_minute": 6000,
656
+ "tokens_per_minute": None,
657
+ },
658
+ "mistral-7b-together": {
659
+ "id": "mistral-7b-together",
660
+ "name": "mistralai/Mistral-7B-Instruct-v0.2",
661
+ "api_base": "https://api.together.xyz/v1",
662
+ "api_key_env_var": "TOGETHER_API_KEY",
663
+ "supports_json": False,
664
+ "api_spec": "openai",
665
+ "input_cost": 0.2,
666
+ "output_cost": 0.2,
667
+ "requests_per_minute": 6000,
668
+ "tokens_per_minute": None,
669
+ },
670
+ "nous-mistral-7b-together": {
671
+ "id": "nous-mistral-7b-together",
672
+ "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
673
+ "api_base": "https://api.together.xyz/v1",
674
+ "api_key_env_var": "TOGETHER_API_KEY",
675
+ "supports_json": False,
676
+ "api_spec": "openai",
677
+ "input_cost": 0.2,
678
+ "output_cost": 0.2,
679
+ "requests_per_minute": 6000,
680
+ "tokens_per_minute": None,
681
+ },
682
+ "qwen-4b-together": {
683
+ "id": "qwen-4b-together",
684
+ "name": "Qwen/Qwen1.5-4B-Chat",
685
+ "api_base": "https://api.together.xyz/v1",
686
+ "api_key_env_var": "TOGETHER_API_KEY",
687
+ "supports_json": False,
688
+ "api_spec": "openai",
689
+ "input_cost": 0.1,
690
+ "output_cost": 0.1,
691
+ "requests_per_minute": 6000,
692
+ "tokens_per_minute": None,
693
+ },
694
+ "llama3-8b-together": {
695
+ "id": "llama3-8b-together",
696
+ "name": "meta-llama/Llama-3-8b-chat-hf",
697
+ "api_base": "https://api.together.xyz/v1",
698
+ "api_key_env_var": "TOGETHER_API_KEY",
699
+ "supports_json": False,
700
+ "api_spec": "openai",
701
+ "input_cost": 0.2,
702
+ "output_cost": 0.2,
703
+ "requests_per_minute": 6000,
704
+ "tokens_per_minute": None,
705
+ },
706
+ # then these ones are big and pretty good, but more expensive
707
+ "llama3-70b-together": {
708
+ "id": "llama3-70b-together",
709
+ "name": "meta-llama/Llama-3-70b-chat-hf",
710
+ "api_base": "https://api.together.xyz/v1",
711
+ "api_key_env_var": "TOGETHER_API_KEY",
712
+ "supports_json": False,
713
+ "api_spec": "openai",
714
+ "input_cost": 0.9,
715
+ "output_cost": 0.9,
716
+ "requests_per_minute": 6000,
717
+ "tokens_per_minute": None,
718
+ },
719
+ "dbrx-together": {
720
+ "id": "dbrx-together",
721
+ "name": "databricks/dbrx-instruct",
722
+ "api_base": "https://api.together.xyz/v1",
723
+ "api_key_env_var": "TOGETHER_API_KEY",
724
+ "supports_json": False,
725
+ "api_spec": "openai",
726
+ "input_cost": 1.20,
727
+ "output_cost": 1.20,
728
+ "requests_per_minute": 6000,
729
+ "tokens_per_minute": None,
730
+ },
731
+ "mistral-8x7b-together": {
732
+ "id": "mistral-8x7b-together",
733
+ "name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
734
+ "api_base": "https://api.together.xyz/v1",
735
+ "api_key_env_var": "TOGETHER_API_KEY",
736
+ "supports_json": False,
737
+ "api_spec": "openai",
738
+ "input_cost": 0.6,
739
+ "output_cost": 0.6,
740
+ "requests_per_minute": 6000,
741
+ "tokens_per_minute": None,
742
+ },
743
+ "mistral-8x22b-together": {
744
+ "id": "mistral-8x22b-together",
745
+ "name": "mistralai/Mixtral-8x22B-Instruct-v0.1",
746
+ "api_base": "https://api.together.xyz/v1",
747
+ "api_key_env_var": "TOGETHER_API_KEY",
748
+ "supports_json": False,
749
+ "api_spec": "openai",
750
+ "input_cost": 1.20,
751
+ "output_cost": 1.20,
752
+ "requests_per_minute": 6000,
753
+ "tokens_per_minute": None,
754
+ },
755
+ # █████████ █████
756
+ # ███░░░░░███ ░░███
757
+ # ███ ░░░ ██████ ░███████ ██████ ████████ ██████
758
+ # ░███ ███░░███ ░███░░███ ███░░███░░███░░███ ███░░███
759
+ # ░███ ░███ ░███ ░███ ░███ ░███████ ░███ ░░░ ░███████
760
+ # ░░███ ███░███ ░███ ░███ ░███ ░███░░░ ░███ ░███░░░
761
+ # ░░█████████ ░░██████ ████ █████░░██████ █████ ░░██████
762
+ # â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘
763
+ "command-a": {
764
+ "id": "command-a",
765
+ "name": "command-a-03-2025",
766
+ "api_base": "https://api.cohere.ai/v2",
767
+ "api_key_env_var": "COHERE_API_KEY",
768
+ "api_spec": "cohere",
769
+ "input_cost": 0.5,
770
+ "output_cost": 1.5,
771
+ "requests_per_minute": 10_000,
772
+ "tokens_per_minute": None,
773
+ },
774
+ "command-r-7b": {
775
+ "id": "command-r-cohere",
776
+ "name": "command-r7b-12-2024",
777
+ "api_base": "https://api.cohere.ai/v2",
778
+ "api_key_env_var": "COHERE_API_KEY",
779
+ "api_spec": "cohere",
780
+ "input_cost": 0.5,
781
+ "output_cost": 1.5,
782
+ "requests_per_minute": 10_000,
783
+ "tokens_per_minute": None,
784
+ },
785
+ "command-r": {
786
+ "id": "command-r",
787
+ "name": "command-r-08-2024",
788
+ "api_base": "https://api.cohere.ai/v2",
789
+ "api_key_env_var": "COHERE_API_KEY",
790
+ "api_spec": "cohere",
791
+ "input_cost": 0.5,
792
+ "output_cost": 1.5,
793
+ "requests_per_minute": 10_000,
794
+ "tokens_per_minute": None,
795
+ },
796
+ "command-r-plus": {
797
+ "id": "command-r-plus",
798
+ "name": "command-r-plus-04-2024",
799
+ "api_base": "https://api.cohere.ai/v2",
800
+ "api_key_env_var": "COHERE_API_KEY",
801
+ "api_spec": "cohere",
802
+ "input_cost": 3.0,
803
+ "output_cost": 15.0,
804
+ "requests_per_minute": 10_000,
805
+ "tokens_per_minute": None,
806
+ },
807
+ # ██████ ██████ ███ █████ ████
808
+ # ░░██████ ██████ ░░░ ░░███ ░░███
809
+ # ░███░█████░███ ████ █████ ███████ ████████ ██████ ░███
810
+ # ░███░░███ ░███ ░░███ ███░░ ░░░███░ ░░███░░███ ░░░░░███ ░███
811
+ # ░███ ░░░ ░███ ░███ ░░█████ ░███ ░███ ░░░ ███████ ░███
812
+ # ░███ ░███ ░███ ░░░░███ ░███ ███ ░███ ███░░███ ░███
813
+ # █████ █████ █████ ██████ ░░█████ █████ ░░████████ █████
814
+ # â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ â–‘â–‘â–‘â–‘â–‘
815
+ "mistral-7b-mistral": {
816
+ "id": "mistral-7b-mistral",
817
+ "name": "open-mistral-7b",
818
+ "api_base": "https://api.mistral.ai/v1",
819
+ "api_key_env_var": "MISTRAL_API_KEY",
820
+ "supports_json": True,
821
+ "api_spec": "mistral",
822
+ "input_cost": 0.25,
823
+ "output_cost": 0.25,
824
+ },
825
+ "mistral-8x7b-mistral": {
826
+ "id": "mistral-8x7b-mistral",
827
+ "name": "open-mixtral-8x7b",
828
+ "api_base": "https://api.mistral.ai/v1",
829
+ "api_key_env_var": "MISTRAL_API_KEY",
830
+ "supports_json": True,
831
+ "api_spec": "mistral",
832
+ "input_cost": 0.7,
833
+ "output_cost": 0.7,
834
+ },
835
+ # same as above but mixtral name is easy to mix up
836
+ "mixtral-8x7b-mistral": {
837
+ "id": "mixtral-8x7b-mistral",
838
+ "name": "open-mixtral-8x7b",
839
+ "api_base": "https://api.mistral.ai/v1",
840
+ "api_key_env_var": "MISTRAL_API_KEY",
841
+ "supports_json": True,
842
+ "api_spec": "mistral",
843
+ "input_cost": 0.7,
844
+ "output_cost": 0.7,
845
+ },
846
+ "mistral-small-mistral": {
847
+ "id": "mistral-small-mistral",
848
+ "name": "mistral-small-latest",
849
+ "api_base": "https://api.mistral.ai/v1",
850
+ "api_key_env_var": "MISTRAL_API_KEY",
851
+ "supports_json": True,
852
+ "api_spec": "mistral",
853
+ "input_cost": 2.0,
854
+ "output_cost": 6.0,
855
+ },
856
+ "mistral-8x22b-mistral": {
857
+ "id": "mistral-8x22b-mistral",
858
+ "name": "open-mixtral-8x22b",
859
+ "api_base": "https://api.mistral.ai/v1",
860
+ "api_key_env_var": "MISTRAL_API_KEY",
861
+ "supports_json": True,
862
+ "api_spec": "mistral",
863
+ "input_cost": 2.0,
864
+ "output_cost": 6.0,
865
+ },
866
+ "mixtral-8x22b-mistral": {
867
+ "id": "mixtral-8x22b-mistral",
868
+ "name": "open-mixtral-8x22b",
869
+ "api_base": "https://api.mistral.ai/v1",
870
+ "api_key_env_var": "MISTRAL_API_KEY",
871
+ "supports_json": True,
872
+ "api_spec": "mistral",
873
+ "input_cost": 2.0,
874
+ "output_cost": 6.0,
875
+ },
876
+ "mistral-medium-mistral": { # WILL BE DEPRECATED SOON
877
+ "id": "mistral-medium-mistral",
878
+ "name": "mistral-medium-latest",
879
+ "api_base": "https://api.mistral.ai/v1",
880
+ "api_key_env_var": "MISTRAL_API_KEY",
881
+ "supports_json": True,
882
+ "api_spec": "mistral",
883
+ "input_cost": 2.7,
884
+ "output_cost": 8.1,
885
+ },
886
+ "mistral-large-mistral": {
887
+ "id": "mistral-large-mistral",
888
+ "name": "mistral-large-latest",
889
+ "api_base": "https://api.mistral.ai/v1",
890
+ "api_key_env_var": "MISTRAL_API_KEY",
891
+ "supports_json": True,
892
+ "api_spec": "mistral",
893
+ "input_cost": 8.0,
894
+ "output_cost": 24.0,
895
+ },
896
+ # ______ _
897
+ # (______) | |
898
+ # _ _ _____ _____ ____ ___ _____ _____| | _
899
+ # | | | | ___ | ___ | _ \ /___) ___ | ___ | |_/ )
900
+ # | |__/ /| ____| ____| |_| |___ | ____| ____| _ (
901
+ # |_____/ |_____)_____) __/(___/|_____)_____)_| \_)
902
+ # |_|
903
+ "deepseek-chat": {
904
+ "id": "deepseek-chat",
905
+ "name": "deepseek-chat",
906
+ "api_base": "https://api.deepseek.com/v1",
907
+ "api_key_env_var": "DEEPSEEK_API_KEY",
908
+ "api_spec": "deepseek",
909
+ "input_cost": 0.14,
910
+ "output_cost": 0.28,
911
+ },
912
+ "deepseek-coder": {
913
+ "id": "deepseek-coder",
914
+ "name": "deepseek-coder",
915
+ "api_base": "https://api.deepseek.com/v1",
916
+ "api_key_env_var": "DEEPSEEK_API_KEY",
917
+ "api_spec": "deepseek",
918
+ "input_cost": 0.14,
919
+ "output_cost": 0.28,
920
+ },
921
+ }
922
+
923
+
924
+ @dataclass
925
+ class APIModel:
926
+ id: str
927
+ name: str
928
+ api_base: str
929
+ api_key_env_var: str
930
+ api_spec: str
931
+ input_cost: Optional[float] = 0 # $ per million input tokens
932
+ output_cost: Optional[float] = 0 # $ per million output tokens
933
+ supports_json: bool = False
934
+ supports_logprobs: bool = False
935
+ reasoning_model: bool = False
936
+ regions: list[str] | dict[str, int] = field(default_factory=list)
937
+ tokens_per_minute: int | None = None
938
+ requests_per_minute: int | None = None
939
+ gpus: Optional[list[str]] = None
940
+
941
+ @classmethod
942
+ def from_registry(cls, name: str):
943
+ if name not in registry:
944
+ raise ValueError(f"Model {name} not found in registry")
945
+ cfg = registry[name]
946
+ return cls(**cfg)
947
+
948
+ def sample_region(self):
949
+ if isinstance(self.regions, list):
950
+ regions = self.regions
951
+ weights = [1] * len(regions)
952
+ elif isinstance(self.regions, dict):
953
+ regions = self.regions.keys()
954
+ weights = self.regions.values()
955
+ else:
956
+ raise ValueError("no regions to sample")
957
+ random.sample(regions, 1, counts=weights)[0]