lm-deluge 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

lm_deluge/models.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import random
2
2
  from dataclasses import dataclass, field
3
- from .gemini_limits import gemini_1_5_pro_limits, gemini_flash_limits
4
3
 
5
4
  registry = {
6
5
  # `7MMM. ,MMF' mm
@@ -38,7 +37,7 @@ registry = {
38
37
  "tokens_per_minute": 1_000_000,
39
38
  "reasoning_model": False,
40
39
  },
41
- "llama-3.3-70B": {
40
+ "llama-3.3-70b": {
42
41
  "id": "llama-3.3-70B",
43
42
  "name": "Llama-3.3-70B-Instruct",
44
43
  "api_base": "https://api.llama.com/compat/v1",
@@ -52,7 +51,7 @@ registry = {
52
51
  "tokens_per_minute": 1_000_000,
53
52
  "reasoning_model": False,
54
53
  },
55
- "llama-3.3-8B": {
54
+ "llama-3.3-8b": {
56
55
  "id": "llama-3.3-8B",
57
56
  "name": "Llama-3.3-8B-Instruct",
58
57
  "api_base": "https://api.llama.com/compat/v1",
@@ -141,8 +140,8 @@ registry = {
141
140
  "reasoning_model": False,
142
141
  },
143
142
  "gemini-2.5-pro": {
144
- "id": "gemini-2.5-pro-exp-03-25",
145
- "name": "gemini-2.5-pro-exp-03-25",
143
+ "id": "gemini-2.5-pro",
144
+ "name": "gemini-2.5-pro-preview-05-06",
146
145
  "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
147
146
  "api_key_env_var": "GEMINI_API_KEY",
148
147
  "supports_json": True,
@@ -152,7 +151,21 @@ registry = {
152
151
  "output_cost": 0.4,
153
152
  "requests_per_minute": 20,
154
153
  "tokens_per_minute": 100_000,
155
- "reasoning_model": False,
154
+ "reasoning_model": True,
155
+ },
156
+ "gemini-2.5-flash": {
157
+ "id": "gemini-2.5-flash",
158
+ "name": "gemini-2.5-flash-preview-05-20",
159
+ "api_base": "https://generativelanguage.googleapis.com/v1beta/openai",
160
+ "api_key_env_var": "GEMINI_API_KEY",
161
+ "supports_json": True,
162
+ "supports_logprobs": False,
163
+ "api_spec": "openai",
164
+ "input_cost": 0.1,
165
+ "output_cost": 0.4,
166
+ "requests_per_minute": 20,
167
+ "tokens_per_minute": 100_000,
168
+ "reasoning_model": True,
156
169
  },
157
170
  # ███████ █████████ █████
158
171
  # ███░░░░░███ ███░░░░░███ ░░███
@@ -165,6 +178,34 @@ registry = {
165
178
  # ░███
166
179
  # █████
167
180
  # ░░░░░
181
+ "o3": {
182
+ "id": "o3",
183
+ "name": "o3-2025-04-16",
184
+ "api_base": "https://api.openai.com/v1",
185
+ "api_key_env_var": "OPENAI_API_KEY",
186
+ "supports_json": False,
187
+ "supports_logprobs": True,
188
+ "api_spec": "openai",
189
+ "input_cost": 10.0,
190
+ "output_cost": 40.0,
191
+ "requests_per_minute": 20,
192
+ "tokens_per_minute": 100_000,
193
+ "reasoning_model": True,
194
+ },
195
+ "o4-mini": {
196
+ "id": "o4-mini",
197
+ "name": "o4-mini-2025-04-16",
198
+ "api_base": "https://api.openai.com/v1",
199
+ "api_key_env_var": "OPENAI_API_KEY",
200
+ "supports_json": False,
201
+ "supports_logprobs": True,
202
+ "api_spec": "openai",
203
+ "input_cost": 1.1,
204
+ "output_cost": 4.4,
205
+ "requests_per_minute": 20,
206
+ "tokens_per_minute": 100_000,
207
+ "reasoning_model": True,
208
+ },
168
209
  "gpt-4.1": {
169
210
  "id": "gpt-4.1",
170
211
  "name": "gpt-4.1-2025-04-14",
@@ -379,8 +420,8 @@ registry = {
379
420
  # ░███
380
421
  # █████
381
422
  # ░░░░░
382
- "claude-haiku-anthropic": {
383
- "id": "claude-haiku-anthropic",
423
+ "claude-3-haiku": {
424
+ "id": "claude-3-haiku",
384
425
  "name": "claude-3-haiku-20240307",
385
426
  "api_base": "https://api.anthropic.com/v1",
386
427
  "api_key_env_var": "ANTHROPIC_API_KEY",
@@ -391,8 +432,8 @@ registry = {
391
432
  "requests_per_minute": 10_000,
392
433
  "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
393
434
  },
394
- "claude-haiku-anthropic-expensive": {
395
- "id": "claude-haiku-anthropic-expensive",
435
+ "claude-3.5-haiku": {
436
+ "id": "claude-3.5-haiku",
396
437
  "name": "claude-3-5-haiku-20241022",
397
438
  "api_base": "https://api.anthropic.com/v1",
398
439
  "api_key_env_var": "ANTHROPIC_API_KEY",
@@ -403,9 +444,9 @@ registry = {
403
444
  "requests_per_minute": 20_000,
404
445
  "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
405
446
  },
406
- "claude-sonnet-anthropic": {
407
- "id": "claude-sonnet-anthropic",
408
- "name": "claude-3-7-sonnet-20250219", # "claude-3-5-sonnet-20241022", # "claude-3-5-sonnet-20240620", # "claude-3-sonnet-20240229",
447
+ "claude-3.7-sonnet": {
448
+ "id": "claude-3.7-sonnet",
449
+ "name": "claude-3-7-sonnet-20250219",
409
450
  "api_base": "https://api.anthropic.com/v1",
410
451
  "api_key_env_var": "ANTHROPIC_API_KEY",
411
452
  "supports_json": False,
@@ -416,8 +457,8 @@ registry = {
416
457
  "tokens_per_minute": 400_000,
417
458
  "reasoning_model": True,
418
459
  },
419
- "claude-3-6-sonnet-anthropic": {
420
- "id": "claude-sonnet-anthropic",
460
+ "claude-3.6-sonnet": {
461
+ "id": "claude-3.6-sonnet",
421
462
  "name": "claude-3-5-sonnet-20241022",
422
463
  "api_base": "https://api.anthropic.com/v1",
423
464
  "api_key_env_var": "ANTHROPIC_API_KEY",
@@ -428,8 +469,8 @@ registry = {
428
469
  "requests_per_minute": 4_000,
429
470
  "tokens_per_minute": 400_000,
430
471
  },
431
- "claude-3-5-sonnet-anthropic": {
432
- "id": "claude-sonnet-anthropic",
472
+ "claude-3.5-sonnet": {
473
+ "id": "claude-3.5-sonnet",
433
474
  "name": "claude-3-5-sonnet-20240620",
434
475
  "api_base": "https://api.anthropic.com/v1",
435
476
  "api_key_env_var": "ANTHROPIC_API_KEY",
@@ -440,8 +481,8 @@ registry = {
440
481
  "requests_per_minute": 4_000,
441
482
  "tokens_per_minute": 400_000,
442
483
  },
443
- "claude-opus-anthropic": {
444
- "id": "claude-opus-anthropic",
484
+ "claude-3-opus": {
485
+ "id": "claude-3-opus",
445
486
  "name": "claude-3-opus-20240229",
446
487
  "api_base": "https://api.anthropic.com/v1",
447
488
  "api_key_env_var": "ANTHROPIC_API_KEY",
@@ -460,83 +501,100 @@ registry = {
460
501
  # ░░░█████░ ░███░░░ ░███ ░███ ███░███░░░ ███░░░███
461
502
  # ░░███ ░░██████ █████ ░░█████ ░░██████ █████ █████
462
503
  # ░░░ ░░░░░░ ░░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░
463
- "claude-haiku-vertex": {
464
- "id": "claude-haiku-vertex",
465
- "name": "claude-3-haiku@20240307",
466
- "regions": ["europe-west4", "us-central1"],
467
- "api_base": "",
468
- "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
469
- "supports_json": False,
470
- "api_spec": "vertex_anthropic",
471
- "input_cost": 0.25,
472
- "output_cost": 1.25,
473
- "requests_per_minute": 120,
474
- "tokens_per_minute": None,
475
- },
476
- "claude-sonnet-vertex": {
477
- "id": "claude-sonnet-vertex",
478
- "name": "claude-3-sonnet@20240229",
479
- "regions": ["us-central1", "asia-southeast1"],
504
+ # "claude-haiku-vertex": {
505
+ # "id": "claude-haiku-vertex",
506
+ # "name": "claude-3-haiku@20240307",
507
+ # "regions": ["europe-west4", "us-central1"],
508
+ # "api_base": "",
509
+ # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
510
+ # "supports_json": False,
511
+ # "api_spec": "vertex_anthropic",
512
+ # "input_cost": 0.25,
513
+ # "output_cost": 1.25,
514
+ # "requests_per_minute": 120,
515
+ # "tokens_per_minute": None,
516
+ # },
517
+ # "claude-sonnet-vertex": {
518
+ # "id": "claude-sonnet-vertex",
519
+ # "name": "claude-3-sonnet@20240229",
520
+ # "regions": ["us-central1", "asia-southeast1"],
521
+ # "api_base": "",
522
+ # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
523
+ # "supports_json": False,
524
+ # "api_spec": "vertex_anthropic",
525
+ # "input_cost": 3.0,
526
+ # "output_cost": 15.0,
527
+ # "requests_per_minute": 120,
528
+ # "tokens_per_minute": None,
529
+ # },
530
+ # "claude-opus-vertex": {
531
+ # "id": "claude-opus-vertex",
532
+ # "name": "claude-3-opus@20240229",
533
+ # "regions": ["us-east5"],
534
+ # "api_base": "",
535
+ # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
536
+ # "supports_json": False,
537
+ # "api_spec": "vertex_anthropic",
538
+ # "input_cost": 15.0,
539
+ # "output_cost": 75.0,
540
+ # "requests_per_minute": 120,
541
+ # "tokens_per_minute": None,
542
+ # },
543
+ "gemini-2.5-pro-vertex": {
544
+ "id": "gemini-2.5-pro",
545
+ "name": "gemini-2.5-pro-preview-05-06",
480
546
  "api_base": "",
481
547
  "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
482
- "supports_json": False,
483
- "api_spec": "vertex_anthropic",
484
- "input_cost": 3.0,
485
- "output_cost": 15.0,
486
- "requests_per_minute": 120,
487
- "tokens_per_minute": None,
488
- },
489
- "claude-opus-vertex": {
490
- "id": "claude-opus-vertex",
491
- "name": "claude-3-opus@20240229",
492
- "regions": ["us-east5"],
493
- "api_base": "",
494
- "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
495
- "supports_json": False,
496
- "api_spec": "vertex_anthropic",
497
- "input_cost": 15.0,
498
- "output_cost": 75.0,
499
- "requests_per_minute": 120,
500
- "tokens_per_minute": None,
548
+ "supports_json": True,
549
+ "supports_logprobs": False,
550
+ "api_spec": "vertex_gemini",
551
+ "input_cost": 1.25,
552
+ "output_cost": 10.0,
553
+ "requests_per_minute": 20,
554
+ "tokens_per_minute": 100_000,
555
+ "reasoning_model": True,
501
556
  },
502
- "gemini-1.5-flash": {
503
- "id": "gemini-1.5-flash",
504
- "name": "gemini-1.5-flash-002", # "gemini-1.5-flash-001",
505
- "regions": gemini_flash_limits,
557
+ "gemini-2.5-flash-vertex": {
558
+ "id": "gemini-2.5-flash",
559
+ "name": "gemini-2.5-flash-preview-05-20",
506
560
  "api_base": "",
507
561
  "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
508
562
  "supports_json": True,
563
+ "supports_logprobs": False,
509
564
  "api_spec": "vertex_gemini",
510
- "input_cost": 0.35,
511
- "output_cost": 0.35,
512
- "requests_per_minute": sum(gemini_flash_limits.values()),
513
- "tokens_per_minute": None,
565
+ "input_cost": 0.15,
566
+ "output_cost": 0.6,
567
+ "requests_per_minute": 20,
568
+ "tokens_per_minute": 100_000,
569
+ "reasoning_model": True,
514
570
  },
515
- "gemini-1.5-pro": {
516
- "id": "gemini-1.5-pro",
517
- "name": "gemini-1.5-pro-002", # "gemini-1.5-pro-001",
518
- "regions": gemini_1_5_pro_limits,
571
+ "gemini-2.0-flash-vertex": {
572
+ "id": "gemini-2.0-flash",
573
+ "name": "gemini-2.0-flash",
519
574
  "api_base": "",
520
575
  "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
521
576
  "supports_json": True,
577
+ "supports_logprobs": False,
522
578
  "api_spec": "vertex_gemini",
523
- "input_cost": 3.5,
524
- "output_cost": 10.5,
525
- "requests_per_minute": sum(gemini_1_5_pro_limits.values()),
526
- "tokens_per_minute": None,
579
+ "input_cost": 0.10,
580
+ "output_cost": 0.40,
581
+ "requests_per_minute": 20,
582
+ "tokens_per_minute": 100_000,
583
+ "reasoning_model": False,
527
584
  },
528
- "gemini-2.0-flash-vertex": {
529
- "id": "gemini-2.0-flash",
530
- "name": "gemini-2.0-flash-exp", # "gemini-1.5-flash-001",
531
- "regions": gemini_flash_limits,
585
+ "gemini-2.0-flash-lite-vertex": {
586
+ "id": "gemini-2.0-flash-lite",
587
+ "name": "gemini-2.0-flash-lite",
532
588
  "api_base": "",
533
589
  "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
534
590
  "supports_json": True,
591
+ "supports_logprobs": False,
535
592
  "api_spec": "vertex_gemini",
536
- "input_cost": 0.35,
537
- "output_cost": 0.35,
538
- "requests_per_minute": sum(gemini_flash_limits.values()),
539
- "tokens_per_minute": None,
593
+ "input_cost": 0.075,
594
+ "output_cost": 0.30,
595
+ "requests_per_minute": 20,
596
+ "tokens_per_minute": 100_000,
597
+ "reasoning_model": False,
540
598
  },
541
599
  # ███████████ █████ █████
542
600
  # ░░███░░░░░███ ░░███ ░░███
@@ -546,66 +604,66 @@ registry = {
546
604
  # ░███ ░███░███░░░ ░███ ░███ ░███ ░███ ░███░███ ███ ░███░░███
547
605
  # ███████████ ░░██████ ░░████████ █████ ░░██████ ░░██████ ████ █████
548
606
  # ░░░░░░░░░░░ ░░░░░░ ░░░░░░░░ ░░░░░ ░░░░░░ ░░░░░░ ░░░░ ░░░░░
549
- "claude-haiku-bedrock": {
550
- "id": "claude-haiku-bedrock",
551
- "name": "anthropic.claude-3-haiku-20240307-v1:0",
552
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
553
- "api_base": "",
554
- "api_key_env_var": "",
555
- "api_spec": "bedrock_anthropic",
556
- "input_cost": 0.25,
557
- "output_cost": 1.25,
558
- "requests_per_minute": 4_000,
559
- "tokens_per_minute": 8_000_000,
560
- },
561
- "claude-sonnet-bedrock": {
562
- "id": "claude-sonnet-bedrock",
563
- "name": "anthropic.claude-3-sonnet-20240229-v1:0",
564
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
565
- "api_base": "",
566
- "api_key_env_var": "",
567
- "api_spec": "bedrock_anthropic",
568
- "input_cost": 3.0,
569
- "output_cost": 15.0,
570
- "requests_per_minute": 2_000,
571
- "tokens_per_minute": 4_000_000,
572
- },
573
- "mistral-7b-bedrock": {
574
- "id": "mistral-7b-bedrock",
575
- "name": "mistral.mistral-7b-instruct-v0:2",
576
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
577
- "api_base": "",
578
- "api_key_env_var": "",
579
- "api_spec": "bedrock_mistral",
580
- "input_cost": 0.15,
581
- "output_cost": 0.2,
582
- "requests_per_minute": 3_200,
583
- "tokens_per_minute": 1_200_000,
584
- },
585
- "mixtral-8x7b-bedrock": {
586
- "id": "mixtral-8x7b-bedrock",
587
- "name": "mistral.mixtral-8x7b-instruct-v0:1",
588
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
589
- "api_base": "",
590
- "api_key_env_var": "",
591
- "api_spec": "bedrock_mistral",
592
- "input_cost": 0.45,
593
- "output_cost": 0.7,
594
- "requests_per_minute": 1_600,
595
- "tokens_per_minute": 1_200_000,
596
- },
597
- "mistral-large-bedrock": {
598
- "id": "mistral-large-bedrock",
599
- "name": "mistral.mistral-large-2402-v1:0",
600
- "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
601
- "api_base": "",
602
- "api_key_env_var": "",
603
- "api_spec": "bedrock_mistral",
604
- "input_cost": 8.0,
605
- "output_cost": 24.0,
606
- "requests_per_minute": 1_600,
607
- "tokens_per_minute": 1_200_000,
608
- },
607
+ # "claude-haiku-bedrock": {
608
+ # "id": "claude-haiku-bedrock",
609
+ # "name": "anthropic.claude-3-haiku-20240307-v1:0",
610
+ # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
611
+ # "api_base": "",
612
+ # "api_key_env_var": "",
613
+ # "api_spec": "bedrock_anthropic",
614
+ # "input_cost": 0.25,
615
+ # "output_cost": 1.25,
616
+ # "requests_per_minute": 4_000,
617
+ # "tokens_per_minute": 8_000_000,
618
+ # },
619
+ # "claude-sonnet-bedrock": {
620
+ # "id": "claude-sonnet-bedrock",
621
+ # "name": "anthropic.claude-3-sonnet-20240229-v1:0",
622
+ # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
623
+ # "api_base": "",
624
+ # "api_key_env_var": "",
625
+ # "api_spec": "bedrock_anthropic",
626
+ # "input_cost": 3.0,
627
+ # "output_cost": 15.0,
628
+ # "requests_per_minute": 2_000,
629
+ # "tokens_per_minute": 4_000_000,
630
+ # },
631
+ # "mistral-7b-bedrock": {
632
+ # "id": "mistral-7b-bedrock",
633
+ # "name": "mistral.mistral-7b-instruct-v0:2",
634
+ # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
635
+ # "api_base": "",
636
+ # "api_key_env_var": "",
637
+ # "api_spec": "bedrock_mistral",
638
+ # "input_cost": 0.15,
639
+ # "output_cost": 0.2,
640
+ # "requests_per_minute": 3_200,
641
+ # "tokens_per_minute": 1_200_000,
642
+ # },
643
+ # "mixtral-8x7b-bedrock": {
644
+ # "id": "mixtral-8x7b-bedrock",
645
+ # "name": "mistral.mixtral-8x7b-instruct-v0:1",
646
+ # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
647
+ # "api_base": "",
648
+ # "api_key_env_var": "",
649
+ # "api_spec": "bedrock_mistral",
650
+ # "input_cost": 0.45,
651
+ # "output_cost": 0.7,
652
+ # "requests_per_minute": 1_600,
653
+ # "tokens_per_minute": 1_200_000,
654
+ # },
655
+ # "mistral-large-bedrock": {
656
+ # "id": "mistral-large-bedrock",
657
+ # "name": "mistral.mistral-large-2402-v1:0",
658
+ # "regions": ["us-east-1", "us-west-2", "ap-southeast-2", "eu-west-3"],
659
+ # "api_base": "",
660
+ # "api_key_env_var": "",
661
+ # "api_spec": "bedrock_mistral",
662
+ # "input_cost": 8.0,
663
+ # "output_cost": 24.0,
664
+ # "requests_per_minute": 1_600,
665
+ # "tokens_per_minute": 1_200_000,
666
+ # },
609
667
  # ███████████ █████ █████
610
668
  # ░█░░░███░░░█ ░░███ ░░███
611
669
  # ░ ░███ ░ ██████ ███████ ██████ ███████ ░███████ ██████ ████████
@@ -618,153 +676,136 @@ registry = {
618
676
  # ░░██████
619
677
  # ░░░░░░
620
678
  # tbh only reason to use these are that they're cheap, but all worse than haiku
621
- "gemma-7b-together": {
622
- "id": "gemma-7b-together",
623
- "name": "google/gemma-7b-it",
624
- "api_base": "https://api.together.xyz/v1",
625
- "api_key_env_var": "TOGETHER_API_KEY",
626
- "supports_json": False,
627
- "api_spec": "openai",
628
- "input_cost": 0.2,
629
- "output_cost": 0.2,
630
- "requests_per_minute": 6000,
631
- "tokens_per_minute": None,
632
- },
633
- "gemma-2b-together": {
634
- "id": "gemma-2b-together",
635
- "name": "google/gemma-2b-it",
679
+ "deepseek-r1-together": {
680
+ "id": "deepseek-r1-together",
681
+ "name": "deepseek-ai/DeepSeek-R1",
636
682
  "api_base": "https://api.together.xyz/v1",
637
683
  "api_key_env_var": "TOGETHER_API_KEY",
638
684
  "supports_json": False,
639
685
  "api_spec": "openai",
640
- "input_cost": 0.1,
641
- "output_cost": 0.1,
642
- "requests_per_minute": 6000,
686
+ "input_cost": 3.0,
687
+ "output_cost": 7.0,
688
+ "requests_per_minute": None,
643
689
  "tokens_per_minute": None,
644
690
  },
645
- "phi2-together": {
646
- "id": "phi2-together",
647
- "name": "microsoft/phi-2",
691
+ "deepseek-v3-together": {
692
+ "id": "deepseek-v3-together",
693
+ "name": "deepseek-ai/DeepSeek-V3",
648
694
  "api_base": "https://api.together.xyz/v1",
649
695
  "api_key_env_var": "TOGETHER_API_KEY",
650
696
  "supports_json": False,
651
697
  "api_spec": "openai",
652
- "input_cost": 0.1,
653
- "output_cost": 0.1,
654
- "requests_per_minute": 6000,
698
+ "input_cost": 1.25,
699
+ "output_cost": 1.25,
700
+ "requests_per_minute": None,
655
701
  "tokens_per_minute": None,
656
702
  },
657
- "mistral-7b-together": {
658
- "id": "mistral-7b-together",
659
- "name": "mistralai/Mistral-7B-Instruct-v0.2",
703
+ "qwen-3-235b-together": {
704
+ "id": "qwen-3-235b-together",
705
+ "name": "Qwen/Qwen3-235B-A22B-fp8",
660
706
  "api_base": "https://api.together.xyz/v1",
661
707
  "api_key_env_var": "TOGETHER_API_KEY",
662
708
  "supports_json": False,
663
709
  "api_spec": "openai",
664
710
  "input_cost": 0.2,
665
- "output_cost": 0.2,
666
- "requests_per_minute": 6000,
711
+ "output_cost": 0.6,
712
+ "requests_per_minute": None,
667
713
  "tokens_per_minute": None,
668
714
  },
669
- "nous-mistral-7b-together": {
670
- "id": "nous-mistral-7b-together",
671
- "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
715
+ "qwen-2.5-vl-together": {
716
+ "id": "qwen-2.5-vl-together",
717
+ "name": "Qwen/Qwen2.5-VL-72B-Instruct",
672
718
  "api_base": "https://api.together.xyz/v1",
673
719
  "api_key_env_var": "TOGETHER_API_KEY",
674
720
  "supports_json": False,
675
721
  "api_spec": "openai",
676
- "input_cost": 0.2,
677
- "output_cost": 0.2,
678
- "requests_per_minute": 6000,
722
+ "input_cost": 1.95,
723
+ "output_cost": 8.0,
724
+ "requests_per_minute": None,
679
725
  "tokens_per_minute": None,
680
726
  },
681
- "qwen-4b-together": {
682
- "id": "qwen-4b-together",
683
- "name": "Qwen/Qwen1.5-4B-Chat",
727
+ "llama-4-maverick-together": {
728
+ "id": "llama-4-maverick-together",
729
+ "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
684
730
  "api_base": "https://api.together.xyz/v1",
685
731
  "api_key_env_var": "TOGETHER_API_KEY",
686
732
  "supports_json": False,
687
733
  "api_spec": "openai",
688
- "input_cost": 0.1,
689
- "output_cost": 0.1,
690
- "requests_per_minute": 6000,
734
+ "input_cost": 0.27,
735
+ "output_cost": 0.85,
736
+ "requests_per_minute": None,
691
737
  "tokens_per_minute": None,
692
738
  },
693
- "llama3-8b-together": {
694
- "id": "llama3-8b-together",
695
- "name": "meta-llama/Llama-3-8b-chat-hf",
739
+ "llama-4-scout-together": {
740
+ "id": "llama-4-scout-together",
741
+ "name": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
696
742
  "api_base": "https://api.together.xyz/v1",
697
743
  "api_key_env_var": "TOGETHER_API_KEY",
698
744
  "supports_json": False,
699
745
  "api_spec": "openai",
700
- "input_cost": 0.2,
701
- "output_cost": 0.2,
702
- "requests_per_minute": 6000,
746
+ "input_cost": 0.18,
747
+ "output_cost": 0.59,
748
+ "requests_per_minute": None,
703
749
  "tokens_per_minute": None,
704
750
  },
705
- # then these ones are big and pretty good, but more expensive
706
- "llama3-70b-together": {
707
- "id": "llama3-70b-together",
708
- "name": "meta-llama/Llama-3-70b-chat-hf",
709
- "api_base": "https://api.together.xyz/v1",
710
- "api_key_env_var": "TOGETHER_API_KEY",
711
- "supports_json": False,
751
+ # █████████ █████
752
+ # ███░░░░░███ ░░███
753
+ # ███ ░░░ ██████ ░███████ ██████ ████████ ██████
754
+ # ░███ ███░░███ ░███░░███ ███░░███░░███░░███ ███░░███
755
+ # ░███ ░███ ░███ ░███ ░███ ░███████ ░███ ░░░ ░███████
756
+ # ░░███ ███░███ ░███ ░███ ░███ ░███░░░ ░███ ░███░░░
757
+ # ░░█████████ ░░██████ ████ █████░░██████ █████ ░░██████
758
+ # ░░░░░░░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░░
759
+ "aya-expanse-8b": {
760
+ "id": "aya-expanse-8b",
761
+ "name": "c4ai-aya-expanse-8b",
762
+ "api_base": "https://api.cohere.ai/compatibility/v1",
763
+ "api_key_env_var": "COHERE_API_KEY",
712
764
  "api_spec": "openai",
713
- "input_cost": 0.9,
714
- "output_cost": 0.9,
715
- "requests_per_minute": 6000,
765
+ "input_cost": 0.5,
766
+ "output_cost": 1.5,
767
+ "requests_per_minute": 10_000,
716
768
  "tokens_per_minute": None,
717
769
  },
718
- "dbrx-together": {
719
- "id": "dbrx-together",
720
- "name": "databricks/dbrx-instruct",
721
- "api_base": "https://api.together.xyz/v1",
722
- "api_key_env_var": "TOGETHER_API_KEY",
723
- "supports_json": False,
770
+ "aya-expanse-32b": {
771
+ "id": "aya-expanse-32b",
772
+ "name": "c4ai-aya-expanse-32b",
773
+ "api_base": "https://api.cohere.ai/compatibility/v1",
774
+ "api_key_env_var": "COHERE_API_KEY",
724
775
  "api_spec": "openai",
725
- "input_cost": 1.20,
726
- "output_cost": 1.20,
727
- "requests_per_minute": 6000,
776
+ "input_cost": 0.5,
777
+ "output_cost": 1.5,
778
+ "requests_per_minute": 10_000,
728
779
  "tokens_per_minute": None,
729
780
  },
730
- "mistral-8x7b-together": {
731
- "id": "mistral-8x7b-together",
732
- "name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
733
- "api_base": "https://api.together.xyz/v1",
734
- "api_key_env_var": "TOGETHER_API_KEY",
735
- "supports_json": False,
781
+ "aya-vision-8b": {
782
+ "id": "aya-vision-8b",
783
+ "name": "c4ai-aya-vision-8b",
784
+ "api_base": "https://api.cohere.ai/compatibility/v1",
785
+ "api_key_env_var": "COHERE_API_KEY",
736
786
  "api_spec": "openai",
737
- "input_cost": 0.6,
738
- "output_cost": 0.6,
739
- "requests_per_minute": 6000,
787
+ "input_cost": 0.5,
788
+ "output_cost": 1.5,
789
+ "requests_per_minute": 10_000,
740
790
  "tokens_per_minute": None,
741
791
  },
742
- "mistral-8x22b-together": {
743
- "id": "mistral-8x22b-together",
744
- "name": "mistralai/Mixtral-8x22B-Instruct-v0.1",
745
- "api_base": "https://api.together.xyz/v1",
746
- "api_key_env_var": "TOGETHER_API_KEY",
747
- "supports_json": False,
792
+ "aya-vision-32b": {
793
+ "id": "aya-vision-32b",
794
+ "name": "c4ai-aya-vision-32b",
795
+ "api_base": "https://api.cohere.ai/compatibility/v1",
796
+ "api_key_env_var": "COHERE_API_KEY",
748
797
  "api_spec": "openai",
749
- "input_cost": 1.20,
750
- "output_cost": 1.20,
751
- "requests_per_minute": 6000,
798
+ "input_cost": 0.5,
799
+ "output_cost": 1.5,
800
+ "requests_per_minute": 10_000,
752
801
  "tokens_per_minute": None,
753
802
  },
754
- # █████████ █████
755
- # ███░░░░░███ ░░███
756
- # ███ ░░░ ██████ ░███████ ██████ ████████ ██████
757
- # ░███ ███░░███ ░███░░███ ███░░███░░███░░███ ███░░███
758
- # ░███ ░███ ░███ ░███ ░███ ░███████ ░███ ░░░ ░███████
759
- # ░░███ ███░███ ░███ ░███ ░███ ░███░░░ ░███ ░███░░░
760
- # ░░█████████ ░░██████ ████ █████░░██████ █████ ░░██████
761
- # ░░░░░░░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░░
762
803
  "command-a": {
763
804
  "id": "command-a",
764
805
  "name": "command-a-03-2025",
765
- "api_base": "https://api.cohere.ai/v2",
806
+ "api_base": "https://api.cohere.ai/compatibility/v1",
766
807
  "api_key_env_var": "COHERE_API_KEY",
767
- "api_spec": "cohere",
808
+ "api_spec": "openai",
768
809
  "input_cost": 0.5,
769
810
  "output_cost": 1.5,
770
811
  "requests_per_minute": 10_000,
@@ -773,9 +814,9 @@ registry = {
773
814
  "command-r-7b": {
774
815
  "id": "command-r-cohere",
775
816
  "name": "command-r7b-12-2024",
776
- "api_base": "https://api.cohere.ai/v2",
817
+ "api_base": "https://api.cohere.ai/compatibility/v1",
777
818
  "api_key_env_var": "COHERE_API_KEY",
778
- "api_spec": "cohere",
819
+ "api_spec": "openai",
779
820
  "input_cost": 0.5,
780
821
  "output_cost": 1.5,
781
822
  "requests_per_minute": 10_000,
@@ -784,9 +825,9 @@ registry = {
784
825
  "command-r": {
785
826
  "id": "command-r",
786
827
  "name": "command-r-08-2024",
787
- "api_base": "https://api.cohere.ai/v2",
828
+ "api_base": "https://api.cohere.ai/compatibility/v1",
788
829
  "api_key_env_var": "COHERE_API_KEY",
789
- "api_spec": "cohere",
830
+ "api_spec": "openai",
790
831
  "input_cost": 0.5,
791
832
  "output_cost": 1.5,
792
833
  "requests_per_minute": 10_000,
@@ -795,9 +836,9 @@ registry = {
795
836
  "command-r-plus": {
796
837
  "id": "command-r-plus",
797
838
  "name": "command-r-plus-04-2024",
798
- "api_base": "https://api.cohere.ai/v2",
839
+ "api_base": "https://api.cohere.ai/compatibility/v1",
799
840
  "api_key_env_var": "COHERE_API_KEY",
800
- "api_spec": "cohere",
841
+ "api_spec": "openai",
801
842
  "input_cost": 3.0,
802
843
  "output_cost": 15.0,
803
844
  "requests_per_minute": 10_000,
@@ -811,86 +852,105 @@ registry = {
811
852
  # ░███ ░███ ░███ ░░░░███ ░███ ███ ░███ ███░░███ ░███
812
853
  # █████ █████ █████ ██████ ░░█████ █████ ░░████████ █████
813
854
  # ░░░░░ ░░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░ ░░░░░░░░ ░░░░░
814
- "mistral-7b-mistral": {
815
- "id": "mistral-7b-mistral",
816
- "name": "open-mistral-7b",
855
+ "mistral-medium": {
856
+ "id": "mistral-medium",
857
+ "name": "mistral-medium-latest",
817
858
  "api_base": "https://api.mistral.ai/v1",
818
859
  "api_key_env_var": "MISTRAL_API_KEY",
819
860
  "supports_json": True,
820
861
  "api_spec": "mistral",
821
- "input_cost": 0.25,
822
- "output_cost": 0.25,
862
+ "input_cost": 0.4,
863
+ "output_cost": 2.0,
823
864
  },
824
- "mistral-8x7b-mistral": {
825
- "id": "mistral-8x7b-mistral",
826
- "name": "open-mixtral-8x7b",
865
+ "mistral-large": {
866
+ "id": "mistral-large",
867
+ "name": "mistral-large-latest",
827
868
  "api_base": "https://api.mistral.ai/v1",
828
869
  "api_key_env_var": "MISTRAL_API_KEY",
829
870
  "supports_json": True,
830
871
  "api_spec": "mistral",
831
- "input_cost": 0.7,
832
- "output_cost": 0.7,
872
+ "input_cost": 2.0,
873
+ "output_cost": 6.0,
833
874
  },
834
- # same as above but mixtral name is easy to mix up
835
- "mixtral-8x7b-mistral": {
836
- "id": "mixtral-8x7b-mistral",
837
- "name": "open-mixtral-8x7b",
875
+ "pixtral-large": {
876
+ "id": "pixtral-large",
877
+ "name": "pixtral-large-latest",
838
878
  "api_base": "https://api.mistral.ai/v1",
839
879
  "api_key_env_var": "MISTRAL_API_KEY",
840
880
  "supports_json": True,
841
881
  "api_spec": "mistral",
842
- "input_cost": 0.7,
843
- "output_cost": 0.7,
882
+ "input_cost": 2.0,
883
+ "output_cost": 6.0,
844
884
  },
845
- "mistral-small-mistral": {
846
- "id": "mistral-small-mistral",
885
+ "mistral-small": {
886
+ "id": "mistral-small",
847
887
  "name": "mistral-small-latest",
848
888
  "api_base": "https://api.mistral.ai/v1",
849
889
  "api_key_env_var": "MISTRAL_API_KEY",
850
890
  "supports_json": True,
851
891
  "api_spec": "mistral",
852
- "input_cost": 2.0,
853
- "output_cost": 6.0,
892
+ "input_cost": 0.1,
893
+ "output_cost": 0.3,
854
894
  },
855
- "mistral-8x22b-mistral": {
856
- "id": "mistral-8x22b-mistral",
857
- "name": "open-mixtral-8x22b",
895
+ "devstral-small": {
896
+ "id": "devstral-small",
897
+ "name": "devstral-small-2505",
858
898
  "api_base": "https://api.mistral.ai/v1",
859
899
  "api_key_env_var": "MISTRAL_API_KEY",
860
900
  "supports_json": True,
861
901
  "api_spec": "mistral",
862
- "input_cost": 2.0,
863
- "output_cost": 6.0,
902
+ "input_cost": 0.1,
903
+ "output_cost": 0.3,
864
904
  },
865
- "mixtral-8x22b-mistral": {
866
- "id": "mixtral-8x22b-mistral",
867
- "name": "open-mixtral-8x22b",
905
+ "codestral": {
906
+ "id": "codestral",
907
+ "name": "codestral-latest",
868
908
  "api_base": "https://api.mistral.ai/v1",
869
909
  "api_key_env_var": "MISTRAL_API_KEY",
870
910
  "supports_json": True,
871
911
  "api_spec": "mistral",
872
- "input_cost": 2.0,
873
- "output_cost": 6.0,
912
+ "input_cost": 0.2,
913
+ "output_cost": 0.6,
874
914
  },
875
- "mistral-medium-mistral": { # WILL BE DEPRECATED SOON
876
- "id": "mistral-medium-mistral",
877
- "name": "mistral-medium-latest",
915
+ "pixtral-12b": {
916
+ "id": "pixtral-12b",
917
+ "name": "pixtral-12b",
878
918
  "api_base": "https://api.mistral.ai/v1",
879
919
  "api_key_env_var": "MISTRAL_API_KEY",
880
920
  "supports_json": True,
881
921
  "api_spec": "mistral",
882
- "input_cost": 2.7,
883
- "output_cost": 8.1,
922
+ "input_cost": 0.1,
923
+ "output_cost": 0.3,
884
924
  },
885
- "mistral-large-mistral": {
886
- "id": "mistral-large-mistral",
887
- "name": "mistral-large-latest",
925
+ "mistral-nemo": {
926
+ "id": "mistral-nemo",
927
+ "name": "open-mistral-nemo",
928
+ "api_base": "https://api.mistral.ai/v1",
929
+ "api_key_env_var": "MISTRAL_API_KEY",
930
+ "supports_json": True,
931
+ "api_spec": "mistral",
932
+ "input_cost": 0.1,
933
+ "output_cost": 0.3,
934
+ },
935
+ "ministral-8b": {
936
+ "id": "ministral-8b",
937
+ "name": "ministral-8b-latest",
888
938
  "api_base": "https://api.mistral.ai/v1",
889
939
  "api_key_env_var": "MISTRAL_API_KEY",
890
940
  "supports_json": True,
891
941
  "api_spec": "mistral",
892
- "input_cost": 8.0,
893
- "output_cost": 24.0,
942
+ "input_cost": 0.7,
943
+ "output_cost": 0.7,
944
+ },
945
+ "mixtral-8x22b": {
946
+ "id": "mistral-8x22b",
947
+ "name": "open-mixtral-8x22b",
948
+ "api_base": "https://api.mistral.ai/v1",
949
+ "api_key_env_var": "MISTRAL_API_KEY",
950
+ "supports_json": True,
951
+ "api_spec": "mistral",
952
+ "input_cost": 2.0,
953
+ "output_cost": 6.0,
894
954
  },
895
955
  # ______ _
896
956
  # (______) | |
@@ -904,18 +964,18 @@ registry = {
904
964
  "name": "deepseek-chat",
905
965
  "api_base": "https://api.deepseek.com/v1",
906
966
  "api_key_env_var": "DEEPSEEK_API_KEY",
907
- "api_spec": "deepseek",
908
- "input_cost": 0.14,
909
- "output_cost": 0.28,
967
+ "api_spec": "openai",
968
+ "input_cost": 0.27,
969
+ "output_cost": 1.10,
910
970
  },
911
- "deepseek-coder": {
912
- "id": "deepseek-coder",
913
- "name": "deepseek-coder",
971
+ "deepseek-r1": {
972
+ "id": "deepseek-r1",
973
+ "name": "deepseek-reasoner",
914
974
  "api_base": "https://api.deepseek.com/v1",
915
975
  "api_key_env_var": "DEEPSEEK_API_KEY",
916
- "api_spec": "deepseek",
917
- "input_cost": 0.14,
918
- "output_cost": 0.28,
976
+ "api_spec": "openai",
977
+ "input_cost": 0.55,
978
+ "output_cost": 2.19,
919
979
  },
920
980
  }
921
981