lm-deluge 0.0.56__tar.gz → 0.0.58__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (82) hide show
  1. {lm_deluge-0.0.56/src/lm_deluge.egg-info → lm_deluge-0.0.58}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/__init__.py +2 -1
  4. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/anthropic.py +1 -1
  5. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/client.py +34 -21
  6. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/__init__.py +7 -7
  7. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/anthropic.py +12 -20
  8. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/bedrock.py +0 -14
  9. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/cohere.py +0 -16
  10. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/google.py +0 -20
  11. lm_deluge-0.0.58/src/lm_deluge/models/grok.py +82 -0
  12. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/groq.py +2 -2
  13. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/meta.py +0 -8
  14. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/openai.py +0 -34
  15. lm_deluge-0.0.58/src/lm_deluge/models/openrouter.py +64 -0
  16. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/together.py +0 -16
  17. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/prompt.py +6 -7
  18. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/tool.py +338 -18
  19. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/tracker.py +11 -5
  20. {lm_deluge-0.0.56 → lm_deluge-0.0.58/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  21. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/SOURCES.txt +0 -2
  22. lm_deluge-0.0.56/src/lm_deluge/agent.py +0 -0
  23. lm_deluge-0.0.56/src/lm_deluge/gemini_limits.py +0 -65
  24. lm_deluge-0.0.56/src/lm_deluge/models/grok.py +0 -38
  25. lm_deluge-0.0.56/src/lm_deluge/models/openrouter.py +0 -1
  26. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/LICENSE +0 -0
  27. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/README.md +0 -0
  28. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/setup.cfg +0 -0
  29. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/__init__.py +0 -0
  30. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/base.py +0 -0
  31. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/bedrock.py +0 -0
  32. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/common.py +0 -0
  33. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  34. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  35. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  36. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  37. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  38. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/gemini.py +0 -0
  39. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/mistral.py +0 -0
  40. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/openai.py +0 -0
  41. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/response.py +0 -0
  42. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/batches.py +0 -0
  43. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  44. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  45. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  46. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  47. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/base.py +0 -0
  48. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/openai.py +0 -0
  49. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/cache.py +0 -0
  50. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/cli.py +0 -0
  51. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/config.py +0 -0
  52. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/embed.py +0 -0
  53. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/errors.py +0 -0
  54. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/file.py +0 -0
  55. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/image.py +0 -0
  56. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/__init__.py +0 -0
  57. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/classify.py +0 -0
  58. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/extract.py +0 -0
  59. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/locate.py +0 -0
  60. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/ocr.py +0 -0
  61. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/score.py +0 -0
  62. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/translate.py +0 -0
  63. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/cerebras.py +0 -0
  64. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/deepseek.py +0 -0
  65. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/fireworks.py +0 -0
  66. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/mistral.py +0 -0
  67. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/presets/cerebras.py +0 -0
  68. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/presets/meta.py +0 -0
  69. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/request_context.py +0 -0
  70. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/rerank.py +0 -0
  71. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/usage.py +0 -0
  72. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/harmony.py +0 -0
  73. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/json.py +0 -0
  74. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/logprobs.py +0 -0
  75. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/spatial.py +0 -0
  76. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/validation.py +0 -0
  77. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/xml.py +0 -0
  78. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  79. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/requires.txt +0 -0
  80. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/top_level.txt +0 -0
  81. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/tests/test_builtin_tools.py +0 -0
  82. {lm_deluge-0.0.56 → lm_deluge-0.0.58}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.56
3
+ Version: 0.0.58
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.56"
6
+ version = "0.0.58"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -1,7 +1,7 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
2
  from .file import File
3
3
  from .prompt import Conversation, Message
4
- from .tool import Tool
4
+ from .tool import Tool, ToolParams
5
5
 
6
6
  # dotenv.load_dotenv() - don't do this, fucks with other packages
7
7
 
@@ -12,5 +12,6 @@ __all__ = [
12
12
  "Conversation",
13
13
  "Message",
14
14
  "Tool",
15
+ "ToolParams",
15
16
  "File",
16
17
  ]
@@ -72,7 +72,7 @@ def _build_anthropic_request(
72
72
  request_json["system"] = system_message
73
73
 
74
74
  # handle temp + top_p for opus 4.1/sonnet 4.5
75
- if model.name in ["claude-sonnet-4-5-20250929", "claude-opus-4-1-20250805"]:
75
+ if "4-1" in model.name or "4-5" in model.name:
76
76
  if "temperature" in request_json and "top_p" in request_json:
77
77
  request_json.pop("top_p")
78
78
 
@@ -369,7 +369,7 @@ class _LLMClient(BaseModel):
369
369
  tools: list[Tool | dict | MCPServer] | None = ...,
370
370
  cache: CachePattern | None = ...,
371
371
  use_responses_api: bool = ...,
372
- ) -> list[APIResponse | None]: ...
372
+ ) -> list[APIResponse]: ...
373
373
 
374
374
  async def process_prompts_async(
375
375
  self,
@@ -380,7 +380,7 @@ class _LLMClient(BaseModel):
380
380
  tools: list[Tool | dict | MCPServer] | None = None,
381
381
  cache: CachePattern | None = None,
382
382
  use_responses_api: bool = False,
383
- ) -> list[APIResponse | None] | list[str | None] | dict[str, int]:
383
+ ) -> list[APIResponse] | list[str | None] | dict[str, int]:
384
384
  """Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
385
385
 
386
386
  This implementation creates all tasks upfront and waits for them to complete,
@@ -516,28 +516,40 @@ class _LLMClient(BaseModel):
516
516
  tools: list[Tool | dict | MCPServer] | None = None,
517
517
  cache: CachePattern | None = None,
518
518
  use_responses_api: bool = False,
519
- ) -> APIResponse | None:
519
+ ) -> APIResponse:
520
520
  task_id = self.start_nowait(
521
521
  prompt, tools=tools, cache=cache, use_responses_api=use_responses_api
522
522
  )
523
523
  return await self.wait_for(task_id)
524
524
 
525
- async def wait_for(self, task_id: int) -> APIResponse | None:
525
+ async def wait_for(self, task_id: int) -> APIResponse:
526
526
  task = self._tasks.get(task_id)
527
527
  if task:
528
528
  return await task
529
- return self._results.get(task_id)
529
+ res = self._results.get(task_id)
530
+ if res:
531
+ return res
532
+ else:
533
+ return APIResponse(
534
+ id=-1,
535
+ model_internal="",
536
+ prompt=Conversation([]),
537
+ sampling_params=SamplingParams(),
538
+ status_code=500,
539
+ is_error=True,
540
+ error_message="Task not found",
541
+ )
530
542
 
531
543
  async def wait_for_all(
532
544
  self, task_ids: Sequence[int] | None = None
533
- ) -> list[APIResponse | None]:
545
+ ) -> list[APIResponse]:
534
546
  if task_ids is None:
535
547
  task_ids = list(self._tasks.keys())
536
548
  return [await self.wait_for(tid) for tid in task_ids]
537
549
 
538
550
  async def as_completed(
539
551
  self, task_ids: Sequence[int] | None = None
540
- ) -> AsyncGenerator[tuple[int, APIResponse | None], None]:
552
+ ) -> AsyncGenerator[tuple[int, APIResponse], None]:
541
553
  """Yield ``(task_id, result)`` pairs as tasks complete.
542
554
 
543
555
  Args:
@@ -561,7 +573,9 @@ class _LLMClient(BaseModel):
561
573
  for task in list(tasks_map.keys()):
562
574
  if task.done():
563
575
  tid = tasks_map.pop(task)
564
- yield tid, self._results.get(tid, await task)
576
+ task_result = self._results.get(tid, await task)
577
+ assert task_result
578
+ yield tid, task_result
565
579
 
566
580
  while tasks_map:
567
581
  done, _ = await asyncio.wait(
@@ -569,7 +583,9 @@ class _LLMClient(BaseModel):
569
583
  )
570
584
  for task in done:
571
585
  tid = tasks_map.pop(task)
572
- yield tid, self._results.get(tid, await task)
586
+ task_result = self._results.get(tid, await task)
587
+ assert task_result
588
+ yield tid, task_result
573
589
 
574
590
  async def stream(
575
591
  self,
@@ -618,23 +634,20 @@ class _LLMClient(BaseModel):
618
634
  mcp_tools = await tool.to_tools()
619
635
  expanded_tools.extend(mcp_tools)
620
636
 
621
- last_response: APIResponse | None = None
637
+ response: APIResponse | None = None
622
638
 
623
639
  for _ in range(max_rounds):
624
- responses = await self.process_prompts_async(
625
- [conversation],
640
+ response = await self.start(
641
+ conversation,
626
642
  tools=tools, # type: ignore
627
- return_completions_only=False,
628
- show_progress=show_progress,
629
643
  )
630
644
 
631
- last_response = responses[0]
632
- if last_response is None or last_response.content is None:
645
+ if response is None or response.content is None:
633
646
  break
634
647
 
635
- conversation = conversation.with_message(last_response.content)
648
+ conversation = conversation.with_message(response.content)
636
649
 
637
- tool_calls = last_response.content.tool_calls
650
+ tool_calls = response.content.tool_calls
638
651
  if not tool_calls:
639
652
  break
640
653
 
@@ -657,12 +670,12 @@ class _LLMClient(BaseModel):
657
670
  if not isinstance(result, (str, dict, list)):
658
671
  result = str(result)
659
672
 
660
- conversation.add_tool_result(call.id, result) # type: ignore
673
+ conversation.with_tool_result(call.id, result) # type: ignore
661
674
 
662
- if last_response is None:
675
+ if response is None:
663
676
  raise RuntimeError("model did not return a response")
664
677
 
665
- return conversation, last_response
678
+ return conversation, response
666
679
 
667
680
  def run_agent_loop_sync(
668
681
  self,
@@ -38,9 +38,9 @@ class APIModel:
38
38
  supports_responses: bool = False
39
39
  reasoning_model: bool = False
40
40
  regions: list[str] | dict[str, int] = field(default_factory=list)
41
- tokens_per_minute: int | None = None
42
- requests_per_minute: int | None = None
43
- gpus: list[str] | None = None
41
+ # tokens_per_minute: int | None = None
42
+ # requests_per_minute: int | None = None
43
+ # gpus: list[str] | None = None
44
44
 
45
45
  @classmethod
46
46
  def from_registry(cls, name: str):
@@ -97,8 +97,8 @@ def register_model(
97
97
  supports_responses: bool = False,
98
98
  reasoning_model: bool = False,
99
99
  regions: list[str] | dict[str, int] = field(default_factory=list),
100
- tokens_per_minute: int | None = None,
101
- requests_per_minute: int | None = None,
100
+ # tokens_per_minute: int | None = None,
101
+ # requests_per_minute: int | None = None,
102
102
  ) -> APIModel:
103
103
  """Register a model configuration and return the created APIModel."""
104
104
  model = APIModel(
@@ -116,8 +116,8 @@ def register_model(
116
116
  supports_responses=supports_responses,
117
117
  reasoning_model=reasoning_model,
118
118
  regions=regions,
119
- tokens_per_minute=tokens_per_minute,
120
- requests_per_minute=requests_per_minute,
119
+ # tokens_per_minute=tokens_per_minute,
120
+ # requests_per_minute=requests_per_minute,
121
121
  )
122
122
  registry[model.id] = model
123
123
  return model
@@ -10,6 +10,18 @@ ANTHROPIC_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  #
13
+ "claude-4.5-haiku": {
14
+ "id": "claude-4.5-haiku",
15
+ "name": "claude-haiku-4-5-20251001",
16
+ "api_base": "https://api.anthropic.com/v1",
17
+ "api_key_env_var": "ANTHROPIC_API_KEY",
18
+ "supports_json": False,
19
+ "api_spec": "anthropic",
20
+ "input_cost": 1.0,
21
+ "cached_input_cost": 0.10,
22
+ "cache_write_cost": 1.25,
23
+ "output_cost": 3.0,
24
+ },
13
25
  "claude-4.5-sonnet": {
14
26
  "id": "claude-4.5-sonnet",
15
27
  "name": "claude-sonnet-4-5-20250929",
@@ -21,8 +33,6 @@ ANTHROPIC_MODELS = {
21
33
  "cached_input_cost": 0.30,
22
34
  "cache_write_cost": 3.75,
23
35
  "output_cost": 15.0,
24
- "requests_per_minute": 4_000,
25
- "tokens_per_minute": 400_000,
26
36
  },
27
37
  "claude-4.1-opus": {
28
38
  "id": "claude-4.1-opus",
@@ -35,8 +45,6 @@ ANTHROPIC_MODELS = {
35
45
  "cached_input_cost": 1.50,
36
46
  "cache_write_cost": 18.75,
37
47
  "output_cost": 75.0,
38
- "requests_per_minute": 4_000,
39
- "tokens_per_minute": 400_000,
40
48
  "reasoning_model": True,
41
49
  },
42
50
  "claude-4-opus": {
@@ -50,8 +58,6 @@ ANTHROPIC_MODELS = {
50
58
  "cached_input_cost": 1.50,
51
59
  "cache_write_cost": 18.75,
52
60
  "output_cost": 75.0,
53
- "requests_per_minute": 4_000,
54
- "tokens_per_minute": 400_000,
55
61
  "reasoning_model": True,
56
62
  },
57
63
  "claude-4-sonnet": {
@@ -65,8 +71,6 @@ ANTHROPIC_MODELS = {
65
71
  "cached_input_cost": 0.30,
66
72
  "cache_write_cost": 3.75,
67
73
  "output_cost": 15.0,
68
- "requests_per_minute": 4_000,
69
- "tokens_per_minute": 400_000,
70
74
  },
71
75
  "claude-3.7-sonnet": {
72
76
  "id": "claude-3.7-sonnet",
@@ -79,8 +83,6 @@ ANTHROPIC_MODELS = {
79
83
  "cached_input_cost": 0.30,
80
84
  "cache_write_cost": 3.75,
81
85
  "output_cost": 15.0,
82
- "requests_per_minute": 4_000,
83
- "tokens_per_minute": 400_000,
84
86
  "reasoning_model": True,
85
87
  },
86
88
  "claude-3.6-sonnet": {
@@ -94,8 +96,6 @@ ANTHROPIC_MODELS = {
94
96
  "cached_input_cost": 0.30,
95
97
  "cache_write_cost": 3.75,
96
98
  "output_cost": 15.0,
97
- "requests_per_minute": 4_000,
98
- "tokens_per_minute": 400_000,
99
99
  },
100
100
  "claude-3.5-sonnet": {
101
101
  "id": "claude-3.5-sonnet",
@@ -108,8 +108,6 @@ ANTHROPIC_MODELS = {
108
108
  "cached_input_cost": 0.30,
109
109
  "cache_write_cost": 3.75,
110
110
  "output_cost": 15.0,
111
- "requests_per_minute": 4_000,
112
- "tokens_per_minute": 400_000,
113
111
  },
114
112
  "claude-3-opus": {
115
113
  "id": "claude-3-opus",
@@ -120,8 +118,6 @@ ANTHROPIC_MODELS = {
120
118
  "api_spec": "anthropic",
121
119
  "input_cost": 15.0,
122
120
  "output_cost": 75.0,
123
- "requests_per_minute": 4_000,
124
- "tokens_per_minute": 400_000,
125
121
  },
126
122
  "claude-3.5-haiku": {
127
123
  "id": "claude-3.5-haiku",
@@ -134,8 +130,6 @@ ANTHROPIC_MODELS = {
134
130
  "cached_input_cost": 0.08,
135
131
  "cache_write_cost": 1.00,
136
132
  "output_cost": 4.00,
137
- "requests_per_minute": 20_000,
138
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
139
133
  },
140
134
  "claude-3-haiku": {
141
135
  "id": "claude-3-haiku",
@@ -148,7 +142,5 @@ ANTHROPIC_MODELS = {
148
142
  "cache_write_cost": 0.30,
149
143
  "cached_input_cost": 0.03,
150
144
  "output_cost": 1.25,
151
- "requests_per_minute": 10_000,
152
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
153
145
  },
154
146
  }
@@ -16,8 +16,6 @@ BEDROCK_MODELS = {
16
16
  "api_spec": "bedrock",
17
17
  "input_cost": 0.25,
18
18
  "output_cost": 1.25,
19
- "requests_per_minute": 4_000,
20
- "tokens_per_minute": 8_000_000,
21
19
  },
22
20
  "claude-3.5-haiku-bedrock": {
23
21
  "id": "claude-3.5-haiku-bedrock",
@@ -28,8 +26,6 @@ BEDROCK_MODELS = {
28
26
  "api_spec": "bedrock",
29
27
  "input_cost": 0.25,
30
28
  "output_cost": 1.25,
31
- "requests_per_minute": 4_000,
32
- "tokens_per_minute": 8_000_000,
33
29
  },
34
30
  "claude-3.5-sonnet-bedrock": {
35
31
  "id": "claude-3.5-sonnet-bedrock",
@@ -40,8 +36,6 @@ BEDROCK_MODELS = {
40
36
  "api_spec": "bedrock",
41
37
  "input_cost": 3.0,
42
38
  "output_cost": 15.0,
43
- "requests_per_minute": 4_000,
44
- "tokens_per_minute": 400_000,
45
39
  "reasoning_model": False,
46
40
  },
47
41
  "claude-3.6-sonnet-bedrock": {
@@ -53,8 +47,6 @@ BEDROCK_MODELS = {
53
47
  "api_spec": "bedrock",
54
48
  "input_cost": 3.0,
55
49
  "output_cost": 15.0,
56
- "requests_per_minute": 4_000,
57
- "tokens_per_minute": 400_000,
58
50
  "reasoning_model": False,
59
51
  },
60
52
  "claude-3.7-sonnet-bedrock": {
@@ -66,8 +58,6 @@ BEDROCK_MODELS = {
66
58
  "api_spec": "bedrock",
67
59
  "input_cost": 3.0,
68
60
  "output_cost": 15.0,
69
- "requests_per_minute": 4_000,
70
- "tokens_per_minute": 400_000,
71
61
  "reasoning_model": True,
72
62
  },
73
63
  "claude-4-sonnet-bedrock": {
@@ -79,8 +69,6 @@ BEDROCK_MODELS = {
79
69
  "api_spec": "bedrock",
80
70
  "input_cost": 3.0,
81
71
  "output_cost": 15.0,
82
- "requests_per_minute": 4_000,
83
- "tokens_per_minute": 400_000,
84
72
  "reasoning_model": True,
85
73
  },
86
74
  "claude-4-opus-bedrock": {
@@ -92,8 +80,6 @@ BEDROCK_MODELS = {
92
80
  "api_spec": "bedrock",
93
81
  "input_cost": 3.0,
94
82
  "output_cost": 15.0,
95
- "requests_per_minute": 4_000,
96
- "tokens_per_minute": 400_000,
97
83
  "reasoning_model": True,
98
84
  },
99
85
  # GPT-OSS on AWS Bedrock
@@ -15,8 +15,6 @@ COHERE_MODELS = {
15
15
  "api_spec": "openai",
16
16
  "input_cost": 0.5,
17
17
  "output_cost": 1.5,
18
- "requests_per_minute": 10_000,
19
- "tokens_per_minute": None,
20
18
  },
21
19
  "aya-expanse-32b": {
22
20
  "id": "aya-expanse-32b",
@@ -26,8 +24,6 @@ COHERE_MODELS = {
26
24
  "api_spec": "openai",
27
25
  "input_cost": 0.5,
28
26
  "output_cost": 1.5,
29
- "requests_per_minute": 10_000,
30
- "tokens_per_minute": None,
31
27
  },
32
28
  "aya-vision-8b": {
33
29
  "id": "aya-vision-8b",
@@ -37,8 +33,6 @@ COHERE_MODELS = {
37
33
  "api_spec": "openai",
38
34
  "input_cost": 0.5,
39
35
  "output_cost": 1.5,
40
- "requests_per_minute": 10_000,
41
- "tokens_per_minute": None,
42
36
  },
43
37
  "aya-vision-32b": {
44
38
  "id": "aya-vision-32b",
@@ -48,8 +42,6 @@ COHERE_MODELS = {
48
42
  "api_spec": "openai",
49
43
  "input_cost": 0.5,
50
44
  "output_cost": 1.5,
51
- "requests_per_minute": 10_000,
52
- "tokens_per_minute": None,
53
45
  },
54
46
  "command-a": {
55
47
  "id": "command-a",
@@ -59,8 +51,6 @@ COHERE_MODELS = {
59
51
  "api_spec": "openai",
60
52
  "input_cost": 0.5,
61
53
  "output_cost": 1.5,
62
- "requests_per_minute": 10_000,
63
- "tokens_per_minute": None,
64
54
  },
65
55
  "command-r-7b": {
66
56
  "id": "command-r-cohere",
@@ -70,8 +60,6 @@ COHERE_MODELS = {
70
60
  "api_spec": "openai",
71
61
  "input_cost": 0.5,
72
62
  "output_cost": 1.5,
73
- "requests_per_minute": 10_000,
74
- "tokens_per_minute": None,
75
63
  },
76
64
  "command-r": {
77
65
  "id": "command-r",
@@ -81,8 +69,6 @@ COHERE_MODELS = {
81
69
  "api_spec": "openai",
82
70
  "input_cost": 0.5,
83
71
  "output_cost": 1.5,
84
- "requests_per_minute": 10_000,
85
- "tokens_per_minute": None,
86
72
  },
87
73
  "command-r-plus": {
88
74
  "id": "command-r-plus",
@@ -92,7 +78,5 @@ COHERE_MODELS = {
92
78
  "api_spec": "openai",
93
79
  "input_cost": 3.0,
94
80
  "output_cost": 15.0,
95
- "requests_per_minute": 10_000,
96
- "tokens_per_minute": None,
97
81
  },
98
82
  }
@@ -20,8 +20,6 @@ GOOGLE_MODELS = {
20
20
  "input_cost": 0.1,
21
21
  "cached_input_cost": 0.025,
22
22
  "output_cost": 0.4,
23
- "requests_per_minute": 20,
24
- "tokens_per_minute": 100_000,
25
23
  "reasoning_model": False,
26
24
  },
27
25
  "gemini-2.0-flash-lite-compat": {
@@ -34,8 +32,6 @@ GOOGLE_MODELS = {
34
32
  "api_spec": "openai",
35
33
  "input_cost": 0.075,
36
34
  "output_cost": 0.3,
37
- "requests_per_minute": 20,
38
- "tokens_per_minute": 100_000,
39
35
  "reasoning_model": False,
40
36
  },
41
37
  "gemini-2.5-pro-compat": {
@@ -49,8 +45,6 @@ GOOGLE_MODELS = {
49
45
  "input_cost": 1.25,
50
46
  "cached_input_cost": 0.31,
51
47
  "output_cost": 10.0,
52
- "requests_per_minute": 20,
53
- "tokens_per_minute": 100_000,
54
48
  "reasoning_model": True,
55
49
  },
56
50
  "gemini-2.5-flash-compat": {
@@ -64,8 +58,6 @@ GOOGLE_MODELS = {
64
58
  "input_cost": 0.3,
65
59
  "cached_input_cost": 0.075,
66
60
  "output_cost": 2.5,
67
- "requests_per_minute": 20,
68
- "tokens_per_minute": 100_000,
69
61
  "reasoning_model": True,
70
62
  },
71
63
  "gemini-2.5-flash-lite-compat": {
@@ -79,8 +71,6 @@ GOOGLE_MODELS = {
79
71
  "input_cost": 0.1,
80
72
  "cached_input_cost": 0.025,
81
73
  "output_cost": 0.4,
82
- "requests_per_minute": 20,
83
- "tokens_per_minute": 100_000,
84
74
  "reasoning_model": True,
85
75
  },
86
76
  # Native Gemini API versions with file support
@@ -95,8 +85,6 @@ GOOGLE_MODELS = {
95
85
  "input_cost": 0.1,
96
86
  "cached_input_cost": 0.025,
97
87
  "output_cost": 0.4,
98
- "requests_per_minute": 20,
99
- "tokens_per_minute": 100_000,
100
88
  "reasoning_model": False,
101
89
  },
102
90
  "gemini-2.0-flash-lite": {
@@ -109,8 +97,6 @@ GOOGLE_MODELS = {
109
97
  "api_spec": "gemini",
110
98
  "input_cost": 0.075,
111
99
  "output_cost": 0.3,
112
- "requests_per_minute": 20,
113
- "tokens_per_minute": 100_000,
114
100
  "reasoning_model": False,
115
101
  },
116
102
  "gemini-2.5-pro": {
@@ -124,8 +110,6 @@ GOOGLE_MODELS = {
124
110
  "input_cost": 1.25,
125
111
  "cached_input_cost": 0.31,
126
112
  "output_cost": 10.0,
127
- "requests_per_minute": 20,
128
- "tokens_per_minute": 100_000,
129
113
  "reasoning_model": True,
130
114
  },
131
115
  "gemini-2.5-flash": {
@@ -139,8 +123,6 @@ GOOGLE_MODELS = {
139
123
  "input_cost": 0.3,
140
124
  "cached_input_cost": 0.075,
141
125
  "output_cost": 2.5,
142
- "requests_per_minute": 20,
143
- "tokens_per_minute": 100_000,
144
126
  "reasoning_model": True,
145
127
  },
146
128
  "gemini-2.5-flash-lite": {
@@ -154,8 +136,6 @@ GOOGLE_MODELS = {
154
136
  "input_cost": 0.1,
155
137
  "cached_input_cost": 0.025,
156
138
  "output_cost": 0.4,
157
- "requests_per_minute": 20,
158
- "tokens_per_minute": 100_000,
159
139
  "reasoning_model": True,
160
140
  },
161
141
  }
@@ -0,0 +1,82 @@
1
+ XAI_MODELS = {
2
+ # .d8888b. 888
3
+ # d88P Y88b 888
4
+ # 888 888 888
5
+ # 888 888d888 .d88b. 888 888
6
+ # 888 88888 888P" d88""88b 888 .88P
7
+ # 888 888 888 888 888 888888K
8
+ # Y88b d88P 888 Y88..88P 888 "88b
9
+ # "Y8888P88 888 "Y88P" 888 888
10
+ "grok-code-fast-1": {
11
+ "id": "grok-code-fast-1",
12
+ "name": "grok-code-fast-1",
13
+ "api_base": "https://api.x.ai/v1",
14
+ "api_key_env_var": "GROK_API_KEY",
15
+ "supports_json": True,
16
+ "supports_logprobs": True,
17
+ "api_spec": "openai",
18
+ "input_cost": 0.2,
19
+ "output_cost": 1.5,
20
+ "reasoning_model": False,
21
+ },
22
+ "grok-4-fast-reasoning": {
23
+ "id": "grok-4-fast-reasoning",
24
+ "name": "grok-4-fast-reasoning",
25
+ "api_base": "https://api.x.ai/v1",
26
+ "api_key_env_var": "GROK_API_KEY",
27
+ "supports_json": True,
28
+ "supports_logprobs": True,
29
+ "api_spec": "openai",
30
+ "input_cost": 0.2,
31
+ "output_cost": 0.5,
32
+ "reasoning_model": False,
33
+ },
34
+ "grok-4-fast-non-reasoning": {
35
+ "id": "grok-4-fast-non-reasoning",
36
+ "name": "grok-4-fast-non-reasoning",
37
+ "api_base": "https://api.x.ai/v1",
38
+ "api_key_env_var": "GROK_API_KEY",
39
+ "supports_json": True,
40
+ "supports_logprobs": True,
41
+ "api_spec": "openai",
42
+ "input_cost": 0.2,
43
+ "output_cost": 0.5,
44
+ "reasoning_model": False,
45
+ },
46
+ "grok-4": {
47
+ "id": "grok-4",
48
+ "name": "grok-4-0709",
49
+ "api_base": "https://api.x.ai/v1",
50
+ "api_key_env_var": "GROK_API_KEY",
51
+ "supports_json": True,
52
+ "supports_logprobs": True,
53
+ "api_spec": "openai",
54
+ "input_cost": 2.0,
55
+ "output_cost": 8.0,
56
+ "reasoning_model": False,
57
+ },
58
+ "grok-3": {
59
+ "id": "grok-3",
60
+ "name": "grok-3-latest",
61
+ "api_base": "https://api.x.ai/v1",
62
+ "api_key_env_var": "GROK_API_KEY",
63
+ "supports_json": True,
64
+ "supports_logprobs": True,
65
+ "api_spec": "openai",
66
+ "input_cost": 2.0,
67
+ "output_cost": 8.0,
68
+ "reasoning_model": False,
69
+ },
70
+ "grok-3-mini": {
71
+ "id": "grok-3-mini",
72
+ "name": "grok-3-mini-latest",
73
+ "api_base": "https://api.x.ai/v1",
74
+ "api_key_env_var": "GROK_API_KEY",
75
+ "supports_json": True,
76
+ "supports_logprobs": True,
77
+ "api_spec": "openai",
78
+ "input_cost": 2.0,
79
+ "output_cost": 8.0,
80
+ "reasoning_model": True,
81
+ },
82
+ }
@@ -41,10 +41,10 @@ GROQ_MODELS = {
41
41
  },
42
42
  "kimi-k2-groq": {
43
43
  "id": "kimi-k2-groq",
44
- "name": "moonshotai/kimi-k2-instruct",
44
+ "name": "moonshotai/kimi-k2-instruct-0905",
45
45
  "api_base": "https://api.groq.com/openai/v1",
46
46
  "api_key_env_var": "GROQ_API_KEY",
47
- "supports_json": False,
47
+ "supports_json": True,
48
48
  "api_spec": "openai",
49
49
  },
50
50
  "gpt-oss-120b-groq": {
@@ -16,8 +16,6 @@ META_MODELS = {
16
16
  "api_spec": "openai",
17
17
  "input_cost": 0.0,
18
18
  "output_cost": 0.0,
19
- "requests_per_minute": 3_000,
20
- "tokens_per_minute": 1_000_000,
21
19
  "reasoning_model": False,
22
20
  },
23
21
  "llama-4-maverick": {
@@ -30,8 +28,6 @@ META_MODELS = {
30
28
  "api_spec": "openai",
31
29
  "input_cost": 0.0,
32
30
  "output_cost": 0.0,
33
- "requests_per_minute": 3_000,
34
- "tokens_per_minute": 1_000_000,
35
31
  "reasoning_model": False,
36
32
  },
37
33
  "llama-3.3-70b": {
@@ -44,8 +40,6 @@ META_MODELS = {
44
40
  "api_spec": "openai",
45
41
  "input_cost": 0.0,
46
42
  "output_cost": 0.0,
47
- "requests_per_minute": 3_000,
48
- "tokens_per_minute": 1_000_000,
49
43
  "reasoning_model": False,
50
44
  },
51
45
  "llama-3.3-8b": {
@@ -58,8 +52,6 @@ META_MODELS = {
58
52
  "api_spec": "openai",
59
53
  "input_cost": 0.0,
60
54
  "output_cost": 0.0,
61
- "requests_per_minute": 3_000,
62
- "tokens_per_minute": 1_000_000,
63
55
  "reasoning_model": False,
64
56
  },
65
57
  }