lm-deluge 0.0.56__tar.gz → 0.0.58__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.56/src/lm_deluge.egg-info → lm_deluge-0.0.58}/PKG-INFO +1 -1
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/pyproject.toml +1 -1
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/__init__.py +2 -1
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/anthropic.py +1 -1
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/client.py +34 -21
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/__init__.py +7 -7
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/anthropic.py +12 -20
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/bedrock.py +0 -14
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/cohere.py +0 -16
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/google.py +0 -20
- lm_deluge-0.0.58/src/lm_deluge/models/grok.py +82 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/groq.py +2 -2
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/meta.py +0 -8
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/openai.py +0 -34
- lm_deluge-0.0.58/src/lm_deluge/models/openrouter.py +64 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/together.py +0 -16
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/prompt.py +6 -7
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/tool.py +338 -18
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/tracker.py +11 -5
- {lm_deluge-0.0.56 → lm_deluge-0.0.58/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/SOURCES.txt +0 -2
- lm_deluge-0.0.56/src/lm_deluge/agent.py +0 -0
- lm_deluge-0.0.56/src/lm_deluge/gemini_limits.py +0 -65
- lm_deluge-0.0.56/src/lm_deluge/models/grok.py +0 -38
- lm_deluge-0.0.56/src/lm_deluge/models/openrouter.py +0 -1
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/LICENSE +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/README.md +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/setup.cfg +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/gemini.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/openai.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/presets/cerebras.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/presets/meta.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.56 → lm_deluge-0.0.58}/tests/test_native_mcp_server.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
2
|
from .file import File
|
|
3
3
|
from .prompt import Conversation, Message
|
|
4
|
-
from .tool import Tool
|
|
4
|
+
from .tool import Tool, ToolParams
|
|
5
5
|
|
|
6
6
|
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
7
7
|
|
|
@@ -12,5 +12,6 @@ __all__ = [
|
|
|
12
12
|
"Conversation",
|
|
13
13
|
"Message",
|
|
14
14
|
"Tool",
|
|
15
|
+
"ToolParams",
|
|
15
16
|
"File",
|
|
16
17
|
]
|
|
@@ -72,7 +72,7 @@ def _build_anthropic_request(
|
|
|
72
72
|
request_json["system"] = system_message
|
|
73
73
|
|
|
74
74
|
# handle temp + top_p for opus 4.1/sonnet 4.5
|
|
75
|
-
if model.name
|
|
75
|
+
if "4-1" in model.name or "4-5" in model.name:
|
|
76
76
|
if "temperature" in request_json and "top_p" in request_json:
|
|
77
77
|
request_json.pop("top_p")
|
|
78
78
|
|
|
@@ -369,7 +369,7 @@ class _LLMClient(BaseModel):
|
|
|
369
369
|
tools: list[Tool | dict | MCPServer] | None = ...,
|
|
370
370
|
cache: CachePattern | None = ...,
|
|
371
371
|
use_responses_api: bool = ...,
|
|
372
|
-
) -> list[APIResponse
|
|
372
|
+
) -> list[APIResponse]: ...
|
|
373
373
|
|
|
374
374
|
async def process_prompts_async(
|
|
375
375
|
self,
|
|
@@ -380,7 +380,7 @@ class _LLMClient(BaseModel):
|
|
|
380
380
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
381
381
|
cache: CachePattern | None = None,
|
|
382
382
|
use_responses_api: bool = False,
|
|
383
|
-
) -> list[APIResponse
|
|
383
|
+
) -> list[APIResponse] | list[str | None] | dict[str, int]:
|
|
384
384
|
"""Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
|
|
385
385
|
|
|
386
386
|
This implementation creates all tasks upfront and waits for them to complete,
|
|
@@ -516,28 +516,40 @@ class _LLMClient(BaseModel):
|
|
|
516
516
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
517
517
|
cache: CachePattern | None = None,
|
|
518
518
|
use_responses_api: bool = False,
|
|
519
|
-
) -> APIResponse
|
|
519
|
+
) -> APIResponse:
|
|
520
520
|
task_id = self.start_nowait(
|
|
521
521
|
prompt, tools=tools, cache=cache, use_responses_api=use_responses_api
|
|
522
522
|
)
|
|
523
523
|
return await self.wait_for(task_id)
|
|
524
524
|
|
|
525
|
-
async def wait_for(self, task_id: int) -> APIResponse
|
|
525
|
+
async def wait_for(self, task_id: int) -> APIResponse:
|
|
526
526
|
task = self._tasks.get(task_id)
|
|
527
527
|
if task:
|
|
528
528
|
return await task
|
|
529
|
-
|
|
529
|
+
res = self._results.get(task_id)
|
|
530
|
+
if res:
|
|
531
|
+
return res
|
|
532
|
+
else:
|
|
533
|
+
return APIResponse(
|
|
534
|
+
id=-1,
|
|
535
|
+
model_internal="",
|
|
536
|
+
prompt=Conversation([]),
|
|
537
|
+
sampling_params=SamplingParams(),
|
|
538
|
+
status_code=500,
|
|
539
|
+
is_error=True,
|
|
540
|
+
error_message="Task not found",
|
|
541
|
+
)
|
|
530
542
|
|
|
531
543
|
async def wait_for_all(
|
|
532
544
|
self, task_ids: Sequence[int] | None = None
|
|
533
|
-
) -> list[APIResponse
|
|
545
|
+
) -> list[APIResponse]:
|
|
534
546
|
if task_ids is None:
|
|
535
547
|
task_ids = list(self._tasks.keys())
|
|
536
548
|
return [await self.wait_for(tid) for tid in task_ids]
|
|
537
549
|
|
|
538
550
|
async def as_completed(
|
|
539
551
|
self, task_ids: Sequence[int] | None = None
|
|
540
|
-
) -> AsyncGenerator[tuple[int, APIResponse
|
|
552
|
+
) -> AsyncGenerator[tuple[int, APIResponse], None]:
|
|
541
553
|
"""Yield ``(task_id, result)`` pairs as tasks complete.
|
|
542
554
|
|
|
543
555
|
Args:
|
|
@@ -561,7 +573,9 @@ class _LLMClient(BaseModel):
|
|
|
561
573
|
for task in list(tasks_map.keys()):
|
|
562
574
|
if task.done():
|
|
563
575
|
tid = tasks_map.pop(task)
|
|
564
|
-
|
|
576
|
+
task_result = self._results.get(tid, await task)
|
|
577
|
+
assert task_result
|
|
578
|
+
yield tid, task_result
|
|
565
579
|
|
|
566
580
|
while tasks_map:
|
|
567
581
|
done, _ = await asyncio.wait(
|
|
@@ -569,7 +583,9 @@ class _LLMClient(BaseModel):
|
|
|
569
583
|
)
|
|
570
584
|
for task in done:
|
|
571
585
|
tid = tasks_map.pop(task)
|
|
572
|
-
|
|
586
|
+
task_result = self._results.get(tid, await task)
|
|
587
|
+
assert task_result
|
|
588
|
+
yield tid, task_result
|
|
573
589
|
|
|
574
590
|
async def stream(
|
|
575
591
|
self,
|
|
@@ -618,23 +634,20 @@ class _LLMClient(BaseModel):
|
|
|
618
634
|
mcp_tools = await tool.to_tools()
|
|
619
635
|
expanded_tools.extend(mcp_tools)
|
|
620
636
|
|
|
621
|
-
|
|
637
|
+
response: APIResponse | None = None
|
|
622
638
|
|
|
623
639
|
for _ in range(max_rounds):
|
|
624
|
-
|
|
625
|
-
|
|
640
|
+
response = await self.start(
|
|
641
|
+
conversation,
|
|
626
642
|
tools=tools, # type: ignore
|
|
627
|
-
return_completions_only=False,
|
|
628
|
-
show_progress=show_progress,
|
|
629
643
|
)
|
|
630
644
|
|
|
631
|
-
|
|
632
|
-
if last_response is None or last_response.content is None:
|
|
645
|
+
if response is None or response.content is None:
|
|
633
646
|
break
|
|
634
647
|
|
|
635
|
-
conversation = conversation.with_message(
|
|
648
|
+
conversation = conversation.with_message(response.content)
|
|
636
649
|
|
|
637
|
-
tool_calls =
|
|
650
|
+
tool_calls = response.content.tool_calls
|
|
638
651
|
if not tool_calls:
|
|
639
652
|
break
|
|
640
653
|
|
|
@@ -657,12 +670,12 @@ class _LLMClient(BaseModel):
|
|
|
657
670
|
if not isinstance(result, (str, dict, list)):
|
|
658
671
|
result = str(result)
|
|
659
672
|
|
|
660
|
-
conversation.
|
|
673
|
+
conversation.with_tool_result(call.id, result) # type: ignore
|
|
661
674
|
|
|
662
|
-
if
|
|
675
|
+
if response is None:
|
|
663
676
|
raise RuntimeError("model did not return a response")
|
|
664
677
|
|
|
665
|
-
return conversation,
|
|
678
|
+
return conversation, response
|
|
666
679
|
|
|
667
680
|
def run_agent_loop_sync(
|
|
668
681
|
self,
|
|
@@ -38,9 +38,9 @@ class APIModel:
|
|
|
38
38
|
supports_responses: bool = False
|
|
39
39
|
reasoning_model: bool = False
|
|
40
40
|
regions: list[str] | dict[str, int] = field(default_factory=list)
|
|
41
|
-
tokens_per_minute: int | None = None
|
|
42
|
-
requests_per_minute: int | None = None
|
|
43
|
-
gpus: list[str] | None = None
|
|
41
|
+
# tokens_per_minute: int | None = None
|
|
42
|
+
# requests_per_minute: int | None = None
|
|
43
|
+
# gpus: list[str] | None = None
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
46
|
def from_registry(cls, name: str):
|
|
@@ -97,8 +97,8 @@ def register_model(
|
|
|
97
97
|
supports_responses: bool = False,
|
|
98
98
|
reasoning_model: bool = False,
|
|
99
99
|
regions: list[str] | dict[str, int] = field(default_factory=list),
|
|
100
|
-
tokens_per_minute: int | None = None,
|
|
101
|
-
requests_per_minute: int | None = None,
|
|
100
|
+
# tokens_per_minute: int | None = None,
|
|
101
|
+
# requests_per_minute: int | None = None,
|
|
102
102
|
) -> APIModel:
|
|
103
103
|
"""Register a model configuration and return the created APIModel."""
|
|
104
104
|
model = APIModel(
|
|
@@ -116,8 +116,8 @@ def register_model(
|
|
|
116
116
|
supports_responses=supports_responses,
|
|
117
117
|
reasoning_model=reasoning_model,
|
|
118
118
|
regions=regions,
|
|
119
|
-
tokens_per_minute=tokens_per_minute,
|
|
120
|
-
requests_per_minute=requests_per_minute,
|
|
119
|
+
# tokens_per_minute=tokens_per_minute,
|
|
120
|
+
# requests_per_minute=requests_per_minute,
|
|
121
121
|
)
|
|
122
122
|
registry[model.id] = model
|
|
123
123
|
return model
|
|
@@ -10,6 +10,18 @@ ANTHROPIC_MODELS = {
|
|
|
10
10
|
# ░███
|
|
11
11
|
# █████
|
|
12
12
|
#
|
|
13
|
+
"claude-4.5-haiku": {
|
|
14
|
+
"id": "claude-4.5-haiku",
|
|
15
|
+
"name": "claude-haiku-4-5-20251001",
|
|
16
|
+
"api_base": "https://api.anthropic.com/v1",
|
|
17
|
+
"api_key_env_var": "ANTHROPIC_API_KEY",
|
|
18
|
+
"supports_json": False,
|
|
19
|
+
"api_spec": "anthropic",
|
|
20
|
+
"input_cost": 1.0,
|
|
21
|
+
"cached_input_cost": 0.10,
|
|
22
|
+
"cache_write_cost": 1.25,
|
|
23
|
+
"output_cost": 3.0,
|
|
24
|
+
},
|
|
13
25
|
"claude-4.5-sonnet": {
|
|
14
26
|
"id": "claude-4.5-sonnet",
|
|
15
27
|
"name": "claude-sonnet-4-5-20250929",
|
|
@@ -21,8 +33,6 @@ ANTHROPIC_MODELS = {
|
|
|
21
33
|
"cached_input_cost": 0.30,
|
|
22
34
|
"cache_write_cost": 3.75,
|
|
23
35
|
"output_cost": 15.0,
|
|
24
|
-
"requests_per_minute": 4_000,
|
|
25
|
-
"tokens_per_minute": 400_000,
|
|
26
36
|
},
|
|
27
37
|
"claude-4.1-opus": {
|
|
28
38
|
"id": "claude-4.1-opus",
|
|
@@ -35,8 +45,6 @@ ANTHROPIC_MODELS = {
|
|
|
35
45
|
"cached_input_cost": 1.50,
|
|
36
46
|
"cache_write_cost": 18.75,
|
|
37
47
|
"output_cost": 75.0,
|
|
38
|
-
"requests_per_minute": 4_000,
|
|
39
|
-
"tokens_per_minute": 400_000,
|
|
40
48
|
"reasoning_model": True,
|
|
41
49
|
},
|
|
42
50
|
"claude-4-opus": {
|
|
@@ -50,8 +58,6 @@ ANTHROPIC_MODELS = {
|
|
|
50
58
|
"cached_input_cost": 1.50,
|
|
51
59
|
"cache_write_cost": 18.75,
|
|
52
60
|
"output_cost": 75.0,
|
|
53
|
-
"requests_per_minute": 4_000,
|
|
54
|
-
"tokens_per_minute": 400_000,
|
|
55
61
|
"reasoning_model": True,
|
|
56
62
|
},
|
|
57
63
|
"claude-4-sonnet": {
|
|
@@ -65,8 +71,6 @@ ANTHROPIC_MODELS = {
|
|
|
65
71
|
"cached_input_cost": 0.30,
|
|
66
72
|
"cache_write_cost": 3.75,
|
|
67
73
|
"output_cost": 15.0,
|
|
68
|
-
"requests_per_minute": 4_000,
|
|
69
|
-
"tokens_per_minute": 400_000,
|
|
70
74
|
},
|
|
71
75
|
"claude-3.7-sonnet": {
|
|
72
76
|
"id": "claude-3.7-sonnet",
|
|
@@ -79,8 +83,6 @@ ANTHROPIC_MODELS = {
|
|
|
79
83
|
"cached_input_cost": 0.30,
|
|
80
84
|
"cache_write_cost": 3.75,
|
|
81
85
|
"output_cost": 15.0,
|
|
82
|
-
"requests_per_minute": 4_000,
|
|
83
|
-
"tokens_per_minute": 400_000,
|
|
84
86
|
"reasoning_model": True,
|
|
85
87
|
},
|
|
86
88
|
"claude-3.6-sonnet": {
|
|
@@ -94,8 +96,6 @@ ANTHROPIC_MODELS = {
|
|
|
94
96
|
"cached_input_cost": 0.30,
|
|
95
97
|
"cache_write_cost": 3.75,
|
|
96
98
|
"output_cost": 15.0,
|
|
97
|
-
"requests_per_minute": 4_000,
|
|
98
|
-
"tokens_per_minute": 400_000,
|
|
99
99
|
},
|
|
100
100
|
"claude-3.5-sonnet": {
|
|
101
101
|
"id": "claude-3.5-sonnet",
|
|
@@ -108,8 +108,6 @@ ANTHROPIC_MODELS = {
|
|
|
108
108
|
"cached_input_cost": 0.30,
|
|
109
109
|
"cache_write_cost": 3.75,
|
|
110
110
|
"output_cost": 15.0,
|
|
111
|
-
"requests_per_minute": 4_000,
|
|
112
|
-
"tokens_per_minute": 400_000,
|
|
113
111
|
},
|
|
114
112
|
"claude-3-opus": {
|
|
115
113
|
"id": "claude-3-opus",
|
|
@@ -120,8 +118,6 @@ ANTHROPIC_MODELS = {
|
|
|
120
118
|
"api_spec": "anthropic",
|
|
121
119
|
"input_cost": 15.0,
|
|
122
120
|
"output_cost": 75.0,
|
|
123
|
-
"requests_per_minute": 4_000,
|
|
124
|
-
"tokens_per_minute": 400_000,
|
|
125
121
|
},
|
|
126
122
|
"claude-3.5-haiku": {
|
|
127
123
|
"id": "claude-3.5-haiku",
|
|
@@ -134,8 +130,6 @@ ANTHROPIC_MODELS = {
|
|
|
134
130
|
"cached_input_cost": 0.08,
|
|
135
131
|
"cache_write_cost": 1.00,
|
|
136
132
|
"output_cost": 4.00,
|
|
137
|
-
"requests_per_minute": 20_000,
|
|
138
|
-
"tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
|
|
139
133
|
},
|
|
140
134
|
"claude-3-haiku": {
|
|
141
135
|
"id": "claude-3-haiku",
|
|
@@ -148,7 +142,5 @@ ANTHROPIC_MODELS = {
|
|
|
148
142
|
"cache_write_cost": 0.30,
|
|
149
143
|
"cached_input_cost": 0.03,
|
|
150
144
|
"output_cost": 1.25,
|
|
151
|
-
"requests_per_minute": 10_000,
|
|
152
|
-
"tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
|
|
153
145
|
},
|
|
154
146
|
}
|
|
@@ -16,8 +16,6 @@ BEDROCK_MODELS = {
|
|
|
16
16
|
"api_spec": "bedrock",
|
|
17
17
|
"input_cost": 0.25,
|
|
18
18
|
"output_cost": 1.25,
|
|
19
|
-
"requests_per_minute": 4_000,
|
|
20
|
-
"tokens_per_minute": 8_000_000,
|
|
21
19
|
},
|
|
22
20
|
"claude-3.5-haiku-bedrock": {
|
|
23
21
|
"id": "claude-3.5-haiku-bedrock",
|
|
@@ -28,8 +26,6 @@ BEDROCK_MODELS = {
|
|
|
28
26
|
"api_spec": "bedrock",
|
|
29
27
|
"input_cost": 0.25,
|
|
30
28
|
"output_cost": 1.25,
|
|
31
|
-
"requests_per_minute": 4_000,
|
|
32
|
-
"tokens_per_minute": 8_000_000,
|
|
33
29
|
},
|
|
34
30
|
"claude-3.5-sonnet-bedrock": {
|
|
35
31
|
"id": "claude-3.5-sonnet-bedrock",
|
|
@@ -40,8 +36,6 @@ BEDROCK_MODELS = {
|
|
|
40
36
|
"api_spec": "bedrock",
|
|
41
37
|
"input_cost": 3.0,
|
|
42
38
|
"output_cost": 15.0,
|
|
43
|
-
"requests_per_minute": 4_000,
|
|
44
|
-
"tokens_per_minute": 400_000,
|
|
45
39
|
"reasoning_model": False,
|
|
46
40
|
},
|
|
47
41
|
"claude-3.6-sonnet-bedrock": {
|
|
@@ -53,8 +47,6 @@ BEDROCK_MODELS = {
|
|
|
53
47
|
"api_spec": "bedrock",
|
|
54
48
|
"input_cost": 3.0,
|
|
55
49
|
"output_cost": 15.0,
|
|
56
|
-
"requests_per_minute": 4_000,
|
|
57
|
-
"tokens_per_minute": 400_000,
|
|
58
50
|
"reasoning_model": False,
|
|
59
51
|
},
|
|
60
52
|
"claude-3.7-sonnet-bedrock": {
|
|
@@ -66,8 +58,6 @@ BEDROCK_MODELS = {
|
|
|
66
58
|
"api_spec": "bedrock",
|
|
67
59
|
"input_cost": 3.0,
|
|
68
60
|
"output_cost": 15.0,
|
|
69
|
-
"requests_per_minute": 4_000,
|
|
70
|
-
"tokens_per_minute": 400_000,
|
|
71
61
|
"reasoning_model": True,
|
|
72
62
|
},
|
|
73
63
|
"claude-4-sonnet-bedrock": {
|
|
@@ -79,8 +69,6 @@ BEDROCK_MODELS = {
|
|
|
79
69
|
"api_spec": "bedrock",
|
|
80
70
|
"input_cost": 3.0,
|
|
81
71
|
"output_cost": 15.0,
|
|
82
|
-
"requests_per_minute": 4_000,
|
|
83
|
-
"tokens_per_minute": 400_000,
|
|
84
72
|
"reasoning_model": True,
|
|
85
73
|
},
|
|
86
74
|
"claude-4-opus-bedrock": {
|
|
@@ -92,8 +80,6 @@ BEDROCK_MODELS = {
|
|
|
92
80
|
"api_spec": "bedrock",
|
|
93
81
|
"input_cost": 3.0,
|
|
94
82
|
"output_cost": 15.0,
|
|
95
|
-
"requests_per_minute": 4_000,
|
|
96
|
-
"tokens_per_minute": 400_000,
|
|
97
83
|
"reasoning_model": True,
|
|
98
84
|
},
|
|
99
85
|
# GPT-OSS on AWS Bedrock
|
|
@@ -15,8 +15,6 @@ COHERE_MODELS = {
|
|
|
15
15
|
"api_spec": "openai",
|
|
16
16
|
"input_cost": 0.5,
|
|
17
17
|
"output_cost": 1.5,
|
|
18
|
-
"requests_per_minute": 10_000,
|
|
19
|
-
"tokens_per_minute": None,
|
|
20
18
|
},
|
|
21
19
|
"aya-expanse-32b": {
|
|
22
20
|
"id": "aya-expanse-32b",
|
|
@@ -26,8 +24,6 @@ COHERE_MODELS = {
|
|
|
26
24
|
"api_spec": "openai",
|
|
27
25
|
"input_cost": 0.5,
|
|
28
26
|
"output_cost": 1.5,
|
|
29
|
-
"requests_per_minute": 10_000,
|
|
30
|
-
"tokens_per_minute": None,
|
|
31
27
|
},
|
|
32
28
|
"aya-vision-8b": {
|
|
33
29
|
"id": "aya-vision-8b",
|
|
@@ -37,8 +33,6 @@ COHERE_MODELS = {
|
|
|
37
33
|
"api_spec": "openai",
|
|
38
34
|
"input_cost": 0.5,
|
|
39
35
|
"output_cost": 1.5,
|
|
40
|
-
"requests_per_minute": 10_000,
|
|
41
|
-
"tokens_per_minute": None,
|
|
42
36
|
},
|
|
43
37
|
"aya-vision-32b": {
|
|
44
38
|
"id": "aya-vision-32b",
|
|
@@ -48,8 +42,6 @@ COHERE_MODELS = {
|
|
|
48
42
|
"api_spec": "openai",
|
|
49
43
|
"input_cost": 0.5,
|
|
50
44
|
"output_cost": 1.5,
|
|
51
|
-
"requests_per_minute": 10_000,
|
|
52
|
-
"tokens_per_minute": None,
|
|
53
45
|
},
|
|
54
46
|
"command-a": {
|
|
55
47
|
"id": "command-a",
|
|
@@ -59,8 +51,6 @@ COHERE_MODELS = {
|
|
|
59
51
|
"api_spec": "openai",
|
|
60
52
|
"input_cost": 0.5,
|
|
61
53
|
"output_cost": 1.5,
|
|
62
|
-
"requests_per_minute": 10_000,
|
|
63
|
-
"tokens_per_minute": None,
|
|
64
54
|
},
|
|
65
55
|
"command-r-7b": {
|
|
66
56
|
"id": "command-r-cohere",
|
|
@@ -70,8 +60,6 @@ COHERE_MODELS = {
|
|
|
70
60
|
"api_spec": "openai",
|
|
71
61
|
"input_cost": 0.5,
|
|
72
62
|
"output_cost": 1.5,
|
|
73
|
-
"requests_per_minute": 10_000,
|
|
74
|
-
"tokens_per_minute": None,
|
|
75
63
|
},
|
|
76
64
|
"command-r": {
|
|
77
65
|
"id": "command-r",
|
|
@@ -81,8 +69,6 @@ COHERE_MODELS = {
|
|
|
81
69
|
"api_spec": "openai",
|
|
82
70
|
"input_cost": 0.5,
|
|
83
71
|
"output_cost": 1.5,
|
|
84
|
-
"requests_per_minute": 10_000,
|
|
85
|
-
"tokens_per_minute": None,
|
|
86
72
|
},
|
|
87
73
|
"command-r-plus": {
|
|
88
74
|
"id": "command-r-plus",
|
|
@@ -92,7 +78,5 @@ COHERE_MODELS = {
|
|
|
92
78
|
"api_spec": "openai",
|
|
93
79
|
"input_cost": 3.0,
|
|
94
80
|
"output_cost": 15.0,
|
|
95
|
-
"requests_per_minute": 10_000,
|
|
96
|
-
"tokens_per_minute": None,
|
|
97
81
|
},
|
|
98
82
|
}
|
|
@@ -20,8 +20,6 @@ GOOGLE_MODELS = {
|
|
|
20
20
|
"input_cost": 0.1,
|
|
21
21
|
"cached_input_cost": 0.025,
|
|
22
22
|
"output_cost": 0.4,
|
|
23
|
-
"requests_per_minute": 20,
|
|
24
|
-
"tokens_per_minute": 100_000,
|
|
25
23
|
"reasoning_model": False,
|
|
26
24
|
},
|
|
27
25
|
"gemini-2.0-flash-lite-compat": {
|
|
@@ -34,8 +32,6 @@ GOOGLE_MODELS = {
|
|
|
34
32
|
"api_spec": "openai",
|
|
35
33
|
"input_cost": 0.075,
|
|
36
34
|
"output_cost": 0.3,
|
|
37
|
-
"requests_per_minute": 20,
|
|
38
|
-
"tokens_per_minute": 100_000,
|
|
39
35
|
"reasoning_model": False,
|
|
40
36
|
},
|
|
41
37
|
"gemini-2.5-pro-compat": {
|
|
@@ -49,8 +45,6 @@ GOOGLE_MODELS = {
|
|
|
49
45
|
"input_cost": 1.25,
|
|
50
46
|
"cached_input_cost": 0.31,
|
|
51
47
|
"output_cost": 10.0,
|
|
52
|
-
"requests_per_minute": 20,
|
|
53
|
-
"tokens_per_minute": 100_000,
|
|
54
48
|
"reasoning_model": True,
|
|
55
49
|
},
|
|
56
50
|
"gemini-2.5-flash-compat": {
|
|
@@ -64,8 +58,6 @@ GOOGLE_MODELS = {
|
|
|
64
58
|
"input_cost": 0.3,
|
|
65
59
|
"cached_input_cost": 0.075,
|
|
66
60
|
"output_cost": 2.5,
|
|
67
|
-
"requests_per_minute": 20,
|
|
68
|
-
"tokens_per_minute": 100_000,
|
|
69
61
|
"reasoning_model": True,
|
|
70
62
|
},
|
|
71
63
|
"gemini-2.5-flash-lite-compat": {
|
|
@@ -79,8 +71,6 @@ GOOGLE_MODELS = {
|
|
|
79
71
|
"input_cost": 0.1,
|
|
80
72
|
"cached_input_cost": 0.025,
|
|
81
73
|
"output_cost": 0.4,
|
|
82
|
-
"requests_per_minute": 20,
|
|
83
|
-
"tokens_per_minute": 100_000,
|
|
84
74
|
"reasoning_model": True,
|
|
85
75
|
},
|
|
86
76
|
# Native Gemini API versions with file support
|
|
@@ -95,8 +85,6 @@ GOOGLE_MODELS = {
|
|
|
95
85
|
"input_cost": 0.1,
|
|
96
86
|
"cached_input_cost": 0.025,
|
|
97
87
|
"output_cost": 0.4,
|
|
98
|
-
"requests_per_minute": 20,
|
|
99
|
-
"tokens_per_minute": 100_000,
|
|
100
88
|
"reasoning_model": False,
|
|
101
89
|
},
|
|
102
90
|
"gemini-2.0-flash-lite": {
|
|
@@ -109,8 +97,6 @@ GOOGLE_MODELS = {
|
|
|
109
97
|
"api_spec": "gemini",
|
|
110
98
|
"input_cost": 0.075,
|
|
111
99
|
"output_cost": 0.3,
|
|
112
|
-
"requests_per_minute": 20,
|
|
113
|
-
"tokens_per_minute": 100_000,
|
|
114
100
|
"reasoning_model": False,
|
|
115
101
|
},
|
|
116
102
|
"gemini-2.5-pro": {
|
|
@@ -124,8 +110,6 @@ GOOGLE_MODELS = {
|
|
|
124
110
|
"input_cost": 1.25,
|
|
125
111
|
"cached_input_cost": 0.31,
|
|
126
112
|
"output_cost": 10.0,
|
|
127
|
-
"requests_per_minute": 20,
|
|
128
|
-
"tokens_per_minute": 100_000,
|
|
129
113
|
"reasoning_model": True,
|
|
130
114
|
},
|
|
131
115
|
"gemini-2.5-flash": {
|
|
@@ -139,8 +123,6 @@ GOOGLE_MODELS = {
|
|
|
139
123
|
"input_cost": 0.3,
|
|
140
124
|
"cached_input_cost": 0.075,
|
|
141
125
|
"output_cost": 2.5,
|
|
142
|
-
"requests_per_minute": 20,
|
|
143
|
-
"tokens_per_minute": 100_000,
|
|
144
126
|
"reasoning_model": True,
|
|
145
127
|
},
|
|
146
128
|
"gemini-2.5-flash-lite": {
|
|
@@ -154,8 +136,6 @@ GOOGLE_MODELS = {
|
|
|
154
136
|
"input_cost": 0.1,
|
|
155
137
|
"cached_input_cost": 0.025,
|
|
156
138
|
"output_cost": 0.4,
|
|
157
|
-
"requests_per_minute": 20,
|
|
158
|
-
"tokens_per_minute": 100_000,
|
|
159
139
|
"reasoning_model": True,
|
|
160
140
|
},
|
|
161
141
|
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
XAI_MODELS = {
|
|
2
|
+
# .d8888b. 888
|
|
3
|
+
# d88P Y88b 888
|
|
4
|
+
# 888 888 888
|
|
5
|
+
# 888 888d888 .d88b. 888 888
|
|
6
|
+
# 888 88888 888P" d88""88b 888 .88P
|
|
7
|
+
# 888 888 888 888 888 888888K
|
|
8
|
+
# Y88b d88P 888 Y88..88P 888 "88b
|
|
9
|
+
# "Y8888P88 888 "Y88P" 888 888
|
|
10
|
+
"grok-code-fast-1": {
|
|
11
|
+
"id": "grok-code-fast-1",
|
|
12
|
+
"name": "grok-code-fast-1",
|
|
13
|
+
"api_base": "https://api.x.ai/v1",
|
|
14
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
15
|
+
"supports_json": True,
|
|
16
|
+
"supports_logprobs": True,
|
|
17
|
+
"api_spec": "openai",
|
|
18
|
+
"input_cost": 0.2,
|
|
19
|
+
"output_cost": 1.5,
|
|
20
|
+
"reasoning_model": False,
|
|
21
|
+
},
|
|
22
|
+
"grok-4-fast-reasoning": {
|
|
23
|
+
"id": "grok-4-fast-reasoning",
|
|
24
|
+
"name": "grok-4-fast-reasoning",
|
|
25
|
+
"api_base": "https://api.x.ai/v1",
|
|
26
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
27
|
+
"supports_json": True,
|
|
28
|
+
"supports_logprobs": True,
|
|
29
|
+
"api_spec": "openai",
|
|
30
|
+
"input_cost": 0.2,
|
|
31
|
+
"output_cost": 0.5,
|
|
32
|
+
"reasoning_model": False,
|
|
33
|
+
},
|
|
34
|
+
"grok-4-fast-non-reasoning": {
|
|
35
|
+
"id": "grok-4-fast-non-reasoning",
|
|
36
|
+
"name": "grok-4-fast-non-reasoning",
|
|
37
|
+
"api_base": "https://api.x.ai/v1",
|
|
38
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
39
|
+
"supports_json": True,
|
|
40
|
+
"supports_logprobs": True,
|
|
41
|
+
"api_spec": "openai",
|
|
42
|
+
"input_cost": 0.2,
|
|
43
|
+
"output_cost": 0.5,
|
|
44
|
+
"reasoning_model": False,
|
|
45
|
+
},
|
|
46
|
+
"grok-4": {
|
|
47
|
+
"id": "grok-4",
|
|
48
|
+
"name": "grok-4-0709",
|
|
49
|
+
"api_base": "https://api.x.ai/v1",
|
|
50
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
51
|
+
"supports_json": True,
|
|
52
|
+
"supports_logprobs": True,
|
|
53
|
+
"api_spec": "openai",
|
|
54
|
+
"input_cost": 2.0,
|
|
55
|
+
"output_cost": 8.0,
|
|
56
|
+
"reasoning_model": False,
|
|
57
|
+
},
|
|
58
|
+
"grok-3": {
|
|
59
|
+
"id": "grok-3",
|
|
60
|
+
"name": "grok-3-latest",
|
|
61
|
+
"api_base": "https://api.x.ai/v1",
|
|
62
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
63
|
+
"supports_json": True,
|
|
64
|
+
"supports_logprobs": True,
|
|
65
|
+
"api_spec": "openai",
|
|
66
|
+
"input_cost": 2.0,
|
|
67
|
+
"output_cost": 8.0,
|
|
68
|
+
"reasoning_model": False,
|
|
69
|
+
},
|
|
70
|
+
"grok-3-mini": {
|
|
71
|
+
"id": "grok-3-mini",
|
|
72
|
+
"name": "grok-3-mini-latest",
|
|
73
|
+
"api_base": "https://api.x.ai/v1",
|
|
74
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
75
|
+
"supports_json": True,
|
|
76
|
+
"supports_logprobs": True,
|
|
77
|
+
"api_spec": "openai",
|
|
78
|
+
"input_cost": 2.0,
|
|
79
|
+
"output_cost": 8.0,
|
|
80
|
+
"reasoning_model": True,
|
|
81
|
+
},
|
|
82
|
+
}
|
|
@@ -41,10 +41,10 @@ GROQ_MODELS = {
|
|
|
41
41
|
},
|
|
42
42
|
"kimi-k2-groq": {
|
|
43
43
|
"id": "kimi-k2-groq",
|
|
44
|
-
"name": "moonshotai/kimi-k2-instruct",
|
|
44
|
+
"name": "moonshotai/kimi-k2-instruct-0905",
|
|
45
45
|
"api_base": "https://api.groq.com/openai/v1",
|
|
46
46
|
"api_key_env_var": "GROQ_API_KEY",
|
|
47
|
-
"supports_json":
|
|
47
|
+
"supports_json": True,
|
|
48
48
|
"api_spec": "openai",
|
|
49
49
|
},
|
|
50
50
|
"gpt-oss-120b-groq": {
|
|
@@ -16,8 +16,6 @@ META_MODELS = {
|
|
|
16
16
|
"api_spec": "openai",
|
|
17
17
|
"input_cost": 0.0,
|
|
18
18
|
"output_cost": 0.0,
|
|
19
|
-
"requests_per_minute": 3_000,
|
|
20
|
-
"tokens_per_minute": 1_000_000,
|
|
21
19
|
"reasoning_model": False,
|
|
22
20
|
},
|
|
23
21
|
"llama-4-maverick": {
|
|
@@ -30,8 +28,6 @@ META_MODELS = {
|
|
|
30
28
|
"api_spec": "openai",
|
|
31
29
|
"input_cost": 0.0,
|
|
32
30
|
"output_cost": 0.0,
|
|
33
|
-
"requests_per_minute": 3_000,
|
|
34
|
-
"tokens_per_minute": 1_000_000,
|
|
35
31
|
"reasoning_model": False,
|
|
36
32
|
},
|
|
37
33
|
"llama-3.3-70b": {
|
|
@@ -44,8 +40,6 @@ META_MODELS = {
|
|
|
44
40
|
"api_spec": "openai",
|
|
45
41
|
"input_cost": 0.0,
|
|
46
42
|
"output_cost": 0.0,
|
|
47
|
-
"requests_per_minute": 3_000,
|
|
48
|
-
"tokens_per_minute": 1_000_000,
|
|
49
43
|
"reasoning_model": False,
|
|
50
44
|
},
|
|
51
45
|
"llama-3.3-8b": {
|
|
@@ -58,8 +52,6 @@ META_MODELS = {
|
|
|
58
52
|
"api_spec": "openai",
|
|
59
53
|
"input_cost": 0.0,
|
|
60
54
|
"output_cost": 0.0,
|
|
61
|
-
"requests_per_minute": 3_000,
|
|
62
|
-
"tokens_per_minute": 1_000_000,
|
|
63
55
|
"reasoning_model": False,
|
|
64
56
|
},
|
|
65
57
|
}
|