lm-deluge 0.0.57__py3-none-any.whl → 0.0.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/api_requests/anthropic.py +1 -1
- lm_deluge/client.py +25 -9
- lm_deluge/models/__init__.py +7 -7
- lm_deluge/models/anthropic.py +12 -20
- lm_deluge/models/bedrock.py +0 -14
- lm_deluge/models/cohere.py +0 -16
- lm_deluge/models/google.py +0 -20
- lm_deluge/models/grok.py +48 -4
- lm_deluge/models/groq.py +2 -2
- lm_deluge/models/meta.py +0 -8
- lm_deluge/models/openai.py +0 -34
- lm_deluge/models/openrouter.py +64 -1
- lm_deluge/models/together.py +0 -16
- {lm_deluge-0.0.57.dist-info → lm_deluge-0.0.58.dist-info}/METADATA +1 -1
- {lm_deluge-0.0.57.dist-info → lm_deluge-0.0.58.dist-info}/RECORD +18 -18
- {lm_deluge-0.0.57.dist-info → lm_deluge-0.0.58.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.57.dist-info → lm_deluge-0.0.58.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.57.dist-info → lm_deluge-0.0.58.dist-info}/top_level.txt +0 -0
|
@@ -72,7 +72,7 @@ def _build_anthropic_request(
|
|
|
72
72
|
request_json["system"] = system_message
|
|
73
73
|
|
|
74
74
|
# handle temp + top_p for opus 4.1/sonnet 4.5
|
|
75
|
-
if model.name
|
|
75
|
+
if "4-1" in model.name or "4-5" in model.name:
|
|
76
76
|
if "temperature" in request_json and "top_p" in request_json:
|
|
77
77
|
request_json.pop("top_p")
|
|
78
78
|
|
lm_deluge/client.py
CHANGED
|
@@ -369,7 +369,7 @@ class _LLMClient(BaseModel):
|
|
|
369
369
|
tools: list[Tool | dict | MCPServer] | None = ...,
|
|
370
370
|
cache: CachePattern | None = ...,
|
|
371
371
|
use_responses_api: bool = ...,
|
|
372
|
-
) -> list[APIResponse
|
|
372
|
+
) -> list[APIResponse]: ...
|
|
373
373
|
|
|
374
374
|
async def process_prompts_async(
|
|
375
375
|
self,
|
|
@@ -380,7 +380,7 @@ class _LLMClient(BaseModel):
|
|
|
380
380
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
381
381
|
cache: CachePattern | None = None,
|
|
382
382
|
use_responses_api: bool = False,
|
|
383
|
-
) -> list[APIResponse
|
|
383
|
+
) -> list[APIResponse] | list[str | None] | dict[str, int]:
|
|
384
384
|
"""Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
|
|
385
385
|
|
|
386
386
|
This implementation creates all tasks upfront and waits for them to complete,
|
|
@@ -516,28 +516,40 @@ class _LLMClient(BaseModel):
|
|
|
516
516
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
517
517
|
cache: CachePattern | None = None,
|
|
518
518
|
use_responses_api: bool = False,
|
|
519
|
-
) -> APIResponse
|
|
519
|
+
) -> APIResponse:
|
|
520
520
|
task_id = self.start_nowait(
|
|
521
521
|
prompt, tools=tools, cache=cache, use_responses_api=use_responses_api
|
|
522
522
|
)
|
|
523
523
|
return await self.wait_for(task_id)
|
|
524
524
|
|
|
525
|
-
async def wait_for(self, task_id: int) -> APIResponse
|
|
525
|
+
async def wait_for(self, task_id: int) -> APIResponse:
|
|
526
526
|
task = self._tasks.get(task_id)
|
|
527
527
|
if task:
|
|
528
528
|
return await task
|
|
529
|
-
|
|
529
|
+
res = self._results.get(task_id)
|
|
530
|
+
if res:
|
|
531
|
+
return res
|
|
532
|
+
else:
|
|
533
|
+
return APIResponse(
|
|
534
|
+
id=-1,
|
|
535
|
+
model_internal="",
|
|
536
|
+
prompt=Conversation([]),
|
|
537
|
+
sampling_params=SamplingParams(),
|
|
538
|
+
status_code=500,
|
|
539
|
+
is_error=True,
|
|
540
|
+
error_message="Task not found",
|
|
541
|
+
)
|
|
530
542
|
|
|
531
543
|
async def wait_for_all(
|
|
532
544
|
self, task_ids: Sequence[int] | None = None
|
|
533
|
-
) -> list[APIResponse
|
|
545
|
+
) -> list[APIResponse]:
|
|
534
546
|
if task_ids is None:
|
|
535
547
|
task_ids = list(self._tasks.keys())
|
|
536
548
|
return [await self.wait_for(tid) for tid in task_ids]
|
|
537
549
|
|
|
538
550
|
async def as_completed(
|
|
539
551
|
self, task_ids: Sequence[int] | None = None
|
|
540
|
-
) -> AsyncGenerator[tuple[int, APIResponse
|
|
552
|
+
) -> AsyncGenerator[tuple[int, APIResponse], None]:
|
|
541
553
|
"""Yield ``(task_id, result)`` pairs as tasks complete.
|
|
542
554
|
|
|
543
555
|
Args:
|
|
@@ -561,7 +573,9 @@ class _LLMClient(BaseModel):
|
|
|
561
573
|
for task in list(tasks_map.keys()):
|
|
562
574
|
if task.done():
|
|
563
575
|
tid = tasks_map.pop(task)
|
|
564
|
-
|
|
576
|
+
task_result = self._results.get(tid, await task)
|
|
577
|
+
assert task_result
|
|
578
|
+
yield tid, task_result
|
|
565
579
|
|
|
566
580
|
while tasks_map:
|
|
567
581
|
done, _ = await asyncio.wait(
|
|
@@ -569,7 +583,9 @@ class _LLMClient(BaseModel):
|
|
|
569
583
|
)
|
|
570
584
|
for task in done:
|
|
571
585
|
tid = tasks_map.pop(task)
|
|
572
|
-
|
|
586
|
+
task_result = self._results.get(tid, await task)
|
|
587
|
+
assert task_result
|
|
588
|
+
yield tid, task_result
|
|
573
589
|
|
|
574
590
|
async def stream(
|
|
575
591
|
self,
|
lm_deluge/models/__init__.py
CHANGED
|
@@ -38,9 +38,9 @@ class APIModel:
|
|
|
38
38
|
supports_responses: bool = False
|
|
39
39
|
reasoning_model: bool = False
|
|
40
40
|
regions: list[str] | dict[str, int] = field(default_factory=list)
|
|
41
|
-
tokens_per_minute: int | None = None
|
|
42
|
-
requests_per_minute: int | None = None
|
|
43
|
-
gpus: list[str] | None = None
|
|
41
|
+
# tokens_per_minute: int | None = None
|
|
42
|
+
# requests_per_minute: int | None = None
|
|
43
|
+
# gpus: list[str] | None = None
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
46
|
def from_registry(cls, name: str):
|
|
@@ -97,8 +97,8 @@ def register_model(
|
|
|
97
97
|
supports_responses: bool = False,
|
|
98
98
|
reasoning_model: bool = False,
|
|
99
99
|
regions: list[str] | dict[str, int] = field(default_factory=list),
|
|
100
|
-
tokens_per_minute: int | None = None,
|
|
101
|
-
requests_per_minute: int | None = None,
|
|
100
|
+
# tokens_per_minute: int | None = None,
|
|
101
|
+
# requests_per_minute: int | None = None,
|
|
102
102
|
) -> APIModel:
|
|
103
103
|
"""Register a model configuration and return the created APIModel."""
|
|
104
104
|
model = APIModel(
|
|
@@ -116,8 +116,8 @@ def register_model(
|
|
|
116
116
|
supports_responses=supports_responses,
|
|
117
117
|
reasoning_model=reasoning_model,
|
|
118
118
|
regions=regions,
|
|
119
|
-
tokens_per_minute=tokens_per_minute,
|
|
120
|
-
requests_per_minute=requests_per_minute,
|
|
119
|
+
# tokens_per_minute=tokens_per_minute,
|
|
120
|
+
# requests_per_minute=requests_per_minute,
|
|
121
121
|
)
|
|
122
122
|
registry[model.id] = model
|
|
123
123
|
return model
|
lm_deluge/models/anthropic.py
CHANGED
|
@@ -10,6 +10,18 @@ ANTHROPIC_MODELS = {
|
|
|
10
10
|
# ░███
|
|
11
11
|
# █████
|
|
12
12
|
#
|
|
13
|
+
"claude-4.5-haiku": {
|
|
14
|
+
"id": "claude-4.5-haiku",
|
|
15
|
+
"name": "claude-haiku-4-5-20251001",
|
|
16
|
+
"api_base": "https://api.anthropic.com/v1",
|
|
17
|
+
"api_key_env_var": "ANTHROPIC_API_KEY",
|
|
18
|
+
"supports_json": False,
|
|
19
|
+
"api_spec": "anthropic",
|
|
20
|
+
"input_cost": 1.0,
|
|
21
|
+
"cached_input_cost": 0.10,
|
|
22
|
+
"cache_write_cost": 1.25,
|
|
23
|
+
"output_cost": 3.0,
|
|
24
|
+
},
|
|
13
25
|
"claude-4.5-sonnet": {
|
|
14
26
|
"id": "claude-4.5-sonnet",
|
|
15
27
|
"name": "claude-sonnet-4-5-20250929",
|
|
@@ -21,8 +33,6 @@ ANTHROPIC_MODELS = {
|
|
|
21
33
|
"cached_input_cost": 0.30,
|
|
22
34
|
"cache_write_cost": 3.75,
|
|
23
35
|
"output_cost": 15.0,
|
|
24
|
-
"requests_per_minute": 4_000,
|
|
25
|
-
"tokens_per_minute": 400_000,
|
|
26
36
|
},
|
|
27
37
|
"claude-4.1-opus": {
|
|
28
38
|
"id": "claude-4.1-opus",
|
|
@@ -35,8 +45,6 @@ ANTHROPIC_MODELS = {
|
|
|
35
45
|
"cached_input_cost": 1.50,
|
|
36
46
|
"cache_write_cost": 18.75,
|
|
37
47
|
"output_cost": 75.0,
|
|
38
|
-
"requests_per_minute": 4_000,
|
|
39
|
-
"tokens_per_minute": 400_000,
|
|
40
48
|
"reasoning_model": True,
|
|
41
49
|
},
|
|
42
50
|
"claude-4-opus": {
|
|
@@ -50,8 +58,6 @@ ANTHROPIC_MODELS = {
|
|
|
50
58
|
"cached_input_cost": 1.50,
|
|
51
59
|
"cache_write_cost": 18.75,
|
|
52
60
|
"output_cost": 75.0,
|
|
53
|
-
"requests_per_minute": 4_000,
|
|
54
|
-
"tokens_per_minute": 400_000,
|
|
55
61
|
"reasoning_model": True,
|
|
56
62
|
},
|
|
57
63
|
"claude-4-sonnet": {
|
|
@@ -65,8 +71,6 @@ ANTHROPIC_MODELS = {
|
|
|
65
71
|
"cached_input_cost": 0.30,
|
|
66
72
|
"cache_write_cost": 3.75,
|
|
67
73
|
"output_cost": 15.0,
|
|
68
|
-
"requests_per_minute": 4_000,
|
|
69
|
-
"tokens_per_minute": 400_000,
|
|
70
74
|
},
|
|
71
75
|
"claude-3.7-sonnet": {
|
|
72
76
|
"id": "claude-3.7-sonnet",
|
|
@@ -79,8 +83,6 @@ ANTHROPIC_MODELS = {
|
|
|
79
83
|
"cached_input_cost": 0.30,
|
|
80
84
|
"cache_write_cost": 3.75,
|
|
81
85
|
"output_cost": 15.0,
|
|
82
|
-
"requests_per_minute": 4_000,
|
|
83
|
-
"tokens_per_minute": 400_000,
|
|
84
86
|
"reasoning_model": True,
|
|
85
87
|
},
|
|
86
88
|
"claude-3.6-sonnet": {
|
|
@@ -94,8 +96,6 @@ ANTHROPIC_MODELS = {
|
|
|
94
96
|
"cached_input_cost": 0.30,
|
|
95
97
|
"cache_write_cost": 3.75,
|
|
96
98
|
"output_cost": 15.0,
|
|
97
|
-
"requests_per_minute": 4_000,
|
|
98
|
-
"tokens_per_minute": 400_000,
|
|
99
99
|
},
|
|
100
100
|
"claude-3.5-sonnet": {
|
|
101
101
|
"id": "claude-3.5-sonnet",
|
|
@@ -108,8 +108,6 @@ ANTHROPIC_MODELS = {
|
|
|
108
108
|
"cached_input_cost": 0.30,
|
|
109
109
|
"cache_write_cost": 3.75,
|
|
110
110
|
"output_cost": 15.0,
|
|
111
|
-
"requests_per_minute": 4_000,
|
|
112
|
-
"tokens_per_minute": 400_000,
|
|
113
111
|
},
|
|
114
112
|
"claude-3-opus": {
|
|
115
113
|
"id": "claude-3-opus",
|
|
@@ -120,8 +118,6 @@ ANTHROPIC_MODELS = {
|
|
|
120
118
|
"api_spec": "anthropic",
|
|
121
119
|
"input_cost": 15.0,
|
|
122
120
|
"output_cost": 75.0,
|
|
123
|
-
"requests_per_minute": 4_000,
|
|
124
|
-
"tokens_per_minute": 400_000,
|
|
125
121
|
},
|
|
126
122
|
"claude-3.5-haiku": {
|
|
127
123
|
"id": "claude-3.5-haiku",
|
|
@@ -134,8 +130,6 @@ ANTHROPIC_MODELS = {
|
|
|
134
130
|
"cached_input_cost": 0.08,
|
|
135
131
|
"cache_write_cost": 1.00,
|
|
136
132
|
"output_cost": 4.00,
|
|
137
|
-
"requests_per_minute": 20_000,
|
|
138
|
-
"tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
|
|
139
133
|
},
|
|
140
134
|
"claude-3-haiku": {
|
|
141
135
|
"id": "claude-3-haiku",
|
|
@@ -148,7 +142,5 @@ ANTHROPIC_MODELS = {
|
|
|
148
142
|
"cache_write_cost": 0.30,
|
|
149
143
|
"cached_input_cost": 0.03,
|
|
150
144
|
"output_cost": 1.25,
|
|
151
|
-
"requests_per_minute": 10_000,
|
|
152
|
-
"tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
|
|
153
145
|
},
|
|
154
146
|
}
|
lm_deluge/models/bedrock.py
CHANGED
|
@@ -16,8 +16,6 @@ BEDROCK_MODELS = {
|
|
|
16
16
|
"api_spec": "bedrock",
|
|
17
17
|
"input_cost": 0.25,
|
|
18
18
|
"output_cost": 1.25,
|
|
19
|
-
"requests_per_minute": 4_000,
|
|
20
|
-
"tokens_per_minute": 8_000_000,
|
|
21
19
|
},
|
|
22
20
|
"claude-3.5-haiku-bedrock": {
|
|
23
21
|
"id": "claude-3.5-haiku-bedrock",
|
|
@@ -28,8 +26,6 @@ BEDROCK_MODELS = {
|
|
|
28
26
|
"api_spec": "bedrock",
|
|
29
27
|
"input_cost": 0.25,
|
|
30
28
|
"output_cost": 1.25,
|
|
31
|
-
"requests_per_minute": 4_000,
|
|
32
|
-
"tokens_per_minute": 8_000_000,
|
|
33
29
|
},
|
|
34
30
|
"claude-3.5-sonnet-bedrock": {
|
|
35
31
|
"id": "claude-3.5-sonnet-bedrock",
|
|
@@ -40,8 +36,6 @@ BEDROCK_MODELS = {
|
|
|
40
36
|
"api_spec": "bedrock",
|
|
41
37
|
"input_cost": 3.0,
|
|
42
38
|
"output_cost": 15.0,
|
|
43
|
-
"requests_per_minute": 4_000,
|
|
44
|
-
"tokens_per_minute": 400_000,
|
|
45
39
|
"reasoning_model": False,
|
|
46
40
|
},
|
|
47
41
|
"claude-3.6-sonnet-bedrock": {
|
|
@@ -53,8 +47,6 @@ BEDROCK_MODELS = {
|
|
|
53
47
|
"api_spec": "bedrock",
|
|
54
48
|
"input_cost": 3.0,
|
|
55
49
|
"output_cost": 15.0,
|
|
56
|
-
"requests_per_minute": 4_000,
|
|
57
|
-
"tokens_per_minute": 400_000,
|
|
58
50
|
"reasoning_model": False,
|
|
59
51
|
},
|
|
60
52
|
"claude-3.7-sonnet-bedrock": {
|
|
@@ -66,8 +58,6 @@ BEDROCK_MODELS = {
|
|
|
66
58
|
"api_spec": "bedrock",
|
|
67
59
|
"input_cost": 3.0,
|
|
68
60
|
"output_cost": 15.0,
|
|
69
|
-
"requests_per_minute": 4_000,
|
|
70
|
-
"tokens_per_minute": 400_000,
|
|
71
61
|
"reasoning_model": True,
|
|
72
62
|
},
|
|
73
63
|
"claude-4-sonnet-bedrock": {
|
|
@@ -79,8 +69,6 @@ BEDROCK_MODELS = {
|
|
|
79
69
|
"api_spec": "bedrock",
|
|
80
70
|
"input_cost": 3.0,
|
|
81
71
|
"output_cost": 15.0,
|
|
82
|
-
"requests_per_minute": 4_000,
|
|
83
|
-
"tokens_per_minute": 400_000,
|
|
84
72
|
"reasoning_model": True,
|
|
85
73
|
},
|
|
86
74
|
"claude-4-opus-bedrock": {
|
|
@@ -92,8 +80,6 @@ BEDROCK_MODELS = {
|
|
|
92
80
|
"api_spec": "bedrock",
|
|
93
81
|
"input_cost": 3.0,
|
|
94
82
|
"output_cost": 15.0,
|
|
95
|
-
"requests_per_minute": 4_000,
|
|
96
|
-
"tokens_per_minute": 400_000,
|
|
97
83
|
"reasoning_model": True,
|
|
98
84
|
},
|
|
99
85
|
# GPT-OSS on AWS Bedrock
|
lm_deluge/models/cohere.py
CHANGED
|
@@ -15,8 +15,6 @@ COHERE_MODELS = {
|
|
|
15
15
|
"api_spec": "openai",
|
|
16
16
|
"input_cost": 0.5,
|
|
17
17
|
"output_cost": 1.5,
|
|
18
|
-
"requests_per_minute": 10_000,
|
|
19
|
-
"tokens_per_minute": None,
|
|
20
18
|
},
|
|
21
19
|
"aya-expanse-32b": {
|
|
22
20
|
"id": "aya-expanse-32b",
|
|
@@ -26,8 +24,6 @@ COHERE_MODELS = {
|
|
|
26
24
|
"api_spec": "openai",
|
|
27
25
|
"input_cost": 0.5,
|
|
28
26
|
"output_cost": 1.5,
|
|
29
|
-
"requests_per_minute": 10_000,
|
|
30
|
-
"tokens_per_minute": None,
|
|
31
27
|
},
|
|
32
28
|
"aya-vision-8b": {
|
|
33
29
|
"id": "aya-vision-8b",
|
|
@@ -37,8 +33,6 @@ COHERE_MODELS = {
|
|
|
37
33
|
"api_spec": "openai",
|
|
38
34
|
"input_cost": 0.5,
|
|
39
35
|
"output_cost": 1.5,
|
|
40
|
-
"requests_per_minute": 10_000,
|
|
41
|
-
"tokens_per_minute": None,
|
|
42
36
|
},
|
|
43
37
|
"aya-vision-32b": {
|
|
44
38
|
"id": "aya-vision-32b",
|
|
@@ -48,8 +42,6 @@ COHERE_MODELS = {
|
|
|
48
42
|
"api_spec": "openai",
|
|
49
43
|
"input_cost": 0.5,
|
|
50
44
|
"output_cost": 1.5,
|
|
51
|
-
"requests_per_minute": 10_000,
|
|
52
|
-
"tokens_per_minute": None,
|
|
53
45
|
},
|
|
54
46
|
"command-a": {
|
|
55
47
|
"id": "command-a",
|
|
@@ -59,8 +51,6 @@ COHERE_MODELS = {
|
|
|
59
51
|
"api_spec": "openai",
|
|
60
52
|
"input_cost": 0.5,
|
|
61
53
|
"output_cost": 1.5,
|
|
62
|
-
"requests_per_minute": 10_000,
|
|
63
|
-
"tokens_per_minute": None,
|
|
64
54
|
},
|
|
65
55
|
"command-r-7b": {
|
|
66
56
|
"id": "command-r-cohere",
|
|
@@ -70,8 +60,6 @@ COHERE_MODELS = {
|
|
|
70
60
|
"api_spec": "openai",
|
|
71
61
|
"input_cost": 0.5,
|
|
72
62
|
"output_cost": 1.5,
|
|
73
|
-
"requests_per_minute": 10_000,
|
|
74
|
-
"tokens_per_minute": None,
|
|
75
63
|
},
|
|
76
64
|
"command-r": {
|
|
77
65
|
"id": "command-r",
|
|
@@ -81,8 +69,6 @@ COHERE_MODELS = {
|
|
|
81
69
|
"api_spec": "openai",
|
|
82
70
|
"input_cost": 0.5,
|
|
83
71
|
"output_cost": 1.5,
|
|
84
|
-
"requests_per_minute": 10_000,
|
|
85
|
-
"tokens_per_minute": None,
|
|
86
72
|
},
|
|
87
73
|
"command-r-plus": {
|
|
88
74
|
"id": "command-r-plus",
|
|
@@ -92,7 +78,5 @@ COHERE_MODELS = {
|
|
|
92
78
|
"api_spec": "openai",
|
|
93
79
|
"input_cost": 3.0,
|
|
94
80
|
"output_cost": 15.0,
|
|
95
|
-
"requests_per_minute": 10_000,
|
|
96
|
-
"tokens_per_minute": None,
|
|
97
81
|
},
|
|
98
82
|
}
|
lm_deluge/models/google.py
CHANGED
|
@@ -20,8 +20,6 @@ GOOGLE_MODELS = {
|
|
|
20
20
|
"input_cost": 0.1,
|
|
21
21
|
"cached_input_cost": 0.025,
|
|
22
22
|
"output_cost": 0.4,
|
|
23
|
-
"requests_per_minute": 20,
|
|
24
|
-
"tokens_per_minute": 100_000,
|
|
25
23
|
"reasoning_model": False,
|
|
26
24
|
},
|
|
27
25
|
"gemini-2.0-flash-lite-compat": {
|
|
@@ -34,8 +32,6 @@ GOOGLE_MODELS = {
|
|
|
34
32
|
"api_spec": "openai",
|
|
35
33
|
"input_cost": 0.075,
|
|
36
34
|
"output_cost": 0.3,
|
|
37
|
-
"requests_per_minute": 20,
|
|
38
|
-
"tokens_per_minute": 100_000,
|
|
39
35
|
"reasoning_model": False,
|
|
40
36
|
},
|
|
41
37
|
"gemini-2.5-pro-compat": {
|
|
@@ -49,8 +45,6 @@ GOOGLE_MODELS = {
|
|
|
49
45
|
"input_cost": 1.25,
|
|
50
46
|
"cached_input_cost": 0.31,
|
|
51
47
|
"output_cost": 10.0,
|
|
52
|
-
"requests_per_minute": 20,
|
|
53
|
-
"tokens_per_minute": 100_000,
|
|
54
48
|
"reasoning_model": True,
|
|
55
49
|
},
|
|
56
50
|
"gemini-2.5-flash-compat": {
|
|
@@ -64,8 +58,6 @@ GOOGLE_MODELS = {
|
|
|
64
58
|
"input_cost": 0.3,
|
|
65
59
|
"cached_input_cost": 0.075,
|
|
66
60
|
"output_cost": 2.5,
|
|
67
|
-
"requests_per_minute": 20,
|
|
68
|
-
"tokens_per_minute": 100_000,
|
|
69
61
|
"reasoning_model": True,
|
|
70
62
|
},
|
|
71
63
|
"gemini-2.5-flash-lite-compat": {
|
|
@@ -79,8 +71,6 @@ GOOGLE_MODELS = {
|
|
|
79
71
|
"input_cost": 0.1,
|
|
80
72
|
"cached_input_cost": 0.025,
|
|
81
73
|
"output_cost": 0.4,
|
|
82
|
-
"requests_per_minute": 20,
|
|
83
|
-
"tokens_per_minute": 100_000,
|
|
84
74
|
"reasoning_model": True,
|
|
85
75
|
},
|
|
86
76
|
# Native Gemini API versions with file support
|
|
@@ -95,8 +85,6 @@ GOOGLE_MODELS = {
|
|
|
95
85
|
"input_cost": 0.1,
|
|
96
86
|
"cached_input_cost": 0.025,
|
|
97
87
|
"output_cost": 0.4,
|
|
98
|
-
"requests_per_minute": 20,
|
|
99
|
-
"tokens_per_minute": 100_000,
|
|
100
88
|
"reasoning_model": False,
|
|
101
89
|
},
|
|
102
90
|
"gemini-2.0-flash-lite": {
|
|
@@ -109,8 +97,6 @@ GOOGLE_MODELS = {
|
|
|
109
97
|
"api_spec": "gemini",
|
|
110
98
|
"input_cost": 0.075,
|
|
111
99
|
"output_cost": 0.3,
|
|
112
|
-
"requests_per_minute": 20,
|
|
113
|
-
"tokens_per_minute": 100_000,
|
|
114
100
|
"reasoning_model": False,
|
|
115
101
|
},
|
|
116
102
|
"gemini-2.5-pro": {
|
|
@@ -124,8 +110,6 @@ GOOGLE_MODELS = {
|
|
|
124
110
|
"input_cost": 1.25,
|
|
125
111
|
"cached_input_cost": 0.31,
|
|
126
112
|
"output_cost": 10.0,
|
|
127
|
-
"requests_per_minute": 20,
|
|
128
|
-
"tokens_per_minute": 100_000,
|
|
129
113
|
"reasoning_model": True,
|
|
130
114
|
},
|
|
131
115
|
"gemini-2.5-flash": {
|
|
@@ -139,8 +123,6 @@ GOOGLE_MODELS = {
|
|
|
139
123
|
"input_cost": 0.3,
|
|
140
124
|
"cached_input_cost": 0.075,
|
|
141
125
|
"output_cost": 2.5,
|
|
142
|
-
"requests_per_minute": 20,
|
|
143
|
-
"tokens_per_minute": 100_000,
|
|
144
126
|
"reasoning_model": True,
|
|
145
127
|
},
|
|
146
128
|
"gemini-2.5-flash-lite": {
|
|
@@ -154,8 +136,6 @@ GOOGLE_MODELS = {
|
|
|
154
136
|
"input_cost": 0.1,
|
|
155
137
|
"cached_input_cost": 0.025,
|
|
156
138
|
"output_cost": 0.4,
|
|
157
|
-
"requests_per_minute": 20,
|
|
158
|
-
"tokens_per_minute": 100_000,
|
|
159
139
|
"reasoning_model": True,
|
|
160
140
|
},
|
|
161
141
|
}
|
lm_deluge/models/grok.py
CHANGED
|
@@ -7,6 +7,54 @@ XAI_MODELS = {
|
|
|
7
7
|
# 888 888 888 888 888 888888K
|
|
8
8
|
# Y88b d88P 888 Y88..88P 888 "88b
|
|
9
9
|
# "Y8888P88 888 "Y88P" 888 888
|
|
10
|
+
"grok-code-fast-1": {
|
|
11
|
+
"id": "grok-code-fast-1",
|
|
12
|
+
"name": "grok-code-fast-1",
|
|
13
|
+
"api_base": "https://api.x.ai/v1",
|
|
14
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
15
|
+
"supports_json": True,
|
|
16
|
+
"supports_logprobs": True,
|
|
17
|
+
"api_spec": "openai",
|
|
18
|
+
"input_cost": 0.2,
|
|
19
|
+
"output_cost": 1.5,
|
|
20
|
+
"reasoning_model": False,
|
|
21
|
+
},
|
|
22
|
+
"grok-4-fast-reasoning": {
|
|
23
|
+
"id": "grok-4-fast-reasoning",
|
|
24
|
+
"name": "grok-4-fast-reasoning",
|
|
25
|
+
"api_base": "https://api.x.ai/v1",
|
|
26
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
27
|
+
"supports_json": True,
|
|
28
|
+
"supports_logprobs": True,
|
|
29
|
+
"api_spec": "openai",
|
|
30
|
+
"input_cost": 0.2,
|
|
31
|
+
"output_cost": 0.5,
|
|
32
|
+
"reasoning_model": False,
|
|
33
|
+
},
|
|
34
|
+
"grok-4-fast-non-reasoning": {
|
|
35
|
+
"id": "grok-4-fast-non-reasoning",
|
|
36
|
+
"name": "grok-4-fast-non-reasoning",
|
|
37
|
+
"api_base": "https://api.x.ai/v1",
|
|
38
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
39
|
+
"supports_json": True,
|
|
40
|
+
"supports_logprobs": True,
|
|
41
|
+
"api_spec": "openai",
|
|
42
|
+
"input_cost": 0.2,
|
|
43
|
+
"output_cost": 0.5,
|
|
44
|
+
"reasoning_model": False,
|
|
45
|
+
},
|
|
46
|
+
"grok-4": {
|
|
47
|
+
"id": "grok-4",
|
|
48
|
+
"name": "grok-4-0709",
|
|
49
|
+
"api_base": "https://api.x.ai/v1",
|
|
50
|
+
"api_key_env_var": "GROK_API_KEY",
|
|
51
|
+
"supports_json": True,
|
|
52
|
+
"supports_logprobs": True,
|
|
53
|
+
"api_spec": "openai",
|
|
54
|
+
"input_cost": 2.0,
|
|
55
|
+
"output_cost": 8.0,
|
|
56
|
+
"reasoning_model": False,
|
|
57
|
+
},
|
|
10
58
|
"grok-3": {
|
|
11
59
|
"id": "grok-3",
|
|
12
60
|
"name": "grok-3-latest",
|
|
@@ -17,8 +65,6 @@ XAI_MODELS = {
|
|
|
17
65
|
"api_spec": "openai",
|
|
18
66
|
"input_cost": 2.0,
|
|
19
67
|
"output_cost": 8.0,
|
|
20
|
-
"requests_per_minute": 20,
|
|
21
|
-
"tokens_per_minute": 100_000,
|
|
22
68
|
"reasoning_model": False,
|
|
23
69
|
},
|
|
24
70
|
"grok-3-mini": {
|
|
@@ -31,8 +77,6 @@ XAI_MODELS = {
|
|
|
31
77
|
"api_spec": "openai",
|
|
32
78
|
"input_cost": 2.0,
|
|
33
79
|
"output_cost": 8.0,
|
|
34
|
-
"requests_per_minute": 20,
|
|
35
|
-
"tokens_per_minute": 100_000,
|
|
36
80
|
"reasoning_model": True,
|
|
37
81
|
},
|
|
38
82
|
}
|
lm_deluge/models/groq.py
CHANGED
|
@@ -41,10 +41,10 @@ GROQ_MODELS = {
|
|
|
41
41
|
},
|
|
42
42
|
"kimi-k2-groq": {
|
|
43
43
|
"id": "kimi-k2-groq",
|
|
44
|
-
"name": "moonshotai/kimi-k2-instruct",
|
|
44
|
+
"name": "moonshotai/kimi-k2-instruct-0905",
|
|
45
45
|
"api_base": "https://api.groq.com/openai/v1",
|
|
46
46
|
"api_key_env_var": "GROQ_API_KEY",
|
|
47
|
-
"supports_json":
|
|
47
|
+
"supports_json": True,
|
|
48
48
|
"api_spec": "openai",
|
|
49
49
|
},
|
|
50
50
|
"gpt-oss-120b-groq": {
|
lm_deluge/models/meta.py
CHANGED
|
@@ -16,8 +16,6 @@ META_MODELS = {
|
|
|
16
16
|
"api_spec": "openai",
|
|
17
17
|
"input_cost": 0.0,
|
|
18
18
|
"output_cost": 0.0,
|
|
19
|
-
"requests_per_minute": 3_000,
|
|
20
|
-
"tokens_per_minute": 1_000_000,
|
|
21
19
|
"reasoning_model": False,
|
|
22
20
|
},
|
|
23
21
|
"llama-4-maverick": {
|
|
@@ -30,8 +28,6 @@ META_MODELS = {
|
|
|
30
28
|
"api_spec": "openai",
|
|
31
29
|
"input_cost": 0.0,
|
|
32
30
|
"output_cost": 0.0,
|
|
33
|
-
"requests_per_minute": 3_000,
|
|
34
|
-
"tokens_per_minute": 1_000_000,
|
|
35
31
|
"reasoning_model": False,
|
|
36
32
|
},
|
|
37
33
|
"llama-3.3-70b": {
|
|
@@ -44,8 +40,6 @@ META_MODELS = {
|
|
|
44
40
|
"api_spec": "openai",
|
|
45
41
|
"input_cost": 0.0,
|
|
46
42
|
"output_cost": 0.0,
|
|
47
|
-
"requests_per_minute": 3_000,
|
|
48
|
-
"tokens_per_minute": 1_000_000,
|
|
49
43
|
"reasoning_model": False,
|
|
50
44
|
},
|
|
51
45
|
"llama-3.3-8b": {
|
|
@@ -58,8 +52,6 @@ META_MODELS = {
|
|
|
58
52
|
"api_spec": "openai",
|
|
59
53
|
"input_cost": 0.0,
|
|
60
54
|
"output_cost": 0.0,
|
|
61
|
-
"requests_per_minute": 3_000,
|
|
62
|
-
"tokens_per_minute": 1_000_000,
|
|
63
55
|
"reasoning_model": False,
|
|
64
56
|
},
|
|
65
57
|
}
|
lm_deluge/models/openai.py
CHANGED
|
@@ -77,8 +77,6 @@ OPENAI_MODELS = {
|
|
|
77
77
|
"api_spec": "openai",
|
|
78
78
|
"input_cost": 3.0,
|
|
79
79
|
"output_cost": 12.0,
|
|
80
|
-
"requests_per_minute": 20,
|
|
81
|
-
"tokens_per_minute": 100_000,
|
|
82
80
|
"reasoning_model": False,
|
|
83
81
|
},
|
|
84
82
|
"o3": {
|
|
@@ -93,8 +91,6 @@ OPENAI_MODELS = {
|
|
|
93
91
|
"input_cost": 2.0,
|
|
94
92
|
"cached_input_cost": 0.50,
|
|
95
93
|
"output_cost": 8.0,
|
|
96
|
-
"requests_per_minute": 20,
|
|
97
|
-
"tokens_per_minute": 100_000,
|
|
98
94
|
"reasoning_model": True,
|
|
99
95
|
},
|
|
100
96
|
"o4-mini": {
|
|
@@ -109,8 +105,6 @@ OPENAI_MODELS = {
|
|
|
109
105
|
"input_cost": 1.1,
|
|
110
106
|
"cached_input_cost": 0.275,
|
|
111
107
|
"output_cost": 4.4,
|
|
112
|
-
"requests_per_minute": 20,
|
|
113
|
-
"tokens_per_minute": 100_000,
|
|
114
108
|
"reasoning_model": True,
|
|
115
109
|
},
|
|
116
110
|
"gpt-4.1": {
|
|
@@ -125,8 +119,6 @@ OPENAI_MODELS = {
|
|
|
125
119
|
"input_cost": 2.0,
|
|
126
120
|
"cached_input_cost": 0.50,
|
|
127
121
|
"output_cost": 8.0,
|
|
128
|
-
"requests_per_minute": 20,
|
|
129
|
-
"tokens_per_minute": 100_000,
|
|
130
122
|
"reasoning_model": False,
|
|
131
123
|
},
|
|
132
124
|
"gpt-4.1-mini": {
|
|
@@ -141,8 +133,6 @@ OPENAI_MODELS = {
|
|
|
141
133
|
"input_cost": 0.4,
|
|
142
134
|
"cached_input_cost": 0.10,
|
|
143
135
|
"output_cost": 1.6,
|
|
144
|
-
"requests_per_minute": 20,
|
|
145
|
-
"tokens_per_minute": 100_000,
|
|
146
136
|
"reasoning_model": False,
|
|
147
137
|
},
|
|
148
138
|
"gpt-4.1-nano": {
|
|
@@ -157,8 +147,6 @@ OPENAI_MODELS = {
|
|
|
157
147
|
"input_cost": 0.1,
|
|
158
148
|
"cached_input_cost": 0.025,
|
|
159
149
|
"output_cost": 0.4,
|
|
160
|
-
"requests_per_minute": 20,
|
|
161
|
-
"tokens_per_minute": 100_000,
|
|
162
150
|
"reasoning_model": False,
|
|
163
151
|
},
|
|
164
152
|
"gpt-4.5": {
|
|
@@ -172,8 +160,6 @@ OPENAI_MODELS = {
|
|
|
172
160
|
"api_spec": "openai",
|
|
173
161
|
"input_cost": 75.0,
|
|
174
162
|
"output_cost": 150.0,
|
|
175
|
-
"requests_per_minute": 20,
|
|
176
|
-
"tokens_per_minute": 100_000,
|
|
177
163
|
"reasoning_model": False,
|
|
178
164
|
},
|
|
179
165
|
"o3-mini": {
|
|
@@ -188,8 +174,6 @@ OPENAI_MODELS = {
|
|
|
188
174
|
"input_cost": 1.1,
|
|
189
175
|
"cached_input_cost": 0.55,
|
|
190
176
|
"output_cost": 4.4,
|
|
191
|
-
"requests_per_minute": 20,
|
|
192
|
-
"tokens_per_minute": 100_000,
|
|
193
177
|
"reasoning_model": True,
|
|
194
178
|
},
|
|
195
179
|
"o1": {
|
|
@@ -204,8 +188,6 @@ OPENAI_MODELS = {
|
|
|
204
188
|
"input_cost": 15.0,
|
|
205
189
|
"cached_input_cost": 7.50,
|
|
206
190
|
"output_cost": 60.0,
|
|
207
|
-
"requests_per_minute": 20,
|
|
208
|
-
"tokens_per_minute": 100_000,
|
|
209
191
|
"reasoning_model": True,
|
|
210
192
|
},
|
|
211
193
|
"o1-preview": {
|
|
@@ -219,8 +201,6 @@ OPENAI_MODELS = {
|
|
|
219
201
|
"api_spec": "openai",
|
|
220
202
|
"input_cost": 15.0,
|
|
221
203
|
"output_cost": 60.0,
|
|
222
|
-
"requests_per_minute": 20,
|
|
223
|
-
"tokens_per_minute": 100_000,
|
|
224
204
|
"reasoning_model": True,
|
|
225
205
|
},
|
|
226
206
|
"o1-mini": {
|
|
@@ -235,8 +215,6 @@ OPENAI_MODELS = {
|
|
|
235
215
|
"input_cost": 1.1,
|
|
236
216
|
"cached_input_cost": 0.55,
|
|
237
217
|
"output_cost": 4.4,
|
|
238
|
-
"requests_per_minute": 20,
|
|
239
|
-
"tokens_per_minute": 100_000,
|
|
240
218
|
"reasoning_model": True,
|
|
241
219
|
},
|
|
242
220
|
"gpt-4o": {
|
|
@@ -251,8 +229,6 @@ OPENAI_MODELS = {
|
|
|
251
229
|
"input_cost": 2.50,
|
|
252
230
|
"cached_input_cost": 1.25,
|
|
253
231
|
"output_cost": 10.0,
|
|
254
|
-
"requests_per_minute": 10_000,
|
|
255
|
-
"tokens_per_minute": 30_000_000,
|
|
256
232
|
},
|
|
257
233
|
"gpt-4o-mini": {
|
|
258
234
|
"id": "gpt-4o-mini",
|
|
@@ -266,8 +242,6 @@ OPENAI_MODELS = {
|
|
|
266
242
|
"input_cost": 0.15,
|
|
267
243
|
"cached_input_cost": 0.075,
|
|
268
244
|
"output_cost": 0.6,
|
|
269
|
-
"requests_per_minute": 60_000,
|
|
270
|
-
"tokens_per_minute": 250_000_000,
|
|
271
245
|
},
|
|
272
246
|
"gpt-3.5-turbo": {
|
|
273
247
|
"id": "gpt-3.5-turbo",
|
|
@@ -280,8 +254,6 @@ OPENAI_MODELS = {
|
|
|
280
254
|
"api_spec": "openai",
|
|
281
255
|
"input_cost": 0.5,
|
|
282
256
|
"output_cost": 1.5,
|
|
283
|
-
"requests_per_minute": 40_000,
|
|
284
|
-
"tokens_per_minute": 75_000_000,
|
|
285
257
|
},
|
|
286
258
|
"gpt-4-turbo": {
|
|
287
259
|
"id": "gpt-4-turbo",
|
|
@@ -294,8 +266,6 @@ OPENAI_MODELS = {
|
|
|
294
266
|
"api_spec": "openai",
|
|
295
267
|
"input_cost": 10.0,
|
|
296
268
|
"output_cost": 30.0,
|
|
297
|
-
"requests_per_minute": 10_000,
|
|
298
|
-
"tokens_per_minute": 1_500_000,
|
|
299
269
|
},
|
|
300
270
|
"gpt-4": {
|
|
301
271
|
"id": "gpt-4",
|
|
@@ -308,8 +278,6 @@ OPENAI_MODELS = {
|
|
|
308
278
|
"api_spec": "openai",
|
|
309
279
|
"input_cost": 30.0,
|
|
310
280
|
"output_cost": 60.0,
|
|
311
|
-
"requests_per_minute": 10_000,
|
|
312
|
-
"tokens_per_minute": 300_000,
|
|
313
281
|
},
|
|
314
282
|
"gpt-4-32k": {
|
|
315
283
|
"id": "gpt-4-32k",
|
|
@@ -322,7 +290,5 @@ OPENAI_MODELS = {
|
|
|
322
290
|
"api_spec": "openai",
|
|
323
291
|
"input_cost": 60.0,
|
|
324
292
|
"output_cost": 120.0,
|
|
325
|
-
"requests_per_minute": 1_000,
|
|
326
|
-
"tokens_per_minute": 150_000,
|
|
327
293
|
},
|
|
328
294
|
}
|
lm_deluge/models/openrouter.py
CHANGED
|
@@ -1 +1,64 @@
|
|
|
1
|
-
OPENROUTER_MODELS = {
|
|
1
|
+
OPENROUTER_MODELS = {
|
|
2
|
+
"glm-4.6-openrouter": {
|
|
3
|
+
"id": "glm-4.6-openrouter",
|
|
4
|
+
"name": "z-ai/glm-4.6",
|
|
5
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
6
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
7
|
+
"supports_json": True,
|
|
8
|
+
"api_spec": "openai",
|
|
9
|
+
"input_cost": 0.6,
|
|
10
|
+
"cached_input_cost": 0.11,
|
|
11
|
+
"cache_write_cost": 0.6,
|
|
12
|
+
"output_cost": 2.20,
|
|
13
|
+
},
|
|
14
|
+
"deepseek-r1-openrouter": {
|
|
15
|
+
"id": "deepseek-r1-openrouter",
|
|
16
|
+
"name": "deepseek/deepseek-r1-0528",
|
|
17
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
18
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
19
|
+
"supports_json": True,
|
|
20
|
+
"api_spec": "openai",
|
|
21
|
+
"input_cost": 0.40,
|
|
22
|
+
"cached_input_cost": 0.40,
|
|
23
|
+
"cache_write_cost": 0.40,
|
|
24
|
+
"output_cost": 1.75,
|
|
25
|
+
},
|
|
26
|
+
"deepseek-3.1-openrouter": {
|
|
27
|
+
"id": "deepseek-3.1-openrouter",
|
|
28
|
+
"name": "deepseek/deepseek-v3.1-terminus",
|
|
29
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
30
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
31
|
+
"supports_json": True,
|
|
32
|
+
"api_spec": "openai",
|
|
33
|
+
"input_cost": 0.23,
|
|
34
|
+
"cached_input_cost": 0.23,
|
|
35
|
+
"cache_write_cost": 0.23,
|
|
36
|
+
"output_cost": 0.9,
|
|
37
|
+
},
|
|
38
|
+
"deepseek-3.2-openrouter": {
|
|
39
|
+
"id": "deepseek-3.2-openrouter",
|
|
40
|
+
"name": "deepseek/deepseek-v3.2-exp",
|
|
41
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
42
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
43
|
+
"supports_json": True,
|
|
44
|
+
"api_spec": "openai",
|
|
45
|
+
"input_cost": 0.27,
|
|
46
|
+
"cached_input_cost": 0.27,
|
|
47
|
+
"cache_write_cost": 0.27,
|
|
48
|
+
"output_cost": 0.4,
|
|
49
|
+
},
|
|
50
|
+
# "gpt-oss-20b-openrouter": {},
|
|
51
|
+
# "gpt-oss-120b-openrouter": {},
|
|
52
|
+
"kimi-k2-openrouter": {
|
|
53
|
+
"id": "kimi-k2-openrouter",
|
|
54
|
+
"name": "z-ai/glm-4.6",
|
|
55
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
56
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
57
|
+
"supports_json": True,
|
|
58
|
+
"api_spec": "openai",
|
|
59
|
+
"input_cost": 0.6,
|
|
60
|
+
"cached_input_cost": 0.11,
|
|
61
|
+
"cache_write_cost": 0.6,
|
|
62
|
+
"output_cost": 2.20,
|
|
63
|
+
},
|
|
64
|
+
}
|
lm_deluge/models/together.py
CHANGED
|
@@ -20,8 +20,6 @@ TOGETHER_MODELS = {
|
|
|
20
20
|
"api_spec": "openai",
|
|
21
21
|
"input_cost": 3.0,
|
|
22
22
|
"output_cost": 7.0,
|
|
23
|
-
"requests_per_minute": None,
|
|
24
|
-
"tokens_per_minute": None,
|
|
25
23
|
},
|
|
26
24
|
"deepseek-v3-together": {
|
|
27
25
|
"id": "deepseek-v3-together",
|
|
@@ -32,8 +30,6 @@ TOGETHER_MODELS = {
|
|
|
32
30
|
"api_spec": "openai",
|
|
33
31
|
"input_cost": 1.25,
|
|
34
32
|
"output_cost": 1.25,
|
|
35
|
-
"requests_per_minute": None,
|
|
36
|
-
"tokens_per_minute": None,
|
|
37
33
|
},
|
|
38
34
|
"qwen-3-235b-together": {
|
|
39
35
|
"id": "qwen-3-235b-together",
|
|
@@ -44,8 +40,6 @@ TOGETHER_MODELS = {
|
|
|
44
40
|
"api_spec": "openai",
|
|
45
41
|
"input_cost": 0.2,
|
|
46
42
|
"output_cost": 0.6,
|
|
47
|
-
"requests_per_minute": None,
|
|
48
|
-
"tokens_per_minute": None,
|
|
49
43
|
},
|
|
50
44
|
"qwen-2.5-vl-together": {
|
|
51
45
|
"id": "qwen-2.5-vl-together",
|
|
@@ -56,8 +50,6 @@ TOGETHER_MODELS = {
|
|
|
56
50
|
"api_spec": "openai",
|
|
57
51
|
"input_cost": 1.95,
|
|
58
52
|
"output_cost": 8.0,
|
|
59
|
-
"requests_per_minute": None,
|
|
60
|
-
"tokens_per_minute": None,
|
|
61
53
|
},
|
|
62
54
|
"llama-4-maverick-together": {
|
|
63
55
|
"id": "llama-4-maverick-together",
|
|
@@ -68,8 +60,6 @@ TOGETHER_MODELS = {
|
|
|
68
60
|
"api_spec": "openai",
|
|
69
61
|
"input_cost": 0.27,
|
|
70
62
|
"output_cost": 0.85,
|
|
71
|
-
"requests_per_minute": None,
|
|
72
|
-
"tokens_per_minute": None,
|
|
73
63
|
},
|
|
74
64
|
"llama-4-scout-together": {
|
|
75
65
|
"id": "llama-4-scout-together",
|
|
@@ -80,8 +70,6 @@ TOGETHER_MODELS = {
|
|
|
80
70
|
"api_spec": "openai",
|
|
81
71
|
"input_cost": 0.18,
|
|
82
72
|
"output_cost": 0.59,
|
|
83
|
-
"requests_per_minute": None,
|
|
84
|
-
"tokens_per_minute": None,
|
|
85
73
|
},
|
|
86
74
|
"gpt-oss-120b-together": {
|
|
87
75
|
"id": "gpt-oss-120b-together",
|
|
@@ -92,8 +80,6 @@ TOGETHER_MODELS = {
|
|
|
92
80
|
"api_spec": "openai",
|
|
93
81
|
"input_cost": 0.18,
|
|
94
82
|
"output_cost": 0.59,
|
|
95
|
-
"requests_per_minute": None,
|
|
96
|
-
"tokens_per_minute": None,
|
|
97
83
|
"reasoning_model": True,
|
|
98
84
|
},
|
|
99
85
|
"gpt-oss-20b-together": {
|
|
@@ -105,8 +91,6 @@ TOGETHER_MODELS = {
|
|
|
105
91
|
"api_spec": "openai",
|
|
106
92
|
"input_cost": 0.18,
|
|
107
93
|
"output_cost": 0.59,
|
|
108
|
-
"requests_per_minute": None,
|
|
109
|
-
"tokens_per_minute": None,
|
|
110
94
|
"reasoning_model": True,
|
|
111
95
|
},
|
|
112
96
|
}
|
|
@@ -2,7 +2,7 @@ lm_deluge/__init__.py,sha256=LKKIcqQoQyDpTck6fnB7iAs75BnfNNa3Bj5Nz7KU4Hk,376
|
|
|
2
2
|
lm_deluge/batches.py,sha256=rQocJLyIs3Ko_nRdAE9jT__5cKWYxiIRAH_Lw3L0E1k,24653
|
|
3
3
|
lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
|
|
4
4
|
lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
|
|
5
|
-
lm_deluge/client.py,sha256=
|
|
5
|
+
lm_deluge/client.py,sha256=1ZxQAWkmtz-zhW4E8rfU2V4BfzvqGsKhvqz_CB63-lc,32894
|
|
6
6
|
lm_deluge/config.py,sha256=H1tQyJDNHGFuwxqQNL5Z-CjWAC0luHSBA3iY_pxmACM,932
|
|
7
7
|
lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
|
|
8
8
|
lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
|
|
@@ -15,7 +15,7 @@ lm_deluge/tool.py,sha256=eZpzgkSIlGD7KdZQwzLF-UdyRJpRnNNXpceGJrNhRrE,26421
|
|
|
15
15
|
lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
|
|
16
16
|
lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
|
|
17
17
|
lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
18
|
-
lm_deluge/api_requests/anthropic.py,sha256=
|
|
18
|
+
lm_deluge/api_requests/anthropic.py,sha256=7tTb_NMPodDHrCzakrLd9LyXuLqeTQyAGU-FvMoV3gI,8437
|
|
19
19
|
lm_deluge/api_requests/base.py,sha256=1et-5SdRqfnvXZT3b9fBEx0vvbCwbVunHBWtQr7Wurg,5878
|
|
20
20
|
lm_deluge/api_requests/bedrock.py,sha256=GmVxXz3ERAeQ7e52Nlztt81O4H9eJOQeOnS6b65vjm4,15453
|
|
21
21
|
lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
|
|
@@ -41,21 +41,21 @@ lm_deluge/llm_tools/locate.py,sha256=lYNbKTmy9dTvj0lEQkOQ7yrxyqsgYzjD0C_byJKI_4w
|
|
|
41
41
|
lm_deluge/llm_tools/ocr.py,sha256=7fDlvs6uUOvbxMasvGGNJx5Fj6biM6z3lijKZaGN26k,23
|
|
42
42
|
lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
|
|
43
43
|
lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
|
|
44
|
-
lm_deluge/models/__init__.py,sha256=
|
|
45
|
-
lm_deluge/models/anthropic.py,sha256=
|
|
46
|
-
lm_deluge/models/bedrock.py,sha256=
|
|
44
|
+
lm_deluge/models/__init__.py,sha256=7HNEnpxpEguZYjcudY_9oJ79hOOLo0oNUvG-kwkEpv4,4539
|
|
45
|
+
lm_deluge/models/anthropic.py,sha256=5j75sB40yZzT1wwKC7Dh0f2Y2cXnp8yxHuXW63PCuns,6285
|
|
46
|
+
lm_deluge/models/bedrock.py,sha256=g1PbfceSRH2lWST3ja0mUlF3oTq4e4T-si6RMe7qXgg,4888
|
|
47
47
|
lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
|
|
48
|
-
lm_deluge/models/cohere.py,sha256=
|
|
48
|
+
lm_deluge/models/cohere.py,sha256=iXjYtM6jy_YL73Op8OfNsrMNopwae9y-Sw-4vF9cEBw,3406
|
|
49
49
|
lm_deluge/models/deepseek.py,sha256=6_jDEprNNYis5I5MDQNloRes9h1P6pMYHXxOd2UZMgg,941
|
|
50
50
|
lm_deluge/models/fireworks.py,sha256=yvt2Ggzye4aUqCqY74ta67Vu7FrQaLFjdFtN4P7D-dc,638
|
|
51
|
-
lm_deluge/models/google.py,sha256=
|
|
52
|
-
lm_deluge/models/grok.py,sha256=
|
|
53
|
-
lm_deluge/models/groq.py,sha256=
|
|
54
|
-
lm_deluge/models/meta.py,sha256=
|
|
51
|
+
lm_deluge/models/google.py,sha256=Hr2MolQoaeY85pKCGO7k7OH_1nQJdrwMgrJbfz5bI8w,5387
|
|
52
|
+
lm_deluge/models/grok.py,sha256=TDzr8yfTaHbdJhwMA-Du6L-efaKFJhjTQViuVElCCHI,2566
|
|
53
|
+
lm_deluge/models/groq.py,sha256=Mi5WE1xOBGoZlymD0UN6kzhH_NOmfJYU4N2l-TO0Z8Q,2552
|
|
54
|
+
lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
|
|
55
55
|
lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
|
|
56
|
-
lm_deluge/models/openai.py,sha256=
|
|
57
|
-
lm_deluge/models/openrouter.py,sha256=
|
|
58
|
-
lm_deluge/models/together.py,sha256=
|
|
56
|
+
lm_deluge/models/openai.py,sha256=HC_oNLmKkmShkcfeUgyhesACtXGg__I2WiIIDrN-X84,10176
|
|
57
|
+
lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
|
|
58
|
+
lm_deluge/models/together.py,sha256=AjKhPsazqBgqyLwHkNQW07COM1n_oSrYQRp2BFVvn9o,4381
|
|
59
59
|
lm_deluge/presets/cerebras.py,sha256=MDkqj15qQRrj8wxSCDNNe_Cs7h1WN1UjV6lTmSY1olQ,479
|
|
60
60
|
lm_deluge/presets/meta.py,sha256=QrreLAVgYS6VIC_NQth1vgGAYuxY38jFQQZSe6ot7C8,364
|
|
61
61
|
lm_deluge/util/harmony.py,sha256=XBfJck6q-5HbOqMhEjdfy1i17i0QtpHG8ruXV4EsHl0,2731
|
|
@@ -64,8 +64,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
|
|
|
64
64
|
lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
|
|
65
65
|
lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
|
|
66
66
|
lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
|
|
67
|
-
lm_deluge-0.0.
|
|
68
|
-
lm_deluge-0.0.
|
|
69
|
-
lm_deluge-0.0.
|
|
70
|
-
lm_deluge-0.0.
|
|
71
|
-
lm_deluge-0.0.
|
|
67
|
+
lm_deluge-0.0.58.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
|
|
68
|
+
lm_deluge-0.0.58.dist-info/METADATA,sha256=jyhXeGVPAMMYBGm3omp6MKZfQGlRX-ow_9fI58ZZNGg,13443
|
|
69
|
+
lm_deluge-0.0.58.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
70
|
+
lm_deluge-0.0.58.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
|
|
71
|
+
lm_deluge-0.0.58.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|