lm-deluge 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +0 -24
- lm_deluge/api_requests/anthropic.py +25 -5
- lm_deluge/api_requests/base.py +37 -0
- lm_deluge/api_requests/bedrock.py +23 -2
- lm_deluge/api_requests/gemini.py +36 -10
- lm_deluge/api_requests/openai.py +31 -4
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +27 -1
- lm_deluge/models/__init__.py +2 -0
- lm_deluge/models/anthropic.py +12 -12
- lm_deluge/models/google.py +13 -0
- lm_deluge/models/minimax.py +9 -1
- lm_deluge/models/openrouter.py +48 -0
- lm_deluge/models/zai.py +50 -1
- lm_deluge/pipelines/gepa/docs/samples.py +19 -10
- lm_deluge/prompt.py +333 -68
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/skills/anthropic.py +0 -0
- lm_deluge/skills/compat.py +0 -0
- lm_deluge/tool/__init__.py +13 -1
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +4 -3
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/RECORD +39 -24
- lm_deluge/mock_openai.py +0 -643
- lm_deluge/tool/prefab/sandbox.py +0 -1621
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py
CHANGED
|
@@ -3,19 +3,6 @@ from .file import File
|
|
|
3
3
|
from .prompt import Conversation, Message
|
|
4
4
|
from .tool import Tool
|
|
5
5
|
|
|
6
|
-
try:
|
|
7
|
-
from .mock_openai import ( # noqa
|
|
8
|
-
APIError,
|
|
9
|
-
APITimeoutError,
|
|
10
|
-
BadRequestError,
|
|
11
|
-
MockAsyncOpenAI,
|
|
12
|
-
RateLimitError,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
_has_openai = True
|
|
16
|
-
except ImportError:
|
|
17
|
-
_has_openai = False
|
|
18
|
-
|
|
19
6
|
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
20
7
|
|
|
21
8
|
__all__ = [
|
|
@@ -27,14 +14,3 @@ __all__ = [
|
|
|
27
14
|
"Tool",
|
|
28
15
|
"File",
|
|
29
16
|
]
|
|
30
|
-
|
|
31
|
-
if _has_openai:
|
|
32
|
-
__all__.extend(
|
|
33
|
-
[
|
|
34
|
-
"MockAsyncOpenAI",
|
|
35
|
-
"APIError",
|
|
36
|
-
"APITimeoutError",
|
|
37
|
-
"BadRequestError",
|
|
38
|
-
"RateLimitError",
|
|
39
|
-
]
|
|
40
|
-
)
|
|
@@ -6,6 +6,7 @@ from aiohttp import ClientResponse
|
|
|
6
6
|
from lm_deluge.prompt import (
|
|
7
7
|
Message,
|
|
8
8
|
Text,
|
|
9
|
+
ThoughtSignature,
|
|
9
10
|
Thinking,
|
|
10
11
|
ToolCall,
|
|
11
12
|
)
|
|
@@ -250,8 +251,28 @@ class AnthropicRequest(APIRequestBase):
|
|
|
250
251
|
if item["type"] == "text":
|
|
251
252
|
parts.append(Text(item["text"]))
|
|
252
253
|
elif item["type"] == "thinking":
|
|
253
|
-
|
|
254
|
-
|
|
254
|
+
thinking_content = item.get("thinking", "")
|
|
255
|
+
thinking = thinking_content
|
|
256
|
+
signature = item.get("signature")
|
|
257
|
+
parts.append(
|
|
258
|
+
Thinking(
|
|
259
|
+
thinking_content,
|
|
260
|
+
raw_payload=item,
|
|
261
|
+
thought_signature=ThoughtSignature(
|
|
262
|
+
signature,
|
|
263
|
+
provider="anthropic",
|
|
264
|
+
)
|
|
265
|
+
if signature is not None
|
|
266
|
+
else None,
|
|
267
|
+
)
|
|
268
|
+
)
|
|
269
|
+
elif item["type"] == "redacted_thinking":
|
|
270
|
+
parts.append(
|
|
271
|
+
Thinking(
|
|
272
|
+
item.get("data", ""),
|
|
273
|
+
raw_payload=item,
|
|
274
|
+
)
|
|
275
|
+
)
|
|
255
276
|
elif item["type"] == "tool_use":
|
|
256
277
|
parts.append(
|
|
257
278
|
ToolCall(
|
|
@@ -265,9 +286,8 @@ class AnthropicRequest(APIRequestBase):
|
|
|
265
286
|
usage = Usage.from_anthropic_usage(data["usage"])
|
|
266
287
|
except Exception as e:
|
|
267
288
|
is_error = True
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
)
|
|
289
|
+
response_text = await http_response.text()
|
|
290
|
+
error_message = f"Error calling .json() on response w/ status {status_code}: {e}. Response: {response_text[:500]}"
|
|
271
291
|
elif mimetype and "json" in mimetype.lower():
|
|
272
292
|
is_error = True # expected status is 200, otherwise it's an error
|
|
273
293
|
data = await http_response.json()
|
lm_deluge/api_requests/base.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
2
4
|
import time
|
|
3
5
|
import traceback
|
|
4
6
|
from abc import ABC, abstractmethod
|
|
@@ -73,6 +75,24 @@ class APIRequestBase(ABC):
|
|
|
73
75
|
|
|
74
76
|
# Start with base headers, then overlay filtered extra headers (extra takes precedence)
|
|
75
77
|
merged = dict(base_headers)
|
|
78
|
+
if "anthropic-beta" in merged and "anthropic-beta" in filtered_extra:
|
|
79
|
+
combined = []
|
|
80
|
+
seen = set()
|
|
81
|
+
for (
|
|
82
|
+
raw
|
|
83
|
+
) in f"{merged['anthropic-beta']},{filtered_extra['anthropic-beta']}".split(
|
|
84
|
+
","
|
|
85
|
+
):
|
|
86
|
+
token = raw.strip()
|
|
87
|
+
if token and token not in seen:
|
|
88
|
+
seen.add(token)
|
|
89
|
+
combined.append(token)
|
|
90
|
+
merged["anthropic-beta"] = ",".join(combined)
|
|
91
|
+
filtered_extra = {
|
|
92
|
+
key: value
|
|
93
|
+
for key, value in filtered_extra.items()
|
|
94
|
+
if key != "anthropic-beta"
|
|
95
|
+
}
|
|
76
96
|
merged.update(filtered_extra)
|
|
77
97
|
|
|
78
98
|
# Filter out None values from final merged headers
|
|
@@ -189,6 +209,23 @@ class APIRequestBase(ABC):
|
|
|
189
209
|
await self.build_request()
|
|
190
210
|
assert self.context.status_tracker
|
|
191
211
|
|
|
212
|
+
if os.getenv("DELUGE_PROXY_LOG_PROVIDER_REQUESTS", "").strip().lower() in {
|
|
213
|
+
"1",
|
|
214
|
+
"true",
|
|
215
|
+
"yes",
|
|
216
|
+
"on",
|
|
217
|
+
}:
|
|
218
|
+
print("DELUGE_PROXY_PROVIDER_REQUEST")
|
|
219
|
+
print(f"URL: {self.url}")
|
|
220
|
+
print("Headers:")
|
|
221
|
+
print(self.request_header)
|
|
222
|
+
if self.request_json is not None:
|
|
223
|
+
print("JSON:")
|
|
224
|
+
try:
|
|
225
|
+
print(json.dumps(self.request_json, indent=2))
|
|
226
|
+
except Exception:
|
|
227
|
+
print(self.request_json)
|
|
228
|
+
|
|
192
229
|
if (
|
|
193
230
|
self.context.background
|
|
194
231
|
and self.context.use_responses_api
|
|
@@ -16,6 +16,7 @@ except ImportError:
|
|
|
16
16
|
from lm_deluge.prompt import (
|
|
17
17
|
Message,
|
|
18
18
|
Text,
|
|
19
|
+
ThoughtSignature,
|
|
19
20
|
Thinking,
|
|
20
21
|
ToolCall,
|
|
21
22
|
)
|
|
@@ -363,8 +364,28 @@ class BedrockRequest(APIRequestBase):
|
|
|
363
364
|
if item["type"] == "text":
|
|
364
365
|
parts.append(Text(item["text"]))
|
|
365
366
|
elif item["type"] == "thinking":
|
|
366
|
-
|
|
367
|
-
|
|
367
|
+
thinking_content = item.get("thinking", "")
|
|
368
|
+
thinking = thinking_content
|
|
369
|
+
signature = item.get("signature")
|
|
370
|
+
parts.append(
|
|
371
|
+
Thinking(
|
|
372
|
+
thinking_content,
|
|
373
|
+
raw_payload=item,
|
|
374
|
+
thought_signature=ThoughtSignature(
|
|
375
|
+
signature,
|
|
376
|
+
provider="anthropic",
|
|
377
|
+
)
|
|
378
|
+
if signature is not None
|
|
379
|
+
else None,
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
elif item["type"] == "redacted_thinking":
|
|
383
|
+
parts.append(
|
|
384
|
+
Thinking(
|
|
385
|
+
item.get("data", ""),
|
|
386
|
+
raw_payload=item,
|
|
387
|
+
)
|
|
388
|
+
)
|
|
368
389
|
elif item["type"] == "tool_use":
|
|
369
390
|
parts.append(
|
|
370
391
|
ToolCall(
|
lm_deluge/api_requests/gemini.py
CHANGED
|
@@ -9,7 +9,7 @@ from lm_deluge.warnings import maybe_warn
|
|
|
9
9
|
|
|
10
10
|
from ..config import SamplingParams
|
|
11
11
|
from ..models import APIModel
|
|
12
|
-
from ..prompt import Conversation, Message, Text, Thinking, ToolCall
|
|
12
|
+
from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
|
|
13
13
|
from ..usage import Usage
|
|
14
14
|
from .base import APIRequestBase, APIResponse
|
|
15
15
|
|
|
@@ -52,6 +52,7 @@ async def _build_gemini_request(
|
|
|
52
52
|
|
|
53
53
|
# Handle reasoning models (thinking)
|
|
54
54
|
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
55
|
+
is_gemini_3_flash = "gemini-3-flash" in model.name.lower()
|
|
55
56
|
if is_gemini_3:
|
|
56
57
|
# gemini3 MUST think
|
|
57
58
|
if not sampling_params.reasoning_effort:
|
|
@@ -62,13 +63,24 @@ async def _build_gemini_request(
|
|
|
62
63
|
if effort_key == "xhigh":
|
|
63
64
|
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
|
|
64
65
|
effort_key = "high"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
66
|
+
if is_gemini_3_flash:
|
|
67
|
+
# Flash supports minimal, low, medium, high
|
|
68
|
+
level_map = {
|
|
69
|
+
"none": "low",
|
|
70
|
+
"minimal": "minimal",
|
|
71
|
+
"low": "low",
|
|
72
|
+
"medium": "medium",
|
|
73
|
+
"high": "high",
|
|
74
|
+
}
|
|
75
|
+
else:
|
|
76
|
+
# Pro only supports low, high
|
|
77
|
+
level_map = {
|
|
78
|
+
"none": "low",
|
|
79
|
+
"minimal": "low",
|
|
80
|
+
"low": "low",
|
|
81
|
+
"medium": "high",
|
|
82
|
+
"high": "high",
|
|
83
|
+
}
|
|
72
84
|
effort = level_map[effort_key]
|
|
73
85
|
thinking_config = {"thinkingLevel": effort}
|
|
74
86
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
@@ -248,10 +260,20 @@ class GeminiRequest(APIRequestBase):
|
|
|
248
260
|
if "content" in candidate and "parts" in candidate["content"]:
|
|
249
261
|
for part in candidate["content"]["parts"]:
|
|
250
262
|
# Extract thought signature if present
|
|
251
|
-
|
|
263
|
+
raw_sig = part.get("thoughtSignature")
|
|
264
|
+
thought_sig = (
|
|
265
|
+
ThoughtSignature(raw_sig, provider="gemini")
|
|
266
|
+
if raw_sig is not None
|
|
267
|
+
else None
|
|
268
|
+
)
|
|
252
269
|
|
|
253
270
|
if "text" in part:
|
|
254
|
-
parts.append(
|
|
271
|
+
parts.append(
|
|
272
|
+
Text(
|
|
273
|
+
part["text"],
|
|
274
|
+
thought_signature=thought_sig,
|
|
275
|
+
)
|
|
276
|
+
)
|
|
255
277
|
elif "thought" in part:
|
|
256
278
|
# Thought with optional signature
|
|
257
279
|
parts.append(
|
|
@@ -274,6 +296,10 @@ class GeminiRequest(APIRequestBase):
|
|
|
274
296
|
thought_signature=thought_sig,
|
|
275
297
|
)
|
|
276
298
|
)
|
|
299
|
+
elif thought_sig:
|
|
300
|
+
parts.append(
|
|
301
|
+
Text("", thought_signature=thought_sig)
|
|
302
|
+
)
|
|
277
303
|
|
|
278
304
|
content = Message("assistant", parts)
|
|
279
305
|
|
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -22,6 +22,24 @@ from ..usage import Usage
|
|
|
22
22
|
from .base import APIRequestBase, APIResponse
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def _message_contents_to_string(messages: list[dict]):
|
|
26
|
+
messages = messages.copy()
|
|
27
|
+
|
|
28
|
+
for msg in messages:
|
|
29
|
+
content = msg.get("content")
|
|
30
|
+
assert content
|
|
31
|
+
if isinstance(content, list):
|
|
32
|
+
new_content = ""
|
|
33
|
+
for part in content:
|
|
34
|
+
assert "text" in part, "Invalid text part: " + str(part)
|
|
35
|
+
new_content += part["text"]
|
|
36
|
+
new_content += "\n"
|
|
37
|
+
|
|
38
|
+
msg["content"] = new_content.strip()
|
|
39
|
+
|
|
40
|
+
return messages
|
|
41
|
+
|
|
42
|
+
|
|
25
43
|
async def _build_oa_chat_request(
|
|
26
44
|
model: APIModel,
|
|
27
45
|
context: RequestContext,
|
|
@@ -55,6 +73,10 @@ async def _build_oa_chat_request(
|
|
|
55
73
|
request_json["service_tier"] = context.service_tier
|
|
56
74
|
else:
|
|
57
75
|
request_json["service_tier"] = context.service_tier
|
|
76
|
+
# if tinker, for now hack to mush into 1 string
|
|
77
|
+
if "tinker" in model.name:
|
|
78
|
+
request_json["messages"] = _message_contents_to_string(request_json["messages"])
|
|
79
|
+
|
|
58
80
|
# set max_tokens or max_completion_tokens dep. on provider
|
|
59
81
|
if "cohere" in model.api_base:
|
|
60
82
|
request_json["max_tokens"] = sampling_params.max_new_tokens
|
|
@@ -217,7 +239,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
217
239
|
parts.append(Text(message["content"]))
|
|
218
240
|
|
|
219
241
|
# Add tool calls if present
|
|
220
|
-
if "tool_calls" in message:
|
|
242
|
+
if "tool_calls" in message and message["tool_calls"] is not None:
|
|
221
243
|
for tool_call in message["tool_calls"]:
|
|
222
244
|
parts.append(
|
|
223
245
|
ToolCall(
|
|
@@ -238,9 +260,9 @@ class OpenAIRequest(APIRequestBase):
|
|
|
238
260
|
and "logprobs" in data["choices"][0]
|
|
239
261
|
):
|
|
240
262
|
logprobs = data["choices"][0]["logprobs"]["content"]
|
|
241
|
-
except Exception:
|
|
263
|
+
except Exception as e:
|
|
242
264
|
is_error = True
|
|
243
|
-
error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
|
|
265
|
+
error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
|
|
244
266
|
elif mimetype and "json" in mimetype.lower():
|
|
245
267
|
is_error = True # expected status is 200, otherwise it's an error
|
|
246
268
|
data = await http_response.json()
|
|
@@ -655,7 +677,12 @@ async def stream_chat(
|
|
|
655
677
|
request_header.update(filtered_extra)
|
|
656
678
|
|
|
657
679
|
context = SimpleNamespace(
|
|
658
|
-
prompt=prompt,
|
|
680
|
+
prompt=prompt,
|
|
681
|
+
tools=tools,
|
|
682
|
+
sampling_params=sampling_params,
|
|
683
|
+
service_tier=None,
|
|
684
|
+
output_schema=None,
|
|
685
|
+
model_name=model_name,
|
|
659
686
|
)
|
|
660
687
|
|
|
661
688
|
request_json = await _build_oa_chat_request(model, context) # type: ignore
|
lm_deluge/batches.py
CHANGED
|
@@ -141,31 +141,22 @@ async def submit_batch_oa(file_path: str):
|
|
|
141
141
|
return batch_id
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
async def _submit_anthropic_batch(
|
|
145
|
-
"""
|
|
144
|
+
async def _submit_anthropic_batch(requests: list[dict], headers: dict, model: str):
|
|
145
|
+
"""Submit batch requests to Anthropic's Message Batches API."""
|
|
146
146
|
|
|
147
147
|
async with aiohttp.ClientSession() as session:
|
|
148
148
|
url = f"{registry[model].api_base}/messages/batches"
|
|
149
|
-
|
|
150
|
-
with open(file_path, "rb") as f:
|
|
151
|
-
data.add_field(
|
|
152
|
-
"file",
|
|
153
|
-
f,
|
|
154
|
-
filename=os.path.basename(file_path),
|
|
155
|
-
content_type="application/json",
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
async with session.post(url, data=data, headers=headers) as response:
|
|
159
|
-
if response.status != 200:
|
|
160
|
-
text = await response.text()
|
|
161
|
-
raise ValueError(f"Error creating batch: {text}")
|
|
149
|
+
payload = {"requests": requests}
|
|
162
150
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
151
|
+
async with session.post(url, json=payload, headers=headers) as response:
|
|
152
|
+
if response.status != 200:
|
|
153
|
+
text = await response.text()
|
|
154
|
+
raise ValueError(f"Error creating batch: {text}")
|
|
166
155
|
|
|
167
|
-
|
|
168
|
-
|
|
156
|
+
batch_data = await response.json()
|
|
157
|
+
batch_id = batch_data["id"]
|
|
158
|
+
print(f"Anthropic batch job started successfully: id = {batch_id}")
|
|
159
|
+
return batch_id
|
|
169
160
|
|
|
170
161
|
|
|
171
162
|
async def create_batch_files_oa(
|
|
@@ -409,20 +400,10 @@ async def submit_batches_anthropic(
|
|
|
409
400
|
|
|
410
401
|
if current_batch and (would_exceed_size or would_exceed_items):
|
|
411
402
|
# Submit current batch
|
|
412
|
-
|
|
413
|
-
with tempfile.NamedTemporaryFile(
|
|
414
|
-
mode="w+", suffix=".jsonl", delete=False
|
|
415
|
-
) as f:
|
|
416
|
-
for batch_request in current_batch:
|
|
417
|
-
json.dump(batch_request, f)
|
|
418
|
-
f.write("\n")
|
|
419
|
-
print("wrote", len(current_batch), "items")
|
|
420
|
-
return f.name
|
|
421
|
-
|
|
422
|
-
file_path = await asyncio.to_thread(write_batch_file)
|
|
403
|
+
print("wrote", len(current_batch), "items")
|
|
423
404
|
batch_tasks.append(
|
|
424
405
|
asyncio.create_task(
|
|
425
|
-
_submit_anthropic_batch(
|
|
406
|
+
_submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
|
|
426
407
|
)
|
|
427
408
|
)
|
|
428
409
|
|
|
@@ -436,21 +417,10 @@ async def submit_batches_anthropic(
|
|
|
436
417
|
|
|
437
418
|
# Submit final batch if it has items
|
|
438
419
|
if current_batch:
|
|
439
|
-
|
|
440
|
-
def write_final_batch_file():
|
|
441
|
-
with tempfile.NamedTemporaryFile(
|
|
442
|
-
mode="w+", suffix=".jsonl", delete=False
|
|
443
|
-
) as f:
|
|
444
|
-
for batch_request in current_batch:
|
|
445
|
-
json.dump(batch_request, f)
|
|
446
|
-
f.write("\n")
|
|
447
|
-
print("wrote", len(current_batch), "items")
|
|
448
|
-
return f.name
|
|
449
|
-
|
|
450
|
-
file_path = await asyncio.to_thread(write_final_batch_file)
|
|
420
|
+
print("wrote", len(current_batch), "items")
|
|
451
421
|
batch_tasks.append(
|
|
452
422
|
asyncio.create_task(
|
|
453
|
-
_submit_anthropic_batch(
|
|
423
|
+
_submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
|
|
454
424
|
)
|
|
455
425
|
)
|
|
456
426
|
|
lm_deluge/client.py
CHANGED
|
@@ -289,6 +289,28 @@ class _LLMClient(BaseModel):
|
|
|
289
289
|
def models(self):
|
|
290
290
|
return self.model_names # why? idk
|
|
291
291
|
|
|
292
|
+
@staticmethod
|
|
293
|
+
def _preprocess_tinker_model(model_name: str) -> str:
|
|
294
|
+
if model_name.startswith("tinker://"):
|
|
295
|
+
model_id = model_name
|
|
296
|
+
if model_id not in registry:
|
|
297
|
+
register_model(
|
|
298
|
+
id=model_name,
|
|
299
|
+
name=model_name,
|
|
300
|
+
api_base="https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1",
|
|
301
|
+
api_key_env_var="TINKER_API_KEY",
|
|
302
|
+
api_spec="openai",
|
|
303
|
+
supports_json=True,
|
|
304
|
+
supports_logprobs=False,
|
|
305
|
+
supports_responses=False,
|
|
306
|
+
input_cost=0, # Unknown costs for arbitrary tinker models
|
|
307
|
+
cached_input_cost=0,
|
|
308
|
+
cache_write_cost=0,
|
|
309
|
+
output_cost=0,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
return model_name
|
|
313
|
+
|
|
292
314
|
@staticmethod
|
|
293
315
|
def _preprocess_openrouter_model(model_name: str) -> str:
|
|
294
316
|
"""Process openrouter: prefix and register model if needed."""
|
|
@@ -315,7 +337,8 @@ class _LLMClient(BaseModel):
|
|
|
315
337
|
)
|
|
316
338
|
|
|
317
339
|
return model_id
|
|
318
|
-
|
|
340
|
+
else:
|
|
341
|
+
return model_name
|
|
319
342
|
|
|
320
343
|
@model_validator(mode="before")
|
|
321
344
|
@classmethod
|
|
@@ -328,6 +351,9 @@ class _LLMClient(BaseModel):
|
|
|
328
351
|
# First, handle OpenRouter prefix
|
|
329
352
|
model_name = cls._preprocess_openrouter_model(model_names)
|
|
330
353
|
|
|
354
|
+
# next handle tinker prefix
|
|
355
|
+
model_name = cls._preprocess_tinker_model(model_name)
|
|
356
|
+
|
|
331
357
|
# Then handle reasoning effort suffix (e.g., "gpt-5-high")
|
|
332
358
|
model_name, effort = cls._strip_reasoning_suffix_if_registered(model_name)
|
|
333
359
|
if effort and data.get("reasoning_effort") is None:
|
lm_deluge/models/__init__.py
CHANGED
|
@@ -23,6 +23,7 @@ from .mistral import MISTRAL_MODELS
|
|
|
23
23
|
from .openai import OPENAI_MODELS
|
|
24
24
|
from .openrouter import OPENROUTER_MODELS
|
|
25
25
|
from .together import TOGETHER_MODELS
|
|
26
|
+
from .zai import ZAI_MODELS
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
@dataclass
|
|
@@ -134,6 +135,7 @@ def register_model(
|
|
|
134
135
|
# Register all models from all providers
|
|
135
136
|
for model_dict in [
|
|
136
137
|
ANTHROPIC_MODELS,
|
|
138
|
+
ZAI_MODELS,
|
|
137
139
|
ARCEE_MODELS,
|
|
138
140
|
BEDROCK_MODELS,
|
|
139
141
|
COHERE_MODELS,
|
lm_deluge/models/anthropic.py
CHANGED
|
@@ -112,18 +112,18 @@ ANTHROPIC_MODELS = {
|
|
|
112
112
|
"cache_write_cost": 3.75,
|
|
113
113
|
"output_cost": 15.0,
|
|
114
114
|
},
|
|
115
|
-
"claude-3.5-sonnet": {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
},
|
|
115
|
+
# "claude-3.5-sonnet": {
|
|
116
|
+
# "id": "claude-3.5-sonnet",
|
|
117
|
+
# "name": "claude-3-5-sonnet-20240620",
|
|
118
|
+
# "api_base": "https://api.anthropic.com/v1",
|
|
119
|
+
# "api_key_env_var": "ANTHROPIC_API_KEY",
|
|
120
|
+
# "supports_json": False,
|
|
121
|
+
# "api_spec": "anthropic",
|
|
122
|
+
# "input_cost": 3.0,
|
|
123
|
+
# "cached_input_cost": 0.30,
|
|
124
|
+
# "cache_write_cost": 3.75,
|
|
125
|
+
# "output_cost": 15.0,
|
|
126
|
+
# },
|
|
127
127
|
"claude-3-opus": {
|
|
128
128
|
"id": "claude-3-opus",
|
|
129
129
|
"name": "claude-3-opus-20240229",
|
lm_deluge/models/google.py
CHANGED
|
@@ -153,6 +153,19 @@ GOOGLE_MODELS = {
|
|
|
153
153
|
# Note: >200k tokens pricing is $4/$18 per million
|
|
154
154
|
"reasoning_model": True,
|
|
155
155
|
},
|
|
156
|
+
"gemini-3-flash-preview": {
|
|
157
|
+
"id": "gemini-3-flash-preview",
|
|
158
|
+
"name": "gemini-3-flash-preview",
|
|
159
|
+
"api_base": "https://generativelanguage.googleapis.com/v1alpha",
|
|
160
|
+
"api_key_env_var": "GEMINI_API_KEY",
|
|
161
|
+
"supports_json": True,
|
|
162
|
+
"supports_logprobs": False,
|
|
163
|
+
"api_spec": "gemini",
|
|
164
|
+
"input_cost": 0.5,
|
|
165
|
+
"cached_input_cost": 0.125, # estimated
|
|
166
|
+
"output_cost": 3.0,
|
|
167
|
+
"reasoning_model": True,
|
|
168
|
+
},
|
|
156
169
|
# Gemini 2.5 Computer Use model
|
|
157
170
|
"gemini-2.5-computer-use": {
|
|
158
171
|
"id": "gemini-2.5-computer-use",
|
lm_deluge/models/minimax.py
CHANGED
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
MINIMAX_MODELS = {
|
|
2
|
+
"minimax-m2.1": {
|
|
3
|
+
"id": "minimax-m2.1",
|
|
4
|
+
"name": "MiniMax-M2.1",
|
|
5
|
+
"api_base": "https://api.minimax.io/anthropic/v1",
|
|
6
|
+
"api_key_env_var": "MINIMAX_API_KEY",
|
|
7
|
+
"supports_json": False,
|
|
8
|
+
"api_spec": "anthropic",
|
|
9
|
+
},
|
|
2
10
|
"minimax-m2": {
|
|
3
11
|
"id": "minimax-m2",
|
|
4
12
|
"name": "MiniMax-M2",
|
|
@@ -6,5 +14,5 @@ MINIMAX_MODELS = {
|
|
|
6
14
|
"api_key_env_var": "MINIMAX_API_KEY",
|
|
7
15
|
"supports_json": False,
|
|
8
16
|
"api_spec": "anthropic",
|
|
9
|
-
}
|
|
17
|
+
},
|
|
10
18
|
}
|
lm_deluge/models/openrouter.py
CHANGED
|
@@ -83,6 +83,18 @@ OPENROUTER_MODELS = {
|
|
|
83
83
|
"cache_write_cost": 0.04,
|
|
84
84
|
"output_cost": 0.18,
|
|
85
85
|
},
|
|
86
|
+
"gpt-oss-20b-free-openrouter": {
|
|
87
|
+
"id": "gpt-oss-20b-openrouter",
|
|
88
|
+
"name": "openai/gpt-oss-20b:free",
|
|
89
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
90
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
91
|
+
"supports_json": True,
|
|
92
|
+
"api_spec": "openai",
|
|
93
|
+
"input_cost": 0.0,
|
|
94
|
+
"cached_input_cost": 0.0,
|
|
95
|
+
"cache_write_cost": 0.0,
|
|
96
|
+
"output_cost": 0.0,
|
|
97
|
+
},
|
|
86
98
|
"gpt-oss-120b-openrouter": {
|
|
87
99
|
"id": "gpt-oss-120b-openrouter",
|
|
88
100
|
"name": "openai/gpt-oss-120b",
|
|
@@ -95,6 +107,18 @@ OPENROUTER_MODELS = {
|
|
|
95
107
|
"cache_write_cost": 0.05,
|
|
96
108
|
"output_cost": 0.45,
|
|
97
109
|
},
|
|
110
|
+
"gpt-oss-120b-free-openrouter": {
|
|
111
|
+
"id": "gpt-oss-120b-free-openrouter",
|
|
112
|
+
"name": "openai/gpt-oss-120b:free",
|
|
113
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
114
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
115
|
+
"supports_json": True,
|
|
116
|
+
"api_spec": "openai",
|
|
117
|
+
"input_cost": 0.00,
|
|
118
|
+
"cached_input_cost": 0.00,
|
|
119
|
+
"cache_write_cost": 0.00,
|
|
120
|
+
"output_cost": 0.0,
|
|
121
|
+
},
|
|
98
122
|
"kimi-k2-openrouter": {
|
|
99
123
|
"id": "kimi-k2-openrouter",
|
|
100
124
|
"name": "moonshotai/kimi-k2-0905:exacto",
|
|
@@ -139,4 +163,28 @@ OPENROUTER_MODELS = {
|
|
|
139
163
|
"input_cost": 0.045,
|
|
140
164
|
"output_cost": 0.15,
|
|
141
165
|
},
|
|
166
|
+
"glm-4.7-openrouter": {
|
|
167
|
+
"id": "glm-4.7-openrouter",
|
|
168
|
+
"name": "z-ai/glm-4.7",
|
|
169
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
170
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
171
|
+
"supports_json": True,
|
|
172
|
+
"api_spec": "openai",
|
|
173
|
+
"input_cost": 0.6,
|
|
174
|
+
"cached_input_cost": 0.6,
|
|
175
|
+
"cache_write_cost": 0.6,
|
|
176
|
+
"output_cost": 2.20,
|
|
177
|
+
},
|
|
178
|
+
"minimax-m2.1-openrouter": {
|
|
179
|
+
"id": "minimax-m2.1-openrouter",
|
|
180
|
+
"name": "minimax/minimax-m2.1",
|
|
181
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
182
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
183
|
+
"supports_json": True,
|
|
184
|
+
"api_spec": "openai",
|
|
185
|
+
"input_cost": 0.3,
|
|
186
|
+
"cached_input_cost": 0.3,
|
|
187
|
+
"cache_write_cost": 0.3,
|
|
188
|
+
"output_cost": 1.20,
|
|
189
|
+
},
|
|
142
190
|
}
|