lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
lm_deluge/api_requests/gemini.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any
|
|
4
3
|
|
|
5
4
|
from aiohttp import ClientResponse
|
|
6
5
|
|
|
@@ -10,7 +9,7 @@ from lm_deluge.warnings import maybe_warn
|
|
|
10
9
|
|
|
11
10
|
from ..config import SamplingParams
|
|
12
11
|
from ..models import APIModel
|
|
13
|
-
from ..prompt import Conversation, Message, Text, Thinking, ToolCall
|
|
12
|
+
from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
|
|
14
13
|
from ..usage import Usage
|
|
15
14
|
from .base import APIRequestBase, APIResponse
|
|
16
15
|
|
|
@@ -23,6 +22,21 @@ async def _build_gemini_request(
|
|
|
23
22
|
) -> dict:
|
|
24
23
|
system_message, messages = prompt.to_gemini()
|
|
25
24
|
|
|
25
|
+
# For Gemini 3, inject dummy signatures when missing for function calls
|
|
26
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
27
|
+
if is_gemini_3:
|
|
28
|
+
dummy_sig = "context_engineering_is_the_way_to_go"
|
|
29
|
+
for msg in messages:
|
|
30
|
+
if "parts" in msg:
|
|
31
|
+
for part in msg["parts"]:
|
|
32
|
+
# For function calls, inject dummy signature if missing
|
|
33
|
+
if "functionCall" in part and "thoughtSignature" not in part:
|
|
34
|
+
part["thoughtSignature"] = dummy_sig
|
|
35
|
+
maybe_warn(
|
|
36
|
+
"WARN_GEMINI3_MISSING_SIGNATURE",
|
|
37
|
+
part_type="function call",
|
|
38
|
+
)
|
|
39
|
+
|
|
26
40
|
request_json = {
|
|
27
41
|
"contents": messages,
|
|
28
42
|
"generationConfig": {
|
|
@@ -37,20 +51,81 @@ async def _build_gemini_request(
|
|
|
37
51
|
request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
|
|
38
52
|
|
|
39
53
|
# Handle reasoning models (thinking)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
54
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
55
|
+
is_gemini_3_flash = "gemini-3-flash" in model.name.lower()
|
|
56
|
+
if is_gemini_3:
|
|
57
|
+
# gemini3 MUST think
|
|
58
|
+
if not sampling_params.reasoning_effort:
|
|
59
|
+
maybe_warn("WARN_GEMINI3_NO_REASONING")
|
|
60
|
+
effort = "low"
|
|
61
|
+
else:
|
|
62
|
+
effort_key = sampling_params.reasoning_effort
|
|
63
|
+
if effort_key == "xhigh":
|
|
64
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
|
|
65
|
+
effort_key = "high"
|
|
66
|
+
if is_gemini_3_flash:
|
|
67
|
+
# Flash supports minimal, low, medium, high
|
|
68
|
+
level_map = {
|
|
69
|
+
"none": "low",
|
|
70
|
+
"minimal": "minimal",
|
|
71
|
+
"low": "low",
|
|
72
|
+
"medium": "medium",
|
|
73
|
+
"high": "high",
|
|
74
|
+
}
|
|
75
|
+
else:
|
|
76
|
+
# Pro only supports low, high
|
|
77
|
+
level_map = {
|
|
78
|
+
"none": "low",
|
|
79
|
+
"minimal": "low",
|
|
80
|
+
"low": "low",
|
|
81
|
+
"medium": "high",
|
|
82
|
+
"high": "high",
|
|
83
|
+
}
|
|
84
|
+
effort = level_map[effort_key]
|
|
85
|
+
thinking_config = {"thinkingLevel": effort}
|
|
86
|
+
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
87
|
+
|
|
88
|
+
elif model.reasoning_model:
|
|
89
|
+
if (
|
|
90
|
+
sampling_params.thinking_budget is not None
|
|
91
|
+
and sampling_params.reasoning_effort is not None
|
|
92
|
+
):
|
|
93
|
+
maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
|
|
94
|
+
|
|
95
|
+
if (
|
|
96
|
+
sampling_params.thinking_budget is not None
|
|
97
|
+
and sampling_params.thinking_budget > 0
|
|
98
|
+
):
|
|
99
|
+
thinking_config = {
|
|
100
|
+
"includeThoughts": True,
|
|
101
|
+
"thinkingBudget": sampling_params.thinking_budget,
|
|
102
|
+
}
|
|
103
|
+
elif sampling_params.thinking_budget == -1:
|
|
104
|
+
# dynamic thinking
|
|
105
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
|
|
106
|
+
elif sampling_params.reasoning_effort not in [None, "none"]:
|
|
107
|
+
effort_key = sampling_params.reasoning_effort
|
|
108
|
+
if effort_key == "xhigh":
|
|
109
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
|
|
110
|
+
effort_key = "high"
|
|
111
|
+
level_map = {
|
|
112
|
+
"minimal": 256,
|
|
113
|
+
"low": 1024,
|
|
114
|
+
"medium": 4096,
|
|
115
|
+
"high": 16384,
|
|
116
|
+
}
|
|
117
|
+
assert effort_key in level_map
|
|
118
|
+
budget = level_map[effort_key]
|
|
119
|
+
if "flash-lite" in model.id:
|
|
120
|
+
budget = max(budget, 512)
|
|
121
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
|
|
122
|
+
elif "2.5-pro" in model.id:
|
|
123
|
+
# 2.5 pro must think.
|
|
124
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
|
|
47
125
|
else:
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
effort
|
|
52
|
-
]
|
|
53
|
-
thinking_config["thinkingBudget"] = budget
|
|
126
|
+
# no thoughts head empty
|
|
127
|
+
thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
|
|
128
|
+
|
|
54
129
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
55
130
|
|
|
56
131
|
else:
|
|
@@ -59,13 +134,60 @@ async def _build_gemini_request(
|
|
|
59
134
|
|
|
60
135
|
# Add tools if provided
|
|
61
136
|
if tools:
|
|
62
|
-
|
|
63
|
-
|
|
137
|
+
request_tools = []
|
|
138
|
+
function_declarations = []
|
|
139
|
+
|
|
140
|
+
for tool in tools:
|
|
141
|
+
if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
|
|
142
|
+
# Gemini computer use tool - add as separate tool entry
|
|
143
|
+
env_map = {
|
|
144
|
+
"browser": "ENVIRONMENT_BROWSER",
|
|
145
|
+
"android": "ENVIRONMENT_ANDROID",
|
|
146
|
+
}
|
|
147
|
+
env = env_map.get(
|
|
148
|
+
tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
|
|
149
|
+
)
|
|
150
|
+
cu_tool: dict = {
|
|
151
|
+
"computerUse": {
|
|
152
|
+
"environment": env,
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
excluded = tool.get("excluded_predefined_functions")
|
|
156
|
+
if excluded:
|
|
157
|
+
cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
|
|
158
|
+
request_tools.append(cu_tool)
|
|
159
|
+
elif hasattr(tool, "dump_for"):
|
|
160
|
+
# Regular Tool object
|
|
161
|
+
function_declarations.append(tool.dump_for("google"))
|
|
162
|
+
elif isinstance(tool, dict):
|
|
163
|
+
# Raw dict tool - assume it's a function declaration
|
|
164
|
+
function_declarations.append(tool)
|
|
165
|
+
|
|
166
|
+
if function_declarations:
|
|
167
|
+
request_tools.append({"functionDeclarations": function_declarations})
|
|
168
|
+
|
|
169
|
+
if request_tools:
|
|
170
|
+
request_json["tools"] = request_tools
|
|
64
171
|
|
|
65
172
|
# Handle JSON mode
|
|
66
173
|
if sampling_params.json_mode and model.supports_json:
|
|
67
174
|
request_json["generationConfig"]["responseMimeType"] = "application/json"
|
|
68
175
|
|
|
176
|
+
# Handle media_resolution for Gemini 3 (requires v1alpha)
|
|
177
|
+
if sampling_params.media_resolution is not None:
|
|
178
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
179
|
+
if is_gemini_3:
|
|
180
|
+
# Add global media resolution to generationConfig
|
|
181
|
+
request_json["generationConfig"]["mediaResolution"] = {
|
|
182
|
+
"level": sampling_params.media_resolution
|
|
183
|
+
}
|
|
184
|
+
else:
|
|
185
|
+
# Warn if trying to use media_resolution on non-Gemini-3 models
|
|
186
|
+
maybe_warn(
|
|
187
|
+
"WARN_MEDIA_RESOLUTION_UNSUPPORTED",
|
|
188
|
+
model_name=model.name,
|
|
189
|
+
)
|
|
190
|
+
|
|
69
191
|
return request_json
|
|
70
192
|
|
|
71
193
|
|
|
@@ -103,7 +225,7 @@ class GeminiRequest(APIRequestBase):
|
|
|
103
225
|
self.request_json = await _build_gemini_request(
|
|
104
226
|
self.model,
|
|
105
227
|
self.context.prompt,
|
|
106
|
-
self.context.tools,
|
|
228
|
+
self.context.tools, # type: ignore
|
|
107
229
|
self.context.sampling_params,
|
|
108
230
|
)
|
|
109
231
|
|
|
@@ -137,10 +259,29 @@ class GeminiRequest(APIRequestBase):
|
|
|
137
259
|
candidate = data["candidates"][0]
|
|
138
260
|
if "content" in candidate and "parts" in candidate["content"]:
|
|
139
261
|
for part in candidate["content"]["parts"]:
|
|
262
|
+
# Extract thought signature if present
|
|
263
|
+
raw_sig = part.get("thoughtSignature")
|
|
264
|
+
thought_sig = (
|
|
265
|
+
ThoughtSignature(raw_sig, provider="gemini")
|
|
266
|
+
if raw_sig is not None
|
|
267
|
+
else None
|
|
268
|
+
)
|
|
269
|
+
|
|
140
270
|
if "text" in part:
|
|
141
|
-
parts.append(
|
|
271
|
+
parts.append(
|
|
272
|
+
Text(
|
|
273
|
+
part["text"],
|
|
274
|
+
thought_signature=thought_sig,
|
|
275
|
+
)
|
|
276
|
+
)
|
|
142
277
|
elif "thought" in part:
|
|
143
|
-
|
|
278
|
+
# Thought with optional signature
|
|
279
|
+
parts.append(
|
|
280
|
+
Thinking(
|
|
281
|
+
content=part["thought"],
|
|
282
|
+
thought_signature=thought_sig,
|
|
283
|
+
)
|
|
284
|
+
)
|
|
144
285
|
elif "functionCall" in part:
|
|
145
286
|
func_call = part["functionCall"]
|
|
146
287
|
# Generate a unique ID since Gemini doesn't provide one
|
|
@@ -152,8 +293,13 @@ class GeminiRequest(APIRequestBase):
|
|
|
152
293
|
id=tool_id,
|
|
153
294
|
name=func_call["name"],
|
|
154
295
|
arguments=func_call.get("args", {}),
|
|
296
|
+
thought_signature=thought_sig,
|
|
155
297
|
)
|
|
156
298
|
)
|
|
299
|
+
elif thought_sig:
|
|
300
|
+
parts.append(
|
|
301
|
+
Text("", thought_signature=thought_sig)
|
|
302
|
+
)
|
|
157
303
|
|
|
158
304
|
content = Message("assistant", parts)
|
|
159
305
|
|
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -2,12 +2,17 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
import traceback as tb
|
|
4
4
|
from types import SimpleNamespace
|
|
5
|
+
from typing import Sequence
|
|
5
6
|
|
|
6
7
|
import aiohttp
|
|
7
8
|
from aiohttp import ClientResponse
|
|
8
9
|
|
|
9
10
|
from lm_deluge.request_context import RequestContext
|
|
10
11
|
from lm_deluge.tool import MCPServer, Tool
|
|
12
|
+
from lm_deluge.util.schema import (
|
|
13
|
+
prepare_output_schema,
|
|
14
|
+
transform_schema_for_openai,
|
|
15
|
+
)
|
|
11
16
|
from lm_deluge.warnings import maybe_warn
|
|
12
17
|
|
|
13
18
|
from ..config import SamplingParams
|
|
@@ -17,6 +22,24 @@ from ..usage import Usage
|
|
|
17
22
|
from .base import APIRequestBase, APIResponse
|
|
18
23
|
|
|
19
24
|
|
|
25
|
+
def _message_contents_to_string(messages: list[dict]):
|
|
26
|
+
messages = messages.copy()
|
|
27
|
+
|
|
28
|
+
for msg in messages:
|
|
29
|
+
content = msg.get("content")
|
|
30
|
+
assert content
|
|
31
|
+
if isinstance(content, list):
|
|
32
|
+
new_content = ""
|
|
33
|
+
for part in content:
|
|
34
|
+
assert "text" in part, "Invalid text part: " + str(part)
|
|
35
|
+
new_content += part["text"]
|
|
36
|
+
new_content += "\n"
|
|
37
|
+
|
|
38
|
+
msg["content"] = new_content.strip()
|
|
39
|
+
|
|
40
|
+
return messages
|
|
41
|
+
|
|
42
|
+
|
|
20
43
|
async def _build_oa_chat_request(
|
|
21
44
|
model: APIModel,
|
|
22
45
|
context: RequestContext,
|
|
@@ -50,14 +73,16 @@ async def _build_oa_chat_request(
|
|
|
50
73
|
request_json["service_tier"] = context.service_tier
|
|
51
74
|
else:
|
|
52
75
|
request_json["service_tier"] = context.service_tier
|
|
76
|
+
# if tinker, for now hack to mush into 1 string
|
|
77
|
+
if "tinker" in model.name:
|
|
78
|
+
request_json["messages"] = _message_contents_to_string(request_json["messages"])
|
|
79
|
+
|
|
53
80
|
# set max_tokens or max_completion_tokens dep. on provider
|
|
54
81
|
if "cohere" in model.api_base:
|
|
55
82
|
request_json["max_tokens"] = sampling_params.max_new_tokens
|
|
56
83
|
else:
|
|
57
84
|
request_json["max_completion_tokens"] = sampling_params.max_new_tokens
|
|
58
85
|
if model.reasoning_model:
|
|
59
|
-
request_json["temperature"] = 1.0
|
|
60
|
-
request_json["top_p"] = 1.0
|
|
61
86
|
effort = sampling_params.reasoning_effort
|
|
62
87
|
if effort in [None, "none"]:
|
|
63
88
|
# Disable reasoning for Gemini models when no effort requested
|
|
@@ -67,11 +92,24 @@ async def _build_oa_chat_request(
|
|
|
67
92
|
effort = "minimal"
|
|
68
93
|
else:
|
|
69
94
|
effort = "low"
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
95
|
+
# GPT-5.1 models don't support 'minimal', they support 'none' instead
|
|
96
|
+
if effort == "minimal" and "gpt-5.1" in model.id:
|
|
97
|
+
maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
|
|
98
|
+
effort = "none"
|
|
99
|
+
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
100
|
+
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
74
101
|
effort = "low"
|
|
102
|
+
# xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
|
|
103
|
+
if effort == "xhigh" and not model.supports_xhigh:
|
|
104
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
105
|
+
effort = "high"
|
|
106
|
+
# GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
|
|
107
|
+
if model.supports_xhigh and effort != "none":
|
|
108
|
+
del request_json["temperature"]
|
|
109
|
+
del request_json["top_p"]
|
|
110
|
+
else:
|
|
111
|
+
request_json["temperature"] = 1.0
|
|
112
|
+
request_json["top_p"] = 1.0
|
|
75
113
|
request_json["reasoning_effort"] = effort
|
|
76
114
|
else:
|
|
77
115
|
if sampling_params.reasoning_effort:
|
|
@@ -81,17 +119,48 @@ async def _build_oa_chat_request(
|
|
|
81
119
|
request_json["logprobs"] = True
|
|
82
120
|
if sampling_params.top_logprobs is not None:
|
|
83
121
|
request_json["top_logprobs"] = sampling_params.top_logprobs
|
|
84
|
-
|
|
122
|
+
|
|
123
|
+
# Handle structured outputs (output_schema takes precedence over json_mode)
|
|
124
|
+
if context.output_schema:
|
|
125
|
+
if model.supports_json:
|
|
126
|
+
base_schema = prepare_output_schema(context.output_schema)
|
|
127
|
+
|
|
128
|
+
# Apply OpenAI-specific transformations (currently passthrough with copy)
|
|
129
|
+
transformed_schema = transform_schema_for_openai(base_schema)
|
|
130
|
+
|
|
131
|
+
request_json["response_format"] = {
|
|
132
|
+
"type": "json_schema",
|
|
133
|
+
"json_schema": {
|
|
134
|
+
"name": "response",
|
|
135
|
+
"schema": transformed_schema,
|
|
136
|
+
"strict": True,
|
|
137
|
+
},
|
|
138
|
+
}
|
|
139
|
+
else:
|
|
140
|
+
print(
|
|
141
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
142
|
+
)
|
|
143
|
+
elif sampling_params.json_mode and model.supports_json:
|
|
85
144
|
request_json["response_format"] = {"type": "json_object"}
|
|
145
|
+
|
|
86
146
|
if tools:
|
|
87
147
|
request_tools = []
|
|
88
148
|
for tool in tools:
|
|
89
149
|
if isinstance(tool, Tool):
|
|
90
|
-
request_tools.append(
|
|
150
|
+
request_tools.append(
|
|
151
|
+
tool.dump_for(
|
|
152
|
+
"openai-completions", strict=sampling_params.strict_tools
|
|
153
|
+
)
|
|
154
|
+
)
|
|
91
155
|
elif isinstance(tool, MCPServer):
|
|
92
156
|
as_tools = await tool.to_tools()
|
|
93
157
|
request_tools.extend(
|
|
94
|
-
[
|
|
158
|
+
[
|
|
159
|
+
t.dump_for(
|
|
160
|
+
"openai-completions", strict=sampling_params.strict_tools
|
|
161
|
+
)
|
|
162
|
+
for t in as_tools
|
|
163
|
+
]
|
|
95
164
|
)
|
|
96
165
|
request_json["tools"] = request_tools
|
|
97
166
|
return request_json
|
|
@@ -170,7 +239,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
170
239
|
parts.append(Text(message["content"]))
|
|
171
240
|
|
|
172
241
|
# Add tool calls if present
|
|
173
|
-
if "tool_calls" in message:
|
|
242
|
+
if "tool_calls" in message and message["tool_calls"] is not None:
|
|
174
243
|
for tool_call in message["tool_calls"]:
|
|
175
244
|
parts.append(
|
|
176
245
|
ToolCall(
|
|
@@ -191,9 +260,9 @@ class OpenAIRequest(APIRequestBase):
|
|
|
191
260
|
and "logprobs" in data["choices"][0]
|
|
192
261
|
):
|
|
193
262
|
logprobs = data["choices"][0]["logprobs"]["content"]
|
|
194
|
-
except Exception:
|
|
263
|
+
except Exception as e:
|
|
195
264
|
is_error = True
|
|
196
|
-
error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
|
|
265
|
+
error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
|
|
197
266
|
elif mimetype and "json" in mimetype.lower():
|
|
198
267
|
is_error = True # expected status is 200, otherwise it's an error
|
|
199
268
|
data = await http_response.json()
|
|
@@ -271,23 +340,60 @@ async def _build_oa_responses_request(
|
|
|
271
340
|
request_json["max_output_tokens"] = sampling_params.max_new_tokens
|
|
272
341
|
|
|
273
342
|
if model.reasoning_model:
|
|
274
|
-
|
|
343
|
+
effort = sampling_params.reasoning_effort
|
|
344
|
+
if effort in [None, "none"]:
|
|
275
345
|
# gemini models can switch reasoning off
|
|
276
346
|
if "gemini" in model.id:
|
|
277
|
-
|
|
347
|
+
effort = "none"
|
|
278
348
|
else:
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
349
|
+
effort = "low"
|
|
350
|
+
# GPT-5.1 models don't support 'minimal', they support 'none' instead
|
|
351
|
+
if effort == "minimal" and "gpt-5.1" in model.id:
|
|
352
|
+
maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
|
|
353
|
+
effort = "none"
|
|
354
|
+
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
355
|
+
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
356
|
+
effort = "low"
|
|
357
|
+
# xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
|
|
358
|
+
if effort == "xhigh" and not model.supports_xhigh:
|
|
359
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
360
|
+
effort = "high"
|
|
361
|
+
# GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
|
|
362
|
+
if model.supports_xhigh and effort != "none":
|
|
363
|
+
del request_json["temperature"]
|
|
364
|
+
del request_json["top_p"]
|
|
365
|
+
else:
|
|
366
|
+
request_json["temperature"] = 1.0
|
|
367
|
+
request_json["top_p"] = 1.0
|
|
282
368
|
request_json["reasoning"] = {
|
|
283
|
-
"effort":
|
|
369
|
+
"effort": effort,
|
|
284
370
|
"summary": "auto",
|
|
285
371
|
}
|
|
286
372
|
else:
|
|
287
373
|
if sampling_params.reasoning_effort:
|
|
288
374
|
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
|
|
289
375
|
|
|
290
|
-
|
|
376
|
+
# Handle structured outputs (output_schema takes precedence over json_mode)
|
|
377
|
+
if context.output_schema:
|
|
378
|
+
if model.supports_json:
|
|
379
|
+
base_schema = prepare_output_schema(context.output_schema)
|
|
380
|
+
|
|
381
|
+
# Apply OpenAI-specific transformations (currently passthrough with copy)
|
|
382
|
+
transformed_schema = transform_schema_for_openai(base_schema)
|
|
383
|
+
|
|
384
|
+
request_json["text"] = {
|
|
385
|
+
"format": {
|
|
386
|
+
"type": "json_schema",
|
|
387
|
+
"name": "response",
|
|
388
|
+
"schema": transformed_schema,
|
|
389
|
+
"strict": True,
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
else:
|
|
393
|
+
print(
|
|
394
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
395
|
+
)
|
|
396
|
+
elif sampling_params.json_mode and model.supports_json:
|
|
291
397
|
request_json["text"] = {"format": {"type": "json_object"}}
|
|
292
398
|
|
|
293
399
|
# Handle tools
|
|
@@ -295,11 +401,13 @@ async def _build_oa_responses_request(
|
|
|
295
401
|
# Add regular function tools
|
|
296
402
|
for tool in tools or []:
|
|
297
403
|
if isinstance(tool, Tool):
|
|
298
|
-
request_tools.append(
|
|
404
|
+
request_tools.append(
|
|
405
|
+
tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
|
|
406
|
+
)
|
|
299
407
|
elif isinstance(tool, dict):
|
|
300
408
|
# if computer use, make sure model supports it
|
|
301
409
|
if tool["type"] == "computer_use_preview":
|
|
302
|
-
if model.name != "
|
|
410
|
+
if model.name != "computer-use-preview":
|
|
303
411
|
raise ValueError(f"model {model.id} does not support computer use")
|
|
304
412
|
# have to use truncation
|
|
305
413
|
request_json["truncation"] = "auto"
|
|
@@ -307,7 +415,14 @@ async def _build_oa_responses_request(
|
|
|
307
415
|
elif isinstance(tool, MCPServer):
|
|
308
416
|
if context.force_local_mcp:
|
|
309
417
|
as_tools = await tool.to_tools()
|
|
310
|
-
request_tools.extend(
|
|
418
|
+
request_tools.extend(
|
|
419
|
+
[
|
|
420
|
+
t.dump_for(
|
|
421
|
+
"openai-responses", strict=sampling_params.strict_tools
|
|
422
|
+
)
|
|
423
|
+
for t in as_tools
|
|
424
|
+
]
|
|
425
|
+
)
|
|
311
426
|
else:
|
|
312
427
|
request_tools.append(tool.for_openai_responses())
|
|
313
428
|
|
|
@@ -381,7 +496,7 @@ class OpenAIResponsesRequest(APIRequestBase):
|
|
|
381
496
|
output = data.get("output", [])
|
|
382
497
|
if not output:
|
|
383
498
|
is_error = True
|
|
384
|
-
error_message = "No output in response"
|
|
499
|
+
error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
|
|
385
500
|
else:
|
|
386
501
|
# Process each output item
|
|
387
502
|
for item in output:
|
|
@@ -536,7 +651,7 @@ async def stream_chat(
|
|
|
536
651
|
model_name: str, # must correspond to registry
|
|
537
652
|
prompt: Conversation,
|
|
538
653
|
sampling_params: SamplingParams = SamplingParams(),
|
|
539
|
-
tools:
|
|
654
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
540
655
|
cache: CachePattern | None = None,
|
|
541
656
|
extra_headers: dict[str, str] | None = None,
|
|
542
657
|
):
|
|
@@ -562,7 +677,12 @@ async def stream_chat(
|
|
|
562
677
|
request_header.update(filtered_extra)
|
|
563
678
|
|
|
564
679
|
context = SimpleNamespace(
|
|
565
|
-
prompt=prompt,
|
|
680
|
+
prompt=prompt,
|
|
681
|
+
tools=tools,
|
|
682
|
+
sampling_params=sampling_params,
|
|
683
|
+
service_tier=None,
|
|
684
|
+
output_schema=None,
|
|
685
|
+
model_name=model_name,
|
|
566
686
|
)
|
|
567
687
|
|
|
568
688
|
request_json = await _build_oa_chat_request(model, context) # type: ignore
|
lm_deluge/batches.py
CHANGED
|
@@ -141,31 +141,22 @@ async def submit_batch_oa(file_path: str):
|
|
|
141
141
|
return batch_id
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
async def _submit_anthropic_batch(
|
|
145
|
-
"""
|
|
144
|
+
async def _submit_anthropic_batch(requests: list[dict], headers: dict, model: str):
|
|
145
|
+
"""Submit batch requests to Anthropic's Message Batches API."""
|
|
146
146
|
|
|
147
147
|
async with aiohttp.ClientSession() as session:
|
|
148
148
|
url = f"{registry[model].api_base}/messages/batches"
|
|
149
|
-
|
|
150
|
-
with open(file_path, "rb") as f:
|
|
151
|
-
data.add_field(
|
|
152
|
-
"file",
|
|
153
|
-
f,
|
|
154
|
-
filename=os.path.basename(file_path),
|
|
155
|
-
content_type="application/json",
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
async with session.post(url, data=data, headers=headers) as response:
|
|
159
|
-
if response.status != 200:
|
|
160
|
-
text = await response.text()
|
|
161
|
-
raise ValueError(f"Error creating batch: {text}")
|
|
149
|
+
payload = {"requests": requests}
|
|
162
150
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
151
|
+
async with session.post(url, json=payload, headers=headers) as response:
|
|
152
|
+
if response.status != 200:
|
|
153
|
+
text = await response.text()
|
|
154
|
+
raise ValueError(f"Error creating batch: {text}")
|
|
166
155
|
|
|
167
|
-
|
|
168
|
-
|
|
156
|
+
batch_data = await response.json()
|
|
157
|
+
batch_id = batch_data["id"]
|
|
158
|
+
print(f"Anthropic batch job started successfully: id = {batch_id}")
|
|
159
|
+
return batch_id
|
|
169
160
|
|
|
170
161
|
|
|
171
162
|
async def create_batch_files_oa(
|
|
@@ -409,20 +400,10 @@ async def submit_batches_anthropic(
|
|
|
409
400
|
|
|
410
401
|
if current_batch and (would_exceed_size or would_exceed_items):
|
|
411
402
|
# Submit current batch
|
|
412
|
-
|
|
413
|
-
with tempfile.NamedTemporaryFile(
|
|
414
|
-
mode="w+", suffix=".jsonl", delete=False
|
|
415
|
-
) as f:
|
|
416
|
-
for batch_request in current_batch:
|
|
417
|
-
json.dump(batch_request, f)
|
|
418
|
-
f.write("\n")
|
|
419
|
-
print("wrote", len(current_batch), "items")
|
|
420
|
-
return f.name
|
|
421
|
-
|
|
422
|
-
file_path = await asyncio.to_thread(write_batch_file)
|
|
403
|
+
print("wrote", len(current_batch), "items")
|
|
423
404
|
batch_tasks.append(
|
|
424
405
|
asyncio.create_task(
|
|
425
|
-
_submit_anthropic_batch(
|
|
406
|
+
_submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
|
|
426
407
|
)
|
|
427
408
|
)
|
|
428
409
|
|
|
@@ -436,21 +417,10 @@ async def submit_batches_anthropic(
|
|
|
436
417
|
|
|
437
418
|
# Submit final batch if it has items
|
|
438
419
|
if current_batch:
|
|
439
|
-
|
|
440
|
-
def write_final_batch_file():
|
|
441
|
-
with tempfile.NamedTemporaryFile(
|
|
442
|
-
mode="w+", suffix=".jsonl", delete=False
|
|
443
|
-
) as f:
|
|
444
|
-
for batch_request in current_batch:
|
|
445
|
-
json.dump(batch_request, f)
|
|
446
|
-
f.write("\n")
|
|
447
|
-
print("wrote", len(current_batch), "items")
|
|
448
|
-
return f.name
|
|
449
|
-
|
|
450
|
-
file_path = await asyncio.to_thread(write_final_batch_file)
|
|
420
|
+
print("wrote", len(current_batch), "items")
|
|
451
421
|
batch_tasks.append(
|
|
452
422
|
asyncio.create_task(
|
|
453
|
-
_submit_anthropic_batch(
|
|
423
|
+
_submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
|
|
454
424
|
)
|
|
455
425
|
)
|
|
456
426
|
|