lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (108) hide show
  1. lm_deluge/__init__.py +1 -2
  2. lm_deluge/api_requests/anthropic.py +117 -22
  3. lm_deluge/api_requests/base.py +84 -11
  4. lm_deluge/api_requests/bedrock.py +30 -6
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +166 -20
  7. lm_deluge/api_requests/openai.py +145 -25
  8. lm_deluge/batches.py +15 -45
  9. lm_deluge/client.py +309 -50
  10. lm_deluge/config.py +15 -3
  11. lm_deluge/models/__init__.py +14 -1
  12. lm_deluge/models/anthropic.py +29 -14
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +42 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +18 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +133 -7
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +50 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +705 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +537 -88
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/server/__init__.py +24 -0
  39. lm_deluge/server/__main__.py +144 -0
  40. lm_deluge/server/adapters.py +369 -0
  41. lm_deluge/server/app.py +388 -0
  42. lm_deluge/server/auth.py +71 -0
  43. lm_deluge/server/model_policy.py +215 -0
  44. lm_deluge/server/models_anthropic.py +172 -0
  45. lm_deluge/server/models_openai.py +175 -0
  46. lm_deluge/tool/__init__.py +1130 -0
  47. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  48. lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  49. lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  50. lm_deluge/tool/builtin/gemini.py +59 -0
  51. lm_deluge/tool/builtin/openai.py +74 -0
  52. lm_deluge/tool/cua/__init__.py +173 -0
  53. lm_deluge/tool/cua/actions.py +148 -0
  54. lm_deluge/tool/cua/base.py +27 -0
  55. lm_deluge/tool/cua/batch.py +215 -0
  56. lm_deluge/tool/cua/converters.py +466 -0
  57. lm_deluge/tool/cua/kernel.py +702 -0
  58. lm_deluge/tool/cua/trycua.py +989 -0
  59. lm_deluge/tool/prefab/__init__.py +45 -0
  60. lm_deluge/tool/prefab/batch_tool.py +156 -0
  61. lm_deluge/tool/prefab/docs.py +1119 -0
  62. lm_deluge/tool/prefab/email.py +294 -0
  63. lm_deluge/tool/prefab/filesystem.py +1711 -0
  64. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  65. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  66. lm_deluge/tool/prefab/memory.py +458 -0
  67. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  68. lm_deluge/tool/prefab/otc/executor.py +281 -0
  69. lm_deluge/tool/prefab/otc/parse.py +188 -0
  70. lm_deluge/tool/prefab/random.py +212 -0
  71. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  72. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  73. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  74. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  75. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  76. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  77. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  78. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  79. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  80. lm_deluge/tool/prefab/sheets.py +385 -0
  81. lm_deluge/tool/prefab/skills.py +0 -0
  82. lm_deluge/tool/prefab/subagents.py +233 -0
  83. lm_deluge/tool/prefab/todos.py +342 -0
  84. lm_deluge/tool/prefab/tool_search.py +169 -0
  85. lm_deluge/tool/prefab/web_search.py +199 -0
  86. lm_deluge/tracker.py +16 -13
  87. lm_deluge/util/schema.py +412 -0
  88. lm_deluge/warnings.py +8 -0
  89. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
  90. lm_deluge-0.0.90.dist-info/RECORD +132 -0
  91. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  92. lm_deluge/built_in_tools/openai.py +0 -28
  93. lm_deluge/presets/cerebras.py +0 -17
  94. lm_deluge/presets/meta.py +0 -13
  95. lm_deluge/tool.py +0 -849
  96. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  97. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  98. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  99. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  100. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  101. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  102. /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
  103. /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
  104. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  105. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  106. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  107. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  108. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import os
3
- from typing import Any
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
@@ -10,7 +9,7 @@ from lm_deluge.warnings import maybe_warn
10
9
 
11
10
  from ..config import SamplingParams
12
11
  from ..models import APIModel
13
- from ..prompt import Conversation, Message, Text, Thinking, ToolCall
12
+ from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
14
13
  from ..usage import Usage
15
14
  from .base import APIRequestBase, APIResponse
16
15
 
@@ -23,6 +22,21 @@ async def _build_gemini_request(
23
22
  ) -> dict:
24
23
  system_message, messages = prompt.to_gemini()
25
24
 
25
+ # For Gemini 3, inject dummy signatures when missing for function calls
26
+ is_gemini_3 = "gemini-3" in model.name.lower()
27
+ if is_gemini_3:
28
+ dummy_sig = "context_engineering_is_the_way_to_go"
29
+ for msg in messages:
30
+ if "parts" in msg:
31
+ for part in msg["parts"]:
32
+ # For function calls, inject dummy signature if missing
33
+ if "functionCall" in part and "thoughtSignature" not in part:
34
+ part["thoughtSignature"] = dummy_sig
35
+ maybe_warn(
36
+ "WARN_GEMINI3_MISSING_SIGNATURE",
37
+ part_type="function call",
38
+ )
39
+
26
40
  request_json = {
27
41
  "contents": messages,
28
42
  "generationConfig": {
@@ -37,20 +51,81 @@ async def _build_gemini_request(
37
51
  request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
38
52
 
39
53
  # Handle reasoning models (thinking)
40
- if model.reasoning_model:
41
- thinking_config: dict[str, Any] | None = None
42
- effort = sampling_params.reasoning_effort
43
- if effort is None or effort == "none":
44
- budget = 128 if "2.5-pro" in model.id else 0
45
- # Explicitly disable thoughts when no effort is requested
46
- thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
54
+ is_gemini_3 = "gemini-3" in model.name.lower()
55
+ is_gemini_3_flash = "gemini-3-flash" in model.name.lower()
56
+ if is_gemini_3:
57
+ # gemini3 MUST think
58
+ if not sampling_params.reasoning_effort:
59
+ maybe_warn("WARN_GEMINI3_NO_REASONING")
60
+ effort = "low"
61
+ else:
62
+ effort_key = sampling_params.reasoning_effort
63
+ if effort_key == "xhigh":
64
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
65
+ effort_key = "high"
66
+ if is_gemini_3_flash:
67
+ # Flash supports minimal, low, medium, high
68
+ level_map = {
69
+ "none": "low",
70
+ "minimal": "minimal",
71
+ "low": "low",
72
+ "medium": "medium",
73
+ "high": "high",
74
+ }
75
+ else:
76
+ # Pro only supports low, high
77
+ level_map = {
78
+ "none": "low",
79
+ "minimal": "low",
80
+ "low": "low",
81
+ "medium": "high",
82
+ "high": "high",
83
+ }
84
+ effort = level_map[effort_key]
85
+ thinking_config = {"thinkingLevel": effort}
86
+ request_json["generationConfig"]["thinkingConfig"] = thinking_config
87
+
88
+ elif model.reasoning_model:
89
+ if (
90
+ sampling_params.thinking_budget is not None
91
+ and sampling_params.reasoning_effort is not None
92
+ ):
93
+ maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
94
+
95
+ if (
96
+ sampling_params.thinking_budget is not None
97
+ and sampling_params.thinking_budget > 0
98
+ ):
99
+ thinking_config = {
100
+ "includeThoughts": True,
101
+ "thinkingBudget": sampling_params.thinking_budget,
102
+ }
103
+ elif sampling_params.thinking_budget == -1:
104
+ # dynamic thinking
105
+ thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
106
+ elif sampling_params.reasoning_effort not in [None, "none"]:
107
+ effort_key = sampling_params.reasoning_effort
108
+ if effort_key == "xhigh":
109
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
110
+ effort_key = "high"
111
+ level_map = {
112
+ "minimal": 256,
113
+ "low": 1024,
114
+ "medium": 4096,
115
+ "high": 16384,
116
+ }
117
+ assert effort_key in level_map
118
+ budget = level_map[effort_key]
119
+ if "flash-lite" in model.id:
120
+ budget = max(budget, 512)
121
+ thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
122
+ elif "2.5-pro" in model.id:
123
+ # 2.5 pro must think.
124
+ thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
47
125
  else:
48
- thinking_config = {"includeThoughts": True}
49
- if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
50
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
51
- effort
52
- ]
53
- thinking_config["thinkingBudget"] = budget
126
+ # no thoughts head empty
127
+ thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
128
+
54
129
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
55
130
 
56
131
  else:
@@ -59,13 +134,60 @@ async def _build_gemini_request(
59
134
 
60
135
  # Add tools if provided
61
136
  if tools:
62
- tool_declarations = [tool.dump_for("google") for tool in tools]
63
- request_json["tools"] = [{"functionDeclarations": tool_declarations}]
137
+ request_tools = []
138
+ function_declarations = []
139
+
140
+ for tool in tools:
141
+ if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
142
+ # Gemini computer use tool - add as separate tool entry
143
+ env_map = {
144
+ "browser": "ENVIRONMENT_BROWSER",
145
+ "android": "ENVIRONMENT_ANDROID",
146
+ }
147
+ env = env_map.get(
148
+ tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
149
+ )
150
+ cu_tool: dict = {
151
+ "computerUse": {
152
+ "environment": env,
153
+ }
154
+ }
155
+ excluded = tool.get("excluded_predefined_functions")
156
+ if excluded:
157
+ cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
158
+ request_tools.append(cu_tool)
159
+ elif hasattr(tool, "dump_for"):
160
+ # Regular Tool object
161
+ function_declarations.append(tool.dump_for("google"))
162
+ elif isinstance(tool, dict):
163
+ # Raw dict tool - assume it's a function declaration
164
+ function_declarations.append(tool)
165
+
166
+ if function_declarations:
167
+ request_tools.append({"functionDeclarations": function_declarations})
168
+
169
+ if request_tools:
170
+ request_json["tools"] = request_tools
64
171
 
65
172
  # Handle JSON mode
66
173
  if sampling_params.json_mode and model.supports_json:
67
174
  request_json["generationConfig"]["responseMimeType"] = "application/json"
68
175
 
176
+ # Handle media_resolution for Gemini 3 (requires v1alpha)
177
+ if sampling_params.media_resolution is not None:
178
+ is_gemini_3 = "gemini-3" in model.name.lower()
179
+ if is_gemini_3:
180
+ # Add global media resolution to generationConfig
181
+ request_json["generationConfig"]["mediaResolution"] = {
182
+ "level": sampling_params.media_resolution
183
+ }
184
+ else:
185
+ # Warn if trying to use media_resolution on non-Gemini-3 models
186
+ maybe_warn(
187
+ "WARN_MEDIA_RESOLUTION_UNSUPPORTED",
188
+ model_name=model.name,
189
+ )
190
+
69
191
  return request_json
70
192
 
71
193
 
@@ -103,7 +225,7 @@ class GeminiRequest(APIRequestBase):
103
225
  self.request_json = await _build_gemini_request(
104
226
  self.model,
105
227
  self.context.prompt,
106
- self.context.tools,
228
+ self.context.tools, # type: ignore
107
229
  self.context.sampling_params,
108
230
  )
109
231
 
@@ -137,10 +259,29 @@ class GeminiRequest(APIRequestBase):
137
259
  candidate = data["candidates"][0]
138
260
  if "content" in candidate and "parts" in candidate["content"]:
139
261
  for part in candidate["content"]["parts"]:
262
+ # Extract thought signature if present
263
+ raw_sig = part.get("thoughtSignature")
264
+ thought_sig = (
265
+ ThoughtSignature(raw_sig, provider="gemini")
266
+ if raw_sig is not None
267
+ else None
268
+ )
269
+
140
270
  if "text" in part:
141
- parts.append(Text(part["text"]))
271
+ parts.append(
272
+ Text(
273
+ part["text"],
274
+ thought_signature=thought_sig,
275
+ )
276
+ )
142
277
  elif "thought" in part:
143
- parts.append(Thinking(part["thought"]))
278
+ # Thought with optional signature
279
+ parts.append(
280
+ Thinking(
281
+ content=part["thought"],
282
+ thought_signature=thought_sig,
283
+ )
284
+ )
144
285
  elif "functionCall" in part:
145
286
  func_call = part["functionCall"]
146
287
  # Generate a unique ID since Gemini doesn't provide one
@@ -152,8 +293,13 @@ class GeminiRequest(APIRequestBase):
152
293
  id=tool_id,
153
294
  name=func_call["name"],
154
295
  arguments=func_call.get("args", {}),
296
+ thought_signature=thought_sig,
155
297
  )
156
298
  )
299
+ elif thought_sig:
300
+ parts.append(
301
+ Text("", thought_signature=thought_sig)
302
+ )
157
303
 
158
304
  content = Message("assistant", parts)
159
305
 
@@ -2,12 +2,17 @@ import json
2
2
  import os
3
3
  import traceback as tb
4
4
  from types import SimpleNamespace
5
+ from typing import Sequence
5
6
 
6
7
  import aiohttp
7
8
  from aiohttp import ClientResponse
8
9
 
9
10
  from lm_deluge.request_context import RequestContext
10
11
  from lm_deluge.tool import MCPServer, Tool
12
+ from lm_deluge.util.schema import (
13
+ prepare_output_schema,
14
+ transform_schema_for_openai,
15
+ )
11
16
  from lm_deluge.warnings import maybe_warn
12
17
 
13
18
  from ..config import SamplingParams
@@ -17,6 +22,24 @@ from ..usage import Usage
17
22
  from .base import APIRequestBase, APIResponse
18
23
 
19
24
 
25
+ def _message_contents_to_string(messages: list[dict]):
26
+ messages = messages.copy()
27
+
28
+ for msg in messages:
29
+ content = msg.get("content")
30
+ assert content
31
+ if isinstance(content, list):
32
+ new_content = ""
33
+ for part in content:
34
+ assert "text" in part, "Invalid text part: " + str(part)
35
+ new_content += part["text"]
36
+ new_content += "\n"
37
+
38
+ msg["content"] = new_content.strip()
39
+
40
+ return messages
41
+
42
+
20
43
  async def _build_oa_chat_request(
21
44
  model: APIModel,
22
45
  context: RequestContext,
@@ -50,14 +73,16 @@ async def _build_oa_chat_request(
50
73
  request_json["service_tier"] = context.service_tier
51
74
  else:
52
75
  request_json["service_tier"] = context.service_tier
76
+ # if tinker, for now hack to mush into 1 string
77
+ if "tinker" in model.name:
78
+ request_json["messages"] = _message_contents_to_string(request_json["messages"])
79
+
53
80
  # set max_tokens or max_completion_tokens dep. on provider
54
81
  if "cohere" in model.api_base:
55
82
  request_json["max_tokens"] = sampling_params.max_new_tokens
56
83
  else:
57
84
  request_json["max_completion_tokens"] = sampling_params.max_new_tokens
58
85
  if model.reasoning_model:
59
- request_json["temperature"] = 1.0
60
- request_json["top_p"] = 1.0
61
86
  effort = sampling_params.reasoning_effort
62
87
  if effort in [None, "none"]:
63
88
  # Disable reasoning for Gemini models when no effort requested
@@ -67,11 +92,24 @@ async def _build_oa_chat_request(
67
92
  effort = "minimal"
68
93
  else:
69
94
  effort = "low"
70
- if effort == "minimal" and "gpt-5" not in model.id:
71
- print(
72
- "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
73
- )
95
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
96
+ if effort == "minimal" and "gpt-5.1" in model.id:
97
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
98
+ effort = "none"
99
+ elif effort == "minimal" and "gpt-5" not in model.id:
100
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
74
101
  effort = "low"
102
+ # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
103
+ if effort == "xhigh" and not model.supports_xhigh:
104
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
105
+ effort = "high"
106
+ # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
107
+ if model.supports_xhigh and effort != "none":
108
+ del request_json["temperature"]
109
+ del request_json["top_p"]
110
+ else:
111
+ request_json["temperature"] = 1.0
112
+ request_json["top_p"] = 1.0
75
113
  request_json["reasoning_effort"] = effort
76
114
  else:
77
115
  if sampling_params.reasoning_effort:
@@ -81,17 +119,48 @@ async def _build_oa_chat_request(
81
119
  request_json["logprobs"] = True
82
120
  if sampling_params.top_logprobs is not None:
83
121
  request_json["top_logprobs"] = sampling_params.top_logprobs
84
- if sampling_params.json_mode and model.supports_json:
122
+
123
+ # Handle structured outputs (output_schema takes precedence over json_mode)
124
+ if context.output_schema:
125
+ if model.supports_json:
126
+ base_schema = prepare_output_schema(context.output_schema)
127
+
128
+ # Apply OpenAI-specific transformations (currently passthrough with copy)
129
+ transformed_schema = transform_schema_for_openai(base_schema)
130
+
131
+ request_json["response_format"] = {
132
+ "type": "json_schema",
133
+ "json_schema": {
134
+ "name": "response",
135
+ "schema": transformed_schema,
136
+ "strict": True,
137
+ },
138
+ }
139
+ else:
140
+ print(
141
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
142
+ )
143
+ elif sampling_params.json_mode and model.supports_json:
85
144
  request_json["response_format"] = {"type": "json_object"}
145
+
86
146
  if tools:
87
147
  request_tools = []
88
148
  for tool in tools:
89
149
  if isinstance(tool, Tool):
90
- request_tools.append(tool.dump_for("openai-completions"))
150
+ request_tools.append(
151
+ tool.dump_for(
152
+ "openai-completions", strict=sampling_params.strict_tools
153
+ )
154
+ )
91
155
  elif isinstance(tool, MCPServer):
92
156
  as_tools = await tool.to_tools()
93
157
  request_tools.extend(
94
- [t.dump_for("openai-completions") for t in as_tools]
158
+ [
159
+ t.dump_for(
160
+ "openai-completions", strict=sampling_params.strict_tools
161
+ )
162
+ for t in as_tools
163
+ ]
95
164
  )
96
165
  request_json["tools"] = request_tools
97
166
  return request_json
@@ -170,7 +239,7 @@ class OpenAIRequest(APIRequestBase):
170
239
  parts.append(Text(message["content"]))
171
240
 
172
241
  # Add tool calls if present
173
- if "tool_calls" in message:
242
+ if "tool_calls" in message and message["tool_calls"] is not None:
174
243
  for tool_call in message["tool_calls"]:
175
244
  parts.append(
176
245
  ToolCall(
@@ -191,9 +260,9 @@ class OpenAIRequest(APIRequestBase):
191
260
  and "logprobs" in data["choices"][0]
192
261
  ):
193
262
  logprobs = data["choices"][0]["logprobs"]["content"]
194
- except Exception:
263
+ except Exception as e:
195
264
  is_error = True
196
- error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
265
+ error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
197
266
  elif mimetype and "json" in mimetype.lower():
198
267
  is_error = True # expected status is 200, otherwise it's an error
199
268
  data = await http_response.json()
@@ -271,23 +340,60 @@ async def _build_oa_responses_request(
271
340
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
272
341
 
273
342
  if model.reasoning_model:
274
- if sampling_params.reasoning_effort in [None, "none"]:
343
+ effort = sampling_params.reasoning_effort
344
+ if effort in [None, "none"]:
275
345
  # gemini models can switch reasoning off
276
346
  if "gemini" in model.id:
277
- sampling_params.reasoning_effort = "none"
347
+ effort = "none"
278
348
  else:
279
- sampling_params.reasoning_effort = "low"
280
- request_json["temperature"] = 1.0
281
- request_json["top_p"] = 1.0
349
+ effort = "low"
350
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
351
+ if effort == "minimal" and "gpt-5.1" in model.id:
352
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
353
+ effort = "none"
354
+ elif effort == "minimal" and "gpt-5" not in model.id:
355
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
356
+ effort = "low"
357
+ # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
358
+ if effort == "xhigh" and not model.supports_xhigh:
359
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
360
+ effort = "high"
361
+ # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
362
+ if model.supports_xhigh and effort != "none":
363
+ del request_json["temperature"]
364
+ del request_json["top_p"]
365
+ else:
366
+ request_json["temperature"] = 1.0
367
+ request_json["top_p"] = 1.0
282
368
  request_json["reasoning"] = {
283
- "effort": sampling_params.reasoning_effort,
369
+ "effort": effort,
284
370
  "summary": "auto",
285
371
  }
286
372
  else:
287
373
  if sampling_params.reasoning_effort:
288
374
  maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
289
375
 
290
- if sampling_params.json_mode and model.supports_json:
376
+ # Handle structured outputs (output_schema takes precedence over json_mode)
377
+ if context.output_schema:
378
+ if model.supports_json:
379
+ base_schema = prepare_output_schema(context.output_schema)
380
+
381
+ # Apply OpenAI-specific transformations (currently passthrough with copy)
382
+ transformed_schema = transform_schema_for_openai(base_schema)
383
+
384
+ request_json["text"] = {
385
+ "format": {
386
+ "type": "json_schema",
387
+ "name": "response",
388
+ "schema": transformed_schema,
389
+ "strict": True,
390
+ }
391
+ }
392
+ else:
393
+ print(
394
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
395
+ )
396
+ elif sampling_params.json_mode and model.supports_json:
291
397
  request_json["text"] = {"format": {"type": "json_object"}}
292
398
 
293
399
  # Handle tools
@@ -295,11 +401,13 @@ async def _build_oa_responses_request(
295
401
  # Add regular function tools
296
402
  for tool in tools or []:
297
403
  if isinstance(tool, Tool):
298
- request_tools.append(tool.dump_for("openai-responses"))
404
+ request_tools.append(
405
+ tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
406
+ )
299
407
  elif isinstance(tool, dict):
300
408
  # if computer use, make sure model supports it
301
409
  if tool["type"] == "computer_use_preview":
302
- if model.name != "openai-computer-use-preview":
410
+ if model.name != "computer-use-preview":
303
411
  raise ValueError(f"model {model.id} does not support computer use")
304
412
  # have to use truncation
305
413
  request_json["truncation"] = "auto"
@@ -307,7 +415,14 @@ async def _build_oa_responses_request(
307
415
  elif isinstance(tool, MCPServer):
308
416
  if context.force_local_mcp:
309
417
  as_tools = await tool.to_tools()
310
- request_tools.extend([t.dump_for("openai-responses") for t in as_tools])
418
+ request_tools.extend(
419
+ [
420
+ t.dump_for(
421
+ "openai-responses", strict=sampling_params.strict_tools
422
+ )
423
+ for t in as_tools
424
+ ]
425
+ )
311
426
  else:
312
427
  request_tools.append(tool.for_openai_responses())
313
428
 
@@ -381,7 +496,7 @@ class OpenAIResponsesRequest(APIRequestBase):
381
496
  output = data.get("output", [])
382
497
  if not output:
383
498
  is_error = True
384
- error_message = "No output in response"
499
+ error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
385
500
  else:
386
501
  # Process each output item
387
502
  for item in output:
@@ -536,7 +651,7 @@ async def stream_chat(
536
651
  model_name: str, # must correspond to registry
537
652
  prompt: Conversation,
538
653
  sampling_params: SamplingParams = SamplingParams(),
539
- tools: list | None = None,
654
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
540
655
  cache: CachePattern | None = None,
541
656
  extra_headers: dict[str, str] | None = None,
542
657
  ):
@@ -562,7 +677,12 @@ async def stream_chat(
562
677
  request_header.update(filtered_extra)
563
678
 
564
679
  context = SimpleNamespace(
565
- prompt=prompt, tools=tools, sampling_params=sampling_params
680
+ prompt=prompt,
681
+ tools=tools,
682
+ sampling_params=sampling_params,
683
+ service_tier=None,
684
+ output_schema=None,
685
+ model_name=model_name,
566
686
  )
567
687
 
568
688
  request_json = await _build_oa_chat_request(model, context) # type: ignore
lm_deluge/batches.py CHANGED
@@ -141,31 +141,22 @@ async def submit_batch_oa(file_path: str):
141
141
  return batch_id
142
142
 
143
143
 
144
- async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
145
- """Upload a JSONL file and create one Anthropic batch."""
144
+ async def _submit_anthropic_batch(requests: list[dict], headers: dict, model: str):
145
+ """Submit batch requests to Anthropic's Message Batches API."""
146
146
 
147
147
  async with aiohttp.ClientSession() as session:
148
148
  url = f"{registry[model].api_base}/messages/batches"
149
- data = aiohttp.FormData()
150
- with open(file_path, "rb") as f:
151
- data.add_field(
152
- "file",
153
- f,
154
- filename=os.path.basename(file_path),
155
- content_type="application/json",
156
- )
157
-
158
- async with session.post(url, data=data, headers=headers) as response:
159
- if response.status != 200:
160
- text = await response.text()
161
- raise ValueError(f"Error creating batch: {text}")
149
+ payload = {"requests": requests}
162
150
 
163
- batch_data = await response.json()
164
- batch_id = batch_data["id"]
165
- print(f"Anthropic batch job started successfully: id = {batch_id}")
151
+ async with session.post(url, json=payload, headers=headers) as response:
152
+ if response.status != 200:
153
+ text = await response.text()
154
+ raise ValueError(f"Error creating batch: {text}")
166
155
 
167
- os.remove(file_path)
168
- return batch_id
156
+ batch_data = await response.json()
157
+ batch_id = batch_data["id"]
158
+ print(f"Anthropic batch job started successfully: id = {batch_id}")
159
+ return batch_id
169
160
 
170
161
 
171
162
  async def create_batch_files_oa(
@@ -409,20 +400,10 @@ async def submit_batches_anthropic(
409
400
 
410
401
  if current_batch and (would_exceed_size or would_exceed_items):
411
402
  # Submit current batch
412
- def write_batch_file():
413
- with tempfile.NamedTemporaryFile(
414
- mode="w+", suffix=".jsonl", delete=False
415
- ) as f:
416
- for batch_request in current_batch:
417
- json.dump(batch_request, f)
418
- f.write("\n")
419
- print("wrote", len(current_batch), "items")
420
- return f.name
421
-
422
- file_path = await asyncio.to_thread(write_batch_file)
403
+ print("wrote", len(current_batch), "items")
423
404
  batch_tasks.append(
424
405
  asyncio.create_task(
425
- _submit_anthropic_batch(file_path, request_headers, model) # type: ignore
406
+ _submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
426
407
  )
427
408
  )
428
409
 
@@ -436,21 +417,10 @@ async def submit_batches_anthropic(
436
417
 
437
418
  # Submit final batch if it has items
438
419
  if current_batch:
439
-
440
- def write_final_batch_file():
441
- with tempfile.NamedTemporaryFile(
442
- mode="w+", suffix=".jsonl", delete=False
443
- ) as f:
444
- for batch_request in current_batch:
445
- json.dump(batch_request, f)
446
- f.write("\n")
447
- print("wrote", len(current_batch), "items")
448
- return f.name
449
-
450
- file_path = await asyncio.to_thread(write_final_batch_file)
420
+ print("wrote", len(current_batch), "items")
451
421
  batch_tasks.append(
452
422
  asyncio.create_task(
453
- _submit_anthropic_batch(file_path, request_headers, model) # type: ignore
423
+ _submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
454
424
  )
455
425
  )
456
426