lm-deluge 0.0.72__tar.gz → 0.0.74__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.72/src/lm_deluge.egg-info → lm_deluge-0.0.74}/PKG-INFO +1 -1
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/pyproject.toml +1 -1
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/anthropic.py +26 -1
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/bedrock.py +9 -4
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/openai.py +74 -14
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/client.py +37 -6
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/config.py +2 -1
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/anthropic.py +2 -2
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/openai.py +42 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/prompt.py +12 -2
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/request_context.py +2 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/tool.py +60 -15
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/warnings.py +2 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/LICENSE +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/README.md +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/setup.cfg +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/gemini.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/mock_openai.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/__init__.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/google.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/grok.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/kimi.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/minimax.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/openrouter.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/together.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/presets/cerebras.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/presets/meta.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_file_upload.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_mock_openai.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_native_mcp_server.py +0 -0
- {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_openrouter_generic.py +0 -0
|
@@ -84,12 +84,37 @@ def _build_anthropic_request(
|
|
|
84
84
|
if "temperature" in request_json and "top_p" in request_json:
|
|
85
85
|
request_json.pop("top_p")
|
|
86
86
|
|
|
87
|
+
# Handle structured outputs (output_format)
|
|
88
|
+
if context.output_schema:
|
|
89
|
+
if model.supports_json:
|
|
90
|
+
_add_beta(base_headers, "structured-outputs-2025-11-13")
|
|
91
|
+
request_json["output_format"] = {
|
|
92
|
+
"type": "json_schema",
|
|
93
|
+
"schema": context.output_schema,
|
|
94
|
+
}
|
|
95
|
+
else:
|
|
96
|
+
print(
|
|
97
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
98
|
+
)
|
|
99
|
+
elif sampling_params.json_mode:
|
|
100
|
+
# Anthropic doesn't support basic json_mode without a schema
|
|
101
|
+
print(
|
|
102
|
+
"WARNING: Anthropic does not support basic json_mode without a schema. "
|
|
103
|
+
"Use output_schema parameter for structured JSON outputs."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Add beta header for strict tools when enabled
|
|
107
|
+
if tools and sampling_params.strict_tools and model.supports_json:
|
|
108
|
+
_add_beta(base_headers, "structured-outputs-2025-11-13")
|
|
109
|
+
|
|
87
110
|
if tools:
|
|
88
111
|
mcp_servers = []
|
|
89
112
|
tool_definitions = []
|
|
90
113
|
for tool in tools:
|
|
91
114
|
if isinstance(tool, Tool):
|
|
92
|
-
|
|
115
|
+
# Only use strict mode if model supports structured outputs
|
|
116
|
+
use_strict = sampling_params.strict_tools and model.supports_json
|
|
117
|
+
tool_definitions.append(tool.dump_for("anthropic", strict=use_strict))
|
|
93
118
|
elif isinstance(tool, dict) and "url" in tool:
|
|
94
119
|
_add_beta(base_headers, "mcp-client-2025-04-04")
|
|
95
120
|
mcp_servers.append(tool)
|
|
@@ -106,7 +106,8 @@ async def _build_anthropic_bedrock_request(
|
|
|
106
106
|
tool_definitions = []
|
|
107
107
|
for tool in tools:
|
|
108
108
|
if isinstance(tool, Tool):
|
|
109
|
-
|
|
109
|
+
# Bedrock doesn't have the strict-mode betas Anthropic exposes yet
|
|
110
|
+
tool_definitions.append(tool.dump_for("anthropic", strict=False))
|
|
110
111
|
elif isinstance(tool, dict):
|
|
111
112
|
tool_definitions.append(tool)
|
|
112
113
|
# add betas if needed
|
|
@@ -124,7 +125,9 @@ async def _build_anthropic_bedrock_request(
|
|
|
124
125
|
# Convert to individual tools locally (like OpenAI does)
|
|
125
126
|
individual_tools = await tool.to_tools()
|
|
126
127
|
for individual_tool in individual_tools:
|
|
127
|
-
tool_definitions.append(
|
|
128
|
+
tool_definitions.append(
|
|
129
|
+
individual_tool.dump_for("anthropic", strict=False)
|
|
130
|
+
)
|
|
128
131
|
|
|
129
132
|
# Add cache control to last tool if tools_only caching is specified
|
|
130
133
|
if cache_pattern == "tools_only" and tool_definitions:
|
|
@@ -194,11 +197,13 @@ async def _build_openai_bedrock_request(
|
|
|
194
197
|
request_tools = []
|
|
195
198
|
for tool in tools:
|
|
196
199
|
if isinstance(tool, Tool):
|
|
197
|
-
request_tools.append(
|
|
200
|
+
request_tools.append(
|
|
201
|
+
tool.dump_for("openai-completions", strict=False)
|
|
202
|
+
)
|
|
198
203
|
elif isinstance(tool, MCPServer):
|
|
199
204
|
as_tools = await tool.to_tools()
|
|
200
205
|
request_tools.extend(
|
|
201
|
-
[t.dump_for("openai-completions") for t in as_tools]
|
|
206
|
+
[t.dump_for("openai-completions", strict=False) for t in as_tools]
|
|
202
207
|
)
|
|
203
208
|
request_json["tools"] = request_tools
|
|
204
209
|
|
|
@@ -67,10 +67,12 @@ async def _build_oa_chat_request(
|
|
|
67
67
|
effort = "minimal"
|
|
68
68
|
else:
|
|
69
69
|
effort = "low"
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
70
|
+
# GPT-5.1 models don't support 'minimal', they support 'none' instead
|
|
71
|
+
if effort == "minimal" and "gpt-5.1" in model.id:
|
|
72
|
+
maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
|
|
73
|
+
effort = "none"
|
|
74
|
+
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
75
|
+
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
74
76
|
effort = "low"
|
|
75
77
|
request_json["reasoning_effort"] = effort
|
|
76
78
|
else:
|
|
@@ -81,17 +83,43 @@ async def _build_oa_chat_request(
|
|
|
81
83
|
request_json["logprobs"] = True
|
|
82
84
|
if sampling_params.top_logprobs is not None:
|
|
83
85
|
request_json["top_logprobs"] = sampling_params.top_logprobs
|
|
84
|
-
|
|
86
|
+
|
|
87
|
+
# Handle structured outputs (output_schema takes precedence over json_mode)
|
|
88
|
+
if context.output_schema:
|
|
89
|
+
if model.supports_json:
|
|
90
|
+
request_json["response_format"] = {
|
|
91
|
+
"type": "json_schema",
|
|
92
|
+
"json_schema": {
|
|
93
|
+
"name": "response",
|
|
94
|
+
"schema": context.output_schema,
|
|
95
|
+
"strict": True,
|
|
96
|
+
},
|
|
97
|
+
}
|
|
98
|
+
else:
|
|
99
|
+
print(
|
|
100
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
101
|
+
)
|
|
102
|
+
elif sampling_params.json_mode and model.supports_json:
|
|
85
103
|
request_json["response_format"] = {"type": "json_object"}
|
|
104
|
+
|
|
86
105
|
if tools:
|
|
87
106
|
request_tools = []
|
|
88
107
|
for tool in tools:
|
|
89
108
|
if isinstance(tool, Tool):
|
|
90
|
-
request_tools.append(
|
|
109
|
+
request_tools.append(
|
|
110
|
+
tool.dump_for(
|
|
111
|
+
"openai-completions", strict=sampling_params.strict_tools
|
|
112
|
+
)
|
|
113
|
+
)
|
|
91
114
|
elif isinstance(tool, MCPServer):
|
|
92
115
|
as_tools = await tool.to_tools()
|
|
93
116
|
request_tools.extend(
|
|
94
|
-
[
|
|
117
|
+
[
|
|
118
|
+
t.dump_for(
|
|
119
|
+
"openai-completions", strict=sampling_params.strict_tools
|
|
120
|
+
)
|
|
121
|
+
for t in as_tools
|
|
122
|
+
]
|
|
95
123
|
)
|
|
96
124
|
request_json["tools"] = request_tools
|
|
97
125
|
return request_json
|
|
@@ -271,23 +299,46 @@ async def _build_oa_responses_request(
|
|
|
271
299
|
request_json["max_output_tokens"] = sampling_params.max_new_tokens
|
|
272
300
|
|
|
273
301
|
if model.reasoning_model:
|
|
274
|
-
|
|
302
|
+
effort = sampling_params.reasoning_effort
|
|
303
|
+
if effort in [None, "none"]:
|
|
275
304
|
# gemini models can switch reasoning off
|
|
276
305
|
if "gemini" in model.id:
|
|
277
|
-
|
|
306
|
+
effort = "none"
|
|
278
307
|
else:
|
|
279
|
-
|
|
308
|
+
effort = "low"
|
|
309
|
+
# GPT-5.1 models don't support 'minimal', they support 'none' instead
|
|
310
|
+
if effort == "minimal" and "gpt-5.1" in model.id:
|
|
311
|
+
maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
|
|
312
|
+
effort = "none"
|
|
313
|
+
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
314
|
+
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
315
|
+
effort = "low"
|
|
280
316
|
request_json["temperature"] = 1.0
|
|
281
317
|
request_json["top_p"] = 1.0
|
|
282
318
|
request_json["reasoning"] = {
|
|
283
|
-
"effort":
|
|
319
|
+
"effort": effort,
|
|
284
320
|
"summary": "auto",
|
|
285
321
|
}
|
|
286
322
|
else:
|
|
287
323
|
if sampling_params.reasoning_effort:
|
|
288
324
|
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
|
|
289
325
|
|
|
290
|
-
|
|
326
|
+
# Handle structured outputs (output_schema takes precedence over json_mode)
|
|
327
|
+
if context.output_schema:
|
|
328
|
+
if model.supports_json:
|
|
329
|
+
request_json["text"] = {
|
|
330
|
+
"format": {
|
|
331
|
+
"type": "json_schema",
|
|
332
|
+
"name": "response",
|
|
333
|
+
"schema": context.output_schema,
|
|
334
|
+
"strict": True,
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
else:
|
|
338
|
+
print(
|
|
339
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
340
|
+
)
|
|
341
|
+
elif sampling_params.json_mode and model.supports_json:
|
|
291
342
|
request_json["text"] = {"format": {"type": "json_object"}}
|
|
292
343
|
|
|
293
344
|
# Handle tools
|
|
@@ -295,7 +346,9 @@ async def _build_oa_responses_request(
|
|
|
295
346
|
# Add regular function tools
|
|
296
347
|
for tool in tools or []:
|
|
297
348
|
if isinstance(tool, Tool):
|
|
298
|
-
request_tools.append(
|
|
349
|
+
request_tools.append(
|
|
350
|
+
tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
|
|
351
|
+
)
|
|
299
352
|
elif isinstance(tool, dict):
|
|
300
353
|
# if computer use, make sure model supports it
|
|
301
354
|
if tool["type"] == "computer_use_preview":
|
|
@@ -307,7 +360,14 @@ async def _build_oa_responses_request(
|
|
|
307
360
|
elif isinstance(tool, MCPServer):
|
|
308
361
|
if context.force_local_mcp:
|
|
309
362
|
as_tools = await tool.to_tools()
|
|
310
|
-
request_tools.extend(
|
|
363
|
+
request_tools.extend(
|
|
364
|
+
[
|
|
365
|
+
t.dump_for(
|
|
366
|
+
"openai-responses", strict=sampling_params.strict_tools
|
|
367
|
+
)
|
|
368
|
+
for t in as_tools
|
|
369
|
+
]
|
|
370
|
+
)
|
|
311
371
|
else:
|
|
312
372
|
request_tools.append(tool.for_openai_responses())
|
|
313
373
|
|
|
@@ -44,10 +44,14 @@ class _LLMClient(BaseModel):
|
|
|
44
44
|
Keeps all validation, serialization, and existing functionality.
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
|
-
_REASONING_SUFFIXES: ClassVar[
|
|
47
|
+
_REASONING_SUFFIXES: ClassVar[
|
|
48
|
+
dict[str, Literal["low", "medium", "high", "minimal", "none"]]
|
|
49
|
+
] = {
|
|
48
50
|
"-low": "low",
|
|
49
51
|
"-medium": "medium",
|
|
50
52
|
"-high": "high",
|
|
53
|
+
"-minimal": "minimal",
|
|
54
|
+
"-none": "none",
|
|
51
55
|
}
|
|
52
56
|
|
|
53
57
|
model_names: str | list[str] = ["gpt-4.1-mini"]
|
|
@@ -149,9 +153,11 @@ class _LLMClient(BaseModel):
|
|
|
149
153
|
|
|
150
154
|
def _normalize_model_names(
|
|
151
155
|
self, models: list[str]
|
|
152
|
-
) -> tuple[
|
|
156
|
+
) -> tuple[
|
|
157
|
+
list[str], list[Literal["low", "medium", "high", "minimal", "none"] | None]
|
|
158
|
+
]:
|
|
153
159
|
normalized: list[str] = []
|
|
154
|
-
efforts: list[Literal["low", "medium", "high"] | None] = []
|
|
160
|
+
efforts: list[Literal["low", "medium", "high", "minimal", "none"] | None] = []
|
|
155
161
|
|
|
156
162
|
for name in models:
|
|
157
163
|
base_name = self._preprocess_openrouter_model(name)
|
|
@@ -164,7 +170,10 @@ class _LLMClient(BaseModel):
|
|
|
164
170
|
return normalized, efforts
|
|
165
171
|
|
|
166
172
|
def _align_sampling_params(
|
|
167
|
-
self,
|
|
173
|
+
self,
|
|
174
|
+
per_model_efforts: list[
|
|
175
|
+
Literal["low", "medium", "high", "minimal", "none"] | None
|
|
176
|
+
],
|
|
168
177
|
) -> None:
|
|
169
178
|
if len(per_model_efforts) < len(self.model_names):
|
|
170
179
|
per_model_efforts = per_model_efforts + [None] * (
|
|
@@ -332,7 +341,7 @@ class _LLMClient(BaseModel):
|
|
|
332
341
|
@classmethod
|
|
333
342
|
def _strip_reasoning_suffix_if_registered(
|
|
334
343
|
cls, model_name: str
|
|
335
|
-
) -> tuple[str, Literal["low", "medium", "high"] | None]:
|
|
344
|
+
) -> tuple[str, Literal["low", "medium", "high", "minimal", "none"] | None]:
|
|
336
345
|
"""Remove reasoning suffix only when the trimmed model already exists."""
|
|
337
346
|
for suffix, effort in cls._REASONING_SUFFIXES.items():
|
|
338
347
|
if model_name.endswith(suffix) and len(model_name) > len(suffix):
|
|
@@ -364,6 +373,15 @@ class _LLMClient(BaseModel):
|
|
|
364
373
|
assert (
|
|
365
374
|
self.use_responses_api
|
|
366
375
|
), "background mode only allowed for responses api"
|
|
376
|
+
|
|
377
|
+
# codex models require responses api
|
|
378
|
+
for model_name in self.model_names:
|
|
379
|
+
if "codex" in model_name.lower() and not self.use_responses_api:
|
|
380
|
+
raise ValueError(
|
|
381
|
+
f"Model '{model_name}' requires use_responses_api=True. "
|
|
382
|
+
"Codex models are only available via the Responses API."
|
|
383
|
+
)
|
|
384
|
+
|
|
367
385
|
# Auto-generate name if not provided
|
|
368
386
|
if self.name is None:
|
|
369
387
|
if len(self.model_names) == 1:
|
|
@@ -543,6 +561,7 @@ class _LLMClient(BaseModel):
|
|
|
543
561
|
return_completions_only: Literal[True],
|
|
544
562
|
show_progress: bool = ...,
|
|
545
563
|
tools: list[Tool | dict | MCPServer] | None = ...,
|
|
564
|
+
output_schema: dict | None = ...,
|
|
546
565
|
cache: CachePattern | None = ...,
|
|
547
566
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
548
567
|
) -> list[str | None]: ...
|
|
@@ -555,6 +574,7 @@ class _LLMClient(BaseModel):
|
|
|
555
574
|
return_completions_only: Literal[False] = ...,
|
|
556
575
|
show_progress: bool = ...,
|
|
557
576
|
tools: list[Tool | dict | MCPServer] | None = ...,
|
|
577
|
+
output_schema: dict | None = ...,
|
|
558
578
|
cache: CachePattern | None = ...,
|
|
559
579
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
560
580
|
) -> list[APIResponse]: ...
|
|
@@ -566,6 +586,7 @@ class _LLMClient(BaseModel):
|
|
|
566
586
|
return_completions_only: bool = False,
|
|
567
587
|
show_progress: bool = True,
|
|
568
588
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
589
|
+
output_schema: dict | None = None,
|
|
569
590
|
cache: CachePattern | None = None,
|
|
570
591
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
571
592
|
) -> list[APIResponse] | list[str | None] | dict[str, int]:
|
|
@@ -594,6 +615,7 @@ class _LLMClient(BaseModel):
|
|
|
594
615
|
task_id = self.start_nowait(
|
|
595
616
|
prompt,
|
|
596
617
|
tools=tools,
|
|
618
|
+
output_schema=output_schema,
|
|
597
619
|
cache=cache,
|
|
598
620
|
service_tier=service_tier,
|
|
599
621
|
)
|
|
@@ -639,6 +661,7 @@ class _LLMClient(BaseModel):
|
|
|
639
661
|
return_completions_only: bool = False,
|
|
640
662
|
show_progress=True,
|
|
641
663
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
664
|
+
output_schema: dict | None = None,
|
|
642
665
|
cache: CachePattern | None = None,
|
|
643
666
|
):
|
|
644
667
|
return asyncio.run(
|
|
@@ -647,6 +670,7 @@ class _LLMClient(BaseModel):
|
|
|
647
670
|
return_completions_only=return_completions_only,
|
|
648
671
|
show_progress=show_progress,
|
|
649
672
|
tools=tools,
|
|
673
|
+
output_schema=output_schema,
|
|
650
674
|
cache=cache,
|
|
651
675
|
)
|
|
652
676
|
)
|
|
@@ -670,6 +694,7 @@ class _LLMClient(BaseModel):
|
|
|
670
694
|
prompt: Prompt,
|
|
671
695
|
*,
|
|
672
696
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
697
|
+
output_schema: dict | None = None,
|
|
673
698
|
cache: CachePattern | None = None,
|
|
674
699
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
675
700
|
) -> int:
|
|
@@ -688,6 +713,7 @@ class _LLMClient(BaseModel):
|
|
|
688
713
|
request_timeout=self.request_timeout,
|
|
689
714
|
status_tracker=tracker,
|
|
690
715
|
tools=tools,
|
|
716
|
+
output_schema=output_schema,
|
|
691
717
|
cache=cache,
|
|
692
718
|
use_responses_api=self.use_responses_api,
|
|
693
719
|
background=self.background,
|
|
@@ -705,11 +731,16 @@ class _LLMClient(BaseModel):
|
|
|
705
731
|
prompt: Prompt,
|
|
706
732
|
*,
|
|
707
733
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
734
|
+
output_schema: dict | None = None,
|
|
708
735
|
cache: CachePattern | None = None,
|
|
709
736
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
710
737
|
) -> APIResponse:
|
|
711
738
|
task_id = self.start_nowait(
|
|
712
|
-
prompt,
|
|
739
|
+
prompt,
|
|
740
|
+
tools=tools,
|
|
741
|
+
output_schema=output_schema,
|
|
742
|
+
cache=cache,
|
|
743
|
+
service_tier=service_tier,
|
|
713
744
|
)
|
|
714
745
|
return await self.wait_for(task_id)
|
|
715
746
|
|
|
@@ -7,10 +7,11 @@ class SamplingParams(BaseModel):
|
|
|
7
7
|
temperature: float = 0.0
|
|
8
8
|
top_p: float = 1.0
|
|
9
9
|
json_mode: bool = False
|
|
10
|
-
max_new_tokens: int =
|
|
10
|
+
max_new_tokens: int = 2_048
|
|
11
11
|
reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
|
|
12
12
|
logprobs: bool = False
|
|
13
13
|
top_logprobs: int | None = None
|
|
14
|
+
strict_tools: bool = True
|
|
14
15
|
|
|
15
16
|
def to_vllm(self):
|
|
16
17
|
try:
|
|
@@ -27,7 +27,7 @@ ANTHROPIC_MODELS = {
|
|
|
27
27
|
"name": "claude-sonnet-4-5-20250929",
|
|
28
28
|
"api_base": "https://api.anthropic.com/v1",
|
|
29
29
|
"api_key_env_var": "ANTHROPIC_API_KEY",
|
|
30
|
-
"supports_json":
|
|
30
|
+
"supports_json": True,
|
|
31
31
|
"api_spec": "anthropic",
|
|
32
32
|
"input_cost": 3.0,
|
|
33
33
|
"cached_input_cost": 0.30,
|
|
@@ -39,7 +39,7 @@ ANTHROPIC_MODELS = {
|
|
|
39
39
|
"name": "claude-opus-4-1-20250805",
|
|
40
40
|
"api_base": "https://api.anthropic.com/v1",
|
|
41
41
|
"api_key_env_var": "ANTHROPIC_API_KEY",
|
|
42
|
-
"supports_json":
|
|
42
|
+
"supports_json": True,
|
|
43
43
|
"api_spec": "anthropic",
|
|
44
44
|
"input_cost": 15.0,
|
|
45
45
|
"cached_input_cost": 1.50,
|
|
@@ -10,6 +10,48 @@ OPENAI_MODELS = {
|
|
|
10
10
|
# ░███
|
|
11
11
|
# █████
|
|
12
12
|
# ░░░░░
|
|
13
|
+
"gpt-5.1": {
|
|
14
|
+
"id": "gpt-5.1",
|
|
15
|
+
"name": "gpt-5.1",
|
|
16
|
+
"api_base": "https://api.openai.com/v1",
|
|
17
|
+
"api_key_env_var": "OPENAI_API_KEY",
|
|
18
|
+
"supports_json": False,
|
|
19
|
+
"supports_logprobs": True,
|
|
20
|
+
"supports_responses": True,
|
|
21
|
+
"api_spec": "openai",
|
|
22
|
+
"input_cost": 1.25,
|
|
23
|
+
"cached_input_cost": 0.125,
|
|
24
|
+
"output_cost": 10.0,
|
|
25
|
+
"reasoning_model": True,
|
|
26
|
+
},
|
|
27
|
+
"gpt-5.1-codex": {
|
|
28
|
+
"id": "gpt-5.1-codex",
|
|
29
|
+
"name": "gpt-5.1-codex",
|
|
30
|
+
"api_base": "https://api.openai.com/v1",
|
|
31
|
+
"api_key_env_var": "OPENAI_API_KEY",
|
|
32
|
+
"supports_json": False,
|
|
33
|
+
"supports_logprobs": True,
|
|
34
|
+
"supports_responses": True,
|
|
35
|
+
"api_spec": "openai",
|
|
36
|
+
"input_cost": 1.25,
|
|
37
|
+
"cached_input_cost": 0.125,
|
|
38
|
+
"output_cost": 10.0,
|
|
39
|
+
"reasoning_model": True,
|
|
40
|
+
},
|
|
41
|
+
"gpt-5.1-codex-mini": {
|
|
42
|
+
"id": "gpt-5.1-codex-mini",
|
|
43
|
+
"name": "gpt-5.1-codex-mini",
|
|
44
|
+
"api_base": "https://api.openai.com/v1",
|
|
45
|
+
"api_key_env_var": "OPENAI_API_KEY",
|
|
46
|
+
"supports_json": False,
|
|
47
|
+
"supports_logprobs": True,
|
|
48
|
+
"supports_responses": True,
|
|
49
|
+
"api_spec": "openai",
|
|
50
|
+
"input_cost": 0.25,
|
|
51
|
+
"cached_input_cost": 0.025,
|
|
52
|
+
"output_cost": 2.0,
|
|
53
|
+
"reasoning_model": True,
|
|
54
|
+
},
|
|
13
55
|
"gpt-5-codex": {
|
|
14
56
|
"id": "gpt-5-codex",
|
|
15
57
|
"name": "gpt-5-codex",
|
|
@@ -1195,14 +1195,24 @@ class Conversation:
|
|
|
1195
1195
|
|
|
1196
1196
|
@classmethod
|
|
1197
1197
|
def from_unknown(
|
|
1198
|
-
cls, messages: list[dict], *, system: str | list[dict] | None = None
|
|
1198
|
+
cls, messages: list[dict] | dict, *, system: str | list[dict] | None = None
|
|
1199
1199
|
) -> tuple["Conversation", str]:
|
|
1200
1200
|
"""Attempt to convert provider-formatted messages without knowing the provider.
|
|
1201
1201
|
|
|
1202
1202
|
Returns the parsed conversation together with the provider label that succeeded
|
|
1203
|
-
("openai" or "
|
|
1203
|
+
("openai", "anthropic", or "log").
|
|
1204
1204
|
"""
|
|
1205
1205
|
|
|
1206
|
+
# Check if input is in log format (output from to_log())
|
|
1207
|
+
if isinstance(messages, dict) and "messages" in messages:
|
|
1208
|
+
return cls.from_log(messages), "log"
|
|
1209
|
+
|
|
1210
|
+
# Ensure messages is a list for provider detection
|
|
1211
|
+
if not isinstance(messages, list):
|
|
1212
|
+
raise ValueError(
|
|
1213
|
+
"messages must be a list of dicts or a dict with 'messages' key"
|
|
1214
|
+
)
|
|
1215
|
+
|
|
1206
1216
|
def _detect_provider() -> str:
|
|
1207
1217
|
has_openai_markers = False
|
|
1208
1218
|
has_anthropic_markers = False
|
|
@@ -32,6 +32,7 @@ class RequestContext:
|
|
|
32
32
|
|
|
33
33
|
# Optional features
|
|
34
34
|
tools: list | None = None
|
|
35
|
+
output_schema: dict | None = None
|
|
35
36
|
cache: CachePattern | None = None
|
|
36
37
|
use_responses_api: bool = False
|
|
37
38
|
background: bool = False
|
|
@@ -66,6 +67,7 @@ class RequestContext:
|
|
|
66
67
|
"results_arr": self.results_arr,
|
|
67
68
|
"callback": self.callback,
|
|
68
69
|
"tools": self.tools,
|
|
70
|
+
"output_schema": self.output_schema,
|
|
69
71
|
"cache": self.cache,
|
|
70
72
|
"use_responses_api": self.use_responses_api,
|
|
71
73
|
"background": self.background,
|
|
@@ -713,17 +713,40 @@ class Tool(BaseModel):
|
|
|
713
713
|
"""just an alias for the above"""
|
|
714
714
|
return self.for_openai_completions(strict=strict, **kwargs)
|
|
715
715
|
|
|
716
|
-
def for_openai_responses(self, **kwargs) -> dict[str, Any]:
|
|
716
|
+
def for_openai_responses(self, *, strict: bool = True, **kwargs) -> dict[str, Any]:
|
|
717
717
|
if self.is_built_in:
|
|
718
718
|
return {"type": self.type, **self.built_in_args, **kwargs}
|
|
719
|
-
return {
|
|
720
|
-
"type": "function",
|
|
721
|
-
"name": self.name,
|
|
722
|
-
"description": self.description,
|
|
723
|
-
"parameters": self._json_schema(include_additional_properties=True),
|
|
724
|
-
}
|
|
725
719
|
|
|
726
|
-
|
|
720
|
+
# Check if schema is compatible with strict mode
|
|
721
|
+
if strict and not self._is_strict_mode_compatible():
|
|
722
|
+
strict = False
|
|
723
|
+
|
|
724
|
+
if strict:
|
|
725
|
+
# For strict mode, remove defaults and make all parameters required
|
|
726
|
+
schema = self._json_schema(
|
|
727
|
+
include_additional_properties=True, remove_defaults=True
|
|
728
|
+
)
|
|
729
|
+
schema["required"] = list(
|
|
730
|
+
(self.parameters or {}).keys()
|
|
731
|
+
) # All parameters required in strict mode
|
|
732
|
+
|
|
733
|
+
return {
|
|
734
|
+
"type": "function",
|
|
735
|
+
"name": self.name,
|
|
736
|
+
"description": self.description,
|
|
737
|
+
"parameters": schema,
|
|
738
|
+
"strict": True,
|
|
739
|
+
}
|
|
740
|
+
else:
|
|
741
|
+
# For non-strict mode, use the original required list
|
|
742
|
+
return {
|
|
743
|
+
"type": "function",
|
|
744
|
+
"name": self.name,
|
|
745
|
+
"description": self.description,
|
|
746
|
+
"parameters": self._json_schema(include_additional_properties=True),
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
def for_anthropic(self, *, strict: bool = True, **kwargs) -> dict[str, Any]:
|
|
727
750
|
# built-in tools have "name", "type", maybe metadata
|
|
728
751
|
if self.is_built_in:
|
|
729
752
|
return {
|
|
@@ -732,11 +755,33 @@ class Tool(BaseModel):
|
|
|
732
755
|
**self.built_in_args,
|
|
733
756
|
**kwargs,
|
|
734
757
|
}
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
758
|
+
|
|
759
|
+
# Check if schema is compatible with strict mode
|
|
760
|
+
if strict and not self._is_strict_mode_compatible():
|
|
761
|
+
strict = False
|
|
762
|
+
|
|
763
|
+
if strict:
|
|
764
|
+
# For strict mode, remove defaults and make all parameters required
|
|
765
|
+
schema = self._json_schema(
|
|
766
|
+
include_additional_properties=True, remove_defaults=True
|
|
767
|
+
)
|
|
768
|
+
schema["required"] = list(
|
|
769
|
+
(self.parameters or {}).keys()
|
|
770
|
+
) # All parameters required in strict mode
|
|
771
|
+
|
|
772
|
+
return {
|
|
773
|
+
"name": self.name,
|
|
774
|
+
"description": self.description,
|
|
775
|
+
"input_schema": schema,
|
|
776
|
+
"strict": True,
|
|
777
|
+
}
|
|
778
|
+
else:
|
|
779
|
+
# For non-strict mode, use the original required list
|
|
780
|
+
return {
|
|
781
|
+
"name": self.name,
|
|
782
|
+
"description": self.description,
|
|
783
|
+
"input_schema": self._json_schema(),
|
|
784
|
+
}
|
|
740
785
|
|
|
741
786
|
def for_google(self) -> dict[str, Any]:
|
|
742
787
|
"""
|
|
@@ -759,11 +804,11 @@ class Tool(BaseModel):
|
|
|
759
804
|
**kw,
|
|
760
805
|
) -> dict[str, Any]:
|
|
761
806
|
if provider == "openai-responses":
|
|
762
|
-
return self.for_openai_responses()
|
|
807
|
+
return self.for_openai_responses(**kw)
|
|
763
808
|
if provider == "openai-completions":
|
|
764
809
|
return self.for_openai_completions(**kw)
|
|
765
810
|
if provider == "anthropic":
|
|
766
|
-
return self.for_anthropic()
|
|
811
|
+
return self.for_anthropic(**kw)
|
|
767
812
|
if provider == "google":
|
|
768
813
|
return self.for_google()
|
|
769
814
|
raise ValueError(provider)
|
|
@@ -7,6 +7,8 @@ WARNINGS: dict[str, str] = {
|
|
|
7
7
|
"WARN_REASONING_UNSUPPORTED": "Ignoring reasoning_effort param for non-reasoning model: {model_name}.",
|
|
8
8
|
"WARN_CACHING_UNSUPPORTED": "Cache parameter '{cache_param}' is not supported, ignoring for {model_name}.",
|
|
9
9
|
"WARN_LOGPROBS_UNSUPPORTED": "Ignoring logprobs param for non-logprobs model: {model_name}",
|
|
10
|
+
"WARN_MINIMAL_TO_LOW": "'minimal' reasoning effort only allowed for gpt-5 models. Setting to 'low' for {model_name}.",
|
|
11
|
+
"WARN_MINIMAL_TO_NONE": "GPT-5.1 models don't support 'minimal' reasoning effort. Converting to 'none' for {model_name}.",
|
|
10
12
|
}
|
|
11
13
|
|
|
12
14
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/computer_use.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|