lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +25 -2
- lm_deluge/api_requests/anthropic.py +92 -17
- lm_deluge/api_requests/base.py +47 -11
- lm_deluge/api_requests/bedrock.py +7 -4
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +138 -18
- lm_deluge/api_requests/openai.py +114 -21
- lm_deluge/client.py +282 -49
- lm_deluge/config.py +15 -3
- lm_deluge/mock_openai.py +643 -0
- lm_deluge/models/__init__.py +12 -1
- lm_deluge/models/anthropic.py +17 -2
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +29 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +10 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +86 -8
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +1 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +696 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +224 -40
- lm_deluge/request_context.py +7 -2
- lm_deluge/tool/__init__.py +1118 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox.py +1621 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
- lm_deluge-0.0.88.dist-info/RECORD +117 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -2,12 +2,17 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
import traceback as tb
|
|
4
4
|
from types import SimpleNamespace
|
|
5
|
+
from typing import Sequence
|
|
5
6
|
|
|
6
7
|
import aiohttp
|
|
7
8
|
from aiohttp import ClientResponse
|
|
8
9
|
|
|
9
10
|
from lm_deluge.request_context import RequestContext
|
|
10
11
|
from lm_deluge.tool import MCPServer, Tool
|
|
12
|
+
from lm_deluge.util.schema import (
|
|
13
|
+
prepare_output_schema,
|
|
14
|
+
transform_schema_for_openai,
|
|
15
|
+
)
|
|
11
16
|
from lm_deluge.warnings import maybe_warn
|
|
12
17
|
|
|
13
18
|
from ..config import SamplingParams
|
|
@@ -56,8 +61,6 @@ async def _build_oa_chat_request(
|
|
|
56
61
|
else:
|
|
57
62
|
request_json["max_completion_tokens"] = sampling_params.max_new_tokens
|
|
58
63
|
if model.reasoning_model:
|
|
59
|
-
request_json["temperature"] = 1.0
|
|
60
|
-
request_json["top_p"] = 1.0
|
|
61
64
|
effort = sampling_params.reasoning_effort
|
|
62
65
|
if effort in [None, "none"]:
|
|
63
66
|
# Disable reasoning for Gemini models when no effort requested
|
|
@@ -67,11 +70,24 @@ async def _build_oa_chat_request(
|
|
|
67
70
|
effort = "minimal"
|
|
68
71
|
else:
|
|
69
72
|
effort = "low"
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
73
|
+
# GPT-5.1 models don't support 'minimal', they support 'none' instead
|
|
74
|
+
if effort == "minimal" and "gpt-5.1" in model.id:
|
|
75
|
+
maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
|
|
76
|
+
effort = "none"
|
|
77
|
+
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
78
|
+
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
74
79
|
effort = "low"
|
|
80
|
+
# xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
|
|
81
|
+
if effort == "xhigh" and not model.supports_xhigh:
|
|
82
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
83
|
+
effort = "high"
|
|
84
|
+
# GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
|
|
85
|
+
if model.supports_xhigh and effort != "none":
|
|
86
|
+
del request_json["temperature"]
|
|
87
|
+
del request_json["top_p"]
|
|
88
|
+
else:
|
|
89
|
+
request_json["temperature"] = 1.0
|
|
90
|
+
request_json["top_p"] = 1.0
|
|
75
91
|
request_json["reasoning_effort"] = effort
|
|
76
92
|
else:
|
|
77
93
|
if sampling_params.reasoning_effort:
|
|
@@ -81,17 +97,48 @@ async def _build_oa_chat_request(
|
|
|
81
97
|
request_json["logprobs"] = True
|
|
82
98
|
if sampling_params.top_logprobs is not None:
|
|
83
99
|
request_json["top_logprobs"] = sampling_params.top_logprobs
|
|
84
|
-
|
|
100
|
+
|
|
101
|
+
# Handle structured outputs (output_schema takes precedence over json_mode)
|
|
102
|
+
if context.output_schema:
|
|
103
|
+
if model.supports_json:
|
|
104
|
+
base_schema = prepare_output_schema(context.output_schema)
|
|
105
|
+
|
|
106
|
+
# Apply OpenAI-specific transformations (currently passthrough with copy)
|
|
107
|
+
transformed_schema = transform_schema_for_openai(base_schema)
|
|
108
|
+
|
|
109
|
+
request_json["response_format"] = {
|
|
110
|
+
"type": "json_schema",
|
|
111
|
+
"json_schema": {
|
|
112
|
+
"name": "response",
|
|
113
|
+
"schema": transformed_schema,
|
|
114
|
+
"strict": True,
|
|
115
|
+
},
|
|
116
|
+
}
|
|
117
|
+
else:
|
|
118
|
+
print(
|
|
119
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
120
|
+
)
|
|
121
|
+
elif sampling_params.json_mode and model.supports_json:
|
|
85
122
|
request_json["response_format"] = {"type": "json_object"}
|
|
123
|
+
|
|
86
124
|
if tools:
|
|
87
125
|
request_tools = []
|
|
88
126
|
for tool in tools:
|
|
89
127
|
if isinstance(tool, Tool):
|
|
90
|
-
request_tools.append(
|
|
128
|
+
request_tools.append(
|
|
129
|
+
tool.dump_for(
|
|
130
|
+
"openai-completions", strict=sampling_params.strict_tools
|
|
131
|
+
)
|
|
132
|
+
)
|
|
91
133
|
elif isinstance(tool, MCPServer):
|
|
92
134
|
as_tools = await tool.to_tools()
|
|
93
135
|
request_tools.extend(
|
|
94
|
-
[
|
|
136
|
+
[
|
|
137
|
+
t.dump_for(
|
|
138
|
+
"openai-completions", strict=sampling_params.strict_tools
|
|
139
|
+
)
|
|
140
|
+
for t in as_tools
|
|
141
|
+
]
|
|
95
142
|
)
|
|
96
143
|
request_json["tools"] = request_tools
|
|
97
144
|
return request_json
|
|
@@ -271,23 +318,60 @@ async def _build_oa_responses_request(
|
|
|
271
318
|
request_json["max_output_tokens"] = sampling_params.max_new_tokens
|
|
272
319
|
|
|
273
320
|
if model.reasoning_model:
|
|
274
|
-
|
|
321
|
+
effort = sampling_params.reasoning_effort
|
|
322
|
+
if effort in [None, "none"]:
|
|
275
323
|
# gemini models can switch reasoning off
|
|
276
324
|
if "gemini" in model.id:
|
|
277
|
-
|
|
325
|
+
effort = "none"
|
|
278
326
|
else:
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
327
|
+
effort = "low"
|
|
328
|
+
# GPT-5.1 models don't support 'minimal', they support 'none' instead
|
|
329
|
+
if effort == "minimal" and "gpt-5.1" in model.id:
|
|
330
|
+
maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
|
|
331
|
+
effort = "none"
|
|
332
|
+
elif effort == "minimal" and "gpt-5" not in model.id:
|
|
333
|
+
maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
|
|
334
|
+
effort = "low"
|
|
335
|
+
# xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
|
|
336
|
+
if effort == "xhigh" and not model.supports_xhigh:
|
|
337
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
338
|
+
effort = "high"
|
|
339
|
+
# GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
|
|
340
|
+
if model.supports_xhigh and effort != "none":
|
|
341
|
+
del request_json["temperature"]
|
|
342
|
+
del request_json["top_p"]
|
|
343
|
+
else:
|
|
344
|
+
request_json["temperature"] = 1.0
|
|
345
|
+
request_json["top_p"] = 1.0
|
|
282
346
|
request_json["reasoning"] = {
|
|
283
|
-
"effort":
|
|
347
|
+
"effort": effort,
|
|
284
348
|
"summary": "auto",
|
|
285
349
|
}
|
|
286
350
|
else:
|
|
287
351
|
if sampling_params.reasoning_effort:
|
|
288
352
|
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
|
|
289
353
|
|
|
290
|
-
|
|
354
|
+
# Handle structured outputs (output_schema takes precedence over json_mode)
|
|
355
|
+
if context.output_schema:
|
|
356
|
+
if model.supports_json:
|
|
357
|
+
base_schema = prepare_output_schema(context.output_schema)
|
|
358
|
+
|
|
359
|
+
# Apply OpenAI-specific transformations (currently passthrough with copy)
|
|
360
|
+
transformed_schema = transform_schema_for_openai(base_schema)
|
|
361
|
+
|
|
362
|
+
request_json["text"] = {
|
|
363
|
+
"format": {
|
|
364
|
+
"type": "json_schema",
|
|
365
|
+
"name": "response",
|
|
366
|
+
"schema": transformed_schema,
|
|
367
|
+
"strict": True,
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
else:
|
|
371
|
+
print(
|
|
372
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
373
|
+
)
|
|
374
|
+
elif sampling_params.json_mode and model.supports_json:
|
|
291
375
|
request_json["text"] = {"format": {"type": "json_object"}}
|
|
292
376
|
|
|
293
377
|
# Handle tools
|
|
@@ -295,11 +379,13 @@ async def _build_oa_responses_request(
|
|
|
295
379
|
# Add regular function tools
|
|
296
380
|
for tool in tools or []:
|
|
297
381
|
if isinstance(tool, Tool):
|
|
298
|
-
request_tools.append(
|
|
382
|
+
request_tools.append(
|
|
383
|
+
tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
|
|
384
|
+
)
|
|
299
385
|
elif isinstance(tool, dict):
|
|
300
386
|
# if computer use, make sure model supports it
|
|
301
387
|
if tool["type"] == "computer_use_preview":
|
|
302
|
-
if model.name != "
|
|
388
|
+
if model.name != "computer-use-preview":
|
|
303
389
|
raise ValueError(f"model {model.id} does not support computer use")
|
|
304
390
|
# have to use truncation
|
|
305
391
|
request_json["truncation"] = "auto"
|
|
@@ -307,7 +393,14 @@ async def _build_oa_responses_request(
|
|
|
307
393
|
elif isinstance(tool, MCPServer):
|
|
308
394
|
if context.force_local_mcp:
|
|
309
395
|
as_tools = await tool.to_tools()
|
|
310
|
-
request_tools.extend(
|
|
396
|
+
request_tools.extend(
|
|
397
|
+
[
|
|
398
|
+
t.dump_for(
|
|
399
|
+
"openai-responses", strict=sampling_params.strict_tools
|
|
400
|
+
)
|
|
401
|
+
for t in as_tools
|
|
402
|
+
]
|
|
403
|
+
)
|
|
311
404
|
else:
|
|
312
405
|
request_tools.append(tool.for_openai_responses())
|
|
313
406
|
|
|
@@ -381,7 +474,7 @@ class OpenAIResponsesRequest(APIRequestBase):
|
|
|
381
474
|
output = data.get("output", [])
|
|
382
475
|
if not output:
|
|
383
476
|
is_error = True
|
|
384
|
-
error_message = "No output in response"
|
|
477
|
+
error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
|
|
385
478
|
else:
|
|
386
479
|
# Process each output item
|
|
387
480
|
for item in output:
|
|
@@ -536,7 +629,7 @@ async def stream_chat(
|
|
|
536
629
|
model_name: str, # must correspond to registry
|
|
537
630
|
prompt: Conversation,
|
|
538
631
|
sampling_params: SamplingParams = SamplingParams(),
|
|
539
|
-
tools:
|
|
632
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
540
633
|
cache: CachePattern | None = None,
|
|
541
634
|
extra_headers: dict[str, str] | None = None,
|
|
542
635
|
):
|