lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (92) hide show
  1. lm_deluge/__init__.py +25 -2
  2. lm_deluge/api_requests/anthropic.py +92 -17
  3. lm_deluge/api_requests/base.py +47 -11
  4. lm_deluge/api_requests/bedrock.py +7 -4
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +138 -18
  7. lm_deluge/api_requests/openai.py +114 -21
  8. lm_deluge/client.py +282 -49
  9. lm_deluge/config.py +15 -3
  10. lm_deluge/mock_openai.py +643 -0
  11. lm_deluge/models/__init__.py +12 -1
  12. lm_deluge/models/anthropic.py +17 -2
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +29 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +10 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +86 -8
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +1 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +696 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +224 -40
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/tool/__init__.py +1118 -0
  39. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  40. lm_deluge/tool/builtin/gemini.py +59 -0
  41. lm_deluge/tool/builtin/openai.py +74 -0
  42. lm_deluge/tool/cua/__init__.py +173 -0
  43. lm_deluge/tool/cua/actions.py +148 -0
  44. lm_deluge/tool/cua/base.py +27 -0
  45. lm_deluge/tool/cua/batch.py +215 -0
  46. lm_deluge/tool/cua/converters.py +466 -0
  47. lm_deluge/tool/cua/kernel.py +702 -0
  48. lm_deluge/tool/cua/trycua.py +989 -0
  49. lm_deluge/tool/prefab/__init__.py +45 -0
  50. lm_deluge/tool/prefab/batch_tool.py +156 -0
  51. lm_deluge/tool/prefab/docs.py +1119 -0
  52. lm_deluge/tool/prefab/email.py +294 -0
  53. lm_deluge/tool/prefab/filesystem.py +1711 -0
  54. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  55. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  56. lm_deluge/tool/prefab/memory.py +458 -0
  57. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  58. lm_deluge/tool/prefab/otc/executor.py +281 -0
  59. lm_deluge/tool/prefab/otc/parse.py +188 -0
  60. lm_deluge/tool/prefab/random.py +212 -0
  61. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  62. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  63. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  64. lm_deluge/tool/prefab/sandbox.py +1621 -0
  65. lm_deluge/tool/prefab/sheets.py +385 -0
  66. lm_deluge/tool/prefab/subagents.py +233 -0
  67. lm_deluge/tool/prefab/todos.py +342 -0
  68. lm_deluge/tool/prefab/tool_search.py +169 -0
  69. lm_deluge/tool/prefab/web_search.py +199 -0
  70. lm_deluge/tracker.py +16 -13
  71. lm_deluge/util/schema.py +412 -0
  72. lm_deluge/warnings.py +8 -0
  73. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
  74. lm_deluge-0.0.88.dist-info/RECORD +117 -0
  75. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  76. lm_deluge/built_in_tools/openai.py +0 -28
  77. lm_deluge/presets/cerebras.py +0 -17
  78. lm_deluge/presets/meta.py +0 -13
  79. lm_deluge/tool.py +0 -849
  80. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  81. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  82. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  83. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  84. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  85. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  86. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
  87. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
  88. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  89. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  90. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
  91. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
  92. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
@@ -2,12 +2,17 @@ import json
2
2
  import os
3
3
  import traceback as tb
4
4
  from types import SimpleNamespace
5
+ from typing import Sequence
5
6
 
6
7
  import aiohttp
7
8
  from aiohttp import ClientResponse
8
9
 
9
10
  from lm_deluge.request_context import RequestContext
10
11
  from lm_deluge.tool import MCPServer, Tool
12
+ from lm_deluge.util.schema import (
13
+ prepare_output_schema,
14
+ transform_schema_for_openai,
15
+ )
11
16
  from lm_deluge.warnings import maybe_warn
12
17
 
13
18
  from ..config import SamplingParams
@@ -56,8 +61,6 @@ async def _build_oa_chat_request(
56
61
  else:
57
62
  request_json["max_completion_tokens"] = sampling_params.max_new_tokens
58
63
  if model.reasoning_model:
59
- request_json["temperature"] = 1.0
60
- request_json["top_p"] = 1.0
61
64
  effort = sampling_params.reasoning_effort
62
65
  if effort in [None, "none"]:
63
66
  # Disable reasoning for Gemini models when no effort requested
@@ -67,11 +70,24 @@ async def _build_oa_chat_request(
67
70
  effort = "minimal"
68
71
  else:
69
72
  effort = "low"
70
- if effort == "minimal" and "gpt-5" not in model.id:
71
- print(
72
- "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
73
- )
73
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
74
+ if effort == "minimal" and "gpt-5.1" in model.id:
75
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
76
+ effort = "none"
77
+ elif effort == "minimal" and "gpt-5" not in model.id:
78
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
74
79
  effort = "low"
80
+ # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
81
+ if effort == "xhigh" and not model.supports_xhigh:
82
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
83
+ effort = "high"
84
+ # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
85
+ if model.supports_xhigh and effort != "none":
86
+ del request_json["temperature"]
87
+ del request_json["top_p"]
88
+ else:
89
+ request_json["temperature"] = 1.0
90
+ request_json["top_p"] = 1.0
75
91
  request_json["reasoning_effort"] = effort
76
92
  else:
77
93
  if sampling_params.reasoning_effort:
@@ -81,17 +97,48 @@ async def _build_oa_chat_request(
81
97
  request_json["logprobs"] = True
82
98
  if sampling_params.top_logprobs is not None:
83
99
  request_json["top_logprobs"] = sampling_params.top_logprobs
84
- if sampling_params.json_mode and model.supports_json:
100
+
101
+ # Handle structured outputs (output_schema takes precedence over json_mode)
102
+ if context.output_schema:
103
+ if model.supports_json:
104
+ base_schema = prepare_output_schema(context.output_schema)
105
+
106
+ # Apply OpenAI-specific transformations (currently passthrough with copy)
107
+ transformed_schema = transform_schema_for_openai(base_schema)
108
+
109
+ request_json["response_format"] = {
110
+ "type": "json_schema",
111
+ "json_schema": {
112
+ "name": "response",
113
+ "schema": transformed_schema,
114
+ "strict": True,
115
+ },
116
+ }
117
+ else:
118
+ print(
119
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
120
+ )
121
+ elif sampling_params.json_mode and model.supports_json:
85
122
  request_json["response_format"] = {"type": "json_object"}
123
+
86
124
  if tools:
87
125
  request_tools = []
88
126
  for tool in tools:
89
127
  if isinstance(tool, Tool):
90
- request_tools.append(tool.dump_for("openai-completions"))
128
+ request_tools.append(
129
+ tool.dump_for(
130
+ "openai-completions", strict=sampling_params.strict_tools
131
+ )
132
+ )
91
133
  elif isinstance(tool, MCPServer):
92
134
  as_tools = await tool.to_tools()
93
135
  request_tools.extend(
94
- [t.dump_for("openai-completions") for t in as_tools]
136
+ [
137
+ t.dump_for(
138
+ "openai-completions", strict=sampling_params.strict_tools
139
+ )
140
+ for t in as_tools
141
+ ]
95
142
  )
96
143
  request_json["tools"] = request_tools
97
144
  return request_json
@@ -271,23 +318,60 @@ async def _build_oa_responses_request(
271
318
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
272
319
 
273
320
  if model.reasoning_model:
274
- if sampling_params.reasoning_effort in [None, "none"]:
321
+ effort = sampling_params.reasoning_effort
322
+ if effort in [None, "none"]:
275
323
  # gemini models can switch reasoning off
276
324
  if "gemini" in model.id:
277
- sampling_params.reasoning_effort = "none"
325
+ effort = "none"
278
326
  else:
279
- sampling_params.reasoning_effort = "low"
280
- request_json["temperature"] = 1.0
281
- request_json["top_p"] = 1.0
327
+ effort = "low"
328
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
329
+ if effort == "minimal" and "gpt-5.1" in model.id:
330
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
331
+ effort = "none"
332
+ elif effort == "minimal" and "gpt-5" not in model.id:
333
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
334
+ effort = "low"
335
+ # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
336
+ if effort == "xhigh" and not model.supports_xhigh:
337
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
338
+ effort = "high"
339
+ # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
340
+ if model.supports_xhigh and effort != "none":
341
+ del request_json["temperature"]
342
+ del request_json["top_p"]
343
+ else:
344
+ request_json["temperature"] = 1.0
345
+ request_json["top_p"] = 1.0
282
346
  request_json["reasoning"] = {
283
- "effort": sampling_params.reasoning_effort,
347
+ "effort": effort,
284
348
  "summary": "auto",
285
349
  }
286
350
  else:
287
351
  if sampling_params.reasoning_effort:
288
352
  maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
289
353
 
290
- if sampling_params.json_mode and model.supports_json:
354
+ # Handle structured outputs (output_schema takes precedence over json_mode)
355
+ if context.output_schema:
356
+ if model.supports_json:
357
+ base_schema = prepare_output_schema(context.output_schema)
358
+
359
+ # Apply OpenAI-specific transformations (currently passthrough with copy)
360
+ transformed_schema = transform_schema_for_openai(base_schema)
361
+
362
+ request_json["text"] = {
363
+ "format": {
364
+ "type": "json_schema",
365
+ "name": "response",
366
+ "schema": transformed_schema,
367
+ "strict": True,
368
+ }
369
+ }
370
+ else:
371
+ print(
372
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
373
+ )
374
+ elif sampling_params.json_mode and model.supports_json:
291
375
  request_json["text"] = {"format": {"type": "json_object"}}
292
376
 
293
377
  # Handle tools
@@ -295,11 +379,13 @@ async def _build_oa_responses_request(
295
379
  # Add regular function tools
296
380
  for tool in tools or []:
297
381
  if isinstance(tool, Tool):
298
- request_tools.append(tool.dump_for("openai-responses"))
382
+ request_tools.append(
383
+ tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
384
+ )
299
385
  elif isinstance(tool, dict):
300
386
  # if computer use, make sure model supports it
301
387
  if tool["type"] == "computer_use_preview":
302
- if model.name != "openai-computer-use-preview":
388
+ if model.name != "computer-use-preview":
303
389
  raise ValueError(f"model {model.id} does not support computer use")
304
390
  # have to use truncation
305
391
  request_json["truncation"] = "auto"
@@ -307,7 +393,14 @@ async def _build_oa_responses_request(
307
393
  elif isinstance(tool, MCPServer):
308
394
  if context.force_local_mcp:
309
395
  as_tools = await tool.to_tools()
310
- request_tools.extend([t.dump_for("openai-responses") for t in as_tools])
396
+ request_tools.extend(
397
+ [
398
+ t.dump_for(
399
+ "openai-responses", strict=sampling_params.strict_tools
400
+ )
401
+ for t in as_tools
402
+ ]
403
+ )
311
404
  else:
312
405
  request_tools.append(tool.for_openai_responses())
313
406
 
@@ -381,7 +474,7 @@ class OpenAIResponsesRequest(APIRequestBase):
381
474
  output = data.get("output", [])
382
475
  if not output:
383
476
  is_error = True
384
- error_message = "No output in response"
477
+ error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
385
478
  else:
386
479
  # Process each output item
387
480
  for item in output:
@@ -536,7 +629,7 @@ async def stream_chat(
536
629
  model_name: str, # must correspond to registry
537
630
  prompt: Conversation,
538
631
  sampling_params: SamplingParams = SamplingParams(),
539
- tools: list | None = None,
632
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
540
633
  cache: CachePattern | None = None,
541
634
  extra_headers: dict[str, str] | None = None,
542
635
  ):