lm-deluge 0.0.72__tar.gz → 0.0.74__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {lm_deluge-0.0.72/src/lm_deluge.egg-info → lm_deluge-0.0.74}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/anthropic.py +26 -1
  4. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/bedrock.py +9 -4
  5. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/openai.py +74 -14
  6. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/client.py +37 -6
  7. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/config.py +2 -1
  8. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/anthropic.py +2 -2
  9. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/openai.py +42 -0
  10. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/prompt.py +12 -2
  11. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/request_context.py +2 -0
  12. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/tool.py +60 -15
  13. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/warnings.py +2 -0
  14. {lm_deluge-0.0.72 → lm_deluge-0.0.74/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  15. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/LICENSE +0 -0
  16. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/README.md +0 -0
  17. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/setup.cfg +0 -0
  18. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/__init__.py +0 -0
  19. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/__init__.py +0 -0
  20. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/base.py +0 -0
  21. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
  22. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/common.py +0 -0
  23. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  24. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  25. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  26. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  27. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  28. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/gemini.py +0 -0
  29. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/mistral.py +0 -0
  30. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/api_requests/response.py +0 -0
  31. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/batches.py +0 -0
  32. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  33. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  34. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  35. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  36. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/base.py +0 -0
  37. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/built_in_tools/openai.py +0 -0
  38. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/cache.py +0 -0
  39. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/cli.py +0 -0
  40. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/embed.py +0 -0
  41. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/errors.py +0 -0
  42. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/file.py +0 -0
  43. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/image.py +0 -0
  44. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/__init__.py +0 -0
  45. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/classify.py +0 -0
  46. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/extract.py +0 -0
  47. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/locate.py +0 -0
  48. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/ocr.py +0 -0
  49. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/score.py +0 -0
  50. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/llm_tools/translate.py +0 -0
  51. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/mock_openai.py +0 -0
  52. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/__init__.py +0 -0
  53. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/bedrock.py +0 -0
  54. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/cerebras.py +0 -0
  55. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/cohere.py +0 -0
  56. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/deepseek.py +0 -0
  57. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/fireworks.py +0 -0
  58. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/google.py +0 -0
  59. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/grok.py +0 -0
  60. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/groq.py +0 -0
  61. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/kimi.py +0 -0
  62. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/meta.py +0 -0
  63. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/minimax.py +0 -0
  64. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/mistral.py +0 -0
  65. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/openrouter.py +0 -0
  66. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/models/together.py +0 -0
  67. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/presets/cerebras.py +0 -0
  68. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/presets/meta.py +0 -0
  69. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/rerank.py +0 -0
  70. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/tracker.py +0 -0
  71. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/usage.py +0 -0
  72. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/harmony.py +0 -0
  73. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/json.py +0 -0
  74. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/logprobs.py +0 -0
  75. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/spatial.py +0 -0
  76. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/validation.py +0 -0
  77. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge/util/xml.py +0 -0
  78. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
  79. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  80. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/requires.txt +0 -0
  81. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/src/lm_deluge.egg-info/top_level.txt +0 -0
  82. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_builtin_tools.py +0 -0
  83. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_file_upload.py +0 -0
  84. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_mock_openai.py +0 -0
  85. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_native_mcp_server.py +0 -0
  86. {lm_deluge-0.0.72 → lm_deluge-0.0.74}/tests/test_openrouter_generic.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.72
3
+ Version: 0.0.74
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.72"
6
+ version = "0.0.74"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -84,12 +84,37 @@ def _build_anthropic_request(
84
84
  if "temperature" in request_json and "top_p" in request_json:
85
85
  request_json.pop("top_p")
86
86
 
87
+ # Handle structured outputs (output_format)
88
+ if context.output_schema:
89
+ if model.supports_json:
90
+ _add_beta(base_headers, "structured-outputs-2025-11-13")
91
+ request_json["output_format"] = {
92
+ "type": "json_schema",
93
+ "schema": context.output_schema,
94
+ }
95
+ else:
96
+ print(
97
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
98
+ )
99
+ elif sampling_params.json_mode:
100
+ # Anthropic doesn't support basic json_mode without a schema
101
+ print(
102
+ "WARNING: Anthropic does not support basic json_mode without a schema. "
103
+ "Use output_schema parameter for structured JSON outputs."
104
+ )
105
+
106
+ # Add beta header for strict tools when enabled
107
+ if tools and sampling_params.strict_tools and model.supports_json:
108
+ _add_beta(base_headers, "structured-outputs-2025-11-13")
109
+
87
110
  if tools:
88
111
  mcp_servers = []
89
112
  tool_definitions = []
90
113
  for tool in tools:
91
114
  if isinstance(tool, Tool):
92
- tool_definitions.append(tool.dump_for("anthropic"))
115
+ # Only use strict mode if model supports structured outputs
116
+ use_strict = sampling_params.strict_tools and model.supports_json
117
+ tool_definitions.append(tool.dump_for("anthropic", strict=use_strict))
93
118
  elif isinstance(tool, dict) and "url" in tool:
94
119
  _add_beta(base_headers, "mcp-client-2025-04-04")
95
120
  mcp_servers.append(tool)
@@ -106,7 +106,8 @@ async def _build_anthropic_bedrock_request(
106
106
  tool_definitions = []
107
107
  for tool in tools:
108
108
  if isinstance(tool, Tool):
109
- tool_definitions.append(tool.dump_for("anthropic"))
109
+ # Bedrock doesn't have the strict-mode betas Anthropic exposes yet
110
+ tool_definitions.append(tool.dump_for("anthropic", strict=False))
110
111
  elif isinstance(tool, dict):
111
112
  tool_definitions.append(tool)
112
113
  # add betas if needed
@@ -124,7 +125,9 @@ async def _build_anthropic_bedrock_request(
124
125
  # Convert to individual tools locally (like OpenAI does)
125
126
  individual_tools = await tool.to_tools()
126
127
  for individual_tool in individual_tools:
127
- tool_definitions.append(individual_tool.dump_for("anthropic"))
128
+ tool_definitions.append(
129
+ individual_tool.dump_for("anthropic", strict=False)
130
+ )
128
131
 
129
132
  # Add cache control to last tool if tools_only caching is specified
130
133
  if cache_pattern == "tools_only" and tool_definitions:
@@ -194,11 +197,13 @@ async def _build_openai_bedrock_request(
194
197
  request_tools = []
195
198
  for tool in tools:
196
199
  if isinstance(tool, Tool):
197
- request_tools.append(tool.dump_for("openai-completions"))
200
+ request_tools.append(
201
+ tool.dump_for("openai-completions", strict=False)
202
+ )
198
203
  elif isinstance(tool, MCPServer):
199
204
  as_tools = await tool.to_tools()
200
205
  request_tools.extend(
201
- [t.dump_for("openai-completions") for t in as_tools]
206
+ [t.dump_for("openai-completions", strict=False) for t in as_tools]
202
207
  )
203
208
  request_json["tools"] = request_tools
204
209
 
@@ -67,10 +67,12 @@ async def _build_oa_chat_request(
67
67
  effort = "minimal"
68
68
  else:
69
69
  effort = "low"
70
- if effort == "minimal" and "gpt-5" not in model.id:
71
- print(
72
- "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
73
- )
70
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
71
+ if effort == "minimal" and "gpt-5.1" in model.id:
72
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
73
+ effort = "none"
74
+ elif effort == "minimal" and "gpt-5" not in model.id:
75
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
74
76
  effort = "low"
75
77
  request_json["reasoning_effort"] = effort
76
78
  else:
@@ -81,17 +83,43 @@ async def _build_oa_chat_request(
81
83
  request_json["logprobs"] = True
82
84
  if sampling_params.top_logprobs is not None:
83
85
  request_json["top_logprobs"] = sampling_params.top_logprobs
84
- if sampling_params.json_mode and model.supports_json:
86
+
87
+ # Handle structured outputs (output_schema takes precedence over json_mode)
88
+ if context.output_schema:
89
+ if model.supports_json:
90
+ request_json["response_format"] = {
91
+ "type": "json_schema",
92
+ "json_schema": {
93
+ "name": "response",
94
+ "schema": context.output_schema,
95
+ "strict": True,
96
+ },
97
+ }
98
+ else:
99
+ print(
100
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
101
+ )
102
+ elif sampling_params.json_mode and model.supports_json:
85
103
  request_json["response_format"] = {"type": "json_object"}
104
+
86
105
  if tools:
87
106
  request_tools = []
88
107
  for tool in tools:
89
108
  if isinstance(tool, Tool):
90
- request_tools.append(tool.dump_for("openai-completions"))
109
+ request_tools.append(
110
+ tool.dump_for(
111
+ "openai-completions", strict=sampling_params.strict_tools
112
+ )
113
+ )
91
114
  elif isinstance(tool, MCPServer):
92
115
  as_tools = await tool.to_tools()
93
116
  request_tools.extend(
94
- [t.dump_for("openai-completions") for t in as_tools]
117
+ [
118
+ t.dump_for(
119
+ "openai-completions", strict=sampling_params.strict_tools
120
+ )
121
+ for t in as_tools
122
+ ]
95
123
  )
96
124
  request_json["tools"] = request_tools
97
125
  return request_json
@@ -271,23 +299,46 @@ async def _build_oa_responses_request(
271
299
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
272
300
 
273
301
  if model.reasoning_model:
274
- if sampling_params.reasoning_effort in [None, "none"]:
302
+ effort = sampling_params.reasoning_effort
303
+ if effort in [None, "none"]:
275
304
  # gemini models can switch reasoning off
276
305
  if "gemini" in model.id:
277
- sampling_params.reasoning_effort = "none"
306
+ effort = "none"
278
307
  else:
279
- sampling_params.reasoning_effort = "low"
308
+ effort = "low"
309
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
310
+ if effort == "minimal" and "gpt-5.1" in model.id:
311
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
312
+ effort = "none"
313
+ elif effort == "minimal" and "gpt-5" not in model.id:
314
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
315
+ effort = "low"
280
316
  request_json["temperature"] = 1.0
281
317
  request_json["top_p"] = 1.0
282
318
  request_json["reasoning"] = {
283
- "effort": sampling_params.reasoning_effort,
319
+ "effort": effort,
284
320
  "summary": "auto",
285
321
  }
286
322
  else:
287
323
  if sampling_params.reasoning_effort:
288
324
  maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
289
325
 
290
- if sampling_params.json_mode and model.supports_json:
326
+ # Handle structured outputs (output_schema takes precedence over json_mode)
327
+ if context.output_schema:
328
+ if model.supports_json:
329
+ request_json["text"] = {
330
+ "format": {
331
+ "type": "json_schema",
332
+ "name": "response",
333
+ "schema": context.output_schema,
334
+ "strict": True,
335
+ }
336
+ }
337
+ else:
338
+ print(
339
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
340
+ )
341
+ elif sampling_params.json_mode and model.supports_json:
291
342
  request_json["text"] = {"format": {"type": "json_object"}}
292
343
 
293
344
  # Handle tools
@@ -295,7 +346,9 @@ async def _build_oa_responses_request(
295
346
  # Add regular function tools
296
347
  for tool in tools or []:
297
348
  if isinstance(tool, Tool):
298
- request_tools.append(tool.dump_for("openai-responses"))
349
+ request_tools.append(
350
+ tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
351
+ )
299
352
  elif isinstance(tool, dict):
300
353
  # if computer use, make sure model supports it
301
354
  if tool["type"] == "computer_use_preview":
@@ -307,7 +360,14 @@ async def _build_oa_responses_request(
307
360
  elif isinstance(tool, MCPServer):
308
361
  if context.force_local_mcp:
309
362
  as_tools = await tool.to_tools()
310
- request_tools.extend([t.dump_for("openai-responses") for t in as_tools])
363
+ request_tools.extend(
364
+ [
365
+ t.dump_for(
366
+ "openai-responses", strict=sampling_params.strict_tools
367
+ )
368
+ for t in as_tools
369
+ ]
370
+ )
311
371
  else:
312
372
  request_tools.append(tool.for_openai_responses())
313
373
 
@@ -44,10 +44,14 @@ class _LLMClient(BaseModel):
44
44
  Keeps all validation, serialization, and existing functionality.
45
45
  """
46
46
 
47
- _REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
47
+ _REASONING_SUFFIXES: ClassVar[
48
+ dict[str, Literal["low", "medium", "high", "minimal", "none"]]
49
+ ] = {
48
50
  "-low": "low",
49
51
  "-medium": "medium",
50
52
  "-high": "high",
53
+ "-minimal": "minimal",
54
+ "-none": "none",
51
55
  }
52
56
 
53
57
  model_names: str | list[str] = ["gpt-4.1-mini"]
@@ -149,9 +153,11 @@ class _LLMClient(BaseModel):
149
153
 
150
154
  def _normalize_model_names(
151
155
  self, models: list[str]
152
- ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
156
+ ) -> tuple[
157
+ list[str], list[Literal["low", "medium", "high", "minimal", "none"] | None]
158
+ ]:
153
159
  normalized: list[str] = []
154
- efforts: list[Literal["low", "medium", "high"] | None] = []
160
+ efforts: list[Literal["low", "medium", "high", "minimal", "none"] | None] = []
155
161
 
156
162
  for name in models:
157
163
  base_name = self._preprocess_openrouter_model(name)
@@ -164,7 +170,10 @@ class _LLMClient(BaseModel):
164
170
  return normalized, efforts
165
171
 
166
172
  def _align_sampling_params(
167
- self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
173
+ self,
174
+ per_model_efforts: list[
175
+ Literal["low", "medium", "high", "minimal", "none"] | None
176
+ ],
168
177
  ) -> None:
169
178
  if len(per_model_efforts) < len(self.model_names):
170
179
  per_model_efforts = per_model_efforts + [None] * (
@@ -332,7 +341,7 @@ class _LLMClient(BaseModel):
332
341
  @classmethod
333
342
  def _strip_reasoning_suffix_if_registered(
334
343
  cls, model_name: str
335
- ) -> tuple[str, Literal["low", "medium", "high"] | None]:
344
+ ) -> tuple[str, Literal["low", "medium", "high", "minimal", "none"] | None]:
336
345
  """Remove reasoning suffix only when the trimmed model already exists."""
337
346
  for suffix, effort in cls._REASONING_SUFFIXES.items():
338
347
  if model_name.endswith(suffix) and len(model_name) > len(suffix):
@@ -364,6 +373,15 @@ class _LLMClient(BaseModel):
364
373
  assert (
365
374
  self.use_responses_api
366
375
  ), "background mode only allowed for responses api"
376
+
377
+ # codex models require responses api
378
+ for model_name in self.model_names:
379
+ if "codex" in model_name.lower() and not self.use_responses_api:
380
+ raise ValueError(
381
+ f"Model '{model_name}' requires use_responses_api=True. "
382
+ "Codex models are only available via the Responses API."
383
+ )
384
+
367
385
  # Auto-generate name if not provided
368
386
  if self.name is None:
369
387
  if len(self.model_names) == 1:
@@ -543,6 +561,7 @@ class _LLMClient(BaseModel):
543
561
  return_completions_only: Literal[True],
544
562
  show_progress: bool = ...,
545
563
  tools: list[Tool | dict | MCPServer] | None = ...,
564
+ output_schema: dict | None = ...,
546
565
  cache: CachePattern | None = ...,
547
566
  service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
548
567
  ) -> list[str | None]: ...
@@ -555,6 +574,7 @@ class _LLMClient(BaseModel):
555
574
  return_completions_only: Literal[False] = ...,
556
575
  show_progress: bool = ...,
557
576
  tools: list[Tool | dict | MCPServer] | None = ...,
577
+ output_schema: dict | None = ...,
558
578
  cache: CachePattern | None = ...,
559
579
  service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
560
580
  ) -> list[APIResponse]: ...
@@ -566,6 +586,7 @@ class _LLMClient(BaseModel):
566
586
  return_completions_only: bool = False,
567
587
  show_progress: bool = True,
568
588
  tools: list[Tool | dict | MCPServer] | None = None,
589
+ output_schema: dict | None = None,
569
590
  cache: CachePattern | None = None,
570
591
  service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
571
592
  ) -> list[APIResponse] | list[str | None] | dict[str, int]:
@@ -594,6 +615,7 @@ class _LLMClient(BaseModel):
594
615
  task_id = self.start_nowait(
595
616
  prompt,
596
617
  tools=tools,
618
+ output_schema=output_schema,
597
619
  cache=cache,
598
620
  service_tier=service_tier,
599
621
  )
@@ -639,6 +661,7 @@ class _LLMClient(BaseModel):
639
661
  return_completions_only: bool = False,
640
662
  show_progress=True,
641
663
  tools: list[Tool | dict | MCPServer] | None = None,
664
+ output_schema: dict | None = None,
642
665
  cache: CachePattern | None = None,
643
666
  ):
644
667
  return asyncio.run(
@@ -647,6 +670,7 @@ class _LLMClient(BaseModel):
647
670
  return_completions_only=return_completions_only,
648
671
  show_progress=show_progress,
649
672
  tools=tools,
673
+ output_schema=output_schema,
650
674
  cache=cache,
651
675
  )
652
676
  )
@@ -670,6 +694,7 @@ class _LLMClient(BaseModel):
670
694
  prompt: Prompt,
671
695
  *,
672
696
  tools: list[Tool | dict | MCPServer] | None = None,
697
+ output_schema: dict | None = None,
673
698
  cache: CachePattern | None = None,
674
699
  service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
675
700
  ) -> int:
@@ -688,6 +713,7 @@ class _LLMClient(BaseModel):
688
713
  request_timeout=self.request_timeout,
689
714
  status_tracker=tracker,
690
715
  tools=tools,
716
+ output_schema=output_schema,
691
717
  cache=cache,
692
718
  use_responses_api=self.use_responses_api,
693
719
  background=self.background,
@@ -705,11 +731,16 @@ class _LLMClient(BaseModel):
705
731
  prompt: Prompt,
706
732
  *,
707
733
  tools: list[Tool | dict | MCPServer] | None = None,
734
+ output_schema: dict | None = None,
708
735
  cache: CachePattern | None = None,
709
736
  service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
710
737
  ) -> APIResponse:
711
738
  task_id = self.start_nowait(
712
- prompt, tools=tools, cache=cache, service_tier=service_tier
739
+ prompt,
740
+ tools=tools,
741
+ output_schema=output_schema,
742
+ cache=cache,
743
+ service_tier=service_tier,
713
744
  )
714
745
  return await self.wait_for(task_id)
715
746
 
@@ -7,10 +7,11 @@ class SamplingParams(BaseModel):
7
7
  temperature: float = 0.0
8
8
  top_p: float = 1.0
9
9
  json_mode: bool = False
10
- max_new_tokens: int = 512
10
+ max_new_tokens: int = 2_048
11
11
  reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
12
12
  logprobs: bool = False
13
13
  top_logprobs: int | None = None
14
+ strict_tools: bool = True
14
15
 
15
16
  def to_vllm(self):
16
17
  try:
@@ -27,7 +27,7 @@ ANTHROPIC_MODELS = {
27
27
  "name": "claude-sonnet-4-5-20250929",
28
28
  "api_base": "https://api.anthropic.com/v1",
29
29
  "api_key_env_var": "ANTHROPIC_API_KEY",
30
- "supports_json": False,
30
+ "supports_json": True,
31
31
  "api_spec": "anthropic",
32
32
  "input_cost": 3.0,
33
33
  "cached_input_cost": 0.30,
@@ -39,7 +39,7 @@ ANTHROPIC_MODELS = {
39
39
  "name": "claude-opus-4-1-20250805",
40
40
  "api_base": "https://api.anthropic.com/v1",
41
41
  "api_key_env_var": "ANTHROPIC_API_KEY",
42
- "supports_json": False,
42
+ "supports_json": True,
43
43
  "api_spec": "anthropic",
44
44
  "input_cost": 15.0,
45
45
  "cached_input_cost": 1.50,
@@ -10,6 +10,48 @@ OPENAI_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  # ░░░░░
13
+ "gpt-5.1": {
14
+ "id": "gpt-5.1",
15
+ "name": "gpt-5.1",
16
+ "api_base": "https://api.openai.com/v1",
17
+ "api_key_env_var": "OPENAI_API_KEY",
18
+ "supports_json": False,
19
+ "supports_logprobs": True,
20
+ "supports_responses": True,
21
+ "api_spec": "openai",
22
+ "input_cost": 1.25,
23
+ "cached_input_cost": 0.125,
24
+ "output_cost": 10.0,
25
+ "reasoning_model": True,
26
+ },
27
+ "gpt-5.1-codex": {
28
+ "id": "gpt-5.1-codex",
29
+ "name": "gpt-5.1-codex",
30
+ "api_base": "https://api.openai.com/v1",
31
+ "api_key_env_var": "OPENAI_API_KEY",
32
+ "supports_json": False,
33
+ "supports_logprobs": True,
34
+ "supports_responses": True,
35
+ "api_spec": "openai",
36
+ "input_cost": 1.25,
37
+ "cached_input_cost": 0.125,
38
+ "output_cost": 10.0,
39
+ "reasoning_model": True,
40
+ },
41
+ "gpt-5.1-codex-mini": {
42
+ "id": "gpt-5.1-codex-mini",
43
+ "name": "gpt-5.1-codex-mini",
44
+ "api_base": "https://api.openai.com/v1",
45
+ "api_key_env_var": "OPENAI_API_KEY",
46
+ "supports_json": False,
47
+ "supports_logprobs": True,
48
+ "supports_responses": True,
49
+ "api_spec": "openai",
50
+ "input_cost": 0.25,
51
+ "cached_input_cost": 0.025,
52
+ "output_cost": 2.0,
53
+ "reasoning_model": True,
54
+ },
13
55
  "gpt-5-codex": {
14
56
  "id": "gpt-5-codex",
15
57
  "name": "gpt-5-codex",
@@ -1195,14 +1195,24 @@ class Conversation:
1195
1195
 
1196
1196
  @classmethod
1197
1197
  def from_unknown(
1198
- cls, messages: list[dict], *, system: str | list[dict] | None = None
1198
+ cls, messages: list[dict] | dict, *, system: str | list[dict] | None = None
1199
1199
  ) -> tuple["Conversation", str]:
1200
1200
  """Attempt to convert provider-formatted messages without knowing the provider.
1201
1201
 
1202
1202
  Returns the parsed conversation together with the provider label that succeeded
1203
- ("openai" or "anthropic").
1203
+ ("openai", "anthropic", or "log").
1204
1204
  """
1205
1205
 
1206
+ # Check if input is in log format (output from to_log())
1207
+ if isinstance(messages, dict) and "messages" in messages:
1208
+ return cls.from_log(messages), "log"
1209
+
1210
+ # Ensure messages is a list for provider detection
1211
+ if not isinstance(messages, list):
1212
+ raise ValueError(
1213
+ "messages must be a list of dicts or a dict with 'messages' key"
1214
+ )
1215
+
1206
1216
  def _detect_provider() -> str:
1207
1217
  has_openai_markers = False
1208
1218
  has_anthropic_markers = False
@@ -32,6 +32,7 @@ class RequestContext:
32
32
 
33
33
  # Optional features
34
34
  tools: list | None = None
35
+ output_schema: dict | None = None
35
36
  cache: CachePattern | None = None
36
37
  use_responses_api: bool = False
37
38
  background: bool = False
@@ -66,6 +67,7 @@ class RequestContext:
66
67
  "results_arr": self.results_arr,
67
68
  "callback": self.callback,
68
69
  "tools": self.tools,
70
+ "output_schema": self.output_schema,
69
71
  "cache": self.cache,
70
72
  "use_responses_api": self.use_responses_api,
71
73
  "background": self.background,
@@ -713,17 +713,40 @@ class Tool(BaseModel):
713
713
  """just an alias for the above"""
714
714
  return self.for_openai_completions(strict=strict, **kwargs)
715
715
 
716
- def for_openai_responses(self, **kwargs) -> dict[str, Any]:
716
+ def for_openai_responses(self, *, strict: bool = True, **kwargs) -> dict[str, Any]:
717
717
  if self.is_built_in:
718
718
  return {"type": self.type, **self.built_in_args, **kwargs}
719
- return {
720
- "type": "function",
721
- "name": self.name,
722
- "description": self.description,
723
- "parameters": self._json_schema(include_additional_properties=True),
724
- }
725
719
 
726
- def for_anthropic(self, **kwargs) -> dict[str, Any]:
720
+ # Check if schema is compatible with strict mode
721
+ if strict and not self._is_strict_mode_compatible():
722
+ strict = False
723
+
724
+ if strict:
725
+ # For strict mode, remove defaults and make all parameters required
726
+ schema = self._json_schema(
727
+ include_additional_properties=True, remove_defaults=True
728
+ )
729
+ schema["required"] = list(
730
+ (self.parameters or {}).keys()
731
+ ) # All parameters required in strict mode
732
+
733
+ return {
734
+ "type": "function",
735
+ "name": self.name,
736
+ "description": self.description,
737
+ "parameters": schema,
738
+ "strict": True,
739
+ }
740
+ else:
741
+ # For non-strict mode, use the original required list
742
+ return {
743
+ "type": "function",
744
+ "name": self.name,
745
+ "description": self.description,
746
+ "parameters": self._json_schema(include_additional_properties=True),
747
+ }
748
+
749
+ def for_anthropic(self, *, strict: bool = True, **kwargs) -> dict[str, Any]:
727
750
  # built-in tools have "name", "type", maybe metadata
728
751
  if self.is_built_in:
729
752
  return {
@@ -732,11 +755,33 @@ class Tool(BaseModel):
732
755
  **self.built_in_args,
733
756
  **kwargs,
734
757
  }
735
- return {
736
- "name": self.name,
737
- "description": self.description,
738
- "input_schema": self._json_schema(),
739
- }
758
+
759
+ # Check if schema is compatible with strict mode
760
+ if strict and not self._is_strict_mode_compatible():
761
+ strict = False
762
+
763
+ if strict:
764
+ # For strict mode, remove defaults and make all parameters required
765
+ schema = self._json_schema(
766
+ include_additional_properties=True, remove_defaults=True
767
+ )
768
+ schema["required"] = list(
769
+ (self.parameters or {}).keys()
770
+ ) # All parameters required in strict mode
771
+
772
+ return {
773
+ "name": self.name,
774
+ "description": self.description,
775
+ "input_schema": schema,
776
+ "strict": True,
777
+ }
778
+ else:
779
+ # For non-strict mode, use the original required list
780
+ return {
781
+ "name": self.name,
782
+ "description": self.description,
783
+ "input_schema": self._json_schema(),
784
+ }
740
785
 
741
786
  def for_google(self) -> dict[str, Any]:
742
787
  """
@@ -759,11 +804,11 @@ class Tool(BaseModel):
759
804
  **kw,
760
805
  ) -> dict[str, Any]:
761
806
  if provider == "openai-responses":
762
- return self.for_openai_responses()
807
+ return self.for_openai_responses(**kw)
763
808
  if provider == "openai-completions":
764
809
  return self.for_openai_completions(**kw)
765
810
  if provider == "anthropic":
766
- return self.for_anthropic()
811
+ return self.for_anthropic(**kw)
767
812
  if provider == "google":
768
813
  return self.for_google()
769
814
  raise ValueError(provider)
@@ -7,6 +7,8 @@ WARNINGS: dict[str, str] = {
7
7
  "WARN_REASONING_UNSUPPORTED": "Ignoring reasoning_effort param for non-reasoning model: {model_name}.",
8
8
  "WARN_CACHING_UNSUPPORTED": "Cache parameter '{cache_param}' is not supported, ignoring for {model_name}.",
9
9
  "WARN_LOGPROBS_UNSUPPORTED": "Ignoring logprobs param for non-logprobs model: {model_name}",
10
+ "WARN_MINIMAL_TO_LOW": "'minimal' reasoning effort only allowed for gpt-5 models. Setting to 'low' for {model_name}.",
11
+ "WARN_MINIMAL_TO_NONE": "GPT-5.1 models don't support 'minimal' reasoning effort. Converting to 'none' for {model_name}.",
10
12
  }
11
13
 
12
14
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.72
3
+ Version: 0.0.74
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
File without changes
File without changes
File without changes