PyPI - lm-deluge - Versions diffs - 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl - Mend

lm-deluge 0.0.67py3-none-any.whl → 0.0.88py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (92) hide show

lm_deluge/__init__.py +25 -2
lm_deluge/api_requests/anthropic.py +92 -17
lm_deluge/api_requests/base.py +47 -11
lm_deluge/api_requests/bedrock.py +7 -4
lm_deluge/api_requests/chat_reasoning.py +4 -0
lm_deluge/api_requests/gemini.py +138 -18
lm_deluge/api_requests/openai.py +114 -21
lm_deluge/client.py +282 -49
lm_deluge/config.py +15 -3
lm_deluge/mock_openai.py +643 -0
lm_deluge/models/__init__.py +12 -1
lm_deluge/models/anthropic.py +17 -2
lm_deluge/models/arcee.py +16 -0
lm_deluge/models/deepseek.py +36 -4
lm_deluge/models/google.py +29 -0
lm_deluge/models/grok.py +24 -0
lm_deluge/models/kimi.py +36 -0
lm_deluge/models/minimax.py +10 -0
lm_deluge/models/openai.py +100 -0
lm_deluge/models/openrouter.py +86 -8
lm_deluge/models/together.py +11 -0
lm_deluge/models/zai.py +1 -0
lm_deluge/pipelines/gepa/__init__.py +95 -0
lm_deluge/pipelines/gepa/core.py +354 -0
lm_deluge/pipelines/gepa/docs/samples.py +696 -0
lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
lm_deluge/pipelines/gepa/optimizer.py +435 -0
lm_deluge/pipelines/gepa/proposer.py +235 -0
lm_deluge/pipelines/gepa/util.py +165 -0
lm_deluge/{llm_tools → pipelines}/score.py +2 -2
lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
lm_deluge/prompt.py +224 -40
lm_deluge/request_context.py +7 -2
lm_deluge/tool/__init__.py +1118 -0
lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
lm_deluge/tool/builtin/gemini.py +59 -0
lm_deluge/tool/builtin/openai.py +74 -0
lm_deluge/tool/cua/__init__.py +173 -0
lm_deluge/tool/cua/actions.py +148 -0
lm_deluge/tool/cua/base.py +27 -0
lm_deluge/tool/cua/batch.py +215 -0
lm_deluge/tool/cua/converters.py +466 -0
lm_deluge/tool/cua/kernel.py +702 -0
lm_deluge/tool/cua/trycua.py +989 -0
lm_deluge/tool/prefab/__init__.py +45 -0
lm_deluge/tool/prefab/batch_tool.py +156 -0
lm_deluge/tool/prefab/docs.py +1119 -0
lm_deluge/tool/prefab/email.py +294 -0
lm_deluge/tool/prefab/filesystem.py +1711 -0
lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
lm_deluge/tool/prefab/memory.py +458 -0
lm_deluge/tool/prefab/otc/__init__.py +165 -0
lm_deluge/tool/prefab/otc/executor.py +281 -0
lm_deluge/tool/prefab/otc/parse.py +188 -0
lm_deluge/tool/prefab/random.py +212 -0
lm_deluge/tool/prefab/rlm/__init__.py +296 -0
lm_deluge/tool/prefab/rlm/executor.py +349 -0
lm_deluge/tool/prefab/rlm/parse.py +144 -0
lm_deluge/tool/prefab/sandbox.py +1621 -0
lm_deluge/tool/prefab/sheets.py +385 -0
lm_deluge/tool/prefab/subagents.py +233 -0
lm_deluge/tool/prefab/todos.py +342 -0
lm_deluge/tool/prefab/tool_search.py +169 -0
lm_deluge/tool/prefab/web_search.py +199 -0
lm_deluge/tracker.py +16 -13
lm_deluge/util/schema.py +412 -0
lm_deluge/warnings.py +8 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
lm_deluge-0.0.88.dist-info/RECORD +117 -0
lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
lm_deluge/built_in_tools/openai.py +0 -28
lm_deluge/presets/cerebras.py +0 -17
lm_deluge/presets/meta.py +0 -13
lm_deluge/tool.py +0 -849
lm_deluge-0.0.67.dist-info/RECORD +0 -72
lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
/lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
/lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
/lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
/lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0

lm_deluge/api_requests/openai.py CHANGED Viewed

@@ -2,12 +2,17 @@ import json
 import os
 import traceback as tb
 from types import SimpleNamespace
+from typing import Sequence
 import aiohttp
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
+from lm_deluge.util.schema import (
+    prepare_output_schema,
+    transform_schema_for_openai,
+)
 from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
@@ -56,8 +61,6 @@ async def _build_oa_chat_request(
     else:
         request_json["max_completion_tokens"] = sampling_params.max_new_tokens
     if model.reasoning_model:
-        request_json["temperature"] = 1.0
-        request_json["top_p"] = 1.0
         effort = sampling_params.reasoning_effort
         if effort in [None, "none"]:
             # Disable reasoning for Gemini models when no effort requested
@@ -67,11 +70,24 @@ async def _build_oa_chat_request(
                 effort = "minimal"
             else:
                 effort = "low"
-        if effort == "minimal" and "gpt-5" not in model.id:
-            print(
-                "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
-            )
+        # GPT-5.1 models don't support 'minimal', they support 'none' instead
+        if effort == "minimal" and "gpt-5.1" in model.id:
+            maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
+            effort = "none"
+        elif effort == "minimal" and "gpt-5" not in model.id:
+            maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
             effort = "low"
+        # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
+        if effort == "xhigh" and not model.supports_xhigh:
+            maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+            effort = "high"
+        # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
+        if model.supports_xhigh and effort != "none":
+            del request_json["temperature"]
+            del request_json["top_p"]
+        else:
+            request_json["temperature"] = 1.0
+            request_json["top_p"] = 1.0
         request_json["reasoning_effort"] = effort
     else:
         if sampling_params.reasoning_effort:
@@ -81,17 +97,48 @@ async def _build_oa_chat_request(
         request_json["logprobs"] = True
         if sampling_params.top_logprobs is not None:
             request_json["top_logprobs"] = sampling_params.top_logprobs
-    if sampling_params.json_mode and model.supports_json:
+    # Handle structured outputs (output_schema takes precedence over json_mode)
+    if context.output_schema:
+        if model.supports_json:
+            base_schema = prepare_output_schema(context.output_schema)
+            # Apply OpenAI-specific transformations (currently passthrough with copy)
+            transformed_schema = transform_schema_for_openai(base_schema)
+            request_json["response_format"] = {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "response",
+                    "schema": transformed_schema,
+                    "strict": True,
+                },
+            }
+        else:
+            print(
+                f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
+            )
+    elif sampling_params.json_mode and model.supports_json:
         request_json["response_format"] = {"type": "json_object"}
     if tools:
         request_tools = []
         for tool in tools:
             if isinstance(tool, Tool):
-                request_tools.append(tool.dump_for("openai-completions"))
+                request_tools.append(
+                    tool.dump_for(
+                        "openai-completions", strict=sampling_params.strict_tools
+                    )
+                )
             elif isinstance(tool, MCPServer):
                 as_tools = await tool.to_tools()
                 request_tools.extend(
-                    [t.dump_for("openai-completions") for t in as_tools]
+                    [
+                        t.dump_for(
+                            "openai-completions", strict=sampling_params.strict_tools
+                        )
+                        for t in as_tools
+                    ]
                 )
         request_json["tools"] = request_tools
     return request_json
@@ -271,23 +318,60 @@ async def _build_oa_responses_request(
         request_json["max_output_tokens"] = sampling_params.max_new_tokens
     if model.reasoning_model:
-        if sampling_params.reasoning_effort in [None, "none"]:
+        effort = sampling_params.reasoning_effort
+        if effort in [None, "none"]:
             # gemini models can switch reasoning off
             if "gemini" in model.id:
-                sampling_params.reasoning_effort = "none"
+                effort = "none"
             else:
-                sampling_params.reasoning_effort = "low"
-        request_json["temperature"] = 1.0
-        request_json["top_p"] = 1.0
+                effort = "low"
+        # GPT-5.1 models don't support 'minimal', they support 'none' instead
+        if effort == "minimal" and "gpt-5.1" in model.id:
+            maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
+            effort = "none"
+        elif effort == "minimal" and "gpt-5" not in model.id:
+            maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
+            effort = "low"
+        # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
+        if effort == "xhigh" and not model.supports_xhigh:
+            maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+            effort = "high"
+        # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
+        if model.supports_xhigh and effort != "none":
+            del request_json["temperature"]
+            del request_json["top_p"]
+        else:
+            request_json["temperature"] = 1.0
+            request_json["top_p"] = 1.0
         request_json["reasoning"] = {
-            "effort": sampling_params.reasoning_effort,
+            "effort": effort,
             "summary": "auto",
         }
     else:
         if sampling_params.reasoning_effort:
             maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
-    if sampling_params.json_mode and model.supports_json:
+    # Handle structured outputs (output_schema takes precedence over json_mode)
+    if context.output_schema:
+        if model.supports_json:
+            base_schema = prepare_output_schema(context.output_schema)
+            # Apply OpenAI-specific transformations (currently passthrough with copy)
+            transformed_schema = transform_schema_for_openai(base_schema)
+            request_json["text"] = {
+                "format": {
+                    "type": "json_schema",
+                    "name": "response",
+                    "schema": transformed_schema,
+                    "strict": True,
+                }
+            }
+        else:
+            print(
+                f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
+            )
+    elif sampling_params.json_mode and model.supports_json:
         request_json["text"] = {"format": {"type": "json_object"}}
     # Handle tools
@@ -295,11 +379,13 @@ async def _build_oa_responses_request(
     # Add regular function tools
     for tool in tools or []:
         if isinstance(tool, Tool):
-            request_tools.append(tool.dump_for("openai-responses"))
+            request_tools.append(
+                tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
+            )
         elif isinstance(tool, dict):
             # if computer use, make sure model supports it
             if tool["type"] == "computer_use_preview":
-                if model.name != "openai-computer-use-preview":
+                if model.name != "computer-use-preview":
                     raise ValueError(f"model {model.id} does not support computer use")
                 # have to use truncation
                 request_json["truncation"] = "auto"
@@ -307,7 +393,14 @@ async def _build_oa_responses_request(
         elif isinstance(tool, MCPServer):
             if context.force_local_mcp:
                 as_tools = await tool.to_tools()
-                request_tools.extend([t.dump_for("openai-responses") for t in as_tools])
+                request_tools.extend(
+                    [
+                        t.dump_for(
+                            "openai-responses", strict=sampling_params.strict_tools
+                        )
+                        for t in as_tools
+                    ]
+                )
             else:
                 request_tools.append(tool.for_openai_responses())
@@ -381,7 +474,7 @@ class OpenAIResponsesRequest(APIRequestBase):
                         output = data.get("output", [])
                         if not output:
                             is_error = True
-                            error_message = "No output in response"
+                            error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
                         else:
                             # Process each output item
                             for item in output:
@@ -536,7 +629,7 @@ async def stream_chat(
     model_name: str,  # must correspond to registry
     prompt: Conversation,
     sampling_params: SamplingParams = SamplingParams(),
-    tools: list | None = None,
+    tools: Sequence[Tool | dict | MCPServer] | None = None,
     cache: CachePattern | None = None,
     extra_headers: dict[str, str] | None = None,
 ):

lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.67py3-none-any.whl → 0.0.88py3-none-any.whl