PyPI - lm-deluge - Versions diffs - 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

lm-deluge 0.0.67py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (108) hide show

lm_deluge/__init__.py +1 -2
lm_deluge/api_requests/anthropic.py +117 -22
lm_deluge/api_requests/base.py +84 -11
lm_deluge/api_requests/bedrock.py +30 -6
lm_deluge/api_requests/chat_reasoning.py +4 -0
lm_deluge/api_requests/gemini.py +166 -20
lm_deluge/api_requests/openai.py +145 -25
lm_deluge/batches.py +15 -45
lm_deluge/client.py +309 -50
lm_deluge/config.py +15 -3
lm_deluge/models/__init__.py +14 -1
lm_deluge/models/anthropic.py +29 -14
lm_deluge/models/arcee.py +16 -0
lm_deluge/models/deepseek.py +36 -4
lm_deluge/models/google.py +42 -0
lm_deluge/models/grok.py +24 -0
lm_deluge/models/kimi.py +36 -0
lm_deluge/models/minimax.py +18 -0
lm_deluge/models/openai.py +100 -0
lm_deluge/models/openrouter.py +133 -7
lm_deluge/models/together.py +11 -0
lm_deluge/models/zai.py +50 -0
lm_deluge/pipelines/gepa/__init__.py +95 -0
lm_deluge/pipelines/gepa/core.py +354 -0
lm_deluge/pipelines/gepa/docs/samples.py +705 -0
lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
lm_deluge/pipelines/gepa/optimizer.py +435 -0
lm_deluge/pipelines/gepa/proposer.py +235 -0
lm_deluge/pipelines/gepa/util.py +165 -0
lm_deluge/{llm_tools → pipelines}/score.py +2 -2
lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
lm_deluge/prompt.py +537 -88
lm_deluge/request_context.py +7 -2
lm_deluge/server/__init__.py +24 -0
lm_deluge/server/__main__.py +144 -0
lm_deluge/server/adapters.py +369 -0
lm_deluge/server/app.py +388 -0
lm_deluge/server/auth.py +71 -0
lm_deluge/server/model_policy.py +215 -0
lm_deluge/server/models_anthropic.py +172 -0
lm_deluge/server/models_openai.py +175 -0
lm_deluge/tool/__init__.py +1130 -0
lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
lm_deluge/tool/builtin/anthropic/bash.py +0 -0
lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
lm_deluge/tool/builtin/gemini.py +59 -0
lm_deluge/tool/builtin/openai.py +74 -0
lm_deluge/tool/cua/__init__.py +173 -0
lm_deluge/tool/cua/actions.py +148 -0
lm_deluge/tool/cua/base.py +27 -0
lm_deluge/tool/cua/batch.py +215 -0
lm_deluge/tool/cua/converters.py +466 -0
lm_deluge/tool/cua/kernel.py +702 -0
lm_deluge/tool/cua/trycua.py +989 -0
lm_deluge/tool/prefab/__init__.py +45 -0
lm_deluge/tool/prefab/batch_tool.py +156 -0
lm_deluge/tool/prefab/docs.py +1119 -0
lm_deluge/tool/prefab/email.py +294 -0
lm_deluge/tool/prefab/filesystem.py +1711 -0
lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
lm_deluge/tool/prefab/memory.py +458 -0
lm_deluge/tool/prefab/otc/__init__.py +165 -0
lm_deluge/tool/prefab/otc/executor.py +281 -0
lm_deluge/tool/prefab/otc/parse.py +188 -0
lm_deluge/tool/prefab/random.py +212 -0
lm_deluge/tool/prefab/rlm/__init__.py +296 -0
lm_deluge/tool/prefab/rlm/executor.py +349 -0
lm_deluge/tool/prefab/rlm/parse.py +144 -0
lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
lm_deluge/tool/prefab/sheets.py +385 -0
lm_deluge/tool/prefab/skills.py +0 -0
lm_deluge/tool/prefab/subagents.py +233 -0
lm_deluge/tool/prefab/todos.py +342 -0
lm_deluge/tool/prefab/tool_search.py +169 -0
lm_deluge/tool/prefab/web_search.py +199 -0
lm_deluge/tracker.py +16 -13
lm_deluge/util/schema.py +412 -0
lm_deluge/warnings.py +8 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
lm_deluge-0.0.90.dist-info/RECORD +132 -0
lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
lm_deluge/built_in_tools/openai.py +0 -28
lm_deluge/presets/cerebras.py +0 -17
lm_deluge/presets/meta.py +0 -13
lm_deluge/tool.py +0 -849
lm_deluge-0.0.67.dist-info/RECORD +0 -72
lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
/lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
/lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
/lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
/lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
/lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
/lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0

lm_deluge/api_requests/gemini.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 import os
-from typing import Any
 from aiohttp import ClientResponse
@@ -10,7 +9,7 @@ from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
-from ..prompt import Conversation, Message, Text, Thinking, ToolCall
+from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
 from ..usage import Usage
 from .base import APIRequestBase, APIResponse
@@ -23,6 +22,21 @@ async def _build_gemini_request(
 ) -> dict:
     system_message, messages = prompt.to_gemini()
+    # For Gemini 3, inject dummy signatures when missing for function calls
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    if is_gemini_3:
+        dummy_sig = "context_engineering_is_the_way_to_go"
+        for msg in messages:
+            if "parts" in msg:
+                for part in msg["parts"]:
+                    # For function calls, inject dummy signature if missing
+                    if "functionCall" in part and "thoughtSignature" not in part:
+                        part["thoughtSignature"] = dummy_sig
+                        maybe_warn(
+                            "WARN_GEMINI3_MISSING_SIGNATURE",
+                            part_type="function call",
+                        )
     request_json = {
         "contents": messages,
         "generationConfig": {
@@ -37,20 +51,81 @@ async def _build_gemini_request(
         request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
     # Handle reasoning models (thinking)
-    if model.reasoning_model:
-        thinking_config: dict[str, Any] | None = None
-        effort = sampling_params.reasoning_effort
-        if effort is None or effort == "none":
-            budget = 128 if "2.5-pro" in model.id else 0
-            # Explicitly disable thoughts when no effort is requested
-            thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    is_gemini_3_flash = "gemini-3-flash" in model.name.lower()
+    if is_gemini_3:
+        # gemini3 MUST think
+        if not sampling_params.reasoning_effort:
+            maybe_warn("WARN_GEMINI3_NO_REASONING")
+            effort = "low"
+        else:
+            effort_key = sampling_params.reasoning_effort
+            if effort_key == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
+                effort_key = "high"
+            if is_gemini_3_flash:
+                # Flash supports minimal, low, medium, high
+                level_map = {
+                    "none": "low",
+                    "minimal": "minimal",
+                    "low": "low",
+                    "medium": "medium",
+                    "high": "high",
+                }
+            else:
+                # Pro only supports low, high
+                level_map = {
+                    "none": "low",
+                    "minimal": "low",
+                    "low": "low",
+                    "medium": "high",
+                    "high": "high",
+                }
+            effort = level_map[effort_key]
+        thinking_config = {"thinkingLevel": effort}
+        request_json["generationConfig"]["thinkingConfig"] = thinking_config
+    elif model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.thinking_budget > 0
+        ):
+            thinking_config = {
+                "includeThoughts": True,
+                "thinkingBudget": sampling_params.thinking_budget,
+            }
+        elif sampling_params.thinking_budget == -1:
+            # dynamic thinking
+            thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
+        elif sampling_params.reasoning_effort not in [None, "none"]:
+            effort_key = sampling_params.reasoning_effort
+            if effort_key == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
+                effort_key = "high"
+            level_map = {
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }
+            assert effort_key in level_map
+            budget = level_map[effort_key]
+            if "flash-lite" in model.id:
+                budget = max(budget, 512)
+            thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
+        elif "2.5-pro" in model.id:
+            # 2.5 pro must think.
+            thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
         else:
-            thinking_config = {"includeThoughts": True}
-            if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
-                budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
-                    effort
-                ]
-                thinking_config["thinkingBudget"] = budget
+            # no thoughts head empty
+            thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
         request_json["generationConfig"]["thinkingConfig"] = thinking_config
     else:
@@ -59,13 +134,60 @@ async def _build_gemini_request(
     # Add tools if provided
     if tools:
-        tool_declarations = [tool.dump_for("google") for tool in tools]
-        request_json["tools"] = [{"functionDeclarations": tool_declarations}]
+        request_tools = []
+        function_declarations = []
+        for tool in tools:
+            if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
+                # Gemini computer use tool - add as separate tool entry
+                env_map = {
+                    "browser": "ENVIRONMENT_BROWSER",
+                    "android": "ENVIRONMENT_ANDROID",
+                }
+                env = env_map.get(
+                    tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
+                )
+                cu_tool: dict = {
+                    "computerUse": {
+                        "environment": env,
+                    }
+                }
+                excluded = tool.get("excluded_predefined_functions")
+                if excluded:
+                    cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
+                request_tools.append(cu_tool)
+            elif hasattr(tool, "dump_for"):
+                # Regular Tool object
+                function_declarations.append(tool.dump_for("google"))
+            elif isinstance(tool, dict):
+                # Raw dict tool - assume it's a function declaration
+                function_declarations.append(tool)
+        if function_declarations:
+            request_tools.append({"functionDeclarations": function_declarations})
+        if request_tools:
+            request_json["tools"] = request_tools
     # Handle JSON mode
     if sampling_params.json_mode and model.supports_json:
         request_json["generationConfig"]["responseMimeType"] = "application/json"
+    # Handle media_resolution for Gemini 3 (requires v1alpha)
+    if sampling_params.media_resolution is not None:
+        is_gemini_3 = "gemini-3" in model.name.lower()
+        if is_gemini_3:
+            # Add global media resolution to generationConfig
+            request_json["generationConfig"]["mediaResolution"] = {
+                "level": sampling_params.media_resolution
+            }
+        else:
+            # Warn if trying to use media_resolution on non-Gemini-3 models
+            maybe_warn(
+                "WARN_MEDIA_RESOLUTION_UNSUPPORTED",
+                model_name=model.name,
+            )
     return request_json
@@ -103,7 +225,7 @@ class GeminiRequest(APIRequestBase):
         self.request_json = await _build_gemini_request(
             self.model,
             self.context.prompt,
-            self.context.tools,
+            self.context.tools,  # type: ignore
             self.context.sampling_params,
         )
@@ -137,10 +259,29 @@ class GeminiRequest(APIRequestBase):
                         candidate = data["candidates"][0]
                         if "content" in candidate and "parts" in candidate["content"]:
                             for part in candidate["content"]["parts"]:
+                                # Extract thought signature if present
+                                raw_sig = part.get("thoughtSignature")
+                                thought_sig = (
+                                    ThoughtSignature(raw_sig, provider="gemini")
+                                    if raw_sig is not None
+                                    else None
+                                )
                                 if "text" in part:
-                                    parts.append(Text(part["text"]))
+                                    parts.append(
+                                        Text(
+                                            part["text"],
+                                            thought_signature=thought_sig,
+                                        )
+                                    )
                                 elif "thought" in part:
-                                    parts.append(Thinking(part["thought"]))
+                                    # Thought with optional signature
+                                    parts.append(
+                                        Thinking(
+                                            content=part["thought"],
+                                            thought_signature=thought_sig,
+                                        )
+                                    )
                                 elif "functionCall" in part:
                                     func_call = part["functionCall"]
                                     # Generate a unique ID since Gemini doesn't provide one
@@ -152,8 +293,13 @@ class GeminiRequest(APIRequestBase):
                                             id=tool_id,
                                             name=func_call["name"],
                                             arguments=func_call.get("args", {}),
+                                            thought_signature=thought_sig,
                                         )
                                     )
+                                elif thought_sig:
+                                    parts.append(
+                                        Text("", thought_signature=thought_sig)
+                                    )
                     content = Message("assistant", parts)

lm_deluge/api_requests/openai.py CHANGED Viewed

@@ -2,12 +2,17 @@ import json
 import os
 import traceback as tb
 from types import SimpleNamespace
+from typing import Sequence
 import aiohttp
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
+from lm_deluge.util.schema import (
+    prepare_output_schema,
+    transform_schema_for_openai,
+)
 from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
@@ -17,6 +22,24 @@ from ..usage import Usage
 from .base import APIRequestBase, APIResponse
+def _message_contents_to_string(messages: list[dict]):
+    messages = messages.copy()
+    for msg in messages:
+        content = msg.get("content")
+        assert content
+        if isinstance(content, list):
+            new_content = ""
+            for part in content:
+                assert "text" in part, "Invalid text part: " + str(part)
+                new_content += part["text"]
+                new_content += "\n"
+            msg["content"] = new_content.strip()
+    return messages
 async def _build_oa_chat_request(
     model: APIModel,
     context: RequestContext,
@@ -50,14 +73,16 @@ async def _build_oa_chat_request(
                 request_json["service_tier"] = context.service_tier
         else:
             request_json["service_tier"] = context.service_tier
+    # if tinker, for now hack to mush into 1 string
+    if "tinker" in model.name:
+        request_json["messages"] = _message_contents_to_string(request_json["messages"])
     # set max_tokens or max_completion_tokens dep. on provider
     if "cohere" in model.api_base:
         request_json["max_tokens"] = sampling_params.max_new_tokens
     else:
         request_json["max_completion_tokens"] = sampling_params.max_new_tokens
     if model.reasoning_model:
-        request_json["temperature"] = 1.0
-        request_json["top_p"] = 1.0
         effort = sampling_params.reasoning_effort
         if effort in [None, "none"]:
             # Disable reasoning for Gemini models when no effort requested
@@ -67,11 +92,24 @@ async def _build_oa_chat_request(
                 effort = "minimal"
             else:
                 effort = "low"
-        if effort == "minimal" and "gpt-5" not in model.id:
-            print(
-                "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
-            )
+        # GPT-5.1 models don't support 'minimal', they support 'none' instead
+        if effort == "minimal" and "gpt-5.1" in model.id:
+            maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
+            effort = "none"
+        elif effort == "minimal" and "gpt-5" not in model.id:
+            maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
             effort = "low"
+        # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
+        if effort == "xhigh" and not model.supports_xhigh:
+            maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+            effort = "high"
+        # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
+        if model.supports_xhigh and effort != "none":
+            del request_json["temperature"]
+            del request_json["top_p"]
+        else:
+            request_json["temperature"] = 1.0
+            request_json["top_p"] = 1.0
         request_json["reasoning_effort"] = effort
     else:
         if sampling_params.reasoning_effort:
@@ -81,17 +119,48 @@ async def _build_oa_chat_request(
         request_json["logprobs"] = True
         if sampling_params.top_logprobs is not None:
             request_json["top_logprobs"] = sampling_params.top_logprobs
-    if sampling_params.json_mode and model.supports_json:
+    # Handle structured outputs (output_schema takes precedence over json_mode)
+    if context.output_schema:
+        if model.supports_json:
+            base_schema = prepare_output_schema(context.output_schema)
+            # Apply OpenAI-specific transformations (currently passthrough with copy)
+            transformed_schema = transform_schema_for_openai(base_schema)
+            request_json["response_format"] = {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "response",
+                    "schema": transformed_schema,
+                    "strict": True,
+                },
+            }
+        else:
+            print(
+                f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
+            )
+    elif sampling_params.json_mode and model.supports_json:
         request_json["response_format"] = {"type": "json_object"}
     if tools:
         request_tools = []
         for tool in tools:
             if isinstance(tool, Tool):
-                request_tools.append(tool.dump_for("openai-completions"))
+                request_tools.append(
+                    tool.dump_for(
+                        "openai-completions", strict=sampling_params.strict_tools
+                    )
+                )
             elif isinstance(tool, MCPServer):
                 as_tools = await tool.to_tools()
                 request_tools.extend(
-                    [t.dump_for("openai-completions") for t in as_tools]
+                    [
+                        t.dump_for(
+                            "openai-completions", strict=sampling_params.strict_tools
+                        )
+                        for t in as_tools
+                    ]
                 )
         request_json["tools"] = request_tools
     return request_json
@@ -170,7 +239,7 @@ class OpenAIRequest(APIRequestBase):
                         parts.append(Text(message["content"]))
                     # Add tool calls if present
-                    if "tool_calls" in message:
+                    if "tool_calls" in message and message["tool_calls"] is not None:
                         for tool_call in message["tool_calls"]:
                             parts.append(
                                 ToolCall(
@@ -191,9 +260,9 @@ class OpenAIRequest(APIRequestBase):
                         and "logprobs" in data["choices"][0]
                     ):
                         logprobs = data["choices"][0]["logprobs"]["content"]
-                except Exception:
+                except Exception as e:
                     is_error = True
-                    error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
+                    error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
         elif mimetype and "json" in mimetype.lower():
             is_error = True  # expected status is 200, otherwise it's an error
             data = await http_response.json()
@@ -271,23 +340,60 @@ async def _build_oa_responses_request(
         request_json["max_output_tokens"] = sampling_params.max_new_tokens
     if model.reasoning_model:
-        if sampling_params.reasoning_effort in [None, "none"]:
+        effort = sampling_params.reasoning_effort
+        if effort in [None, "none"]:
             # gemini models can switch reasoning off
             if "gemini" in model.id:
-                sampling_params.reasoning_effort = "none"
+                effort = "none"
             else:
-                sampling_params.reasoning_effort = "low"
-        request_json["temperature"] = 1.0
-        request_json["top_p"] = 1.0
+                effort = "low"
+        # GPT-5.1 models don't support 'minimal', they support 'none' instead
+        if effort == "minimal" and "gpt-5.1" in model.id:
+            maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
+            effort = "none"
+        elif effort == "minimal" and "gpt-5" not in model.id:
+            maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
+            effort = "low"
+        # xhigh only supported for specific models (gpt-5.2, gpt-5.1-codex-max)
+        if effort == "xhigh" and not model.supports_xhigh:
+            maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+            effort = "high"
+        # GPT-5.2 and gpt-5.1-codex-max don't support temperature/top_p when reasoning is enabled
+        if model.supports_xhigh and effort != "none":
+            del request_json["temperature"]
+            del request_json["top_p"]
+        else:
+            request_json["temperature"] = 1.0
+            request_json["top_p"] = 1.0
         request_json["reasoning"] = {
-            "effort": sampling_params.reasoning_effort,
+            "effort": effort,
             "summary": "auto",
         }
     else:
         if sampling_params.reasoning_effort:
             maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
-    if sampling_params.json_mode and model.supports_json:
+    # Handle structured outputs (output_schema takes precedence over json_mode)
+    if context.output_schema:
+        if model.supports_json:
+            base_schema = prepare_output_schema(context.output_schema)
+            # Apply OpenAI-specific transformations (currently passthrough with copy)
+            transformed_schema = transform_schema_for_openai(base_schema)
+            request_json["text"] = {
+                "format": {
+                    "type": "json_schema",
+                    "name": "response",
+                    "schema": transformed_schema,
+                    "strict": True,
+                }
+            }
+        else:
+            print(
+                f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
+            )
+    elif sampling_params.json_mode and model.supports_json:
         request_json["text"] = {"format": {"type": "json_object"}}
     # Handle tools
@@ -295,11 +401,13 @@ async def _build_oa_responses_request(
     # Add regular function tools
     for tool in tools or []:
         if isinstance(tool, Tool):
-            request_tools.append(tool.dump_for("openai-responses"))
+            request_tools.append(
+                tool.dump_for("openai-responses", strict=sampling_params.strict_tools)
+            )
         elif isinstance(tool, dict):
             # if computer use, make sure model supports it
             if tool["type"] == "computer_use_preview":
-                if model.name != "openai-computer-use-preview":
+                if model.name != "computer-use-preview":
                     raise ValueError(f"model {model.id} does not support computer use")
                 # have to use truncation
                 request_json["truncation"] = "auto"
@@ -307,7 +415,14 @@ async def _build_oa_responses_request(
         elif isinstance(tool, MCPServer):
             if context.force_local_mcp:
                 as_tools = await tool.to_tools()
-                request_tools.extend([t.dump_for("openai-responses") for t in as_tools])
+                request_tools.extend(
+                    [
+                        t.dump_for(
+                            "openai-responses", strict=sampling_params.strict_tools
+                        )
+                        for t in as_tools
+                    ]
+                )
             else:
                 request_tools.append(tool.for_openai_responses())
@@ -381,7 +496,7 @@ class OpenAIResponsesRequest(APIRequestBase):
                         output = data.get("output", [])
                         if not output:
                             is_error = True
-                            error_message = "No output in response"
+                            error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
                         else:
                             # Process each output item
                             for item in output:
@@ -536,7 +651,7 @@ async def stream_chat(
     model_name: str,  # must correspond to registry
     prompt: Conversation,
     sampling_params: SamplingParams = SamplingParams(),
-    tools: list | None = None,
+    tools: Sequence[Tool | dict | MCPServer] | None = None,
     cache: CachePattern | None = None,
     extra_headers: dict[str, str] | None = None,
 ):
@@ -562,7 +677,12 @@ async def stream_chat(
         request_header.update(filtered_extra)
     context = SimpleNamespace(
-        prompt=prompt, tools=tools, sampling_params=sampling_params
+        prompt=prompt,
+        tools=tools,
+        sampling_params=sampling_params,
+        service_tier=None,
+        output_schema=None,
+        model_name=model_name,
     )
     request_json = await _build_oa_chat_request(model, context)  # type: ignore

lm_deluge/batches.py CHANGED Viewed

@@ -141,31 +141,22 @@ async def submit_batch_oa(file_path: str):
         return batch_id
-async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
-    """Upload a JSONL file and create one Anthropic batch."""
+async def _submit_anthropic_batch(requests: list[dict], headers: dict, model: str):
+    """Submit batch requests to Anthropic's Message Batches API."""
     async with aiohttp.ClientSession() as session:
         url = f"{registry[model].api_base}/messages/batches"
-        data = aiohttp.FormData()
-        with open(file_path, "rb") as f:
-            data.add_field(
-                "file",
-                f,
-                filename=os.path.basename(file_path),
-                content_type="application/json",
-            )
-            async with session.post(url, data=data, headers=headers) as response:
-                if response.status != 200:
-                    text = await response.text()
-                    raise ValueError(f"Error creating batch: {text}")
+        payload = {"requests": requests}
-                batch_data = await response.json()
-                batch_id = batch_data["id"]
-                print(f"Anthropic batch job started successfully: id = {batch_id}")
+        async with session.post(url, json=payload, headers=headers) as response:
+            if response.status != 200:
+                text = await response.text()
+                raise ValueError(f"Error creating batch: {text}")
-        os.remove(file_path)
-        return batch_id
+            batch_data = await response.json()
+            batch_id = batch_data["id"]
+            print(f"Anthropic batch job started successfully: id = {batch_id}")
+            return batch_id
 async def create_batch_files_oa(
@@ -409,20 +400,10 @@ async def submit_batches_anthropic(
         if current_batch and (would_exceed_size or would_exceed_items):
             # Submit current batch
-            def write_batch_file():
-                with tempfile.NamedTemporaryFile(
-                    mode="w+", suffix=".jsonl", delete=False
-                ) as f:
-                    for batch_request in current_batch:
-                        json.dump(batch_request, f)
-                        f.write("\n")
-                    print("wrote", len(current_batch), "items")
-                    return f.name
-            file_path = await asyncio.to_thread(write_batch_file)
+            print("wrote", len(current_batch), "items")
             batch_tasks.append(
                 asyncio.create_task(
-                    _submit_anthropic_batch(file_path, request_headers, model)  # type: ignore
+                    _submit_anthropic_batch(current_batch, request_headers, model)  # type: ignore
                 )
             )
@@ -436,21 +417,10 @@ async def submit_batches_anthropic(
     # Submit final batch if it has items
     if current_batch:
-        def write_final_batch_file():
-            with tempfile.NamedTemporaryFile(
-                mode="w+", suffix=".jsonl", delete=False
-            ) as f:
-                for batch_request in current_batch:
-                    json.dump(batch_request, f)
-                    f.write("\n")
-                print("wrote", len(current_batch), "items")
-                return f.name
-        file_path = await asyncio.to_thread(write_final_batch_file)
+        print("wrote", len(current_batch), "items")
         batch_tasks.append(
             asyncio.create_task(
-                _submit_anthropic_batch(file_path, request_headers, model)  # type: ignore
+                _submit_anthropic_batch(current_batch, request_headers, model)  # type: ignore
             )
         )

lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.67py3-none-any.whl → 0.0.90py3-none-any.whl