PyPI - lm-deluge - Versions diffs - 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

lm-deluge 0.0.67py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (108) hide show

lm_deluge/__init__.py +1 -2
lm_deluge/api_requests/anthropic.py +117 -22
lm_deluge/api_requests/base.py +84 -11
lm_deluge/api_requests/bedrock.py +30 -6
lm_deluge/api_requests/chat_reasoning.py +4 -0
lm_deluge/api_requests/gemini.py +166 -20
lm_deluge/api_requests/openai.py +145 -25
lm_deluge/batches.py +15 -45
lm_deluge/client.py +309 -50
lm_deluge/config.py +15 -3
lm_deluge/models/__init__.py +14 -1
lm_deluge/models/anthropic.py +29 -14
lm_deluge/models/arcee.py +16 -0
lm_deluge/models/deepseek.py +36 -4
lm_deluge/models/google.py +42 -0
lm_deluge/models/grok.py +24 -0
lm_deluge/models/kimi.py +36 -0
lm_deluge/models/minimax.py +18 -0
lm_deluge/models/openai.py +100 -0
lm_deluge/models/openrouter.py +133 -7
lm_deluge/models/together.py +11 -0
lm_deluge/models/zai.py +50 -0
lm_deluge/pipelines/gepa/__init__.py +95 -0
lm_deluge/pipelines/gepa/core.py +354 -0
lm_deluge/pipelines/gepa/docs/samples.py +705 -0
lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
lm_deluge/pipelines/gepa/optimizer.py +435 -0
lm_deluge/pipelines/gepa/proposer.py +235 -0
lm_deluge/pipelines/gepa/util.py +165 -0
lm_deluge/{llm_tools → pipelines}/score.py +2 -2
lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
lm_deluge/prompt.py +537 -88
lm_deluge/request_context.py +7 -2
lm_deluge/server/__init__.py +24 -0
lm_deluge/server/__main__.py +144 -0
lm_deluge/server/adapters.py +369 -0
lm_deluge/server/app.py +388 -0
lm_deluge/server/auth.py +71 -0
lm_deluge/server/model_policy.py +215 -0
lm_deluge/server/models_anthropic.py +172 -0
lm_deluge/server/models_openai.py +175 -0
lm_deluge/tool/__init__.py +1130 -0
lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
lm_deluge/tool/builtin/anthropic/bash.py +0 -0
lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
lm_deluge/tool/builtin/gemini.py +59 -0
lm_deluge/tool/builtin/openai.py +74 -0
lm_deluge/tool/cua/__init__.py +173 -0
lm_deluge/tool/cua/actions.py +148 -0
lm_deluge/tool/cua/base.py +27 -0
lm_deluge/tool/cua/batch.py +215 -0
lm_deluge/tool/cua/converters.py +466 -0
lm_deluge/tool/cua/kernel.py +702 -0
lm_deluge/tool/cua/trycua.py +989 -0
lm_deluge/tool/prefab/__init__.py +45 -0
lm_deluge/tool/prefab/batch_tool.py +156 -0
lm_deluge/tool/prefab/docs.py +1119 -0
lm_deluge/tool/prefab/email.py +294 -0
lm_deluge/tool/prefab/filesystem.py +1711 -0
lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
lm_deluge/tool/prefab/memory.py +458 -0
lm_deluge/tool/prefab/otc/__init__.py +165 -0
lm_deluge/tool/prefab/otc/executor.py +281 -0
lm_deluge/tool/prefab/otc/parse.py +188 -0
lm_deluge/tool/prefab/random.py +212 -0
lm_deluge/tool/prefab/rlm/__init__.py +296 -0
lm_deluge/tool/prefab/rlm/executor.py +349 -0
lm_deluge/tool/prefab/rlm/parse.py +144 -0
lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
lm_deluge/tool/prefab/sheets.py +385 -0
lm_deluge/tool/prefab/skills.py +0 -0
lm_deluge/tool/prefab/subagents.py +233 -0
lm_deluge/tool/prefab/todos.py +342 -0
lm_deluge/tool/prefab/tool_search.py +169 -0
lm_deluge/tool/prefab/web_search.py +199 -0
lm_deluge/tracker.py +16 -13
lm_deluge/util/schema.py +412 -0
lm_deluge/warnings.py +8 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
lm_deluge-0.0.90.dist-info/RECORD +132 -0
lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
lm_deluge/built_in_tools/openai.py +0 -28
lm_deluge/presets/cerebras.py +0 -17
lm_deluge/presets/meta.py +0 -13
lm_deluge/tool.py +0 -849
lm_deluge-0.0.67.dist-info/RECORD +0 -72
lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
/lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
/lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
/lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
/lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
/lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
/lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0

lm_deluge/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .client import APIResponse, LLMClient, SamplingParams
 from .file import File
 from .prompt import Conversation, Message
-from .tool import Tool, ToolParams
+from .tool import Tool
 # dotenv.load_dotenv() - don't do this, fucks with other packages
@@ -12,6 +12,5 @@ __all__ = [
     "Conversation",
     "Message",
     "Tool",
-    "ToolParams",
     "File",
 ]

lm_deluge/api_requests/anthropic.py CHANGED Viewed

@@ -6,12 +6,18 @@ from aiohttp import ClientResponse
 from lm_deluge.prompt import (
     Message,
     Text,
+    ThoughtSignature,
     Thinking,
     ToolCall,
 )
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
 from lm_deluge.usage import Usage
+from lm_deluge.util.schema import (
+    prepare_output_schema,
+    transform_schema_for_anthropic,
+)
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from .base import APIRequestBase, APIResponse
@@ -58,38 +64,102 @@ def _build_anthropic_request(
         "max_tokens": sampling_params.max_new_tokens,
     }
+    if model.id == "claude-4.5-opus" and sampling_params.global_effort:
+        request_json["output_config"] = {"effort": sampling_params.global_effort}
+        _add_beta(base_headers, "effort-2025-11-24")
     # handle thinking
-    if model.reasoning_model and sampling_params.reasoning_effort:
-        # translate reasoning effort of low, medium, high to budget tokens
-        budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
-            sampling_params.reasoning_effort
-        )
-        request_json["thinking"] = {
-            "type": "enabled",
-            "budget_tokens": budget,
-        }
-        if "top_p" in request_json:
-            request_json["top_p"] = max(request_json["top_p"], 0.95)
-        request_json["temperature"] = 1.0
-        request_json["max_tokens"] += budget
+    if model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if sampling_params.thinking_budget is not None:
+            budget = sampling_params.thinking_budget
+        elif sampling_params.reasoning_effort is not None:
+            effort = sampling_params.reasoning_effort
+            if effort == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+                effort = "high"
+            # translate reasoning effort of low, medium, high to budget tokens
+            budget = {
+                "none": 0,
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }.get(effort)
+            assert isinstance(budget, int)
+        else:
+            budget = 0
+        if budget > 0:
+            request_json["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": budget,
+            }
+            if "top_p" in request_json:
+                request_json["top_p"] = max(request_json["top_p"], 0.95)
+            request_json["temperature"] = 1.0
+            request_json["max_tokens"] += budget
+        else:
+            request_json["thinking"] = {"type": "disabled"}
+            if "kimi" in model.id and "thinking" in model.id:
+                maybe_warn("WARN_KIMI_THINKING_NO_REASONING")
     else:
         request_json["thinking"] = {"type": "disabled"}
         if sampling_params.reasoning_effort:
             print("ignoring reasoning_effort for non-reasoning model")
     if system_message is not None:
         request_json["system"] = system_message
-    # handle temp + top_p for opus 4.1/sonnet 4.5
+    # handle temp + top_p for opus 4.1/sonnet 4.5.
+    # TODO: make clearer / more user-friendly so there can be NotGiven
+    # and user can control which one they want to use
     if "4-1" in model.name or "4-5" in model.name:
-        if "temperature" in request_json and "top_p" in request_json:
-            request_json.pop("top_p")
+        request_json.pop("top_p")
+    # print(request_json)
+    # Handle structured outputs (output_format)
+    if context.output_schema:
+        if model.supports_json:
+            base_schema = prepare_output_schema(context.output_schema)
+            # Apply Anthropic-specific transformations (move unsupported constraints to description)
+            transformed_schema = transform_schema_for_anthropic(base_schema)
+            _add_beta(base_headers, "structured-outputs-2025-11-13")
+            request_json["output_format"] = {
+                "type": "json_schema",
+                "schema": transformed_schema,
+            }
+        else:
+            print(
+                f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
+            )
+    elif sampling_params.json_mode:
+        # Anthropic doesn't support basic json_mode without a schema
+        print(
+            "WARNING: Anthropic does not support basic json_mode without a schema. "
+            "Use output_schema parameter for structured JSON outputs."
+        )
+    # Add beta header for strict tools when enabled
+    if tools and sampling_params.strict_tools and model.supports_json:
+        _add_beta(base_headers, "structured-outputs-2025-11-13")
     if tools:
         mcp_servers = []
         tool_definitions = []
         for tool in tools:
             if isinstance(tool, Tool):
-                tool_definitions.append(tool.dump_for("anthropic"))
+                # Only use strict mode if model supports structured outputs
+                use_strict = sampling_params.strict_tools and model.supports_json
+                tool_definitions.append(tool.dump_for("anthropic", strict=use_strict))
             elif isinstance(tool, dict) and "url" in tool:
                 _add_beta(base_headers, "mcp-client-2025-04-04")
                 mcp_servers.append(tool)
@@ -102,6 +172,9 @@ def _build_anthropic_request(
                     "bash_20241022",
                 ]:
                     _add_beta(base_headers, "computer-use-2024-10-22")
+                elif tool["type"] == "computer_20251124":
+                    # Claude Opus 4.5 - newest computer use with zoom support
+                    _add_beta(base_headers, "computer-use-2025-11-24")
                 elif tool["type"] == "computer_20250124":
                     _add_beta(base_headers, "computer-use-2025-01-24")
                 elif tool["type"] == "code_execution_20250522":
@@ -169,14 +242,37 @@ class AnthropicRequest(APIRequestBase):
                 data = await http_response.json()
                 response_content = data["content"]
+                # print("=== CONTENT ===")
+                # print(response_content)
                 # Parse response into Message with parts
                 parts = []
                 for item in response_content:
                     if item["type"] == "text":
                         parts.append(Text(item["text"]))
                     elif item["type"] == "thinking":
-                        thinking = item["thinking"]
-                        parts.append(Thinking(item["thinking"]))
+                        thinking_content = item.get("thinking", "")
+                        thinking = thinking_content
+                        signature = item.get("signature")
+                        parts.append(
+                            Thinking(
+                                thinking_content,
+                                raw_payload=item,
+                                thought_signature=ThoughtSignature(
+                                    signature,
+                                    provider="anthropic",
+                                )
+                                if signature is not None
+                                else None,
+                            )
+                        )
+                    elif item["type"] == "redacted_thinking":
+                        parts.append(
+                            Thinking(
+                                item.get("data", ""),
+                                raw_payload=item,
+                            )
+                        )
                     elif item["type"] == "tool_use":
                         parts.append(
                             ToolCall(
@@ -190,9 +286,8 @@ class AnthropicRequest(APIRequestBase):
                 usage = Usage.from_anthropic_usage(data["usage"])
             except Exception as e:
                 is_error = True
-                error_message = (
-                    f"Error calling .json() on response w/ status {status_code}: {e}"
-                )
+                response_text = await http_response.text()
+                error_message = f"Error calling .json() on response w/ status {status_code}: {e}. Response: {response_text[:500]}"
         elif mimetype and "json" in mimetype.lower():
             is_error = True  # expected status is 200, otherwise it's an error
             data = await http_response.json()

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import asyncio
+import json
+import os
 import time
 import traceback
 from abc import ABC, abstractmethod
@@ -73,6 +75,24 @@ class APIRequestBase(ABC):
         # Start with base headers, then overlay filtered extra headers (extra takes precedence)
         merged = dict(base_headers)
+        if "anthropic-beta" in merged and "anthropic-beta" in filtered_extra:
+            combined = []
+            seen = set()
+            for (
+                raw
+            ) in f"{merged['anthropic-beta']},{filtered_extra['anthropic-beta']}".split(
+                ","
+            ):
+                token = raw.strip()
+                if token and token not in seen:
+                    seen.add(token)
+                    combined.append(token)
+            merged["anthropic-beta"] = ",".join(combined)
+            filtered_extra = {
+                key: value
+                for key, value in filtered_extra.items()
+                if key != "anthropic-beta"
+            }
         merged.update(filtered_extra)
         # Filter out None values from final merged headers
@@ -90,9 +110,32 @@ class APIRequestBase(ABC):
         start -> poll -> result style of request.
         """
         assert self.context.status_tracker, "no status tracker"
-        start_time = time.time()
+        poll_interval = 5.0
+        attempt_start = time.monotonic()
+        deadline = attempt_start + self.context.request_timeout
+        response_id: str | None = None
+        last_status: str | None = None
         async with aiohttp.ClientSession() as session:
-            last_status: str | None = None
+            async def cancel_response(reason: str) -> None:
+                nonlocal response_id
+                if not response_id:
+                    return
+                cancel_url = f"{self.url}/{response_id}/cancel"
+                try:
+                    async with session.post(
+                        url=cancel_url,
+                        headers=self.request_header,
+                    ) as cancel_response:
+                        cancel_response.raise_for_status()
+                    print(f"Background req {response_id} cancelled: {reason}")
+                except (
+                    Exception
+                ) as cancel_err:  # pragma: no cover - best effort logging
+                    print(
+                        f"Failed to cancel background req {response_id}: {cancel_err}"
+                    )
             try:
                 self.context.status_tracker.total_requests += 1
@@ -109,14 +152,11 @@ class APIRequestBase(ABC):
                     last_status = data["status"]
                 while True:
-                    if time.time() - start_time > self.context.request_timeout:
-                        # cancel the response
-                        async with session.post(
-                            url=f"{self.url}/{response_id}/cancel",
-                            headers=self.request_header,
-                        ) as http_response:
-                            http_response.raise_for_status()
+                    now = time.monotonic()
+                    remaining = deadline - now
+                    if remaining <= 0:
+                        elapsed = now - attempt_start
+                        await cancel_response(f"timed out after {elapsed:.1f}s")
                         return APIResponse(
                             id=self.context.task_id,
                             model_internal=self.context.model_name,
@@ -128,8 +168,9 @@ class APIRequestBase(ABC):
                             content=None,
                             usage=None,
                         )
                     # poll for the response
-                    await asyncio.sleep(5.0)
+                    await asyncio.sleep(min(poll_interval, max(remaining, 0)))
                     async with session.get(
                         url=f"{self.url}/{response_id}",
                         headers=self.request_header,
@@ -146,6 +187,8 @@ class APIRequestBase(ABC):
                             return await self.handle_response(http_response)
             except Exception as e:
+                if response_id:
+                    await cancel_response(f"errored: {type(e).__name__}")
                 raise_if_modal_exception(e)
                 tb = traceback.format_exc()
                 print(tb)
@@ -166,6 +209,23 @@ class APIRequestBase(ABC):
         await self.build_request()
         assert self.context.status_tracker
+        if os.getenv("DELUGE_PROXY_LOG_PROVIDER_REQUESTS", "").strip().lower() in {
+            "1",
+            "true",
+            "yes",
+            "on",
+        }:
+            print("DELUGE_PROXY_PROVIDER_REQUEST")
+            print(f"URL: {self.url}")
+            print("Headers:")
+            print(self.request_header)
+            if self.request_json is not None:
+                print("JSON:")
+                try:
+                    print(json.dumps(self.request_json, indent=2))
+                except Exception:
+                    print(self.request_json)
         if (
             self.context.background
             and self.context.use_responses_api
@@ -199,6 +259,19 @@ class APIRequestBase(ABC):
                 usage=None,
             )
+        except aiohttp.ServerDisconnectedError:
+            return APIResponse(
+                id=self.context.task_id,
+                model_internal=self.context.model_name,
+                prompt=self.context.prompt,
+                sampling_params=self.context.sampling_params,
+                status_code=None,
+                is_error=True,
+                error_message="Server disconnected.",
+                content=None,
+                usage=None,
+            )
         except Exception as e:
             raise_if_modal_exception(e)
             tb = traceback.format_exc()

lm_deluge/api_requests/bedrock.py CHANGED Viewed

@@ -16,6 +16,7 @@ except ImportError:
 from lm_deluge.prompt import (
     Message,
     Text,
+    ThoughtSignature,
     Thinking,
     ToolCall,
 )
@@ -106,7 +107,8 @@ async def _build_anthropic_bedrock_request(
         tool_definitions = []
         for tool in tools:
             if isinstance(tool, Tool):
-                tool_definitions.append(tool.dump_for("anthropic"))
+                # Bedrock doesn't have the strict-mode betas Anthropic exposes yet
+                tool_definitions.append(tool.dump_for("anthropic", strict=False))
             elif isinstance(tool, dict):
                 tool_definitions.append(tool)
                 # add betas if needed
@@ -124,7 +126,9 @@ async def _build_anthropic_bedrock_request(
                 # Convert to individual tools locally (like OpenAI does)
                 individual_tools = await tool.to_tools()
                 for individual_tool in individual_tools:
-                    tool_definitions.append(individual_tool.dump_for("anthropic"))
+                    tool_definitions.append(
+                        individual_tool.dump_for("anthropic", strict=False)
+                    )
         # Add cache control to last tool if tools_only caching is specified
         if cache_pattern == "tools_only" and tool_definitions:
@@ -194,11 +198,11 @@ async def _build_openai_bedrock_request(
         request_tools = []
         for tool in tools:
             if isinstance(tool, Tool):
-                request_tools.append(tool.dump_for("openai-completions"))
+                request_tools.append(tool.dump_for("openai-completions", strict=False))
             elif isinstance(tool, MCPServer):
                 as_tools = await tool.to_tools()
                 request_tools.extend(
-                    [t.dump_for("openai-completions") for t in as_tools]
+                    [t.dump_for("openai-completions", strict=False) for t in as_tools]
                 )
         request_json["tools"] = request_tools
@@ -360,8 +364,28 @@ class BedrockRequest(APIRequestBase):
                         if item["type"] == "text":
                             parts.append(Text(item["text"]))
                         elif item["type"] == "thinking":
-                            thinking = item["thinking"]
-                            parts.append(Thinking(item["thinking"]))
+                            thinking_content = item.get("thinking", "")
+                            thinking = thinking_content
+                            signature = item.get("signature")
+                            parts.append(
+                                Thinking(
+                                    thinking_content,
+                                    raw_payload=item,
+                                    thought_signature=ThoughtSignature(
+                                        signature,
+                                        provider="anthropic",
+                                    )
+                                    if signature is not None
+                                    else None,
+                                )
+                            )
+                        elif item["type"] == "redacted_thinking":
+                            parts.append(
+                                Thinking(
+                                    item.get("data", ""),
+                                    raw_payload=item,
+                                )
+                            )
                         elif item["type"] == "tool_use":
                             parts.append(
                                 ToolCall(

lm_deluge/api_requests/chat_reasoning.py ADDED Viewed

@@ -0,0 +1,4 @@
+# this request type is for models that add "reasoning_content"
+# on top of the openai chat completions. it's important to be separate
+# for providers that expect you to provide back the reasoning content to
+# preserve best performance.

lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.67py3-none-any.whl → 0.0.90py3-none-any.whl