PyPI - lm-deluge - Versions diffs - 0.0.56__py3-none-any.whl → 0.0.69__py3-none-any.whl - Mend

lm-deluge 0.0.56py3-none-any.whl → 0.0.69py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

lm_deluge/__init__.py +12 -1
lm_deluge/api_requests/anthropic.py +12 -1
lm_deluge/api_requests/base.py +87 -5
lm_deluge/api_requests/bedrock.py +3 -4
lm_deluge/api_requests/chat_reasoning.py +4 -0
lm_deluge/api_requests/gemini.py +7 -6
lm_deluge/api_requests/mistral.py +8 -9
lm_deluge/api_requests/openai.py +179 -124
lm_deluge/batches.py +25 -9
lm_deluge/client.py +280 -67
lm_deluge/config.py +1 -1
lm_deluge/file.py +382 -13
lm_deluge/mock_openai.py +482 -0
lm_deluge/models/__init__.py +12 -8
lm_deluge/models/anthropic.py +12 -20
lm_deluge/models/bedrock.py +0 -14
lm_deluge/models/cohere.py +0 -16
lm_deluge/models/google.py +0 -20
lm_deluge/models/grok.py +48 -4
lm_deluge/models/groq.py +2 -2
lm_deluge/models/kimi.py +34 -0
lm_deluge/models/meta.py +0 -8
lm_deluge/models/minimax.py +10 -0
lm_deluge/models/openai.py +28 -34
lm_deluge/models/openrouter.py +64 -1
lm_deluge/models/together.py +0 -16
lm_deluge/prompt.py +138 -29
lm_deluge/request_context.py +9 -11
lm_deluge/tool.py +395 -19
lm_deluge/tracker.py +11 -5
lm_deluge/warnings.py +46 -0
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/METADATA +3 -1
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/RECORD +36 -33
lm_deluge/agent.py +0 -0
lm_deluge/gemini_limits.py +0 -65
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/top_level.txt +0 -0

lm_deluge/__init__.py CHANGED Viewed

@@ -1,7 +1,14 @@
 from .client import APIResponse, LLMClient, SamplingParams
 from .file import File
 from .prompt import Conversation, Message
-from .tool import Tool
+from .tool import Tool, ToolParams
+try:
+    from .mock_openai import MockAsyncOpenAI  # noqa
+    _has_openai = True
+except ImportError:
+    _has_openai = False
 # dotenv.load_dotenv() - don't do this, fucks with other packages
@@ -12,5 +19,9 @@ __all__ = [
     "Conversation",
     "Message",
     "Tool",
+    "ToolParams",
     "File",
 ]
+if _has_openai:
+    __all__.append("MockAsyncOpenAI")

lm_deluge/api_requests/anthropic.py CHANGED Viewed

@@ -42,6 +42,14 @@ def _build_anthropic_request(
         "content-type": "application/json",
     }
+    # Check if any messages contain uploaded files (file_id)
+    # If so, add the files-api beta header
+    for msg in prompt.messages:
+        for file in msg.files:
+            if file.is_remote and file.remote_provider == "anthropic":
+                _add_beta(base_headers, "files-api-2025-04-14")
+                break
     request_json = {
         "model": model.name,
         "messages": messages,
@@ -72,7 +80,7 @@ def _build_anthropic_request(
         request_json["system"] = system_message
     # handle temp + top_p for opus 4.1/sonnet 4.5
-    if model.name in ["claude-sonnet-4-5-20250929", "claude-opus-4-1-20250805"]:
+    if "4-1" in model.name or "4-5" in model.name:
         if "temperature" in request_json and "top_p" in request_json:
             request_json.pop("top_p")
@@ -82,6 +90,9 @@ def _build_anthropic_request(
         for tool in tools:
             if isinstance(tool, Tool):
                 tool_definitions.append(tool.dump_for("anthropic"))
+            elif isinstance(tool, dict) and "url" in tool:
+                _add_beta(base_headers, "mcp-client-2025-04-04")
+                mcp_servers.append(tool)
             elif isinstance(tool, dict):
                 tool_definitions.append(tool)
                 # add betas if needed

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import time
 import traceback
 from abc import ABC, abstractmethod
@@ -6,6 +7,7 @@ import aiohttp
 from aiohttp import ClientResponse
 from ..errors import raise_if_modal_exception
+from ..models.openai import OPENAI_MODELS
 from ..request_context import RequestContext
 from .response import APIResponse
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
         if self.context.status_tracker:
             self.context.status_tracker.task_succeeded(self.context.task_id)
+    async def _execute_once_background_mode(self) -> APIResponse:
+        """
+        ONLY for OpenAI responses API. Implement the
+        start -> poll -> result style of request.
+        """
+        assert self.context.status_tracker, "no status tracker"
+        start_time = time.time()
+        async with aiohttp.ClientSession() as session:
+            last_status: str | None = None
+            try:
+                self.context.status_tracker.total_requests += 1
+                assert self.url is not None, "URL is not set"
+                async with session.post(
+                    url=self.url,
+                    headers=self.request_header,
+                    json=self.request_json,
+                ) as http_response:
+                    # make sure we created the Response object
+                    http_response.raise_for_status()
+                    data = await http_response.json()
+                    response_id = data["id"]
+                    last_status = data["status"]
+                while True:
+                    if time.time() - start_time > self.context.request_timeout:
+                        # cancel the response
+                        async with session.post(
+                            url=f"{self.url}/{response_id}/cancel",
+                            headers=self.request_header,
+                        ) as http_response:
+                            http_response.raise_for_status()
+                        return APIResponse(
+                            id=self.context.task_id,
+                            model_internal=self.context.model_name,
+                            prompt=self.context.prompt,
+                            sampling_params=self.context.sampling_params,
+                            status_code=None,
+                            is_error=True,
+                            error_message="Request timed out (terminated by client).",
+                            content=None,
+                            usage=None,
+                        )
+                    # poll for the response
+                    await asyncio.sleep(5.0)
+                    async with session.get(
+                        url=f"{self.url}/{response_id}",
+                        headers=self.request_header,
+                    ) as http_response:
+                        http_response.raise_for_status()
+                        data = await http_response.json()
+                        if data["status"] != last_status:
+                            print(
+                                f"Background req {response_id} status updated to: {data['status']}"
+                            )
+                            last_status = data["status"]
+                        if last_status not in ["queued", "in_progress"]:
+                            return await self.handle_response(http_response)
+            except Exception as e:
+                raise_if_modal_exception(e)
+                tb = traceback.format_exc()
+                print(tb)
+                return APIResponse(
+                    id=self.context.task_id,
+                    model_internal=self.context.model_name,
+                    prompt=self.context.prompt,
+                    sampling_params=self.context.sampling_params,
+                    status_code=None,
+                    is_error=True,
+                    error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
+                    content=None,
+                    usage=None,
+                )
     async def execute_once(self) -> APIResponse:
         """Send the HTTP request once and return the parsed APIResponse."""
         await self.build_request()
         assert self.context.status_tracker
-        # try:
-        #     dumped = json.dumps(self.request_json)
-        # except Exception:
-        #     print("couldn't serialize request json")
-        #     print(self.request_json)
+        if (
+            self.context.background
+            and self.context.use_responses_api
+            and self.context.model_name in OPENAI_MODELS
+        ):
+            return await self._execute_once_background_mode()
         try:
             self.context.status_tracker.total_requests += 1
             timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)

lm_deluge/api_requests/bedrock.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import asyncio
 import json
 import os
-import warnings
 from aiohttp import ClientResponse
+from lm_deluge.warnings import maybe_warn
 try:
     from requests_aws4auth import AWS4Auth
 except ImportError:
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
     # Note: GPT-OSS on Bedrock doesn't support response_format parameter
     # Even though the model supports JSON, we can't use the response_format parameter
     if sampling_params.json_mode and model.supports_json:
-        warnings.warn(
-            f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
-        )
+        maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
     if tools:
         request_tools = []

lm_deluge/api_requests/chat_reasoning.py ADDED Viewed

@@ -0,0 +1,4 @@
+# this request type is for models that add "reasoning_content"
+# on top of the openai chat completions. it's important to be separate
+# for providers that expect you to provide back the reasoning content to
+# preserve best performance.

lm_deluge/api_requests/gemini.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import json
 import os
-import warnings
 from typing import Any
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import Tool
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -54,9 +55,7 @@ async def _build_gemini_request(
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
     # Add tools if provided
     if tools:
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
         # Warn if cache is specified for Gemini model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is not supported for Gemini models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)

lm_deluge/api_requests/mistral.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import json
 import os
-import warnings
 from aiohttp import ClientResponse
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from ..prompt import Message
 from ..request_context import RequestContext
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
             "max_tokens": self.context.sampling_params.max_new_tokens,
         }
         if self.context.sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
         if self.context.sampling_params.logprobs:
-            warnings.warn(
-                f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
-            )
+            maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
         if self.context.sampling_params.json_mode and self.model.supports_json:
             self.request_json["response_format"] = {"type": "json_object"}

lm-deluge 0.0.56__py3-none-any.whl → 0.0.69__py3-none-any.whl

lm-deluge 0.0.56py3-none-any.whl → 0.0.69py3-none-any.whl