PyPI - lm-deluge - Versions diffs - 0.0.59__tar.gz → 0.0.61__tar.gz - Mend

lm-deluge 0.0.59tar.gz → 0.0.61tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (82) hide show

{lm_deluge-0.0.59/src/lm_deluge.egg-info → lm_deluge-0.0.61}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.59
+Version: 0.0.61
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.59 → lm_deluge-0.0.61}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.59"
+version = "0.0.61"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -42,6 +42,14 @@ def _build_anthropic_request(
         "content-type": "application/json",
     }
+    # Check if any messages contain uploaded files (file_id)
+    # If so, add the files-api beta header
+    for msg in prompt.messages:
+        for file in msg.files:
+            if file.is_remote and file.remote_provider == "anthropic":
+                _add_beta(base_headers, "files-api-2025-04-14")
+                break
     request_json = {
         "model": model.name,
         "messages": messages,

{lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/bedrock.py RENAMED Viewed

@@ -1,10 +1,11 @@
 import asyncio
 import json
 import os
-import warnings
 from aiohttp import ClientResponse
+from lm_deluge.warnings import maybe_warn
 try:
     from requests_aws4auth import AWS4Auth
 except ImportError:
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
     # Note: GPT-OSS on Bedrock doesn't support response_format parameter
     # Even though the model supports JSON, we can't use the response_format parameter
     if sampling_params.json_mode and model.supports_json:
-        warnings.warn(
-            f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
-        )
+        maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
     if tools:
         request_tools = []

{lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -1,11 +1,12 @@
 import json
 import os
-import warnings
 from typing import Any
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import Tool
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -54,9 +55,7 @@ async def _build_gemini_request(
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
     # Add tools if provided
     if tools:
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
         # Warn if cache is specified for Gemini model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is not supported for Gemini models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)

{lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/mistral.py RENAMED Viewed

@@ -1,9 +1,10 @@
 import json
 import os
-import warnings
 from aiohttp import ClientResponse
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from ..prompt import Message
 from ..request_context import RequestContext
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
             "max_tokens": self.context.sampling_params.max_new_tokens,
         }
         if self.context.sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
         if self.context.sampling_params.logprobs:
-            warnings.warn(
-                f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
-            )
+            maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
         if self.context.sampling_params.json_mode and self.model.supports_json:
             self.request_json["response_format"] = {"type": "json_object"}

{lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import json
 import os
 import traceback as tb
-import warnings
 from types import SimpleNamespace
 import aiohttp
@@ -9,6 +8,7 @@ from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -75,9 +75,8 @@ async def _build_oa_chat_request(
         request_json["reasoning_effort"] = effort
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
     if sampling_params.logprobs:
         request_json["logprobs"] = True
         if sampling_params.top_logprobs is not None:
@@ -105,8 +104,10 @@ class OpenAIRequest(APIRequestBase):
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -283,9 +284,7 @@ async def _build_oa_responses_request(
         }
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
     if sampling_params.json_mode and model.supports_json:
         request_json["text"] = {"format": {"type": "json_object"}}
@@ -322,8 +321,10 @@ class OpenAIResponsesRequest(APIRequestBase):
         super().__init__(context)
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -526,8 +527,10 @@ async def stream_chat(
     extra_headers: dict[str, str] | None = None,
 ):
     if cache is not None:
-        warnings.warn(
-            f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
+        maybe_warn(
+            "WARN_CACHING_UNSUPPORTED",
+            model_name=model_name,
+            cache_param=cache,
         )
     model = APIModel.from_registry(model_name)

{lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/client.py RENAMED Viewed

@@ -3,6 +3,7 @@ from typing import (
     Any,
     AsyncGenerator,
     Callable,
+    ClassVar,
     Literal,
     Self,
     Sequence,
@@ -31,7 +32,7 @@ from lm_deluge.tool import MCPServer, Tool
 from .api_requests.base import APIResponse
 from .config import SamplingParams
-from .models import APIModel, registry
+from .models import APIModel, register_model, registry
 from .request_context import RequestContext
 from .tracker import StatusTracker
@@ -43,6 +44,12 @@ class _LLMClient(BaseModel):
     Keeps all validation, serialization, and existing functionality.
     """
+    _REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
+        "-low": "low",
+        "-medium": "medium",
+        "-high": "high",
+    }
     model_names: str | list[str] = ["gpt-4.1-mini"]
     name: str | None = None
     max_requests_per_minute: int = 1_000
@@ -117,13 +124,112 @@ class _LLMClient(BaseModel):
     # NEW! Builder methods
     def with_model(self, model: str):
-        self.model_names = [model]
+        self._update_models([model])
         return self
     def with_models(self, models: list[str]):
-        self.model_names = models
+        self._update_models(models)
         return self
+    def _update_models(self, models: list[str]) -> None:
+        normalized, per_model_efforts = self._normalize_model_names(models)
+        if self.reasoning_effort is None:
+            unique_efforts = {eff for eff in per_model_efforts if eff is not None}
+            if len(normalized) == 1 and per_model_efforts[0] is not None:
+                self.reasoning_effort = per_model_efforts[0]
+            elif (
+                len(unique_efforts) == 1
+                and len(unique_efforts) != 0
+                and None not in per_model_efforts
+            ):
+                self.reasoning_effort = next(iter(unique_efforts))  # type: ignore
+        self.model_names = normalized
+        self._align_sampling_params(per_model_efforts)
+        self._reset_model_weights()
+    def _normalize_model_names(
+        self, models: list[str]
+    ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
+        normalized: list[str] = []
+        efforts: list[Literal["low", "medium", "high"] | None] = []
+        for name in models:
+            base_name = self._preprocess_openrouter_model(name)
+            trimmed_name, effort = self.__class__._strip_reasoning_suffix_if_registered(
+                base_name
+            )
+            normalized.append(trimmed_name)
+            efforts.append(effort)
+        return normalized, efforts
+    def _align_sampling_params(
+        self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
+    ) -> None:
+        if len(per_model_efforts) < len(self.model_names):
+            per_model_efforts = per_model_efforts + [None] * (
+                len(self.model_names) - len(per_model_efforts)
+            )
+        if not self.model_names:
+            self.sampling_params = []
+            return
+        if not self.sampling_params:
+            self.sampling_params = []
+        if len(self.sampling_params) == 0:
+            for _ in self.model_names:
+                self.sampling_params.append(
+                    SamplingParams(
+                        temperature=self.temperature,
+                        top_p=self.top_p,
+                        json_mode=self.json_mode,
+                        max_new_tokens=self.max_new_tokens,
+                        reasoning_effort=self.reasoning_effort,
+                        logprobs=self.logprobs,
+                        top_logprobs=self.top_logprobs,
+                    )
+                )
+        elif len(self.sampling_params) == 1 and len(self.model_names) > 1:
+            base_param = self.sampling_params[0]
+            self.sampling_params = [
+                base_param.model_copy(deep=True) for _ in self.model_names
+            ]
+        elif len(self.sampling_params) != len(self.model_names):
+            base_param = self.sampling_params[0]
+            self.sampling_params = [
+                base_param.model_copy(deep=True) for _ in self.model_names
+            ]
+        if self.reasoning_effort is not None:
+            for sp in self.sampling_params:
+                sp.reasoning_effort = self.reasoning_effort
+        else:
+            for sp, effort in zip(self.sampling_params, per_model_efforts):
+                if effort is not None:
+                    sp.reasoning_effort = effort
+    def _reset_model_weights(self) -> None:
+        if not self.model_names:
+            self.model_weights = []
+            return
+        if isinstance(self.model_weights, list):
+            if len(self.model_weights) == len(self.model_names) and any(
+                self.model_weights
+            ):
+                total = sum(self.model_weights)
+                if total == 0:
+                    self.model_weights = [
+                        1 / len(self.model_names) for _ in self.model_names
+                    ]
+                else:
+                    self.model_weights = [w / total for w in self.model_weights]
+                return
+        # Fallback to uniform distribution
+        self.model_weights = [1 / len(self.model_names) for _ in self.model_names]
     def with_limits(
         self,
         max_requests_per_minute: int | None = None,
@@ -147,11 +253,64 @@ class _LLMClient(BaseModel):
     def models(self):
         return self.model_names  # why? idk
+    @staticmethod
+    def _preprocess_openrouter_model(model_name: str) -> str:
+        """Process openrouter: prefix and register model if needed."""
+        if model_name.startswith("openrouter:"):
+            slug = model_name.split(":", 1)[1]  # Everything after "openrouter:"
+            # Create a unique id by replacing slashes with hyphens
+            model_id = f"openrouter-{slug.replace('/', '-')}"
+            # Register the model if not already in registry
+            if model_id not in registry:
+                register_model(
+                    id=model_id,
+                    name=slug,  # The full slug sent to OpenRouter API (e.g., "openrouter/andromeda-alpha")
+                    api_base="https://openrouter.ai/api/v1",
+                    api_key_env_var="OPENROUTER_API_KEY",
+                    api_spec="openai",
+                    supports_json=True,
+                    supports_logprobs=False,
+                    supports_responses=False,
+                    input_cost=0,  # Unknown costs for generic models
+                    cached_input_cost=0,
+                    cache_write_cost=0,
+                    output_cost=0,
+                )
+            return model_id
+        return model_name
     @model_validator(mode="before")
     @classmethod
     def fix_lists(cls, data) -> "_LLMClient":
-        if isinstance(data.get("model_names"), str):
-            data["model_names"] = [data["model_names"]]
+        # Process model_names - handle both strings and lists
+        model_names = data.get("model_names")
+        if isinstance(model_names, str):
+            # Single model as string
+            # First, handle OpenRouter prefix
+            model_name = cls._preprocess_openrouter_model(model_names)
+            # Then handle reasoning effort suffix (e.g., "gpt-5-high")
+            model_name, effort = cls._strip_reasoning_suffix_if_registered(model_name)
+            if effort and data.get("reasoning_effort") is None:
+                data["reasoning_effort"] = effort
+            data["model_names"] = [model_name]
+        elif isinstance(model_names, list):
+            # List of models - process each one
+            processed_models = []
+            for model_name in model_names:
+                # Handle OpenRouter prefix for each model
+                processed_model = cls._preprocess_openrouter_model(model_name)
+                processed_model, _ = cls._strip_reasoning_suffix_if_registered(
+                    processed_model
+                )
+                processed_models.append(processed_model)
+            data["model_names"] = processed_models
         if not isinstance(data.get("sampling_params", []), list):
             data["sampling_params"] = [data["sampling_params"]]
         if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
@@ -170,6 +329,18 @@ class _LLMClient(BaseModel):
             data["sampling_params"] = data["sampling_params"] * len(data["model_names"])
         return data
+    @classmethod
+    def _strip_reasoning_suffix_if_registered(
+        cls, model_name: str
+    ) -> tuple[str, Literal["low", "medium", "high"] | None]:
+        """Remove reasoning suffix only when the trimmed model already exists."""
+        for suffix, effort in cls._REASONING_SUFFIXES.items():
+            if model_name.endswith(suffix) and len(model_name) > len(suffix):
+                candidate = model_name[: -len(suffix)]
+                if candidate in registry:
+                    return candidate, effort
+        return model_name, None
     @model_validator(mode="after")
     def validate_client(self) -> Self:
         if isinstance(self.model_names, str):

lm-deluge 0.0.59__tar.gz → 0.0.61__tar.gz

Potentially problematic release.

lm-deluge 0.0.59tar.gz → 0.0.61tar.gz