PyPI - lm-deluge - Versions diffs - 0.0.33__tar.gz → 0.0.34__tar.gz - Mend

lm-deluge 0.0.33tar.gz → 0.0.34tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (62) hide show

{lm_deluge-0.0.33/src/lm_deluge.egg-info → lm_deluge-0.0.34}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.33
+Version: 0.0.34
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.33 → lm_deluge-0.0.34}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.33"
+version = "0.0.34"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.33 → lm_deluge-0.0.34}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import json
 import os
 import warnings
+from typing import Any
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
@@ -37,11 +37,12 @@ async def _build_gemini_request(
     # Handle reasoning models (thinking)
     if model.reasoning_model:
-        thinking_config = None
+        thinking_config: dict[str, Any] | None = None
         effort = sampling_params.reasoning_effort
         if effort is None or effort == "none":
+            budget = 128 if "2.5-pro" in model.id else 0
             # Explicitly disable thoughts when no effort is requested
-            thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
+            thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
         else:
             thinking_config = {"includeThoughts": True}
             if effort in {"low", "medium", "high"} and "flash" in model.id:

{lm_deluge-0.0.33 → lm_deluge-0.0.34}/src/lm_deluge/client.py RENAMED Viewed

@@ -773,10 +773,54 @@ class _LLMClient(BaseModel):
 # Clean factory function with perfect IDE support
 @overload
-def LLMClient(model_names: str, **kwargs) -> _LLMClient: ...
+def LLMClient(
+    model_names: str,
+    *,
+    max_requests_per_minute: int = 1_000,
+    max_tokens_per_minute: int = 100_000,
+    max_concurrent_requests: int = 225,
+    sampling_params: list[SamplingParams] | None = None,
+    model_weights: list[float] | Literal["uniform", "dynamic"] = "uniform",
+    max_attempts: int = 5,
+    request_timeout: int = 30,
+    cache: Any = None,
+    extra_headers: dict[str, str] | None = None,
+    temperature: float = 0.75,
+    top_p: float = 1.0,
+    json_mode: bool = False,
+    max_new_tokens: int = 512,
+    reasoning_effort: Literal["low", "medium", "high", None] = None,
+    logprobs: bool = False,
+    top_logprobs: int | None = None,
+    force_local_mcp: bool = False,
+    progress: Literal["rich", "tqdm", "manual"] = "rich",
+) -> _LLMClient: ...
+@overload
+def LLMClient(
+    model_names: list[str],
+    *,
+    max_requests_per_minute: int = 1_000,
+    max_tokens_per_minute: int = 100_000,
+    max_concurrent_requests: int = 225,
+    sampling_params: list[SamplingParams] | None = None,
+    model_weights: list[float] | Literal["uniform", "dynamic"] = "uniform",
+    max_attempts: int = 5,
+    request_timeout: int = 30,
+    cache: Any = None,
+    extra_headers: dict[str, str] | None = None,
+    temperature: float = 0.75,
+    top_p: float = 1.0,
+    json_mode: bool = False,
+    max_new_tokens: int = 512,
+    reasoning_effort: Literal["low", "medium", "high", None] = None,
+    logprobs: bool = False,
+    top_logprobs: int | None = None,
+    force_local_mcp: bool = False,
+    progress: Literal["rich", "tqdm", "manual"] = "rich",
+) -> _LLMClient: ...
-@overload
-def LLMClient(model_names: list[str], **kwargs) -> _LLMClient: ...
 def LLMClient(
     model_names: str | list[str] = "gpt-4.1-mini",
@@ -802,18 +846,18 @@ def LLMClient(
 ) -> _LLMClient:
     """
     Create an LLMClient with model_names as a positional argument.
     Args:
         model_names: Model name(s) to use - can be a single string or list of strings
         **kwargs: All other LLMClient configuration options (keyword-only)
     Returns:
         Configured LLMClient instance
     """
     # Handle default for mutable argument
     if sampling_params is None:
         sampling_params = []
     # Simply pass everything to the Pydantic constructor
     return _LLMClient(
         model_names=model_names,

{lm_deluge-0.0.33 → lm_deluge-0.0.34}/src/lm_deluge/models.py RENAMED Viewed

@@ -1275,7 +1275,7 @@ def register_model(
     reasoning_model: bool = False,
     regions: list[str] | dict[str, int] = field(default_factory=list),
     tokens_per_minute: int | None = None,
-    requests_per_minute: int | None = None
+    requests_per_minute: int | None = None,
 ) -> APIModel:
     """Register a model configuration and return the created APIModel."""
     model = APIModel(
@@ -1292,7 +1292,7 @@ def register_model(
         reasoning_model=reasoning_model,
         regions=regions,
         tokens_per_minute=tokens_per_minute,
-        requests_per_minute=requests_per_minute
+        requests_per_minute=requests_per_minute,
     )
     registry[model.id] = model
     return model

{lm_deluge-0.0.33 → lm_deluge-0.0.34/src/lm_deluge.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.33
+Version: 0.0.34
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10