PyPI - lm-deluge - Versions diffs - 0.0.78__tar.gz → 0.0.80__tar.gz - Mend

lm-deluge 0.0.78tar.gz → 0.0.80tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

{lm_deluge-0.0.78/src/lm_deluge.egg-info → lm_deluge-0.0.80}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.78
+Version: 0.0.80
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10
@@ -52,7 +52,7 @@ Dynamic: license-file
 pip install lm-deluge
 ```
-The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
+The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GEMINI_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
 ## Quickstart
@@ -61,9 +61,9 @@ The package relies on environment variables for API keys. Typical variables incl
 ```python
 from lm_deluge import LLMClient
-client = LLMClient("gpt-4o-mini")
+client = LLMClient("gpt-4.1-mini")
 resps = client.process_prompts_sync(["Hello, world!"])
-print(resp[0].completion)
+print(resps[0].completion)
 ```
 ## Spraying Across Models
@@ -74,13 +74,13 @@ To distribute your requests across models, just provide a list of more than one
 from lm_deluge import LLMClient
 client = LLMClient(
-    ["gpt-4o-mini", "claude-3-haiku"],
+    ["gpt-4.1-mini", "claude-4.5-haiku"],
     max_requests_per_minute=10_000
 )
 resps = client.process_prompts_sync(
     ["Hello, ChatGPT!", "Hello, Claude!"]
 )
-print(resp[0].completion)
+print(resps[0].completion)
 ```
 ## Configuration
@@ -181,7 +181,7 @@ def get_weather(city: str) -> str:
     return f"The weather in {city} is sunny and 72°F"
 tool = Tool.from_function(get_weather)
-client = LLMClient("claude-3-haiku")
+client = LLMClient("claude-4.5-haiku")
 resps = client.process_prompts_sync(
     ["What's the weather in Paris?"],
     tools=[tool]
@@ -255,7 +255,7 @@ conv = (
 )
 # Use prompt caching to cache system message and tools
-client = LLMClient("claude-3-5-sonnet")
+client = LLMClient("claude-4.5-sonnet")
 resps = client.process_prompts_sync(
     [conv],
     cache="system_and_tools"  # Cache system message and any tools

{lm_deluge-0.0.78 → lm_deluge-0.0.80}/README.md RENAMED Viewed

@@ -23,7 +23,7 @@
 pip install lm-deluge
 ```
-The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
+The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GEMINI_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
 ## Quickstart
@@ -32,9 +32,9 @@ The package relies on environment variables for API keys. Typical variables incl
 ```python
 from lm_deluge import LLMClient
-client = LLMClient("gpt-4o-mini")
+client = LLMClient("gpt-4.1-mini")
 resps = client.process_prompts_sync(["Hello, world!"])
-print(resp[0].completion)
+print(resps[0].completion)
 ```
 ## Spraying Across Models
@@ -45,13 +45,13 @@ To distribute your requests across models, just provide a list of more than one
 from lm_deluge import LLMClient
 client = LLMClient(
-    ["gpt-4o-mini", "claude-3-haiku"],
+    ["gpt-4.1-mini", "claude-4.5-haiku"],
     max_requests_per_minute=10_000
 )
 resps = client.process_prompts_sync(
     ["Hello, ChatGPT!", "Hello, Claude!"]
 )
-print(resp[0].completion)
+print(resps[0].completion)
 ```
 ## Configuration
@@ -152,7 +152,7 @@ def get_weather(city: str) -> str:
     return f"The weather in {city} is sunny and 72°F"
 tool = Tool.from_function(get_weather)
-client = LLMClient("claude-3-haiku")
+client = LLMClient("claude-4.5-haiku")
 resps = client.process_prompts_sync(
     ["What's the weather in Paris?"],
     tools=[tool]
@@ -226,7 +226,7 @@ conv = (
 )
 # Use prompt caching to cache system message and tools
-client = LLMClient("claude-3-5-sonnet")
+client = LLMClient("claude-4.5-sonnet")
 resps = client.process_prompts_sync(
     [conv],
     cache="system_and_tools"  # Cache system message and any tools

{lm_deluge-0.0.78 → lm_deluge-0.0.80}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.78"
+version = "0.0.80"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -16,6 +16,7 @@ from lm_deluge.util.schema import (
     prepare_output_schema,
     transform_schema_for_anthropic,
 )
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from .base import APIRequestBase, APIResponse
@@ -62,20 +63,45 @@ def _build_anthropic_request(
         "max_tokens": sampling_params.max_new_tokens,
     }
+    if model.id == "claude-4.5-opus" and sampling_params.global_effort:
+        request_json["effort"] = sampling_params.global_effort
+        _add_beta(base_headers, "effort-2025-11-24")
     # handle thinking
-    if model.reasoning_model and sampling_params.reasoning_effort:
-        # translate reasoning effort of low, medium, high to budget tokens
-        budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
-            sampling_params.reasoning_effort
-        )
-        request_json["thinking"] = {
-            "type": "enabled",
-            "budget_tokens": budget,
-        }
-        if "top_p" in request_json:
-            request_json["top_p"] = max(request_json["top_p"], 0.95)
-        request_json["temperature"] = 1.0
-        request_json["max_tokens"] += budget
+    if model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if sampling_params.thinking_budget is not None:
+            budget = sampling_params.thinking_budget
+        elif sampling_params.reasoning_effort is not None:
+            # translate reasoning effort of low, medium, high to budget tokens
+            budget = {
+                "none": 0,
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }.get(sampling_params.reasoning_effort)
+            assert isinstance(budget, int)
+        else:
+            budget = 0
+        if budget > 0:
+            request_json["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": budget,
+            }
+            if "top_p" in request_json:
+                request_json["top_p"] = max(request_json["top_p"], 0.95)
+            request_json["temperature"] = 1.0
+            request_json["max_tokens"] += budget
+        else:
+            request_json["thinking"] = {"type": "disabled"}
     else:
         request_json["thinking"] = {"type": "disabled"}
         if sampling_params.reasoning_effort:
@@ -83,10 +109,11 @@ def _build_anthropic_request(
     if system_message is not None:
         request_json["system"] = system_message
-    # handle temp + top_p for opus 4.1/sonnet 4.5
+    # handle temp + top_p for opus 4.1/sonnet 4.5.
+    # TODO: make clearer / more user-friendly so there can be NotGiven
+    # and user can control which one they want to use
     if "4-1" in model.name or "4-5" in model.name:
-        if "temperature" in request_json and "top_p" in request_json:
-            request_json.pop("top_p")
+        request_json.pop("top_p")
     # Handle structured outputs (output_format)
     if context.output_schema:

{lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import json
 import os
-from typing import Any
 from aiohttp import ClientResponse
@@ -23,6 +22,21 @@ async def _build_gemini_request(
 ) -> dict:
     system_message, messages = prompt.to_gemini()
+    # For Gemini 3, inject dummy signatures when missing for function calls
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    if is_gemini_3:
+        dummy_sig = "context_engineering_is_the_way_to_go"
+        for msg in messages:
+            if "parts" in msg:
+                for part in msg["parts"]:
+                    # For function calls, inject dummy signature if missing
+                    if "functionCall" in part and "thoughtSignature" not in part:
+                        part["thoughtSignature"] = dummy_sig
+                        maybe_warn(
+                            "WARN_GEMINI3_MISSING_SIGNATURE",
+                            part_type="function call",
+                        )
     request_json = {
         "contents": messages,
         "generationConfig": {
@@ -37,20 +51,61 @@ async def _build_gemini_request(
         request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
     # Handle reasoning models (thinking)
-    if model.reasoning_model:
-        thinking_config: dict[str, Any] | None = None
-        effort = sampling_params.reasoning_effort
-        if effort is None or effort == "none":
-            budget = 128 if "2.5-pro" in model.id else 0
-            # Explicitly disable thoughts when no effort is requested
-            thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    if is_gemini_3:
+        # gemini3 MUST think
+        if not sampling_params.reasoning_effort:
+            maybe_warn("WARN_GEMINI3_NO_REASONING")
+            effort = "low"
         else:
-            thinking_config = {"includeThoughts": True}
-            if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
-                budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
-                    effort
-                ]
-                thinking_config["thinkingBudget"] = budget
+            level_map = {
+                "none": "low",
+                "minimal": "low",
+                "low": "low",
+                "medium": "high",  # change when supported
+                "high": "high",
+            }
+            effort = level_map[sampling_params.reasoning_effort]
+        thinking_config = {"thinkingLevel": effort}
+        request_json["generationConfig"]["thinkingConfig"] = thinking_config
+    elif model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.thinking_budget > 0
+        ):
+            thinking_config = {
+                "includeThoughts": True,
+                "thinkingBudget": sampling_params.thinking_budget,
+            }
+        elif sampling_params.thinking_budget == -1:
+            # dynamic thinking
+            thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
+        elif sampling_params.reasoning_effort not in [None, "none"]:
+            level_map = {
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }
+            assert sampling_params.reasoning_effort in level_map
+            budget = level_map[sampling_params.reasoning_effort]
+            if "flash-lite" in model.id:
+                budget = max(budget, 512)
+            thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
+        elif "2.5-pro" in model.id:
+            # 2.5 pro must think.
+            thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
+        else:
+            # no thoughts head empty
+            thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
         request_json["generationConfig"]["thinkingConfig"] = thinking_config
     else:
@@ -66,6 +121,21 @@ async def _build_gemini_request(
     if sampling_params.json_mode and model.supports_json:
         request_json["generationConfig"]["responseMimeType"] = "application/json"
+    # Handle media_resolution for Gemini 3 (requires v1alpha)
+    if sampling_params.media_resolution is not None:
+        is_gemini_3 = "gemini-3" in model.name.lower()
+        if is_gemini_3:
+            # Add global media resolution to generationConfig
+            request_json["generationConfig"]["mediaResolution"] = {
+                "level": sampling_params.media_resolution
+            }
+        else:
+            # Warn if trying to use media_resolution on non-Gemini-3 models
+            maybe_warn(
+                "WARN_MEDIA_RESOLUTION_UNSUPPORTED",
+                model_name=model.name,
+            )
     return request_json
@@ -137,10 +207,19 @@ class GeminiRequest(APIRequestBase):
                         candidate = data["candidates"][0]
                         if "content" in candidate and "parts" in candidate["content"]:
                             for part in candidate["content"]["parts"]:
+                                # Extract thought signature if present
+                                thought_sig = part.get("thoughtSignature")
                                 if "text" in part:
                                     parts.append(Text(part["text"]))
                                 elif "thought" in part:
-                                    parts.append(Thinking(part["thought"]))
+                                    # Thought with optional signature
+                                    parts.append(
+                                        Thinking(
+                                            content=part["thought"],
+                                            thought_signature=thought_sig,
+                                        )
+                                    )
                                 elif "functionCall" in part:
                                     func_call = part["functionCall"]
                                     # Generate a unique ID since Gemini doesn't provide one
@@ -152,6 +231,7 @@ class GeminiRequest(APIRequestBase):
                                             id=tool_id,
                                             name=func_call["name"],
                                             arguments=func_call.get("args", {}),
+                                            thought_signature=thought_sig,
                                         )
                                     )

{lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/client.py RENAMED Viewed

@@ -79,7 +79,7 @@ class _LLMClient(BaseModel):
     background: bool = False
     # sampling params - if provided, and sampling_params is not,
     # these override the defaults
-    temperature: float = 0.75
+    temperature: float = 1.0
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 512
@@ -262,6 +262,7 @@ class _LLMClient(BaseModel):
             self.max_tokens_per_minute = max_tokens_per_minute
         if max_concurrent_requests:
             self.max_concurrent_requests = max_concurrent_requests
+        return self
     def _get_tracker(self) -> StatusTracker:
         if self._tracker is None:
@@ -336,7 +337,7 @@ class _LLMClient(BaseModel):
         if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
             data["sampling_params"] = [
                 SamplingParams(
-                    temperature=data.get("temperature", 0.75),
+                    temperature=data.get("temperature", 1.0),
                     top_p=data.get("top_p", 1.0),
                     json_mode=data.get("json_mode", False),
                     max_new_tokens=data.get("max_new_tokens", 512),
@@ -1066,7 +1067,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
@@ -1095,7 +1096,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
@@ -1123,7 +1124,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,

{lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/config.py RENAMED Viewed

@@ -4,14 +4,23 @@ from pydantic import BaseModel
 class SamplingParams(BaseModel):
-    temperature: float = 0.0
+    temperature: float = 1.0  # more typical for new models
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 2_048
+    global_effort: Literal["low", "medium", "high"] = "high"  # for opus-4.5
     reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    thinking_budget: int | None = None
     logprobs: bool = False
     top_logprobs: int | None = None
     strict_tools: bool = True
+    # Gemini 3 only - controls multimodal vision processing fidelity
+    media_resolution: (
+        Literal[
+            "media_resolution_low", "media_resolution_medium", "media_resolution_high"
+        ]
+        | None
+    ) = None
     def to_vllm(self):
         try:

lm-deluge 0.0.78__tar.gz → 0.0.80__tar.gz

lm-deluge 0.0.78tar.gz → 0.0.80tar.gz