PyPI - lm-deluge - Versions diffs - 0.0.79__py3-none-any.whl → 0.0.80__py3-none-any.whl - Mend

lm-deluge 0.0.79py3-none-any.whl → 0.0.80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

lm_deluge/api_requests/anthropic.py +43 -16
lm_deluge/api_requests/gemini.py +53 -40
lm_deluge/client.py +5 -5
lm_deluge/config.py +3 -1
lm_deluge/models/anthropic.py +15 -0
lm_deluge/warnings.py +2 -0
{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/METADATA +1 -1
{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/RECORD +11 -11
{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/top_level.txt +0 -0

lm_deluge/api_requests/anthropic.py CHANGED Viewed

@@ -16,6 +16,7 @@ from lm_deluge.util.schema import (
     prepare_output_schema,
     transform_schema_for_anthropic,
 )
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from .base import APIRequestBase, APIResponse
@@ -62,20 +63,45 @@ def _build_anthropic_request(
         "max_tokens": sampling_params.max_new_tokens,
     }
+    if model.id == "claude-4.5-opus" and sampling_params.global_effort:
+        request_json["effort"] = sampling_params.global_effort
+        _add_beta(base_headers, "effort-2025-11-24")
     # handle thinking
-    if model.reasoning_model and sampling_params.reasoning_effort:
-        # translate reasoning effort of low, medium, high to budget tokens
-        budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
-            sampling_params.reasoning_effort
-        )
-        request_json["thinking"] = {
-            "type": "enabled",
-            "budget_tokens": budget,
-        }
-        if "top_p" in request_json:
-            request_json["top_p"] = max(request_json["top_p"], 0.95)
-        request_json["temperature"] = 1.0
-        request_json["max_tokens"] += budget
+    if model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if sampling_params.thinking_budget is not None:
+            budget = sampling_params.thinking_budget
+        elif sampling_params.reasoning_effort is not None:
+            # translate reasoning effort of low, medium, high to budget tokens
+            budget = {
+                "none": 0,
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }.get(sampling_params.reasoning_effort)
+            assert isinstance(budget, int)
+        else:
+            budget = 0
+        if budget > 0:
+            request_json["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": budget,
+            }
+            if "top_p" in request_json:
+                request_json["top_p"] = max(request_json["top_p"], 0.95)
+            request_json["temperature"] = 1.0
+            request_json["max_tokens"] += budget
+        else:
+            request_json["thinking"] = {"type": "disabled"}
     else:
         request_json["thinking"] = {"type": "disabled"}
         if sampling_params.reasoning_effort:
@@ -83,10 +109,11 @@ def _build_anthropic_request(
     if system_message is not None:
         request_json["system"] = system_message
-    # handle temp + top_p for opus 4.1/sonnet 4.5
+    # handle temp + top_p for opus 4.1/sonnet 4.5.
+    # TODO: make clearer / more user-friendly so there can be NotGiven
+    # and user can control which one they want to use
     if "4-1" in model.name or "4-5" in model.name:
-        if "temperature" in request_json and "top_p" in request_json:
-            request_json.pop("top_p")
+        request_json.pop("top_p")
     # Handle structured outputs (output_format)
     if context.output_schema:

lm_deluge/api_requests/gemini.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 import os
-from typing import Any
 from aiohttp import ClientResponse
@@ -52,47 +51,61 @@ async def _build_gemini_request(
         request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
     # Handle reasoning models (thinking)
-    if model.reasoning_model:
-        thinking_config: dict[str, Any] | None = None
-        effort = sampling_params.reasoning_effort
-        is_gemini_3 = "gemini-3" in model.name.lower()
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    if is_gemini_3:
+        # gemini3 MUST think
+        if not sampling_params.reasoning_effort:
+            maybe_warn("WARN_GEMINI3_NO_REASONING")
+            effort = "low"
+        else:
+            level_map = {
+                "none": "low",
+                "minimal": "low",
+                "low": "low",
+                "medium": "high",  # change when supported
+                "high": "high",
+            }
+            effort = level_map[sampling_params.reasoning_effort]
+        thinking_config = {"thinkingLevel": effort}
+        request_json["generationConfig"]["thinkingConfig"] = thinking_config
-        if is_gemini_3:
-            # Gemini 3 uses thinkingLevel instead of thinkingBudget
-            if effort in {"none", "minimal"}:
-                thinking_config = {"thinkingLevel": "low"}
-            elif effort is None:
-                # Default to high when reasoning is enabled but no preference was provided
-                thinking_config = {"thinkingLevel": "high"}
-            else:
-                # Map reasoning_effort to thinkingLevel
-                level_map = {
-                    "minimal": "low",
-                    "low": "low",
-                    "medium": "medium",  # Will work when supported
-                    "high": "high",
-                }
-                thinking_level = level_map.get(effort, "high")
-                thinking_config = {"thinkingLevel": thinking_level}
+    elif model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.thinking_budget > 0
+        ):
+            thinking_config = {
+                "includeThoughts": True,
+                "thinkingBudget": sampling_params.thinking_budget,
+            }
+        elif sampling_params.thinking_budget == -1:
+            # dynamic thinking
+            thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
+        elif sampling_params.reasoning_effort not in [None, "none"]:
+            level_map = {
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }
+            assert sampling_params.reasoning_effort in level_map
+            budget = level_map[sampling_params.reasoning_effort]
+            if "flash-lite" in model.id:
+                budget = max(budget, 512)
+            thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
+        elif "2.5-pro" in model.id:
+            # 2.5 pro must think.
+            thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
         else:
-            # Gemini 2.5 uses thinkingBudget (legacy)
-            if effort is None or effort == "none":
-                budget = 128 if "2.5-pro" in model.id else 0
-                # Explicitly disable thoughts when no effort is requested
-                thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
-            else:
-                thinking_config = {"includeThoughts": True}
-                if (
-                    effort in {"minimal", "low", "medium", "high"}
-                    and "flash" in model.id
-                ):
-                    budget = {
-                        "minimal": 256,
-                        "low": 1024,
-                        "medium": 4096,
-                        "high": 16384,
-                    }[effort]
-                    thinking_config["thinkingBudget"] = budget
+            # no thoughts head empty
+            thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
         request_json["generationConfig"]["thinkingConfig"] = thinking_config
     else:

lm_deluge/client.py CHANGED Viewed

@@ -79,7 +79,7 @@ class _LLMClient(BaseModel):
     background: bool = False
     # sampling params - if provided, and sampling_params is not,
     # these override the defaults
-    temperature: float = 0.75
+    temperature: float = 1.0
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 512
@@ -337,7 +337,7 @@ class _LLMClient(BaseModel):
         if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
             data["sampling_params"] = [
                 SamplingParams(
-                    temperature=data.get("temperature", 0.75),
+                    temperature=data.get("temperature", 1.0),
                     top_p=data.get("top_p", 1.0),
                     json_mode=data.get("json_mode", False),
                     max_new_tokens=data.get("max_new_tokens", 512),
@@ -1067,7 +1067,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
@@ -1096,7 +1096,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
@@ -1124,7 +1124,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,

lm_deluge/config.py CHANGED Viewed

@@ -4,11 +4,13 @@ from pydantic import BaseModel
 class SamplingParams(BaseModel):
-    temperature: float = 0.0
+    temperature: float = 1.0  # more typical for new models
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 2_048
+    global_effort: Literal["low", "medium", "high"] = "high"  # for opus-4.5
     reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    thinking_budget: int | None = None
     logprobs: bool = False
     top_logprobs: int | None = None
     strict_tools: bool = True

lm_deluge/models/anthropic.py CHANGED Viewed

@@ -10,6 +10,19 @@ ANTHROPIC_MODELS = {
     #                                                                   ░███
     #                                                                   █████
     #
+    "claude-4.5-opus": {
+        "id": "claude-4.5-opus",
+        "name": "claude-opus-4-5-20251101",
+        "api_base": "https://api.anthropic.com/v1",
+        "api_key_env_var": "ANTHROPIC_API_KEY",
+        "supports_json": False,
+        "api_spec": "anthropic",
+        "input_cost": 5.0,
+        "cached_input_cost": 0.50,
+        "cache_write_cost": 6.25,
+        "output_cost": 25.0,
+        "reasoning_model": True,
+    },
     "claude-4.5-haiku": {
         "id": "claude-4.5-haiku",
         "name": "claude-haiku-4-5-20251001",
@@ -21,6 +34,7 @@ ANTHROPIC_MODELS = {
         "cached_input_cost": 0.10,
         "cache_write_cost": 1.25,
         "output_cost": 3.0,
+        "reasoning_model": True,
     },
     "claude-4.5-sonnet": {
         "id": "claude-4.5-sonnet",
@@ -33,6 +47,7 @@ ANTHROPIC_MODELS = {
         "cached_input_cost": 0.30,
         "cache_write_cost": 3.75,
         "output_cost": 15.0,
+        "reasoning_model": True,
     },
     "claude-4.1-opus": {
         "id": "claude-4.1-opus",

lm_deluge/warnings.py CHANGED Viewed

@@ -11,6 +11,8 @@ WARNINGS: dict[str, str] = {
     "WARN_MINIMAL_TO_NONE": "GPT-5.1 models don't support 'minimal' reasoning effort. Converting to 'none' for {model_name}.",
     "WARN_MEDIA_RESOLUTION_UNSUPPORTED": "media_resolution parameter is only supported for Gemini 3 models, ignoring for {model_name}.",
     "WARN_GEMINI3_MISSING_SIGNATURE": "Gemini 3 thought signature missing in {part_type}, injecting dummy signature 'context_engineering_is_the_way_to_go' to avoid API error.",
+    "WARN_GEMINI3_NO_REASONING": "Gemini 3 requires reasoning (thinkingConfig). Setting thinkingConfig to low.",
+    "WARN_THINKING_BUDGET_AND_REASONING_EFFORT": "`reasoning_effort` and `thinking_budget` both provided. `thinking_budget` will take priority.",
 }

{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.79
+Version: 0.0.80
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/RECORD RENAMED Viewed

@@ -2,8 +2,8 @@ lm_deluge/__init__.py,sha256=zF5lAitfgJ8A28IXJ5BE9OUCqGOqSnGOWn3ZIlizNyY,822
 lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
 lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
 lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
-lm_deluge/client.py,sha256=ZwDD4qkPFJsPxDMCijD6lz2s5ULL-hW58tGFN00BmSI,44796
-lm_deluge/config.py,sha256=7pTfqlg4qHf68qpckr21deVtCuao9b0ypiXT2k-nHUE,1210
+lm_deluge/client.py,sha256=VqCuFXM_ylO4v-lev85HMPFRHeU69tZo70favz-I2Uk,44791
+lm_deluge/config.py,sha256=C-_rVwAFL5sivLfKSkaa2ANMqqxKbyDCW86KfQB_Lck,1357
 lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
 lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
 lm_deluge/file.py,sha256=PTmlJQ-IaYcYUFun9V0bJ1NPVP84edJrR0hvCMWFylY,19697
@@ -15,14 +15,14 @@ lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
 lm_deluge/tool.py,sha256=ipgNy4OpfH3CA9OPQq5zfn1xO8H08GMvDynB8ZPQ5mA,30617
 lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
 lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
-lm_deluge/warnings.py,sha256=bAG9UXPnppk_oWGIsWpY3k5lWin4tganYFw0U7OEvJQ,2062
+lm_deluge/warnings.py,sha256=12RseSa9mYAFkbY783FQTP0x9RapRBErIQt4o7hzVnM,2321
 lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-lm_deluge/api_requests/anthropic.py,sha256=OvkciXTHyrG1cFyC1vv6nYyCFTqtMgt1r15Q-pbHiUQ,10411
+lm_deluge/api_requests/anthropic.py,sha256=ytNeADgGeflmlm5gVQ0cJ5bgchJ_EZvKJIIt7Imxf2A,11338
 lm_deluge/api_requests/base.py,sha256=mXEM85mcU_5LD-ugELpCl28tv-tpHKcaxerTIVLQZVo,10436
 lm_deluge/api_requests/bedrock.py,sha256=mY1xTvgfCLyqLlfFFmu_baKgkVq1Df1_MJXeN_G1jWQ,15597
 lm_deluge/api_requests/chat_reasoning.py,sha256=sJvstvKFqsSBUjYcwxzGt2_FH4cEp3Z6gKcBPyPjGwk,236
 lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
-lm_deluge/api_requests/gemini.py,sha256=gHmIfEY48B-MYlJYxYc8hT8ojmK16XSETcvfljRKAH0,10813
+lm_deluge/api_requests/gemini.py,sha256=FjYKisAjD6rW2fA6WyXnnRn3oqJBXMod1_8HtGWIyEU,11099
 lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
 lm_deluge/api_requests/openai.py,sha256=E0oakhcb2T5Swfn6ATMjRZKuLyRrx4Zj5SREo1JILfc,28841
 lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
@@ -49,7 +49,7 @@ lm_deluge/llm_tools/subagents.py,sha256=srJ7On7YR0Y8WuNvf5TJl_7IUfEtG3zlxZeLgmn_
 lm_deluge/llm_tools/todos.py,sha256=doKJZWLZlh4J_k6HkdwonWHfZTZaxEI9_XHAoNFnfQo,14906
 lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
 lm_deluge/models/__init__.py,sha256=54H24K_eADbfdEH9aNORrNEXvDLZCQ4TEekeLiWljSE,4619
-lm_deluge/models/anthropic.py,sha256=sFkS-g0OWgRnVoFMKxWkSUt0qy2LVrcO5KtbYAG26iY,6283
+lm_deluge/models/anthropic.py,sha256=X92EYIapos-8LXnIYiypPJcFhI0tqmXja_w8e9H4CF8,6781
 lm_deluge/models/bedrock.py,sha256=g1PbfceSRH2lWST3ja0mUlF3oTq4e4T-si6RMe7qXgg,4888
 lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
 lm_deluge/models/cohere.py,sha256=iXjYtM6jy_YL73Op8OfNsrMNopwae9y-Sw-4vF9cEBw,3406
@@ -74,8 +74,8 @@ lm_deluge/util/schema.py,sha256=q6uwhA4s1lM2dHT1Kwc46E7OY1VecMOtTEI0PTFn6tA,1320
 lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
 lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
 lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
-lm_deluge-0.0.79.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
-lm_deluge-0.0.79.dist-info/METADATA,sha256=wqNdfbJ_BIJT-uZMOvwX9RWgqqzUFM4rZ_a4KblAFus,13705
-lm_deluge-0.0.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lm_deluge-0.0.79.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
-lm_deluge-0.0.79.dist-info/RECORD,,
+lm_deluge-0.0.80.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
+lm_deluge-0.0.80.dist-info/METADATA,sha256=LJ2nPTs9WzdiP3kU5KPKUdOy_SuuiHRJCz9PINHEvZk,13705
+lm_deluge-0.0.80.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lm_deluge-0.0.80.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
+lm_deluge-0.0.80.dist-info/RECORD,,

{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/WHEEL RENAMED Viewed

File without changes

{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lm_deluge-0.0.79.dist-info → lm_deluge-0.0.80.dist-info}/top_level.txt RENAMED Viewed

File without changes

lm-deluge 0.0.79__py3-none-any.whl → 0.0.80__py3-none-any.whl

lm-deluge 0.0.79py3-none-any.whl → 0.0.80py3-none-any.whl