PyPI - lm-deluge - Versions diffs - 0.0.79__tar.gz → 0.0.81__tar.gz - Mend

lm-deluge 0.0.79tar.gz → 0.0.81tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

{lm_deluge-0.0.79/src/lm_deluge.egg-info → lm_deluge-0.0.81}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.79
+Version: 0.0.81
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10
@@ -296,11 +296,10 @@ We support structured outputs via `json_mode` parameter provided to `SamplingPar
 ## Built‑in tools
-The `lm_deluge.llm_tools` package exposes a few helper functions:
+The `lm_deluge.pipelines` module exposes a few helper functions that combine LLMClient with prompt and output parsing to accomplish tasks:
 - `extract` – structure text or images into a Pydantic model based on a schema.
 - `translate` – translate a list of strings to English.
 - `score_llm` – simple yes/no style scoring with optional log probability output.
-- `FilesystemManager` – expose a sandboxed read/write filesystem tool (with optional regex search and `apply_patch` support) that agents can call without touching the host machine.
 Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/README.md RENAMED Viewed

@@ -267,11 +267,10 @@ We support structured outputs via `json_mode` parameter provided to `SamplingPar
 ## Built‑in tools
-The `lm_deluge.llm_tools` package exposes a few helper functions:
+The `lm_deluge.pipelines` module exposes a few helper functions that combine LLMClient with prompt and output parsing to accomplish tasks:
 - `extract` – structure text or images into a Pydantic model based on a schema.
 - `translate` – translate a list of strings to English.
 - `score_llm` – simple yes/no style scoring with optional log probability output.
-- `FilesystemManager` – expose a sandboxed read/write filesystem tool (with optional regex search and `apply_patch` support) that agents can call without touching the host machine.
 Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.79"
+version = "0.0.81"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from .client import APIResponse, LLMClient, SamplingParams
 from .file import File
 from .prompt import Conversation, Message
-from .tool import Tool, ToolParams
+from .tool import Tool
 try:
     from .mock_openai import (  # noqa
@@ -25,7 +25,6 @@ __all__ = [
     "Conversation",
     "Message",
     "Tool",
-    "ToolParams",
     "File",
 ]

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -16,6 +16,7 @@ from lm_deluge.util.schema import (
     prepare_output_schema,
     transform_schema_for_anthropic,
 )
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from .base import APIRequestBase, APIResponse
@@ -62,20 +63,45 @@ def _build_anthropic_request(
         "max_tokens": sampling_params.max_new_tokens,
     }
+    if model.id == "claude-4.5-opus" and sampling_params.global_effort:
+        request_json["output_config"] = {"effort": sampling_params.global_effort}
+        _add_beta(base_headers, "effort-2025-11-24")
     # handle thinking
-    if model.reasoning_model and sampling_params.reasoning_effort:
-        # translate reasoning effort of low, medium, high to budget tokens
-        budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
-            sampling_params.reasoning_effort
-        )
-        request_json["thinking"] = {
-            "type": "enabled",
-            "budget_tokens": budget,
-        }
-        if "top_p" in request_json:
-            request_json["top_p"] = max(request_json["top_p"], 0.95)
-        request_json["temperature"] = 1.0
-        request_json["max_tokens"] += budget
+    if model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if sampling_params.thinking_budget is not None:
+            budget = sampling_params.thinking_budget
+        elif sampling_params.reasoning_effort is not None:
+            # translate reasoning effort of low, medium, high to budget tokens
+            budget = {
+                "none": 0,
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }.get(sampling_params.reasoning_effort)
+            assert isinstance(budget, int)
+        else:
+            budget = 0
+        if budget > 0:
+            request_json["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": budget,
+            }
+            if "top_p" in request_json:
+                request_json["top_p"] = max(request_json["top_p"], 0.95)
+            request_json["temperature"] = 1.0
+            request_json["max_tokens"] += budget
+        else:
+            request_json["thinking"] = {"type": "disabled"}
     else:
         request_json["thinking"] = {"type": "disabled"}
         if sampling_params.reasoning_effort:
@@ -83,11 +109,13 @@ def _build_anthropic_request(
     if system_message is not None:
         request_json["system"] = system_message
-    # handle temp + top_p for opus 4.1/sonnet 4.5
+    # handle temp + top_p for opus 4.1/sonnet 4.5.
+    # TODO: make clearer / more user-friendly so there can be NotGiven
+    # and user can control which one they want to use
     if "4-1" in model.name or "4-5" in model.name:
-        if "temperature" in request_json and "top_p" in request_json:
-            request_json.pop("top_p")
+        request_json.pop("top_p")
+    # print(request_json)
     # Handle structured outputs (output_format)
     if context.output_schema:
         if model.supports_json:

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/base.py RENAMED Viewed

@@ -222,6 +222,19 @@ class APIRequestBase(ABC):
                 usage=None,
             )
+        except aiohttp.ServerDisconnectedError:
+            return APIResponse(
+                id=self.context.task_id,
+                model_internal=self.context.model_name,
+                prompt=self.context.prompt,
+                sampling_params=self.context.sampling_params,
+                status_code=None,
+                is_error=True,
+                error_message="Server disconnected.",
+                content=None,
+                usage=None,
+            )
         except Exception as e:
             raise_if_modal_exception(e)
             tb = traceback.format_exc()

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import json
 import os
-from typing import Any
 from aiohttp import ClientResponse
@@ -52,47 +51,61 @@ async def _build_gemini_request(
         request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
     # Handle reasoning models (thinking)
-    if model.reasoning_model:
-        thinking_config: dict[str, Any] | None = None
-        effort = sampling_params.reasoning_effort
-        is_gemini_3 = "gemini-3" in model.name.lower()
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    if is_gemini_3:
+        # gemini3 MUST think
+        if not sampling_params.reasoning_effort:
+            maybe_warn("WARN_GEMINI3_NO_REASONING")
+            effort = "low"
+        else:
+            level_map = {
+                "none": "low",
+                "minimal": "low",
+                "low": "low",
+                "medium": "high",  # change when supported
+                "high": "high",
+            }
+            effort = level_map[sampling_params.reasoning_effort]
+        thinking_config = {"thinkingLevel": effort}
+        request_json["generationConfig"]["thinkingConfig"] = thinking_config
-        if is_gemini_3:
-            # Gemini 3 uses thinkingLevel instead of thinkingBudget
-            if effort in {"none", "minimal"}:
-                thinking_config = {"thinkingLevel": "low"}
-            elif effort is None:
-                # Default to high when reasoning is enabled but no preference was provided
-                thinking_config = {"thinkingLevel": "high"}
-            else:
-                # Map reasoning_effort to thinkingLevel
-                level_map = {
-                    "minimal": "low",
-                    "low": "low",
-                    "medium": "medium",  # Will work when supported
-                    "high": "high",
-                }
-                thinking_level = level_map.get(effort, "high")
-                thinking_config = {"thinkingLevel": thinking_level}
+    elif model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.thinking_budget > 0
+        ):
+            thinking_config = {
+                "includeThoughts": True,
+                "thinkingBudget": sampling_params.thinking_budget,
+            }
+        elif sampling_params.thinking_budget == -1:
+            # dynamic thinking
+            thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
+        elif sampling_params.reasoning_effort not in [None, "none"]:
+            level_map = {
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }
+            assert sampling_params.reasoning_effort in level_map
+            budget = level_map[sampling_params.reasoning_effort]
+            if "flash-lite" in model.id:
+                budget = max(budget, 512)
+            thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
+        elif "2.5-pro" in model.id:
+            # 2.5 pro must think.
+            thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
         else:
-            # Gemini 2.5 uses thinkingBudget (legacy)
-            if effort is None or effort == "none":
-                budget = 128 if "2.5-pro" in model.id else 0
-                # Explicitly disable thoughts when no effort is requested
-                thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
-            else:
-                thinking_config = {"includeThoughts": True}
-                if (
-                    effort in {"minimal", "low", "medium", "high"}
-                    and "flash" in model.id
-                ):
-                    budget = {
-                        "minimal": 256,
-                        "low": 1024,
-                        "medium": 4096,
-                        "high": 16384,
-                    }[effort]
-                    thinking_config["thinkingBudget"] = budget
+            # no thoughts head empty
+            thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
         request_json["generationConfig"]["thinkingConfig"] = thinking_config
     else:
@@ -160,7 +173,7 @@ class GeminiRequest(APIRequestBase):
         self.request_json = await _build_gemini_request(
             self.model,
             self.context.prompt,
-            self.context.tools,
+            self.context.tools,  # type: ignore
             self.context.sampling_params,
         )

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

@@ -2,17 +2,18 @@ import json
 import os
 import traceback as tb
 from types import SimpleNamespace
+from typing import Sequence
 import aiohttp
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
-from lm_deluge.warnings import maybe_warn
 from lm_deluge.util.schema import (
     prepare_output_schema,
     transform_schema_for_openai,
 )
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -610,7 +611,7 @@ async def stream_chat(
     model_name: str,  # must correspond to registry
     prompt: Conversation,
     sampling_params: SamplingParams = SamplingParams(),
-    tools: list | None = None,
+    tools: Sequence[Tool | dict | MCPServer] | None = None,
     cache: CachePattern | None = None,
     extra_headers: dict[str, str] | None = None,
 ):

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/client.py RENAMED Viewed

@@ -79,7 +79,7 @@ class _LLMClient(BaseModel):
     background: bool = False
     # sampling params - if provided, and sampling_params is not,
     # these override the defaults
-    temperature: float = 0.75
+    temperature: float = 1.0
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 512
@@ -337,7 +337,7 @@ class _LLMClient(BaseModel):
         if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
             data["sampling_params"] = [
                 SamplingParams(
-                    temperature=data.get("temperature", 0.75),
+                    temperature=data.get("temperature", 1.0),
                     top_p=data.get("top_p", 1.0),
                     json_mode=data.get("json_mode", False),
                     max_new_tokens=data.get("max_new_tokens", 512),
@@ -572,7 +572,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: Literal[True],
         show_progress: bool = ...,
-        tools: list[Tool | dict | MCPServer] | None = ...,
+        tools: Sequence[Tool | dict | MCPServer] | None = ...,
         output_schema: type[BaseModel] | dict | None = ...,
         cache: CachePattern | None = ...,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
@@ -585,7 +585,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: Literal[False] = ...,
         show_progress: bool = ...,
-        tools: list[Tool | dict | MCPServer] | None = ...,
+        tools: Sequence[Tool | dict | MCPServer] | None = ...,
         output_schema: type[BaseModel] | dict | None = ...,
         cache: CachePattern | None = ...,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
@@ -597,7 +597,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: bool = False,
         show_progress: bool = True,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -672,7 +672,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: bool = False,
         show_progress=True,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
     ):
@@ -705,7 +705,7 @@ class _LLMClient(BaseModel):
         self,
         prompt: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -742,7 +742,7 @@ class _LLMClient(BaseModel):
         self,
         prompt: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -835,7 +835,7 @@ class _LLMClient(BaseModel):
     async def stream(
         self,
         prompt: Prompt,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
     ):
         model, sampling_params = self._select_model()
         prompt = prompts_to_conversations([prompt])[0]
@@ -856,7 +856,7 @@ class _LLMClient(BaseModel):
         task_id: int,
         conversation: Conversation,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
     ) -> AgentLoopResponse:
         """Internal method to run agent loop and return wrapped result."""
@@ -920,7 +920,7 @@ class _LLMClient(BaseModel):
         self,
         conversation: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
     ) -> int:
         """Start an agent loop without waiting for it to complete.
@@ -967,7 +967,7 @@ class _LLMClient(BaseModel):
         self,
         conversation: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
         show_progress: bool = False,
     ) -> tuple[Conversation, APIResponse]:
@@ -986,7 +986,7 @@ class _LLMClient(BaseModel):
         self,
         conversation: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
         show_progress: bool = False,
     ) -> tuple[Conversation, APIResponse]:
@@ -1067,7 +1067,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
@@ -1096,7 +1096,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
@@ -1124,7 +1124,7 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/config.py RENAMED Viewed

@@ -4,11 +4,13 @@ from pydantic import BaseModel
 class SamplingParams(BaseModel):
-    temperature: float = 0.0
+    temperature: float = 1.0  # more typical for new models
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 2_048
+    global_effort: Literal["low", "medium", "high"] = "high"  # for opus-4.5
     reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    thinking_budget: int | None = None
     logprobs: bool = False
     top_logprobs: int | None = None
     strict_tools: bool = True

lm_deluge-0.0.81/src/lm_deluge/llm_tools/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Backward compatibility - re-export from new locations
+# Pipelines (workflow functions)
+from ..pipelines import extract, extract_async, score_llm, translate, translate_async
+# Prefab tools (Tool managers)
+from ..tool.prefab import (
+    SubAgentManager,
+    TodoItem,
+    TodoManager,
+    TodoPriority,
+    TodoStatus,
+)
+__all__ = [
+    "extract",
+    "extract_async",
+    "TodoItem",
+    "TodoManager",
+    "TodoPriority",
+    "TodoStatus",
+    "translate",
+    "translate_async",
+    "score_llm",
+    "SubAgentManager",
+]

{lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/anthropic.py RENAMED Viewed

@@ -10,6 +10,19 @@ ANTHROPIC_MODELS = {
     #                                                                   ░███
     #                                                                   █████
     #
+    "claude-4.5-opus": {
+        "id": "claude-4.5-opus",
+        "name": "claude-opus-4-5-20251101",
+        "api_base": "https://api.anthropic.com/v1",
+        "api_key_env_var": "ANTHROPIC_API_KEY",
+        "supports_json": False,
+        "api_spec": "anthropic",
+        "input_cost": 5.0,
+        "cached_input_cost": 0.50,
+        "cache_write_cost": 6.25,
+        "output_cost": 25.0,
+        "reasoning_model": True,
+    },
     "claude-4.5-haiku": {
         "id": "claude-4.5-haiku",
         "name": "claude-haiku-4-5-20251001",
@@ -21,6 +34,7 @@ ANTHROPIC_MODELS = {
         "cached_input_cost": 0.10,
         "cache_write_cost": 1.25,
         "output_cost": 3.0,
+        "reasoning_model": True,
     },
     "claude-4.5-sonnet": {
         "id": "claude-4.5-sonnet",
@@ -33,6 +47,7 @@ ANTHROPIC_MODELS = {
         "cached_input_cost": 0.30,
         "cache_write_cost": 3.75,
         "output_cost": 15.0,
+        "reasoning_model": True,
     },
     "claude-4.1-opus": {
         "id": "claude-4.1-opus",

{lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/__init__.py RENAMED Viewed

@@ -1,18 +1,11 @@
 from .extract import extract, extract_async
 from .score import score_llm
-from .subagents import SubAgentManager
-from .todos import TodoItem, TodoManager, TodoPriority, TodoStatus
 from .translate import translate, translate_async
 __all__ = [
     "extract",
     "extract_async",
-    "TodoItem",
-    "TodoManager",
-    "TodoPriority",
-    "TodoStatus",
     "translate",
     "translate_async",
     "score_llm",
-    "SubAgentManager",
 ]

{lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/score.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from ..client import LLMClient, APIResponse
+from ..client import _LLMClient, APIResponse
 from ..util.logprobs import extract_prob
 # def extract_prob_yes(logprobs: list[dict]):
@@ -24,7 +24,7 @@ from ..util.logprobs import extract_prob
 def score_llm(
     scoring_prompt_template: str,
     inputs: list[tuple | list | dict],  # to format the template
-    scoring_model: LLMClient,
+    scoring_model: _LLMClient,
     return_probabilities: bool,
     yes_token: str = "yes",
 ) -> list[bool | None] | list[float | None]:

{lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/translate.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import asyncio
-from ..client import LLMClient
+from ..client import _LLMClient
 translation_prompt = (
     "Translate the following text (enclosed in ```) into English. "
@@ -20,7 +20,9 @@ def is_english(text: str, low_memory: bool = True):
     return True
-async def translate_async(texts: list[str], client: LLMClient, low_memory: bool = True):
+async def translate_async(
+    texts: list[str], client: _LLMClient, low_memory: bool = True
+):
     to_translate_idxs = [
         i for i, text in enumerate(texts) if not is_english(text, low_memory=low_memory)
     ]
@@ -40,5 +42,5 @@ async def translate_async(texts: list[str], client: LLMClient, low_memory: bool
     return texts
-def translate(texts: list[str], client: LLMClient, low_memory: bool = True):
+def translate(texts: list[str], client: _LLMClient, low_memory: bool = True):
     return asyncio.run(translate_async(texts, client, low_memory))

lm-deluge 0.0.79__tar.gz → 0.0.81__tar.gz

lm-deluge 0.0.79tar.gz → 0.0.81tar.gz