PyPI - lm-deluge - Versions diffs - 0.0.80__py3-none-any.whl → 0.0.82__py3-none-any.whl - Mend

lm-deluge 0.0.80py3-none-any.whl → 0.0.82py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

lm_deluge/__init__.py +1 -2
lm_deluge/api_requests/anthropic.py +2 -1
lm_deluge/api_requests/base.py +13 -0
lm_deluge/api_requests/gemini.py +1 -1
lm_deluge/api_requests/openai.py +3 -2
lm_deluge/client.py +16 -11
lm_deluge/llm_tools/__init__.py +12 -5
lm_deluge/pipelines/__init__.py +11 -0
lm_deluge/{llm_tools → pipelines}/score.py +2 -2
lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
lm_deluge/prompt.py +105 -0
lm_deluge/request_context.py +2 -2
lm_deluge/{tool.py → tool/__init__.py} +531 -314
lm_deluge/tool/prefab/__init__.py +29 -0
lm_deluge/tool/prefab/batch_tool.py +156 -0
lm_deluge/{llm_tools → tool/prefab}/filesystem.py +1 -1
lm_deluge/tool/prefab/memory.py +190 -0
lm_deluge/tool/prefab/otc/__init__.py +165 -0
lm_deluge/tool/prefab/otc/executor.py +281 -0
lm_deluge/tool/prefab/otc/parse.py +188 -0
lm_deluge/{llm_tools → tool/prefab}/sandbox.py +251 -61
lm_deluge/{llm_tools → tool/prefab}/todos.py +1 -1
lm_deluge/tool/prefab/tool_search.py +169 -0
lm_deluge/tracker.py +16 -13
{lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/METADATA +2 -3
{lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/RECORD +34 -28
lm_deluge/presets/cerebras.py +0 -17
lm_deluge/presets/meta.py +0 -13
/lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
/lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
/lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
/lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
/lm_deluge/{llm_tools → tool/prefab}/subagents.py +0 -0
{lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/top_level.txt +0 -0

lm_deluge/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .client import APIResponse, LLMClient, SamplingParams
 from .file import File
 from .prompt import Conversation, Message
-from .tool import Tool, ToolParams
+from .tool import Tool
 try:
     from .mock_openai import (  # noqa
@@ -25,7 +25,6 @@ __all__ = [
     "Conversation",
     "Message",
     "Tool",
-    "ToolParams",
     "File",
 ]

lm_deluge/api_requests/anthropic.py CHANGED Viewed

@@ -64,7 +64,7 @@ def _build_anthropic_request(
     }
     if model.id == "claude-4.5-opus" and sampling_params.global_effort:
-        request_json["effort"] = sampling_params.global_effort
+        request_json["output_config"] = {"effort": sampling_params.global_effort}
         _add_beta(base_headers, "effort-2025-11-24")
     # handle thinking
@@ -115,6 +115,7 @@ def _build_anthropic_request(
     if "4-1" in model.name or "4-5" in model.name:
         request_json.pop("top_p")
+    # print(request_json)
     # Handle structured outputs (output_format)
     if context.output_schema:
         if model.supports_json:

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -222,6 +222,19 @@ class APIRequestBase(ABC):
                 usage=None,
             )
+        except aiohttp.ServerDisconnectedError:
+            return APIResponse(
+                id=self.context.task_id,
+                model_internal=self.context.model_name,
+                prompt=self.context.prompt,
+                sampling_params=self.context.sampling_params,
+                status_code=None,
+                is_error=True,
+                error_message="Server disconnected.",
+                content=None,
+                usage=None,
+            )
         except Exception as e:
             raise_if_modal_exception(e)
             tb = traceback.format_exc()

lm_deluge/api_requests/gemini.py CHANGED Viewed

@@ -173,7 +173,7 @@ class GeminiRequest(APIRequestBase):
         self.request_json = await _build_gemini_request(
             self.model,
             self.context.prompt,
-            self.context.tools,
+            self.context.tools,  # type: ignore
             self.context.sampling_params,
         )

lm_deluge/api_requests/openai.py CHANGED Viewed

@@ -2,17 +2,18 @@ import json
 import os
 import traceback as tb
 from types import SimpleNamespace
+from typing import Sequence
 import aiohttp
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
-from lm_deluge.warnings import maybe_warn
 from lm_deluge.util.schema import (
     prepare_output_schema,
     transform_schema_for_openai,
 )
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -610,7 +611,7 @@ async def stream_chat(
     model_name: str,  # must correspond to registry
     prompt: Conversation,
     sampling_params: SamplingParams = SamplingParams(),
-    tools: list | None = None,
+    tools: Sequence[Tool | dict | MCPServer] | None = None,
     cache: CachePattern | None = None,
     extra_headers: dict[str, str] | None = None,
 ):

lm_deluge/client.py CHANGED Viewed

@@ -103,6 +103,11 @@ class _LLMClient(BaseModel):
     _tracker: StatusTracker | None = PrivateAttr(default=None)
     _capacity_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
+    # usage
+    def print_usage(self):
+        if self._tracker:
+            self._tracker.log_usage()
     # Progress management for queueing API
     def open(self, total: int | None = None, show_progress: bool = True):
         self._tracker = StatusTracker(
@@ -572,7 +577,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: Literal[True],
         show_progress: bool = ...,
-        tools: list[Tool | dict | MCPServer] | None = ...,
+        tools: Sequence[Tool | dict | MCPServer] | None = ...,
         output_schema: type[BaseModel] | dict | None = ...,
         cache: CachePattern | None = ...,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
@@ -585,7 +590,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: Literal[False] = ...,
         show_progress: bool = ...,
-        tools: list[Tool | dict | MCPServer] | None = ...,
+        tools: Sequence[Tool | dict | MCPServer] | None = ...,
         output_schema: type[BaseModel] | dict | None = ...,
         cache: CachePattern | None = ...,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
@@ -597,7 +602,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: bool = False,
         show_progress: bool = True,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -672,7 +677,7 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: bool = False,
         show_progress=True,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
     ):
@@ -705,7 +710,7 @@ class _LLMClient(BaseModel):
         self,
         prompt: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -742,7 +747,7 @@ class _LLMClient(BaseModel):
         self,
         prompt: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -835,7 +840,7 @@ class _LLMClient(BaseModel):
     async def stream(
         self,
         prompt: Prompt,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
     ):
         model, sampling_params = self._select_model()
         prompt = prompts_to_conversations([prompt])[0]
@@ -856,7 +861,7 @@ class _LLMClient(BaseModel):
         task_id: int,
         conversation: Conversation,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
     ) -> AgentLoopResponse:
         """Internal method to run agent loop and return wrapped result."""
@@ -920,7 +925,7 @@ class _LLMClient(BaseModel):
         self,
         conversation: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
     ) -> int:
         """Start an agent loop without waiting for it to complete.
@@ -967,7 +972,7 @@ class _LLMClient(BaseModel):
         self,
         conversation: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
         show_progress: bool = False,
     ) -> tuple[Conversation, APIResponse]:
@@ -986,7 +991,7 @@ class _LLMClient(BaseModel):
         self,
         conversation: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
         show_progress: bool = False,
     ) -> tuple[Conversation, APIResponse]:

lm_deluge/llm_tools/__init__.py CHANGED Viewed

@@ -1,8 +1,15 @@
-from .extract import extract, extract_async
-from .score import score_llm
-from .subagents import SubAgentManager
-from .todos import TodoItem, TodoManager, TodoPriority, TodoStatus
-from .translate import translate, translate_async
+# Backward compatibility - re-export from new locations
+# Pipelines (workflow functions)
+from ..pipelines import extract, extract_async, score_llm, translate, translate_async
+# Prefab tools (Tool managers)
+from ..tool.prefab import (
+    SubAgentManager,
+    TodoItem,
+    TodoManager,
+    TodoPriority,
+    TodoStatus,
+)
 __all__ = [
     "extract",

lm_deluge/pipelines/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .extract import extract, extract_async
+from .score import score_llm
+from .translate import translate, translate_async
+__all__ = [
+    "extract",
+    "extract_async",
+    "translate",
+    "translate_async",
+    "score_llm",
+]

lm_deluge/{llm_tools → pipelines}/score.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from ..client import LLMClient, APIResponse
+from ..client import _LLMClient, APIResponse
 from ..util.logprobs import extract_prob
 # def extract_prob_yes(logprobs: list[dict]):
@@ -24,7 +24,7 @@ from ..util.logprobs import extract_prob
 def score_llm(
     scoring_prompt_template: str,
     inputs: list[tuple | list | dict],  # to format the template
-    scoring_model: LLMClient,
+    scoring_model: _LLMClient,
     return_probabilities: bool,
     yes_token: str = "yes",
 ) -> list[bool | None] | list[float | None]:

lm_deluge/{llm_tools → pipelines}/translate.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import asyncio
-from ..client import LLMClient
+from ..client import _LLMClient
 translation_prompt = (
     "Translate the following text (enclosed in ```) into English. "
@@ -20,7 +20,9 @@ def is_english(text: str, low_memory: bool = True):
     return True
-async def translate_async(texts: list[str], client: LLMClient, low_memory: bool = True):
+async def translate_async(
+    texts: list[str], client: _LLMClient, low_memory: bool = True
+):
     to_translate_idxs = [
         i for i, text in enumerate(texts) if not is_english(text, low_memory=low_memory)
     ]
@@ -40,5 +42,5 @@ async def translate_async(texts: list[str], client: LLMClient, low_memory: bool
     return texts
-def translate(texts: list[str], client: LLMClient, low_memory: bool = True):
+def translate(texts: list[str], client: _LLMClient, low_memory: bool = True):
     return asyncio.run(translate_async(texts, client, low_memory))

lm_deluge/prompt.py CHANGED Viewed

@@ -1598,6 +1598,111 @@ class Conversation:
         return {"messages": serialized}
+    def print(self, max_text_length: int = 500, indent: int = 2) -> None:
+        """Pretty-print the conversation to stdout.
+        Args:
+            max_text_length: Truncate text content longer than this (default 500 chars)
+            indent: JSON indentation for tool calls/results (default 2)
+        """
+        ROLE_COLORS = {
+            "system": "\033[95m",  # magenta
+            "user": "\033[94m",  # blue
+            "assistant": "\033[92m",  # green
+            "tool": "\033[93m",  # yellow
+        }
+        RESET = "\033[0m"
+        DIM = "\033[2m"
+        BOLD = "\033[1m"
+        def truncate(text: str, max_len: int) -> str:
+            if len(text) <= max_len:
+                return text
+            return (
+                text[:max_len] + f"{DIM}... [{len(text) - max_len} more chars]{RESET}"
+            )
+        def format_json(obj: dict | list, ind: int) -> str:
+            return json.dumps(obj, indent=ind, ensure_ascii=False)
+        print(f"\n{BOLD}{'=' * 60}{RESET}")
+        print(f"{BOLD}Conversation ({len(self.messages)} messages){RESET}")
+        print(f"{BOLD}{'=' * 60}{RESET}\n")
+        for i, msg in enumerate(self.messages):
+            role_color = ROLE_COLORS.get(msg.role, "")
+            print(f"{role_color}{BOLD}[{msg.role.upper()}]{RESET}")
+            for part in msg.parts:
+                if isinstance(part, Text):
+                    text = truncate(part.text, max_text_length)
+                    # Indent multiline text
+                    lines = text.split("\n")
+                    if len(lines) > 1:
+                        print("  " + "\n  ".join(lines))
+                    else:
+                        print(f"  {text}")
+                elif isinstance(part, Image):
+                    w, h = part.size
+                    print(f"  {DIM}<Image ({w}x{h})>{RESET}")
+                elif isinstance(part, File):
+                    size = part.size
+                    filename = getattr(part, "filename", None)
+                    if filename:
+                        print(f"  {DIM}<File: {filename} ({size} bytes)>{RESET}")
+                    else:
+                        print(f"  {DIM}<File ({size} bytes)>{RESET}")
+                elif isinstance(part, ToolCall):
+                    print(
+                        f"  {DIM}Tool Call:{RESET} {BOLD}{part.name}{RESET} (id: {part.id})"
+                    )
+                    if part.arguments:
+                        args_json = format_json(part.arguments, indent)
+                        # Indent the JSON
+                        indented = "\n".join(
+                            "    " + line for line in args_json.split("\n")
+                        )
+                        print(indented)
+                elif isinstance(part, ToolResult):
+                    print(f"  {DIM}Tool Result:{RESET} (call_id: {part.tool_call_id})")
+                    if isinstance(part.result, str):
+                        result_text = truncate(part.result, max_text_length)
+                        lines = result_text.split("\n")
+                        for line in lines:
+                            print(f"    {line}")
+                    elif isinstance(part.result, dict):
+                        result_json = format_json(part.result, indent)
+                        indented = "\n".join(
+                            "    " + line for line in result_json.split("\n")
+                        )
+                        print(indented)
+                    elif isinstance(part.result, list):
+                        print(f"    {DIM}<{len(part.result)} content blocks>{RESET}")
+                        for block in part.result:
+                            if isinstance(block, Text):
+                                block_text = truncate(block.text, max_text_length // 2)
+                                print(f"      [text] {block_text}")
+                            elif isinstance(block, Image):
+                                bw, bh = block.size
+                                print(f"      {DIM}<Image ({bw}x{bh})>{RESET}")
+                elif isinstance(part, Thinking):
+                    print(f"  {DIM}Thinking:{RESET}")
+                    thought = truncate(part.content, max_text_length)
+                    lines = thought.split("\n")
+                    for line in lines:
+                        print(f"    {DIM}{line}{RESET}")
+            # Separator between messages
+            if i < len(self.messages) - 1:
+                print(f"\n{'-' * 40}\n")
+        print(f"\n{BOLD}{'=' * 60}{RESET}\n")
     @classmethod
     def from_log(cls, payload: dict) -> "Conversation":
         """Re-hydrate a Conversation previously produced by `to_log()`."""

lm_deluge/request_context.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from functools import cached_property
-from typing import Any, Callable, TYPE_CHECKING
+from typing import Any, Callable, Sequence, TYPE_CHECKING
 from .config import SamplingParams
 from .prompt import CachePattern, Conversation
@@ -34,7 +34,7 @@ class RequestContext:
     callback: Callable | None = None
     # Optional features
-    tools: list | None = None
+    tools: Sequence[Any] | None = None
     output_schema: "type[BaseModel] | dict | None" = None
     cache: CachePattern | None = None
     use_responses_api: bool = False

lm-deluge 0.0.80__py3-none-any.whl → 0.0.82__py3-none-any.whl

lm-deluge 0.0.80py3-none-any.whl → 0.0.82py3-none-any.whl