PyPI - lm-deluge - Versions diffs - 0.0.82__tar.gz → 0.0.84__tar.gz - Mend

lm-deluge 0.0.82tar.gz → 0.0.84tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

{lm_deluge-0.0.82/src/lm_deluge.egg-info → lm_deluge-0.0.84}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.82
+Version: 0.0.84
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.82"
+version = "0.0.84"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -101,11 +101,14 @@ def _build_anthropic_request(
             request_json["max_tokens"] += budget
         else:
             request_json["thinking"] = {"type": "disabled"}
+            if "kimi" in model.id and "thinking" in model.id:
+                maybe_warn("WARN_KIMI_THINKING_NO_REASONING")
     else:
         request_json["thinking"] = {"type": "disabled"}
         if sampling_params.reasoning_effort:
             print("ignoring reasoning_effort for non-reasoning model")
     if system_message is not None:
         request_json["system"] = system_message
@@ -164,6 +167,9 @@ def _build_anthropic_request(
                     "bash_20241022",
                 ]:
                     _add_beta(base_headers, "computer-use-2024-10-22")
+                elif tool["type"] == "computer_20251124":
+                    # Claude Opus 4.5 - newest computer use with zoom support
+                    _add_beta(base_headers, "computer-use-2025-11-24")
                 elif tool["type"] == "computer_20250124":
                     _add_beta(base_headers, "computer-use-2025-01-24")
                 elif tool["type"] == "code_execution_20250522":
@@ -231,6 +237,9 @@ class AnthropicRequest(APIRequestBase):
                 data = await http_response.json()
                 response_content = data["content"]
+                # print("=== CONTENT ===")
+                # print(response_content)
                 # Parse response into Message with parts
                 parts = []
                 for item in response_content:

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

@@ -114,8 +114,40 @@ async def _build_gemini_request(
     # Add tools if provided
     if tools:
-        tool_declarations = [tool.dump_for("google") for tool in tools]
-        request_json["tools"] = [{"functionDeclarations": tool_declarations}]
+        request_tools = []
+        function_declarations = []
+        for tool in tools:
+            if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
+                # Gemini computer use tool - add as separate tool entry
+                env_map = {
+                    "browser": "ENVIRONMENT_BROWSER",
+                    "android": "ENVIRONMENT_ANDROID",
+                }
+                env = env_map.get(
+                    tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
+                )
+                cu_tool: dict = {
+                    "computerUse": {
+                        "environment": env,
+                    }
+                }
+                excluded = tool.get("excluded_predefined_functions")
+                if excluded:
+                    cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
+                request_tools.append(cu_tool)
+            elif hasattr(tool, "dump_for"):
+                # Regular Tool object
+                function_declarations.append(tool.dump_for("google"))
+            elif isinstance(tool, dict):
+                # Raw dict tool - assume it's a function declaration
+                function_declarations.append(tool)
+        if function_declarations:
+            request_tools.append({"functionDeclarations": function_declarations})
+        if request_tools:
+            request_json["tools"] = request_tools
     # Handle JSON mode
     if sampling_params.json_mode and model.supports_json:

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

@@ -367,7 +367,7 @@ async def _build_oa_responses_request(
         elif isinstance(tool, dict):
             # if computer use, make sure model supports it
             if tool["type"] == "computer_use_preview":
-                if model.name != "openai-computer-use-preview":
+                if model.name != "computer-use-preview":
                     raise ValueError(f"model {model.id} does not support computer use")
                 # have to use truncation
                 request_json["truncation"] = "auto"

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/client.py RENAMED Viewed

@@ -84,6 +84,8 @@ class _LLMClient(BaseModel):
     json_mode: bool = False
     max_new_tokens: int = 512
     reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    global_effort: Literal["low", "medium", "high"] | None = None
+    thinking_budget: int | None = None
     logprobs: bool = False
     top_logprobs: int | None = None
     force_local_mcp: bool = False
@@ -212,6 +214,8 @@ class _LLMClient(BaseModel):
                         json_mode=self.json_mode,
                         max_new_tokens=self.max_new_tokens,
                         reasoning_effort=self.reasoning_effort,
+                        global_effort=self.global_effort or "high",
+                        thinking_budget=self.thinking_budget,
                         logprobs=self.logprobs,
                         top_logprobs=self.top_logprobs,
                     )
@@ -347,6 +351,8 @@ class _LLMClient(BaseModel):
                     json_mode=data.get("json_mode", False),
                     max_new_tokens=data.get("max_new_tokens", 512),
                     reasoning_effort=data.get("reasoning_effort", None),
+                    global_effort=data.get("global_effort") or "high",
+                    thinking_budget=data.get("thinking_budget", None),
                     logprobs=data.get("logprobs", False),
                     top_logprobs=data.get("top_logprobs", None),
                 )
@@ -1006,6 +1012,92 @@ class _LLMClient(BaseModel):
             )
         )
+    async def process_agent_loops_async(
+        self,
+        prompts: Sequence[Prompt],
+        *,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        max_rounds: int = 5,
+        max_concurrent_agents: int = 10,
+        show_progress: bool = True,
+    ) -> list[tuple[Conversation, APIResponse]]:
+        """Process multiple agent loops concurrently.
+        Each prompt becomes an independent agent loop that can make multiple LLM
+        calls and execute tools until completion. The agent loops run concurrently,
+        limited by ``max_concurrent_agents``, while the underlying LLM requests
+        are still governed by ``max_concurrent_requests``.
+        Args:
+            prompts: Sequence of prompts, each becoming a separate agent loop.
+            tools: Tools available to all agent loops.
+            max_rounds: Maximum rounds per agent loop (default 5).
+            max_concurrent_agents: Maximum number of agent loops running
+                concurrently (default 10). This is separate from the LLM request
+                concurrency limit.
+            show_progress: Whether to show progress bar for LLM requests.
+        Returns:
+            List of (Conversation, APIResponse) tuples in the same order as
+            the input prompts.
+        """
+        # Convert prompts to Conversations
+        conversations = prompts_to_conversations(list(prompts))
+        # Ensure tracker exists for underlying LLM requests
+        if self._tracker is None:
+            self.open(total=0, show_progress=show_progress)
+            tracker_preopened = False
+        else:
+            tracker_preopened = True
+        # Semaphore to limit concurrent agent loops
+        agent_semaphore = asyncio.Semaphore(max_concurrent_agents)
+        async def run_single_loop(
+            idx: int, conv: Conversation
+        ) -> tuple[int, Conversation, APIResponse]:
+            """Run a single agent loop with semaphore protection."""
+            async with agent_semaphore:
+                task_id = self._next_task_id
+                self._next_task_id += 1
+                result = await self._run_agent_loop_internal(
+                    task_id, conv, tools=tools, max_rounds=max_rounds
+                )
+                return idx, result.conversation, result.final_response
+        # Launch all agent loops concurrently (semaphore limits actual concurrency)
+        tasks = [run_single_loop(idx, conv) for idx, conv in enumerate(conversations)]
+        completed = await asyncio.gather(*tasks)
+        # Close tracker if we opened it
+        if not tracker_preopened:
+            self.close()
+        # Sort by original index and extract results
+        completed_sorted = sorted(completed, key=lambda x: x[0])
+        return [(conv, resp) for _, conv, resp in completed_sorted]
+    def process_agent_loops_sync(
+        self,
+        prompts: Sequence[Prompt],
+        *,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        max_rounds: int = 5,
+        max_concurrent_agents: int = 10,
+        show_progress: bool = True,
+    ) -> list[tuple[Conversation, APIResponse]]:
+        """Synchronous wrapper for :meth:`process_agent_loops_async`."""
+        return asyncio.run(
+            self.process_agent_loops_async(
+                prompts,
+                tools=tools,
+                max_rounds=max_rounds,
+                max_concurrent_agents=max_concurrent_agents,
+                show_progress=show_progress,
+            )
+        )
     async def submit_batch_job(
         self,
         prompts: Prompt | Sequence[Prompt],
@@ -1077,6 +1169,8 @@ def LLMClient(
     json_mode: bool = False,
     max_new_tokens: int = 512,
     reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    global_effort: Literal["low", "medium", "high"] | None = None,
+    thinking_budget: int | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
@@ -1106,6 +1200,8 @@ def LLMClient(
     json_mode: bool = False,
     max_new_tokens: int = 512,
     reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    global_effort: Literal["low", "medium", "high"] | None = None,
+    thinking_budget: int | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
@@ -1134,6 +1230,8 @@ def LLMClient(
     json_mode: bool = False,
     max_new_tokens: int = 512,
     reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    global_effort: Literal["low", "medium", "high"] | None = None,
+    thinking_budget: int | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
@@ -1174,6 +1272,8 @@ def LLMClient(
         json_mode=json_mode,
         max_new_tokens=max_new_tokens,
         reasoning_effort=reasoning_effort,
+        global_effort=global_effort,
+        thinking_budget=thinking_budget,
         logprobs=logprobs,
         top_logprobs=top_logprobs,
         force_local_mcp=force_local_mcp,

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/__init__.py RENAMED Viewed

@@ -4,9 +4,10 @@ import random
 from dataclasses import dataclass, field
 from ..request_context import RequestContext
+from .anthropic import ANTHROPIC_MODELS
 # Import and register all provider models
-from .anthropic import ANTHROPIC_MODELS
+from .arcee import ARCEE_MODELS
 from .bedrock import BEDROCK_MODELS
 from .cerebras import CEREBRAS_MODELS
 from .cohere import COHERE_MODELS
@@ -128,6 +129,7 @@ def register_model(
 # Register all models from all providers
 for model_dict in [
     ANTHROPIC_MODELS,
+    ARCEE_MODELS,
     BEDROCK_MODELS,
     COHERE_MODELS,
     DEEPSEEK_MODELS,

lm_deluge-0.0.84/src/lm_deluge/models/arcee.py ADDED Viewed

@@ -0,0 +1,16 @@
+ARCEE_MODELS = {
+    "trinity-mini": {
+        "id": "trinity-mini",
+        "name": "trinity-mini",
+        "api_base": "https://api.arcee.ai/api/v1",
+        "api_key_env_var": "ARCEE_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "supports_responses": False,
+        "api_spec": "openai",
+        "input_cost": 0.045,
+        "cached_input_cost": 0.045,
+        "output_cost": 0.15,
+        "reasoning_model": True,
+    }
+}

lm_deluge-0.0.84/src/lm_deluge/models/deepseek.py ADDED Viewed

@@ -0,0 +1,59 @@
+DEEPSEEK_MODELS = {
+    #  ______                                     _
+    # (______)                                   | |
+    #  _     _ _____ _____ ____   ___ _____ _____| |  _
+    # | |   | | ___ | ___ |  _ \ /___) ___ | ___ | |_/ )
+    # | |__/ /| ____| ____| |_| |___ | ____| ____|  _ (
+    # |_____/ |_____)_____)  __/(___/|_____)_____)_| \_)
+    #                     |_|
+    "deepseek-chat": {
+        "id": "deepseek-chat",
+        "name": "deepseek-chat",
+        "api_base": "https://api.deepseek.com/v1",
+        "api_key_env_var": "DEEPSEEK_API_KEY",
+        "api_spec": "openai",
+        "input_cost": 0.28,
+        "cached_input_cost": 0.028,
+        "output_cost": 0.42,
+    },
+    "deepseek-r1": {
+        "id": "deepseek-r1",
+        "name": "deepseek-reasoner",
+        "api_base": "https://api.deepseek.com/v1",
+        "api_key_env_var": "DEEPSEEK_API_KEY",
+        "api_spec": "openai",
+        "input_cost": 0.28,
+        "cached_input_cost": 0.028,
+        "output_cost": 0.42,
+    },
+    "deepseek-reasoner": {
+        "id": "deepseek-reasoner",
+        "name": "deepseek-reasoner",
+        "api_base": "https://api.deepseek.com/v1",
+        "api_key_env_var": "DEEPSEEK_API_KEY",
+        "api_spec": "openai",
+        "input_cost": 0.28,
+        "cached_input_cost": 0.028,
+        "output_cost": 0.42,
+    },
+    "deepseek-reasoner-anthropic-compat": {
+        "id": "deepseek-reasoner-anthropic-compat",
+        "name": "deepseek-reasoner",
+        "api_base": "https://api.deepseek.com/anthropic",
+        "api_key_env_var": "DEEPSEEK_API_KEY",
+        "api_spec": "anthropic",
+        "input_cost": 0.28,
+        "cached_input_cost": 0.028,
+        "output_cost": 0.42,
+    },
+    "deepseek-speciale": {
+        "id": "deepseek-speciale",
+        "name": "deepseek-reasoner",
+        "api_base": "https://api.deepseek.com/v3.2_speciale_expires_on_20251215/v1",
+        "api_key_env_var": "DEEPSEEK_API_KEY",
+        "api_spec": "openai",
+        "input_cost": 0.28,
+        "cached_input_cost": 0.028,
+        "output_cost": 0.42,
+    },
+}

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/google.py RENAMED Viewed

@@ -153,4 +153,18 @@ GOOGLE_MODELS = {
         # Note: >200k tokens pricing is $4/$18 per million
         "reasoning_model": True,
     },
+    # Gemini 2.5 Computer Use model
+    "gemini-2.5-computer-use": {
+        "id": "gemini-2.5-computer-use",
+        "name": "gemini-2.5-computer-use-preview-10-2025",
+        "api_base": "https://generativelanguage.googleapis.com/v1beta",
+        "api_key_env_var": "GEMINI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "api_spec": "gemini",
+        "input_cost": 1.25,  # same as gemini-2.5-pro for now
+        "cached_input_cost": 0.31,
+        "output_cost": 10.0,
+        "reasoning_model": True,
+    },
 }

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/kimi.py RENAMED Viewed

@@ -22,6 +22,7 @@ KIMI_MODELS = {
         "api_key_env_var": "MOONSHOT_API_KEY",
         "supports_json": True,
         "api_spec": "anthropic",
+        "reasoning_model": True,
     },
     "kimi-k2-thinking-turbo": {
         "id": "kimi-k2-thinking-turbo",
@@ -30,5 +31,6 @@ KIMI_MODELS = {
         "api_key_env_var": "MOONSHOT_API_KEY",
         "supports_json": True,
         "api_spec": "anthropic",
+        "reasoning_model": True,
     },
 }

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/openrouter.py RENAMED Viewed

@@ -71,4 +71,14 @@ OPENROUTER_MODELS = {
         "input_cost": 0.2,
         "output_cost": 35,
     },
+    "trinity-mini-openrouter": {
+        "id": "trinity-mini-openrouter",
+        "name": "arcee-ai/trinity-mini:free",
+        "api_base": "https://openrouter.ai/api/v1",
+        "api_key_env_var": "OPENROUTER_API_KEY",
+        "supports_json": True,
+        "api_spec": "openai",
+        "input_cost": 0.045,
+        "output_cost": 0.15,
+    },
 }

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/together.py RENAMED Viewed

@@ -93,4 +93,15 @@ TOGETHER_MODELS = {
         "output_cost": 0.59,
         "reasoning_model": True,
     },
+    "trinity-mini-together": {
+        "id": "trinity-mini-together",
+        "name": "arcee-ai/trinity-mini",
+        "api_base": "https://api.together.xyz/v1",
+        "api_key_env_var": "TOGETHER_API_KEY",
+        "supports_json": False,
+        "api_spec": "openai",
+        "input_cost": 0.18,
+        "output_cost": 0.59,
+        "reasoning_model": True,
+    },
 }

lm_deluge-0.0.84/src/lm_deluge/models/zai.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ ZAI_MODELS = {}

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/prompt.py RENAMED Viewed

@@ -203,6 +203,8 @@ class ToolResult:
                 "call_id": self.tool_call_id,
             }
             if self.built_in_type == "computer_call":
+                # OpenAI expects "computer_call_output" for the result type
+                result["type"] = "computer_call_output"
                 result["output"] = output_data.get("output", {})
                 if "acknowledged_safety_checks" in output_data:
                     result["acknowledged_safety_checks"] = output_data[
@@ -235,15 +237,41 @@ class ToolResult:
             raise ValueError("unsupported self.result type")
     def gemini(self) -> dict:
-        if not isinstance(self.result, str):
-            raise ValueError("can't handle content blocks for gemini yet")
-        return {
-            "functionResponse": {
-                "name": self.tool_call_id,  # Gemini uses name field for ID
-                "response": {"result": self.result},
-            }
+        # Build the function response
+        func_response: dict = {
+            "name": self.tool_call_id,  # Gemini uses name field for ID
         }
+        # Handle different result types
+        if isinstance(self.result, str):
+            func_response["response"] = {"result": self.result}
+        elif isinstance(self.result, dict):
+            # Check for Gemini computer use format with inline screenshot
+            if self.built_in_type == "gemini_computer_use":
+                # Gemini CU expects response dict with optional inline_data parts
+                func_response["response"] = self.result.get("response", {})
+                # Include inline data (screenshot) if present
+                if "inline_data" in self.result:
+                    func_response["parts"] = [
+                        {
+                            "inlineData": {
+                                "mimeType": self.result["inline_data"].get(
+                                    "mime_type", "image/png"
+                                ),
+                                "data": self.result["inline_data"]["data"],
+                            }
+                        }
+                    ]
+            else:
+                func_response["response"] = self.result
+        elif isinstance(self.result, list):
+            # Handle content blocks (images, etc.) - not yet implemented
+            raise ValueError("can't handle content blocks for gemini yet")
+        else:
+            func_response["response"] = {"result": str(self.result)}
+        return {"functionResponse": func_response}
     def mistral(self) -> dict:
         return {
             "type": "tool_result",
@@ -1367,14 +1395,14 @@ class Conversation:
                 # For assistant messages, extract computer calls as separate items
                 text_parts = []
                 for p in m.parts:
-                    if isinstance(p, ToolCall) and p.name.startswith("_computer_"):
+                    if isinstance(p, ToolCall) and p.built_in_type == "computer_call":
                         # Computer calls become separate items in the input array
-                        action_type = p.name.replace("_computer_", "")
+                        # p.arguments already contains the full action dict with "type"
                         input_items.append(
                             {
                                 "type": "computer_call",
                                 "call_id": p.id,
-                                "action": {"type": action_type, **p.arguments},
+                                "action": p.arguments,
                             }
                         )
                     elif isinstance(p, Text):
@@ -1752,7 +1780,7 @@ class Conversation:
 Prompt: TypeAlias = str | list[dict] | Message | Conversation
-def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[Prompt]:
+def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[Conversation]:
     converted = []
     for prompt in prompts:
         if isinstance(prompt, Conversation):

{lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/__init__.py RENAMED Viewed

@@ -614,6 +614,7 @@ class Tool(BaseModel):
         cls,
         func: Callable,
         *,
+        name: str | None = None,
         include_output_schema_in_description: bool = False,
     ) -> "Tool":
         """
@@ -629,6 +630,8 @@ class Tool(BaseModel):
         Args:
             func: The function to create a tool from.
+            name: Optional name override for the tool. If not provided,
+                uses the function's __name__.
             include_output_schema_in_description: If True, append the return type
                 and any complex type definitions to the tool description. This can
                 help the model understand what the tool returns. Default is False.
@@ -646,6 +649,10 @@ class Tool(BaseModel):
             # tool.output_schema contains schema for list[dict]
             # tool.call(query="test", validate_output=True) validates return value
+            # With custom name:
+            tool = Tool.from_function(search, name="search_database")
+            # tool.name is "search_database"
             # With output schema in description:
             tool = Tool.from_function(search, include_output_schema_in_description=True)
             # Description becomes:
@@ -653,11 +660,11 @@ class Tool(BaseModel):
             #
             # Returns: list[dict]"
         """
-        # Get function name
-        name = func.__name__
+        # Get function name (use override if provided)
+        tool_name = name if name is not None else func.__name__
         # Get docstring for description
-        description = func.__doc__ or f"Call the {name} function"
+        description = func.__doc__ or f"Call the {tool_name} function"
         description = description.strip()
         # Use TypeAdapter for robust schema generation
@@ -705,7 +712,7 @@ class Tool(BaseModel):
                 description = f"{description}\n\n{output_info}"
         tool = cls(
-            name=name,
+            name=tool_name,
             description=description,
             parameters=parameters,
             required=required,

lm-deluge 0.0.82__tar.gz → 0.0.84__tar.gz

lm-deluge 0.0.82tar.gz → 0.0.84tar.gz