PyPI - lm-deluge - Versions diffs - 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl - Mend

lm-deluge 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (26) hide show

lm_deluge/__init__.py +9 -1
lm_deluge/agent.py +0 -0
lm_deluge/api_requests/anthropic.py +107 -60
lm_deluge/api_requests/base.py +107 -54
lm_deluge/api_requests/bedrock.py +59 -22
lm_deluge/api_requests/common.py +2 -1
lm_deluge/api_requests/mistral.py +20 -22
lm_deluge/api_requests/openai.py +283 -51
lm_deluge/batches.py +498 -0
lm_deluge/client.py +373 -634
lm_deluge/computer_use/anthropic_tools.py +75 -0
lm_deluge/{sampling_params.py → config.py} +10 -3
lm_deluge/embed.py +17 -11
lm_deluge/models.py +33 -0
lm_deluge/prompt.py +173 -7
lm_deluge/rerank.py +18 -12
lm_deluge/tool.py +11 -1
lm_deluge/tracker.py +212 -2
lm_deluge/usage.py +114 -0
lm_deluge/util/json.py +18 -1
{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/METADATA +78 -20
lm_deluge-0.0.13.dist-info/RECORD +42 -0
{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/WHEEL +1 -1
lm_deluge-0.0.11.dist-info/RECORD +0 -38
{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/top_level.txt +0 -0

lm_deluge/tracker.py CHANGED Viewed

@@ -1,21 +1,109 @@
+import asyncio
 import time
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+from rich.console import Console, Group
+from rich.live import Live
+from rich.progress import (
+    BarColumn,
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+)
+from rich.text import Text
+from tqdm import tqdm
+SECONDS_TO_PAUSE_AFTER_RATE_LIMIT_ERROR = 5
 @dataclass
 class StatusTracker:
+    max_requests_per_minute: int
+    max_tokens_per_minute: int
+    max_concurrent_requests: int
     num_tasks_started: int = 0
     num_tasks_in_progress: int = 0
     num_tasks_succeeded: int = 0
     num_tasks_failed: int = 0
     num_rate_limit_errors: int = 0
     time_of_last_rate_limit_error: int | float = 0
-    total_requests = 0
+    total_requests: int = 0
+    retry_queue: asyncio.Queue = field(default_factory=asyncio.Queue)
+    # Progress bar configuration
+    use_progress_bar: bool = True
+    progress_bar_total: int | None = None
+    progress_bar_disable: bool = False
+    _pbar: tqdm | None = None
+    # Rich display configuration
+    use_rich: bool = True
+    _rich_console: Console | None = None
+    _rich_live: object | None = None
+    _rich_progress: object | None = None
+    _rich_task_id: object | None = None
+    _rich_display_task: asyncio.Task | None = None
+    _rich_stop_event: asyncio.Event | None = None
+    def __post_init__(self):
+        self.available_request_capacity = self.max_requests_per_minute
+        self.available_token_capacity = self.max_tokens_per_minute
+        self.last_update_time = time.time() - 1
+        self.last_pbar_update_time = time.time() - 1
+        self.limiting_factor = None
     @property
     def time_since_rate_limit_error(self):
         return time.time() - self.time_of_last_rate_limit_error
+    @property
+    def seconds_to_pause(self):
+        return max(
+            0,
+            SECONDS_TO_PAUSE_AFTER_RATE_LIMIT_ERROR - self.time_since_rate_limit_error,
+        )
+    def set_limiting_factor(self, factor):
+        self.limiting_factor = factor
+    def check_capacity(self, num_tokens: int):
+        request_available = self.available_request_capacity >= 1
+        tokens_available = self.available_token_capacity >= num_tokens
+        concurrent_request_available = (
+            self.num_tasks_in_progress < self.max_concurrent_requests
+        )
+        if request_available and tokens_available and concurrent_request_available:
+            self.available_request_capacity -= 1
+            self.available_token_capacity -= num_tokens
+            self.num_tasks_started += 1
+            self.num_tasks_in_progress += 1
+            self.set_limiting_factor(None)
+            return True
+        else:
+            # update reason why
+            if not request_available:
+                self.set_limiting_factor("Requests")
+            elif not concurrent_request_available:
+                self.set_limiting_factor("Concurrent Requests")
+            elif not tokens_available:
+                self.set_limiting_factor("Tokens")
+    def update_capacity(self):
+        current_time = time.time()
+        seconds_since_update = current_time - self.last_update_time
+        self.available_request_capacity = min(
+            self.available_request_capacity
+            + self.max_requests_per_minute * seconds_since_update / 60.0,
+            self.max_requests_per_minute,
+        )
+        self.available_token_capacity = min(
+            self.available_token_capacity
+            + self.max_tokens_per_minute * seconds_since_update / 60.0,
+            self.max_tokens_per_minute,
+        )
+        self.last_update_time = current_time
     def start_task(self, task_id):
         self.num_tasks_started += 1
         self.num_tasks_in_progress += 1
@@ -27,12 +115,16 @@ class StatusTracker:
     def task_succeeded(self, task_id):
         self.num_tasks_in_progress -= 1
         self.num_tasks_succeeded += 1
+        self.increment_pbar()
     def task_failed(self, task_id):
         self.num_tasks_in_progress -= 1
         self.num_tasks_failed += 1
     def log_final_status(self):
+        # Close progress bar before printing final status
+        self.close_progress_bar()
         if self.num_tasks_failed > 0:
             print(
                 f"{self.num_tasks_failed} / {self.num_tasks_started} requests failed."
@@ -41,3 +133,121 @@ class StatusTracker:
             print(
                 f"{self.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate."
             )
+    @property
+    def pbar(self) -> tqdm | None:
+        """Backward compatibility property to access progress bar."""
+        return self._pbar
+    def init_progress_bar(self, total: int | None = None, disable: bool | None = None):
+        """Initialize progress bar if enabled."""
+        if not self.use_progress_bar:
+            return
+        if self.use_rich:
+            self._init_rich_display(total, disable)
+        else:
+            # Use provided values or fall back to instance defaults
+            pbar_total = total if total is not None else self.progress_bar_total
+            pbar_disable = disable if disable is not None else self.progress_bar_disable
+            self._pbar = tqdm(total=pbar_total, disable=pbar_disable)
+        self.update_pbar()
+    def close_progress_bar(self):
+        """Close progress bar if it exists."""
+        if self.use_rich and self._rich_stop_event:
+            self._close_rich_display()
+        elif self._pbar is not None:
+            self._pbar.close()
+            self._pbar = None
+    def _init_rich_display(self, total: int | None = None, disable: bool | None = None):
+        """Initialize Rich display components."""
+        if disable:
+            return
+        pbar_total = total if total is not None else self.progress_bar_total
+        if pbar_total is None:
+            pbar_total = 100  # Default fallback
+        self._rich_console = Console()
+        self._rich_stop_event = asyncio.Event()
+        # Start the display updater task
+        self._rich_display_task = asyncio.create_task(
+            self._rich_display_updater(pbar_total)
+        )
+    async def _rich_display_updater(self, total: int):
+        """Update Rich display independently."""
+        if not self._rich_console or self._rich_stop_event is None:
+            return
+        # Create progress bar without console so we can use it in Live
+        progress = Progress(
+            SpinnerColumn(),
+            TextColumn("Processing requests..."),
+            BarColumn(),
+            MofNCompleteColumn(),
+        )
+        main_task = progress.add_task("requests", total=total)
+        # Use Live to combine progress + text
+        with Live(console=self._rich_console, refresh_per_second=10) as live:
+            while not self._rich_stop_event.is_set():
+                completed = self.num_tasks_succeeded
+                progress.update(main_task, completed=completed)
+                # Create capacity info text
+                tokens_info = f"TPM Capacity: {self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k"
+                reqs_info = f"RPM Capacity: {int(self.available_request_capacity)}/{self.max_requests_per_minute}"
+                in_progress = f"In Progress: {int(self.num_tasks_in_progress)}"
+                capacity_text = Text(f"{in_progress} • {tokens_info} • {reqs_info}")
+                # Group progress bar and text
+                display = Group(progress, capacity_text)
+                live.update(display)
+                await asyncio.sleep(0.1)
+    def _close_rich_display(self):
+        """Clean up Rich display."""
+        if self._rich_stop_event:
+            self._rich_stop_event.set()
+        if self._rich_display_task and not self._rich_display_task.done():
+            self._rich_display_task.cancel()
+        self._rich_console = None
+        self._rich_live = None
+        self._rich_display_task = None
+        self._rich_stop_event = None
+    def update_pbar(self, n: int = 0):
+        """Update progress bar status and optionally increment.
+        Args:
+            n: Number of items to increment (0 means just update postfix)
+        """
+        current_time = time.time()
+        if self._pbar and (current_time - self.last_pbar_update_time > 1):
+            self.last_pbar_update_time = current_time
+            self._pbar.set_postfix(
+                {
+                    "Token Capacity": f"{self.available_token_capacity / 1_000:.1f}k",
+                    "Req. Capacity": f"{int(self.available_request_capacity)}",
+                    "Reqs. in Progress": self.num_tasks_in_progress,
+                    "Limiting Factor": self.limiting_factor,
+                }
+            )
+        if n > 0 and self._pbar:
+            self._pbar.update(n)
+    def increment_pbar(self):
+        """Increment progress bar by 1."""
+        if self.use_rich:
+            # Rich display is updated automatically by the display updater
+            pass
+        elif self._pbar:
+            self._pbar.update(1)

lm_deluge/usage.py ADDED Viewed

@@ -0,0 +1,114 @@
+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class Usage:
+    """
+    Unified usage tracking for all API providers.
+    Tracks token usage including cache hits and writes for providers that support it.
+    For providers that don't support caching, cache_read and cache_write will be None.
+    """
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_read_tokens: Optional[int] = None  # Tokens read from cache (Anthropic)
+    cache_write_tokens: Optional[int] = None  # Tokens written to cache (Anthropic)
+    @property
+    def total_input_tokens(self) -> int:
+        """Total input tokens including both fresh input, cache writes, and cache reads."""
+        result = self.input_tokens
+        if self.cache_read_tokens is not None:
+            result += self.cache_read_tokens
+        if self.cache_write_tokens is not None:
+            result += self.cache_write_tokens
+        return result
+    @property
+    def total_tokens(self) -> int:
+        """Total tokens processed (input + output)."""
+        return self.total_input_tokens + self.output_tokens
+    @property
+    def has_cache_hit(self) -> bool:
+        """Whether this request had any cache hits."""
+        return self.cache_read_tokens is not None and self.cache_read_tokens > 0
+    @property
+    def has_cache_write(self) -> bool:
+        """Whether this request wrote to cache."""
+        return self.cache_write_tokens is not None and self.cache_write_tokens > 0
+    @classmethod
+    def from_anthropic_usage(cls, usage_data: dict) -> "Usage":
+        """Create Usage from Anthropic API response usage data."""
+        return cls(
+            input_tokens=usage_data.get("input_tokens", 0),
+            output_tokens=usage_data.get("output_tokens", 0),
+            cache_read_tokens=usage_data.get("cache_read_input_tokens"),
+            cache_write_tokens=usage_data.get("cache_creation_input_tokens"),
+        )
+    @classmethod
+    def from_openai_usage(cls, usage_data: dict) -> "Usage":
+        """Create Usage from OpenAI API response usage data."""
+        return cls(
+            input_tokens=usage_data.get("prompt_tokens", 0),
+            output_tokens=usage_data.get("completion_tokens", 0),
+            cache_read_tokens=None,  # OpenAI doesn't support caching yet
+            cache_write_tokens=None,
+        )
+    @classmethod
+    def from_mistral_usage(cls, usage_data: dict) -> "Usage":
+        """Create Usage from Mistral API response usage data."""
+        return cls(
+            input_tokens=usage_data.get("prompt_tokens", 0),
+            output_tokens=usage_data.get("completion_tokens", 0),
+            cache_read_tokens=None,  # Mistral doesn't support caching
+            cache_write_tokens=None,
+        )
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "input_tokens": self.input_tokens,
+            "output_tokens": self.output_tokens,
+            "cache_read_tokens": self.cache_read_tokens,
+            "cache_write_tokens": self.cache_write_tokens,
+            "total_input_tokens": self.total_input_tokens,
+            "total_tokens": self.total_tokens,
+            "has_cache_hit": self.has_cache_hit,
+            "has_cache_write": self.has_cache_write,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "Usage":
+        """Create Usage from dictionary."""
+        return cls(
+            input_tokens=data.get("input_tokens", 0),
+            output_tokens=data.get("output_tokens", 0),
+            cache_read_tokens=data.get("cache_read_tokens"),
+            cache_write_tokens=data.get("cache_write_tokens"),
+        )
+    def __add__(self, other: "Usage") -> "Usage":
+        """Add two Usage objects together."""
+        return Usage(
+            input_tokens=self.input_tokens + other.input_tokens,
+            output_tokens=self.output_tokens + other.output_tokens,
+            cache_read_tokens=(
+                (self.cache_read_tokens or 0) + (other.cache_read_tokens or 0)
+                if self.cache_read_tokens is not None
+                or other.cache_read_tokens is not None
+                else None
+            ),
+            cache_write_tokens=(
+                (self.cache_write_tokens or 0) + (other.cache_write_tokens or 0)
+                if self.cache_write_tokens is not None
+                or other.cache_write_tokens is not None
+                else None
+            ),
+        )

lm_deluge/util/json.py CHANGED Viewed

@@ -1,5 +1,6 @@
-import re
 import json
+import re
 import json5
@@ -166,3 +167,19 @@ def load_json(
             pass
     raise ValueError(f"Invalid JSON string: {json_string}")
+def try_load_json(
+    json_string: str | None,
+    allow_json5: bool = True,
+    allow_partial: bool = False,
+    allow_healing: bool = True,
+):
+    """
+    Like the above, except it returns None instead of raising an error.
+    """
+    try:
+        return load_json(json_string, allow_json5, allow_partial, allow_healing)
+    except Exception as e:
+        print(f"Failed to load json: {e}. Returning None.")
+        return None

{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.11
+Version: 0.0.13
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10
@@ -22,8 +22,7 @@ Requires-Dist: lxml
 Requires-Dist: pdf2image
 Requires-Dist: pillow
 Requires-Dist: fastmcp>=2.4
-Requires-Dist: fasttext-wheel
-Requires-Dist: fasttext-langdetect
+Requires-Dist: rich
 Dynamic: license-file
 # lm-deluge
@@ -35,6 +34,7 @@ Dynamic: license-file
 - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
 - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
 - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
+- **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
 - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
 - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
 - **Sync and async APIs** – Use the client from sync or async code.
@@ -47,7 +47,7 @@ Dynamic: license-file
 pip install lm-deluge
 ```
-The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables.
+The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
 ## Quickstart
@@ -63,7 +63,7 @@ print(resp[0].completion)
 ## Spraying Across Models
-To distribute your requests across models, just provide a list of more than one model to the constructor. The rate limits for the client apply to the client as a whole, not per-model, so you may want to increase them:
+To distribute your requests across models, just provide a list of more than one model to the constructor. See all available models in `models.py`. The rate limits for the client apply to the client as a whole, not per-model, so you may want to increase them:
 ```python
 from lm_deluge import LLMClient
@@ -84,7 +84,7 @@ API calls can be customized in a few ways.
 1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models. You can also pass many of these arguments directly to `LLMClient.basic` so you don't have to construct an entire `SamplingParams` object.
 2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and caching.
-3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object).
+3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
 Putting it all together:
@@ -123,7 +123,9 @@ resps = client.process_prompts_sync([prompt])
 This just works. Images can be local images on disk, URLs, bytes, base64 data URLs... go wild. You can use `Conversation.to_openai` or `Conversation.to_anthropic` to format your messages for the OpenAI or Anthropic clients directly.
-## Basic Tool Use
+See a full multi-turn chat example in `examples/multiturn.md`.
+## Tool Use
 Define tools from Python functions and use them with any model:
@@ -135,27 +137,83 @@ def get_weather(city: str) -> str:
 tool = Tool.from_function(get_weather)
 client = LLMClient.basic("claude-3-haiku")
-resp = client.process_prompts_sync(["What's the weather in Paris?"], tools=[tool])
-```
+resps = client.process_prompts_sync(
+    ["What's the weather in Paris?"],
+    tools=[tool]
+)
-## MCP Integration
+# you can iterate over the tool calls in the response automatically
+for tool_call in resps[0].tool_calls:
+    print(tool_call.name, tool_call.arguments)
+```
-Connect to MCP servers to extend your models with external tools:
+You can also automatically instantiate tools from MCP servers. Under the hood, the the constructor connects to the server, asks it what tools it has, and then creates a `Tool` from each of them, *with a built-in `call` and `acall` interface*.
 ```python
 from lm_deluge import LLMClient, Tool
-# Connect to a local MCP server
-mcp_tool = Tool.from_mcp("filesystem", command="npx -y @modelcontextprotocol/server-filesystem", args=["/path/to/directory"])
-client = LLMClient.basic("gpt-4o-mini", tools=[mcp_tool])
-resp = client.process_prompts_sync(["List the files in the current directory"])
+# Connect to a local MCP server and get all of its tools
+filesystem_tools = Tool.from_mcp(
+    "filesystem",
+    command="npx",
+    args=["-y", "@modelcontextprotocol/server-filesystem", "/path/to/directory"]
+)
+# or load ALL the tools from a Claude Desktop like config
+config = {
+    "mcpServers": {
+        "exa": {
+            "url": f"https://mcp.exa.ai/mcp?exaApiKey={os.getenv('EXA_API_KEY')}"
+        },
+        "zapier": {
+            "url": f"https://mcp.zapier.com/api/mcp/s/{os.getenv('ZAPIER_MCP_SECRET')}/mcp"
+        }
+    }
+}
+all_tools = Tool.from_mcp_config(config)
+# let the model use the tools
+client = LLMClient.basic("gpt-4o-mini")
+resps = client.process_prompts_sync(
+    ["List the files in the current directory"],
+    tools=tools
+)
+# call the tools
+for tool_call in resps[0].tool_calls:
+    # this is dumb sorry will make it better
+    tool_to_call = [x for x in tools if x.name == tool_call.name][0]
+    tool_to_call.call(**tool_call.arguments) # in async code, use .acall()
 ```
-## Caching
+### Prompt Caching (Anthropic)
+For Anthropic models, you can use prompt caching to reduce costs and latency for repeated context. This uses Anthropic's server-side prompt caching. Other providers like OpenAI and Google do this automatically, but Anthropic requires you to manually set cache-control on messages. You can do this in lm-deluge with a simple "cache" argument to `process_prompts_sync` or `process_prompts_async`:
+```python
+from lm_deluge import LLMClient, Conversation, Message
+# Create a conversation with system message
+conv = (
+    Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
+    .add(Message.user("How do I use asyncio.gather?"))
+)
+# Use prompt caching to cache system message and tools
+client = LLMClient.basic("claude-3-5-sonnet")
+resps = client.process_prompts_sync(
+    [conv],
+    cache="system_and_tools"  # Cache system message and any tools
+)
+```
+Available cache patterns: `"system_and_tools"`, `"tools_only"`, `"last_user_message"`, `"last_2_user_messages"`, `"last_3_user_messages"`.
+## Local Caching
-`lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches.  Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent across different `process_prompts_[...]` calls.
+Besides caching from model providers (which provides cache reads at a discount, but not for free) `lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches to cache prompts locally. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent across different `process_prompts_[...]` calls.
-**IMPORTANT:** Caching does not currently work for prompts in the SAME batch. That is, if you call `process_prompts_sync` with the same prompt 100 times, there will be 0 cache hits. If you call `process_prompts_sync` a *second* time with those same 100 prompts, all 100 will be cache hits. The cache is intended to be persistent and help you save costs across many invocations, but it can't help with a single batch-inference session (yet!).
+**IMPORTANT:** Caching does not currently work for prompts in the SAME batch. That is, if you call `process_prompts_sync` with the same prompt 100 times, there will be 0 cache hits. If you call `process_prompts_sync` a *second* time with those same 100 prompts, all 100 will be cache hits. The local cache is intended to be persistent and help you save costs across many invocations, but it can't help with a single batch-inference session (yet!).
 ## Asynchronous Client
 Use this in asynchronous code, or in a Jupyter notebook. If you try to use the sync client in a Jupyter notebook, you'll have to use `nest-asyncio`, because internally the sync client uses async code. Don't do it! Just use the async client!
@@ -175,11 +233,11 @@ asyncio.run(main())
 ## Available Models
-We support all models in `src/lm_deluge/models.py`. An older version of this client supported Bedrock and Vertex. We plan to re-implement Bedrock support (our previous support was spotty and we need to figure out cross-region inference in order to support the newest Claude models). Vertex support is not currently planned, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
+We support all models in `src/lm_deluge/models.py`. Vertex support is not planned in the short term, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
 ## Feature Support
-We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We don't support tool use yet, but support is planned (keep an eye out for a unified tool definition spec that works for all models!). We support logprobs for OpenAI models that return them via the `logprobs` argument to the `LLMClient`.
+We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
 ## Built‑in tools

lm_deluge-0.0.13.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,42 @@
+lm_deluge/__init__.py,sha256=XR_EuBvJM4LggqfWdsrdQij1-UIGAFwyvHW9Rp8tnQA,280
+lm_deluge/agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lm_deluge/batches.py,sha256=dI5G9uvmoDU9hMohrkEhlIDyJPsmsVwZPwxx6qETxxk,17728
+lm_deluge/cache.py,sha256=VB1kv8rM2t5XWPR60uhszFcxLDnVKOe1oA5hYjVDjIo,4375
+lm_deluge/client.py,sha256=nkYO_wsGgUkFfqfb_8JrDzcU39RL9FfplKEK6zrncAo,20564
+lm_deluge/config.py,sha256=E47daVMvqMicoY2CDcgUnN5nVGDLAQejR358B-pRHZk,923
+lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
+lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
+lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
+lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
+lm_deluge/models.py,sha256=gW9ZhKYjwC-ZF-SzWqagFUE_7Mqerdtt_T5NxGo040E,46583
+lm_deluge/prompt.py,sha256=dKaV4gI9yLB0w0Ukdz14kGl34yMm5JNm6Sc-24WQPcg,32202
+lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
+lm_deluge/tool.py,sha256=C2zwU9-7fldfYT0TZDoVVGGSC6dN_It9GSxnfkN6Z_w,9822
+lm_deluge/tracker.py,sha256=Un2uthRNZk3dl2fODvvR6CCyFW3IKWfR0GjvpB_dxoM,9095
+lm_deluge/usage.py,sha256=oS-rmF3ZJ1RMtR7WI6BB2uVOAjJg0scvGF3zZRahWVg,4449
+lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
+lm_deluge/api_requests/anthropic.py,sha256=itKPu1cqCYcrr4fkLarlvSYr6tqLEAGVLGXEG05QXWM,8345
+lm_deluge/api_requests/base.py,sha256=ixI326EtRadoVCbmvIddzzzIp6E_zPfPOIfDEnucZrc,18060
+lm_deluge/api_requests/bedrock.py,sha256=yh4-zMrjlQfmxoBbrc2WYJ8gEqVkTP_-tMR7-XbTAtQ,11753
+lm_deluge/api_requests/common.py,sha256=pcOpODL4heoaNLjbA6_ogkrOAbUSKY3F37D2EyMLW10,359
+lm_deluge/api_requests/mistral.py,sha256=PkuoKbOJAB6DOK_NvzbxpWPAktfvonf69QjC0tVCYuE,5366
+lm_deluge/api_requests/openai.py,sha256=fj-ioXeK6-OGl9VIFpVy6XJRYOvf6TgMv7eu5mkC8RE,16482
+lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
+lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
+lm_deluge/api_requests/deprecated/deepseek.py,sha256=FEApI93VAWDwuaqTooIyKMgONYqRhdUmiAPBRme-IYs,4582
+lm_deluge/api_requests/deprecated/mistral.py,sha256=pOfOZUM4U35I3Plch84SnAFpDAzouHcSNNMtgxRvjy4,4709
+lm_deluge/api_requests/deprecated/vertex.py,sha256=ygXz2RjdXErPCSBbiHLEWbf5_sSTIi31WoX0UaoYzRI,15275
+lm_deluge/computer_use/anthropic_tools.py,sha256=p1CgHw1htX0PTdDW9Tni9N1azVMCoyA_ei-fMT6HHis,2478
+lm_deluge/llm_tools/__init__.py,sha256=TbZTETq9i_9yYskFWQKOG4pGh5ZiyE_D-h3RArfhGp4,231
+lm_deluge/llm_tools/extract.py,sha256=-GtyqJUxKvB567tk_NnCMklazz18xZBCPlAjYHTVUWg,3649
+lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
+lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
+lm_deluge/util/json.py,sha256=_4Oar2Cmz2L1DK3EtPLPDxD6rsYHxjROmV8ZpmMjQ-4,5822
+lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
+lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
+lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
+lm_deluge-0.0.13.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
+lm_deluge-0.0.13.dist-info/METADATA,sha256=GEkP9_w0VcPOGEKad9Yh24WOhiW4TQvC2pX4wK1x0jk,11549
+lm_deluge-0.0.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lm_deluge-0.0.13.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
+lm_deluge-0.0.13.dist-info/RECORD,,

{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.8.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

lm_deluge-0.0.11.dist-info/RECORD DELETED Viewed

@@ -1,38 +0,0 @@
-lm_deluge/__init__.py,sha256=rndOr4Rcfnpttz-onWU3vVEm-MM0WDFgz6KexKPAx0k,222
-lm_deluge/cache.py,sha256=VB1kv8rM2t5XWPR60uhszFcxLDnVKOe1oA5hYjVDjIo,4375
-lm_deluge/client.py,sha256=lGD4rqT7qHkTKddjRvKK_1bh7s8GNIzXzQ52GCZhfCg,28932
-lm_deluge/embed.py,sha256=m-X8UK4gV9KKD7Wv3yarAceMQaj7gR1JwzD_sB0MOQY,13183
-lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
-lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
-lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
-lm_deluge/models.py,sha256=oYrt0x0iVfTwoHjP-l1WWennzEDGwnZczj6ds6a6-xc,45406
-lm_deluge/prompt.py,sha256=_pJYwgjL39lDzMNmae8pPIBoORm_ekSM_9qU2iGGpOc,25445
-lm_deluge/rerank.py,sha256=tW1c3gQCAqaF8Ez-r-4qxYAcdKqxnLMxwHApKOUKwk4,11289
-lm_deluge/sampling_params.py,sha256=E2kewh1vz-1Qcy5xNBCzihfGgT_GcHYMfzaWb3FLiXs,739
-lm_deluge/tool.py,sha256=5nFbHchv12C1jkL8nkEh6v9WfxpC0O6rALP25z60WsI,9476
-lm_deluge/tracker.py,sha256=Dk99scN_NeDEO0gkLO5efXiZq11Ga-k6cerUHWN7IWY,1292
-lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
-lm_deluge/api_requests/anthropic.py,sha256=MMI_w9hVbevQpcqP3NVVindpTmLb2KHqjJQpIzCi5RM,7240
-lm_deluge/api_requests/base.py,sha256=w0MEOCIccxxy2c67Y2Y-QBox9rinIxQ7MLnp8953sjQ,15954
-lm_deluge/api_requests/bedrock.py,sha256=cvB85BFvL9HKTUsP9qFUCLQzJh83IQNAcLXuW6ReZK8,10520
-lm_deluge/api_requests/common.py,sha256=U0mX_wC3Tzg2-1u9nYUCTQqYzuYJqvLrICCNW_dbbJM,287
-lm_deluge/api_requests/mistral.py,sha256=lU9AOyb46uTzRjKw6Sd5iojEbBIMF432fRex7q6Xtwk,5423
-lm_deluge/api_requests/openai.py,sha256=BuMiM_2zJQXfnUjTT94JxJi3ZX5V-KQQueRG-R0SGuc,7361
-lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
-lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
-lm_deluge/api_requests/deprecated/deepseek.py,sha256=FEApI93VAWDwuaqTooIyKMgONYqRhdUmiAPBRme-IYs,4582
-lm_deluge/api_requests/deprecated/mistral.py,sha256=pOfOZUM4U35I3Plch84SnAFpDAzouHcSNNMtgxRvjy4,4709
-lm_deluge/api_requests/deprecated/vertex.py,sha256=ygXz2RjdXErPCSBbiHLEWbf5_sSTIi31WoX0UaoYzRI,15275
-lm_deluge/llm_tools/__init__.py,sha256=TbZTETq9i_9yYskFWQKOG4pGh5ZiyE_D-h3RArfhGp4,231
-lm_deluge/llm_tools/extract.py,sha256=-GtyqJUxKvB567tk_NnCMklazz18xZBCPlAjYHTVUWg,3649
-lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
-lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
-lm_deluge/util/json.py,sha256=dCeG9j1D17rXmQJbKJH79X0CGof4Wlqd55TDg4D6ky8,5388
-lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
-lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
-lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
-lm_deluge-0.0.11.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
-lm_deluge-0.0.11.dist-info/METADATA,sha256=jdPdmbo_F8ecKTAHtPTg2GeyCOFmmsJ6T4-4RUleU24,9210
-lm_deluge-0.0.11.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
-lm_deluge-0.0.11.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
-lm_deluge-0.0.11.dist-info/RECORD,,

{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lm_deluge-0.0.11.dist-info → lm_deluge-0.0.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

lm-deluge 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl