PyPI - lm-deluge - Versions diffs - 0.0.54__tar.gz → 0.0.56__tar.gz - Mend

lm-deluge 0.0.54tar.gz → 0.0.56tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (80) hide show

{lm_deluge-0.0.54/src/lm_deluge.egg-info → lm_deluge-0.0.56}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.54
+Version: 0.0.56
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.54"
+version = "0.0.56"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/base.py RENAMED Viewed

@@ -52,6 +52,9 @@ class APIRequestBase(ABC):
         self, base_headers: dict[str, str], exclude_patterns: list[str] | None = None
     ) -> dict[str, str]:
         """Merge extra_headers with base headers, giving priority to extra_headers."""
+        # Filter out None values from base headers (e.g., missing API keys)
+        base_headers = {k: v for k, v in base_headers.items() if v is not None}
         if not self.context.extra_headers:
             return base_headers
@@ -69,6 +72,9 @@ class APIRequestBase(ABC):
         # Start with base headers, then overlay filtered extra headers (extra takes precedence)
         merged = dict(base_headers)
         merged.update(filtered_extra)
+        # Filter out None values from final merged headers
+        merged = {k: v for k, v in merged.items() if v is not None}
         return merged
     def handle_success(self, data):

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/response.py RENAMED Viewed

@@ -84,10 +84,37 @@ class APIResponse:
             and api_model.input_cost is not None
             and api_model.output_cost is not None
         ):
+            # Calculate input cost, accounting for cached vs non-cached tokens
+            # Different providers report tokens differently:
+            # - Anthropic/Bedrock: input_tokens is ONLY non-cached, cache_read_tokens is separate
+            # - OpenAI/Gemini: input_tokens INCLUDES cached, cache_read_tokens is a subset
+            cache_read_tokens = self.usage.cache_read_tokens or 0
+            if api_model.api_spec in ("anthropic", "bedrock"):
+                # For Anthropic: input_tokens already excludes cache, so use directly
+                non_cached_input_tokens = self.usage.input_tokens
+            else:
+                # For OpenAI/Gemini: input_tokens includes cache, so subtract it
+                non_cached_input_tokens = self.usage.input_tokens - cache_read_tokens
             self.cost = (
-                self.usage.input_tokens * api_model.input_cost / 1e6
+                non_cached_input_tokens * api_model.input_cost / 1e6
                 + self.usage.output_tokens * api_model.output_cost / 1e6
             )
+            # Add cost for cache read tokens (at reduced rate)
+            if cache_read_tokens > 0 and api_model.cached_input_cost is not None:
+                self.cost += cache_read_tokens * api_model.cached_input_cost / 1e6
+            # Add cost for cache write tokens (only for Anthropic)
+            if (
+                self.usage.cache_write_tokens
+                and self.usage.cache_write_tokens > 0
+                and api_model.cache_write_cost is not None
+            ):
+                self.cost += (
+                    self.usage.cache_write_tokens * api_model.cache_write_cost / 1e6
+                )
         elif self.content is not None and self.completion is not None:
             pass
             # print(

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/client.py RENAMED Viewed

@@ -30,6 +30,7 @@ class _LLMClient(BaseModel):
     """
     model_names: str | list[str] = ["gpt-4.1-mini"]
+    name: str | None = None
     max_requests_per_minute: int = 1_000
     max_tokens_per_minute: int = 100_000
     max_concurrent_requests: int = 225
@@ -69,6 +70,7 @@ class _LLMClient(BaseModel):
             max_requests_per_minute=self.max_requests_per_minute,
             max_tokens_per_minute=self.max_tokens_per_minute,
             max_concurrent_requests=self.max_concurrent_requests,
+            client_name=self.name or "LLMClient",
             progress_style=self.progress,
             use_progress_bar=show_progress,
         )
@@ -169,6 +171,13 @@ class _LLMClient(BaseModel):
         # normalize weights
         self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
+        # Auto-generate name if not provided
+        if self.name is None:
+            if len(self.model_names) == 1:
+                self.name = self.model_names[0]
+            else:
+                self.name = "LLMClient"
         # Validate logprobs settings across all sampling params
         if self.logprobs or any(sp.logprobs for sp in self.sampling_params):
             print("Logprobs enabled.")
@@ -286,6 +295,7 @@ class _LLMClient(BaseModel):
         # Handle successful response
         if not response.is_error:
             context.status_tracker.task_succeeded(context.task_id)
+            context.status_tracker.track_usage(response)
             # Cache successful responses immediately
             if self.cache and response.completion:
                 # print(f"DEBUG: Caching successful response")
@@ -324,6 +334,8 @@ class _LLMClient(BaseModel):
         # No retries left or no retry queue - final failure
         context.status_tracker.task_failed(context.task_id)
+        # Track usage even for failed requests if they made an API call
+        context.status_tracker.track_usage(response)
         context.maybe_callback(response, context.status_tracker)
         # Print final error message
@@ -725,6 +737,7 @@ class _LLMClient(BaseModel):
 def LLMClient(
     model_names: str,
     *,
+    name: str | None = None,
     max_requests_per_minute: int = 1_000,
     max_tokens_per_minute: int = 100_000,
     max_concurrent_requests: int = 225,
@@ -751,6 +764,7 @@ def LLMClient(
 def LLMClient(
     model_names: list[str],
     *,
+    name: str | None = None,
     max_requests_per_minute: int = 1_000,
     max_tokens_per_minute: int = 100_000,
     max_concurrent_requests: int = 225,
@@ -776,6 +790,7 @@ def LLMClient(
 def LLMClient(
     model_names: str | list[str] = "gpt-4.1-mini",
     *,
+    name: str | None = None,
     max_requests_per_minute: int = 1_000,
     max_tokens_per_minute: int = 100_000,
     max_concurrent_requests: int = 225,
@@ -813,6 +828,7 @@ def LLMClient(
     # Simply pass everything to the Pydantic constructor
     return _LLMClient(
         model_names=model_names,
+        name=name,
         max_requests_per_minute=max_requests_per_minute,
         max_tokens_per_minute=max_tokens_per_minute,
         max_concurrent_requests=max_concurrent_requests,

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/__init__.py RENAMED Viewed

@@ -29,7 +29,8 @@ class APIModel:
     api_base: str
     api_key_env_var: str
     api_spec: str
-    cached_input_cost: float | None = 0
+    cached_input_cost: float | None = 0  # $ per million cached/read input tokens
+    cache_write_cost: float | None = 0  # $ per million cache write tokens
     input_cost: float | None = 0  # $ per million input tokens
     output_cost: float | None = 0  # $ per million output tokens
     supports_json: bool = False
@@ -89,6 +90,7 @@ def register_model(
     api_spec: str = "openai",
     input_cost: float | None = 0,  # $ per million input tokens
     cached_input_cost: float | None = 0,
+    cache_write_cost: float | None = 0,  # $ per million cache write tokens
     output_cost: float | None = 0,  # $ per million output tokens
     supports_json: bool = False,
     supports_logprobs: bool = False,
@@ -106,6 +108,7 @@ def register_model(
         api_key_env_var=api_key_env_var,
         api_spec=api_spec,
         cached_input_cost=cached_input_cost,
+        cache_write_cost=cache_write_cost,
         input_cost=input_cost,
         output_cost=output_cost,
         supports_json=supports_json,

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/anthropic.py RENAMED Viewed

@@ -18,6 +18,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 3.0,
+        "cached_input_cost": 0.30,
+        "cache_write_cost": 3.75,
         "output_cost": 15.0,
         "requests_per_minute": 4_000,
         "tokens_per_minute": 400_000,
@@ -30,6 +32,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 15.0,
+        "cached_input_cost": 1.50,
+        "cache_write_cost": 18.75,
         "output_cost": 75.0,
         "requests_per_minute": 4_000,
         "tokens_per_minute": 400_000,
@@ -43,6 +47,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 15.0,
+        "cached_input_cost": 1.50,
+        "cache_write_cost": 18.75,
         "output_cost": 75.0,
         "requests_per_minute": 4_000,
         "tokens_per_minute": 400_000,
@@ -56,6 +62,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 3.0,
+        "cached_input_cost": 0.30,
+        "cache_write_cost": 3.75,
         "output_cost": 15.0,
         "requests_per_minute": 4_000,
         "tokens_per_minute": 400_000,
@@ -68,6 +76,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 3.0,
+        "cached_input_cost": 0.30,
+        "cache_write_cost": 3.75,
         "output_cost": 15.0,
         "requests_per_minute": 4_000,
         "tokens_per_minute": 400_000,
@@ -81,6 +91,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 3.0,
+        "cached_input_cost": 0.30,
+        "cache_write_cost": 3.75,
         "output_cost": 15.0,
         "requests_per_minute": 4_000,
         "tokens_per_minute": 400_000,
@@ -93,6 +105,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 3.0,
+        "cached_input_cost": 0.30,
+        "cache_write_cost": 3.75,
         "output_cost": 15.0,
         "requests_per_minute": 4_000,
         "tokens_per_minute": 400_000,
@@ -116,8 +130,10 @@ ANTHROPIC_MODELS = {
         "api_key_env_var": "ANTHROPIC_API_KEY",
         "supports_json": False,
         "api_spec": "anthropic",
-        "input_cost": 1.00,
-        "output_cost": 5.00,
+        "input_cost": 0.8,
+        "cached_input_cost": 0.08,
+        "cache_write_cost": 1.00,
+        "output_cost": 4.00,
         "requests_per_minute": 20_000,
         "tokens_per_minute": 4_000_000,  # supposed to be this but they fucked up
     },
@@ -129,6 +145,8 @@ ANTHROPIC_MODELS = {
         "supports_json": False,
         "api_spec": "anthropic",
         "input_cost": 0.25,
+        "cache_write_cost": 0.30,
+        "cached_input_cost": 0.03,
         "output_cost": 1.25,
         "requests_per_minute": 10_000,
         "tokens_per_minute": 4_000_000,  # supposed to be this but they fucked up

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/google.py RENAMED Viewed

@@ -18,6 +18,7 @@ GOOGLE_MODELS = {
         "supports_logprobs": False,
         "api_spec": "openai",
         "input_cost": 0.1,
+        "cached_input_cost": 0.025,
         "output_cost": 0.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -31,8 +32,8 @@ GOOGLE_MODELS = {
         "supports_json": True,
         "supports_logprobs": False,
         "api_spec": "openai",
-        "input_cost": 0.1,
-        "output_cost": 0.4,
+        "input_cost": 0.075,
+        "output_cost": 0.3,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": False,
@@ -45,8 +46,9 @@ GOOGLE_MODELS = {
         "supports_json": True,
         "supports_logprobs": False,
         "api_spec": "openai",
-        "input_cost": 0.1,
-        "output_cost": 0.4,
+        "input_cost": 1.25,
+        "cached_input_cost": 0.31,
+        "output_cost": 10.0,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": True,
@@ -59,8 +61,9 @@ GOOGLE_MODELS = {
         "supports_json": True,
         "supports_logprobs": False,
         "api_spec": "openai",
-        "input_cost": 0.1,
-        "output_cost": 0.4,
+        "input_cost": 0.3,
+        "cached_input_cost": 0.075,
+        "output_cost": 2.5,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": True,
@@ -74,6 +77,7 @@ GOOGLE_MODELS = {
         "supports_logprobs": False,
         "api_spec": "openai",
         "input_cost": 0.1,
+        "cached_input_cost": 0.025,
         "output_cost": 0.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -89,6 +93,7 @@ GOOGLE_MODELS = {
         "supports_logprobs": False,
         "api_spec": "gemini",
         "input_cost": 0.1,
+        "cached_input_cost": 0.025,
         "output_cost": 0.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -102,8 +107,8 @@ GOOGLE_MODELS = {
         "supports_json": True,
         "supports_logprobs": False,
         "api_spec": "gemini",
-        "input_cost": 0.1,
-        "output_cost": 0.4,
+        "input_cost": 0.075,
+        "output_cost": 0.3,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": False,
@@ -116,8 +121,9 @@ GOOGLE_MODELS = {
         "supports_json": True,
         "supports_logprobs": False,
         "api_spec": "gemini",
-        "input_cost": 0.1,
-        "output_cost": 0.4,
+        "input_cost": 1.25,
+        "cached_input_cost": 0.31,
+        "output_cost": 10.0,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": True,
@@ -130,8 +136,9 @@ GOOGLE_MODELS = {
         "supports_json": True,
         "supports_logprobs": False,
         "api_spec": "gemini",
-        "input_cost": 0.1,
-        "output_cost": 0.4,
+        "input_cost": 0.3,
+        "cached_input_cost": 0.075,
+        "output_cost": 2.5,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": True,
@@ -145,6 +152,7 @@ GOOGLE_MODELS = {
         "supports_logprobs": False,
         "api_spec": "gemini",
         "input_cost": 0.1,
+        "cached_input_cost": 0.025,
         "output_cost": 0.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/openai.py RENAMED Viewed

@@ -75,8 +75,8 @@ OPENAI_MODELS = {
         "supports_logprobs": False,
         "supports_responses": True,
         "api_spec": "openai",
-        "input_cost": 2.0,
-        "output_cost": 8.0,
+        "input_cost": 3.0,
+        "output_cost": 12.0,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": False,
@@ -90,8 +90,9 @@ OPENAI_MODELS = {
         "supports_logprobs": True,
         "supports_responses": True,
         "api_spec": "openai",
-        "input_cost": 10.0,
-        "output_cost": 40.0,
+        "input_cost": 2.0,
+        "cached_input_cost": 0.50,
+        "output_cost": 8.0,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": True,
@@ -106,6 +107,7 @@ OPENAI_MODELS = {
         "supports_responses": True,
         "api_spec": "openai",
         "input_cost": 1.1,
+        "cached_input_cost": 0.275,
         "output_cost": 4.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -121,6 +123,7 @@ OPENAI_MODELS = {
         "supports_responses": True,
         "api_spec": "openai",
         "input_cost": 2.0,
+        "cached_input_cost": 0.50,
         "output_cost": 8.0,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -136,6 +139,7 @@ OPENAI_MODELS = {
         "supports_responses": True,
         "api_spec": "openai",
         "input_cost": 0.4,
+        "cached_input_cost": 0.10,
         "output_cost": 1.6,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -151,6 +155,7 @@ OPENAI_MODELS = {
         "supports_responses": True,
         "api_spec": "openai",
         "input_cost": 0.1,
+        "cached_input_cost": 0.025,
         "output_cost": 0.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -181,6 +186,7 @@ OPENAI_MODELS = {
         "supports_responses": True,
         "api_spec": "openai",
         "input_cost": 1.1,
+        "cached_input_cost": 0.55,
         "output_cost": 4.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -196,6 +202,7 @@ OPENAI_MODELS = {
         "supports_responses": True,
         "api_spec": "openai",
         "input_cost": 15.0,
+        "cached_input_cost": 7.50,
         "output_cost": 60.0,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
@@ -225,8 +232,9 @@ OPENAI_MODELS = {
         "supports_logprobs": True,
         "supports_responses": True,
         "api_spec": "openai",
-        "input_cost": 3.0,
-        "output_cost": 15.0,
+        "input_cost": 1.1,
+        "cached_input_cost": 0.55,
+        "output_cost": 4.4,
         "requests_per_minute": 20,
         "tokens_per_minute": 100_000,
         "reasoning_model": True,
@@ -240,8 +248,9 @@ OPENAI_MODELS = {
         "supports_logprobs": True,
         "supports_responses": True,
         "api_spec": "openai",
-        "input_cost": 5.0,
-        "output_cost": 15.0,
+        "input_cost": 2.50,
+        "cached_input_cost": 1.25,
+        "output_cost": 10.0,
         "requests_per_minute": 10_000,
         "tokens_per_minute": 30_000_000,
     },
@@ -255,6 +264,7 @@ OPENAI_MODELS = {
         "supports_responses": True,
         "api_spec": "openai",
         "input_cost": 0.15,
+        "cached_input_cost": 0.075,
         "output_cost": 0.6,
         "requests_per_minute": 60_000,
         "tokens_per_minute": 250_000_000,

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/tracker.py RENAMED Viewed

@@ -13,7 +13,6 @@ from rich.progress import (
     TaskID,
     TextColumn,
 )
-from rich.text import Text
 from tqdm.auto import tqdm
 SECONDS_TO_PAUSE_AFTER_RATE_LIMIT_ERROR = 5
@@ -24,6 +23,7 @@ class StatusTracker:
     max_requests_per_minute: int
     max_tokens_per_minute: int
     max_concurrent_requests: int
+    client_name: str = "LLMClient"
     num_tasks_started: int = 0
     num_tasks_in_progress: int = 0
     num_tasks_succeeded: int = 0
@@ -33,6 +33,13 @@ class StatusTracker:
     total_requests: int = 0
     retry_queue: asyncio.Queue = field(default_factory=asyncio.Queue)
+    # Cumulative usage tracking
+    total_cost: float = 0.0
+    total_input_tokens: int = 0  # non-cached input tokens
+    total_cache_read_tokens: int = 0
+    total_cache_write_tokens: int = 0
+    total_output_tokens: int = 0
     # Progress bar configuration
     use_progress_bar: bool = True
     progress_bar_total: int | None = None
@@ -131,6 +138,25 @@ class StatusTracker:
         self.num_tasks_in_progress -= 1
         self.num_tasks_failed += 1
+    def track_usage(self, response):
+        """Accumulate usage statistics from a completed request.
+        Args:
+            response: APIResponse object containing usage and cost information
+        """
+        if response.cost:
+            self.total_cost += response.cost
+        if response.usage:
+            self.total_output_tokens += response.usage.output_tokens
+            self.total_input_tokens += response.usage.input_tokens
+            if response.usage.cache_read_tokens:
+                self.total_cache_read_tokens += response.usage.cache_read_tokens
+            if response.usage.cache_write_tokens:
+                self.total_cache_write_tokens += response.usage.cache_write_tokens
     def log_final_status(self):
         # Close progress bar before printing final status
         self.close_progress_bar()
@@ -144,6 +170,22 @@ class StatusTracker:
                 f"{self.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate."
             )
+        # Display cumulative usage stats if available
+        if self.total_cost > 0 or self.total_input_tokens > 0 or self.total_output_tokens > 0:
+            usage_parts = []
+            if self.total_cost > 0:
+                usage_parts.append(f"Cost: ${self.total_cost:.4f}")
+            if self.total_input_tokens > 0 or self.total_output_tokens > 0:
+                usage_parts.append(
+                    f"Tokens: {self.total_input_tokens:,} in / {self.total_output_tokens:,} out"
+                )
+            if self.total_cache_read_tokens > 0:
+                usage_parts.append(f"Cache: {self.total_cache_read_tokens:,} read")
+            if self.total_cache_write_tokens > 0:
+                usage_parts.append(f"{self.total_cache_write_tokens:,} write")
+            print(" | ".join(usage_parts))
     @property
     def pbar(self) -> tqdm | None:
         """Backward compatibility property to access progress bar."""
@@ -187,14 +229,16 @@ class StatusTracker:
     def _init_rich_display(self, total: int):
         """Initialize Rich display components."""
-        self._rich_console = Console()
+        self._rich_console = Console(highlight=False)
+        # Escape square brackets so Rich doesn't interpret them as markup
+        description = f"[bold blue]\\[{self.client_name}][/bold blue] Processing..."
         self._rich_progress = Progress(
             SpinnerColumn(),
-            TextColumn("Processing requests..."),
+            TextColumn("[progress.description]{task.description}"),
             BarColumn(),
             MofNCompleteColumn(),
         )
-        self._rich_task_id = self._rich_progress.add_task("requests", total=total)
+        self._rich_task_id = self._rich_progress.add_task(description, total=total)
         self._rich_stop_event = asyncio.Event()
         self._rich_display_task = asyncio.create_task(self._rich_display_updater())
@@ -217,12 +261,36 @@ class StatusTracker:
                     total=self.progress_bar_total,
                 )
-                tokens_info = f"TPM Capacity: {self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k"
-                reqs_info = f"RPM Capacity: {int(self.available_request_capacity)}/{self.max_requests_per_minute}"
-                in_progress = f"In Progress: {int(self.num_tasks_in_progress)}"
-                capacity_text = Text(f"{in_progress} • {tokens_info} • {reqs_info}")
+                tokens_info = f"{self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k TPM"
+                reqs_info = f"{int(self.available_request_capacity)}/{self.max_requests_per_minute} RPM"
+                in_progress = (
+                    f"   [gold3]In Progress:[/gold3] {int(self.num_tasks_in_progress)} "
+                    + ("requests" if self.num_tasks_in_progress != 1 else "request")
+                )
+                capacity_text = (
+                    f"   [gold3]Capacity:[/gold3] {tokens_info} • {reqs_info}"
+                )
-                display = Group(self._rich_progress, capacity_text)
+                # Format usage stats
+                usage_parts = []
+                if self.total_cost > 0:
+                    usage_parts.append(f"${self.total_cost:.4f}")
+                if self.total_input_tokens > 0 or self.total_output_tokens > 0:
+                    input_k = self.total_input_tokens / 1000
+                    output_k = self.total_output_tokens / 1000
+                    usage_parts.append(f"{input_k:.1f}k in • {output_k:.1f}k out")
+                if self.total_cache_read_tokens > 0:
+                    cache_k = self.total_cache_read_tokens / 1000
+                    usage_parts.append(f"{cache_k:.1f}k cached")
+                usage_text = ""
+                if usage_parts:
+                    usage_text = f"   [gold3]Usage:[/gold3] {' • '.join(usage_parts)}"
+                if usage_text:
+                    display = Group(self._rich_progress, in_progress, capacity_text, usage_text)
+                else:
+                    display = Group(self._rich_progress, in_progress, capacity_text)
                 live.update(display)
                 await asyncio.sleep(0.1)
@@ -252,7 +320,7 @@ class StatusTracker:
             return
         while not self._manual_stop_event.is_set():
             print(
-                f"Completed {self.num_tasks_succeeded}/{self.progress_bar_total} requests"
+                f"[{self.client_name}] Completed {self.num_tasks_succeeded}/{self.progress_bar_total} requests"
             )
             await asyncio.sleep(self.progress_print_interval)

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/usage.py RENAMED Viewed

@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-from typing import Optional
 @dataclass
@@ -13,8 +12,8 @@ class Usage:
     input_tokens: int = 0
     output_tokens: int = 0
-    cache_read_tokens: Optional[int] = None  # Tokens read from cache (Anthropic)
-    cache_write_tokens: Optional[int] = None  # Tokens written to cache (Anthropic)
+    cache_read_tokens: int = 0
+    cache_write_tokens: int = 0
     @property
     def total_input_tokens(self) -> int:
@@ -47,18 +46,29 @@ class Usage:
         return cls(
             input_tokens=usage_data.get("input_tokens", 0),
             output_tokens=usage_data.get("output_tokens", 0),
-            cache_read_tokens=usage_data.get("cache_read_input_tokens"),
-            cache_write_tokens=usage_data.get("cache_creation_input_tokens"),
+            cache_read_tokens=usage_data.get("cache_read_input_tokens", 0),
+            cache_write_tokens=usage_data.get("cache_creation_input_tokens", 0),
         )
     @classmethod
     def from_openai_usage(cls, usage_data: dict) -> "Usage":
-        """Create Usage from OpenAI API response usage data."""
+        """Create Usage from OpenAI API response usage data.
+        OpenAI supports prompt caching - cached tokens appear in prompt_tokens_details.cached_tokens.
+        Caching is automatic for prompts over 1024 tokens.
+        """
+        prompt_tokens_details = usage_data.get("prompt_tokens_details", {})
+        cached_tokens = (
+            prompt_tokens_details.get("cached_tokens", 0)
+            if prompt_tokens_details
+            else 0
+        )
         return cls(
             input_tokens=usage_data.get("prompt_tokens", 0),
             output_tokens=usage_data.get("completion_tokens", 0),
-            cache_read_tokens=None,  # OpenAI doesn't support caching yet
-            cache_write_tokens=None,
+            cache_read_tokens=cached_tokens if cached_tokens > 0 else 0,
+            cache_write_tokens=0,  # OpenAI doesn't charge separately for cache writes
         )
     @classmethod
@@ -67,18 +77,23 @@ class Usage:
         return cls(
             input_tokens=usage_data.get("prompt_tokens", 0),
             output_tokens=usage_data.get("completion_tokens", 0),
-            cache_read_tokens=None,  # Mistral doesn't support caching
-            cache_write_tokens=None,
+            cache_read_tokens=0,  # Mistral doesn't support caching
+            cache_write_tokens=0,
         )
     @classmethod
     def from_gemini_usage(cls, usage_data: dict) -> "Usage":
-        """Create Usage from Gemini API response usage data."""
+        """Create Usage from Gemini API response usage data.
+        Gemini supports context caching - cached tokens appear in cachedContentTokenCount.
+        """
+        cached_tokens = usage_data.get("cachedContentTokenCount", 0)
         return cls(
             input_tokens=usage_data.get("promptTokenCount", 0),
             output_tokens=usage_data.get("candidatesTokenCount", 0),
-            cache_read_tokens=None,  # Gemini doesn't support caching yet
-            cache_write_tokens=None,
+            cache_read_tokens=cached_tokens if cached_tokens > 0 else 0,
+            cache_write_tokens=0,  # Gemini doesn't charge separately for cache writes
         )
     def to_dict(self) -> dict:
@@ -100,8 +115,8 @@ class Usage:
         return cls(
             input_tokens=data.get("input_tokens", 0),
             output_tokens=data.get("output_tokens", 0),
-            cache_read_tokens=data.get("cache_read_tokens"),
-            cache_write_tokens=data.get("cache_write_tokens"),
+            cache_read_tokens=data.get("cache_read_tokens", 0),
+            cache_write_tokens=data.get("cache_write_tokens", 0),
         )
     def __add__(self, other: "Usage") -> "Usage":
@@ -111,14 +126,8 @@ class Usage:
             output_tokens=self.output_tokens + other.output_tokens,
             cache_read_tokens=(
                 (self.cache_read_tokens or 0) + (other.cache_read_tokens or 0)
-                if self.cache_read_tokens is not None
-                or other.cache_read_tokens is not None
-                else None
             ),
             cache_write_tokens=(
                 (self.cache_write_tokens or 0) + (other.cache_write_tokens or 0)
-                if self.cache_write_tokens is not None
-                or other.cache_write_tokens is not None
-                else None
             ),
         )

{lm_deluge-0.0.54 → lm_deluge-0.0.56/src/lm_deluge.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.54
+Version: 0.0.56
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/LICENSE RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/README.md RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/setup.cfg RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/__init__.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/agent.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/__init__.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/bedrock.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/common.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/bedrock.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/cohere.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/deepseek.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/mistral.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/vertex.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/gemini.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/mistral.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/batches.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/__init__.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/bash.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/computer_use.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/editor.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/base.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/openai.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/cache.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/cli.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/config.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/embed.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/errors.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/file.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/gemini_limits.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/image.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/__init__.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/classify.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/extract.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/locate.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/ocr.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/score.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/translate.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/bedrock.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/cerebras.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/cohere.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/deepseek.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/fireworks.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/grok.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/groq.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/meta.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/mistral.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/openrouter.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/together.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/presets/cerebras.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/presets/meta.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/prompt.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/request_context.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/rerank.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/tool.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/harmony.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/json.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/logprobs.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/spatial.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/validation.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/xml.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/requires.txt RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/top_level.txt RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/tests/test_builtin_tools.py RENAMED Viewed

File without changes

{lm_deluge-0.0.54 → lm_deluge-0.0.56}/tests/test_native_mcp_server.py RENAMED Viewed

File without changes

lm-deluge 0.0.54__tar.gz → 0.0.56__tar.gz

Potentially problematic release.

lm-deluge 0.0.54tar.gz → 0.0.56tar.gz