lm-deluge 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

lm_deluge/tracker.py CHANGED
@@ -1,21 +1,109 @@
1
+ import asyncio
1
2
  import time
2
- from dataclasses import dataclass
3
+ from dataclasses import dataclass, field
4
+
5
+ from rich.console import Console, Group
6
+ from rich.live import Live
7
+ from rich.progress import (
8
+ BarColumn,
9
+ MofNCompleteColumn,
10
+ Progress,
11
+ SpinnerColumn,
12
+ TextColumn,
13
+ )
14
+ from rich.text import Text
15
+ from tqdm import tqdm
16
+
17
+ SECONDS_TO_PAUSE_AFTER_RATE_LIMIT_ERROR = 5
3
18
 
4
19
 
5
20
  @dataclass
6
21
  class StatusTracker:
22
+ max_requests_per_minute: int
23
+ max_tokens_per_minute: int
24
+ max_concurrent_requests: int
7
25
  num_tasks_started: int = 0
8
26
  num_tasks_in_progress: int = 0
9
27
  num_tasks_succeeded: int = 0
10
28
  num_tasks_failed: int = 0
11
29
  num_rate_limit_errors: int = 0
12
30
  time_of_last_rate_limit_error: int | float = 0
13
- total_requests = 0
31
+ total_requests: int = 0
32
+ retry_queue: asyncio.Queue = field(default_factory=asyncio.Queue)
33
+
34
+ # Progress bar configuration
35
+ use_progress_bar: bool = True
36
+ progress_bar_total: int | None = None
37
+ progress_bar_disable: bool = False
38
+ _pbar: tqdm | None = None
39
+
40
+ # Rich display configuration
41
+ use_rich: bool = True
42
+ _rich_console: Console | None = None
43
+ _rich_live: object | None = None
44
+ _rich_progress: object | None = None
45
+ _rich_task_id: object | None = None
46
+ _rich_display_task: asyncio.Task | None = None
47
+ _rich_stop_event: asyncio.Event | None = None
48
+
49
+ def __post_init__(self):
50
+ self.available_request_capacity = self.max_requests_per_minute
51
+ self.available_token_capacity = self.max_tokens_per_minute
52
+ self.last_update_time = time.time() - 1
53
+ self.last_pbar_update_time = time.time() - 1
54
+ self.limiting_factor = None
14
55
 
15
56
  @property
16
57
  def time_since_rate_limit_error(self):
17
58
  return time.time() - self.time_of_last_rate_limit_error
18
59
 
60
+ @property
61
+ def seconds_to_pause(self):
62
+ return max(
63
+ 0,
64
+ SECONDS_TO_PAUSE_AFTER_RATE_LIMIT_ERROR - self.time_since_rate_limit_error,
65
+ )
66
+
67
+ def set_limiting_factor(self, factor):
68
+ self.limiting_factor = factor
69
+
70
+ def check_capacity(self, num_tokens: int):
71
+ request_available = self.available_request_capacity >= 1
72
+ tokens_available = self.available_token_capacity >= num_tokens
73
+ concurrent_request_available = (
74
+ self.num_tasks_in_progress < self.max_concurrent_requests
75
+ )
76
+ if request_available and tokens_available and concurrent_request_available:
77
+ self.available_request_capacity -= 1
78
+ self.available_token_capacity -= num_tokens
79
+ self.num_tasks_started += 1
80
+ self.num_tasks_in_progress += 1
81
+ self.set_limiting_factor(None)
82
+ return True
83
+ else:
84
+ # update reason why
85
+ if not request_available:
86
+ self.set_limiting_factor("Requests")
87
+ elif not concurrent_request_available:
88
+ self.set_limiting_factor("Concurrent Requests")
89
+ elif not tokens_available:
90
+ self.set_limiting_factor("Tokens")
91
+
92
+ def update_capacity(self):
93
+ current_time = time.time()
94
+ seconds_since_update = current_time - self.last_update_time
95
+ self.available_request_capacity = min(
96
+ self.available_request_capacity
97
+ + self.max_requests_per_minute * seconds_since_update / 60.0,
98
+ self.max_requests_per_minute,
99
+ )
100
+ self.available_token_capacity = min(
101
+ self.available_token_capacity
102
+ + self.max_tokens_per_minute * seconds_since_update / 60.0,
103
+ self.max_tokens_per_minute,
104
+ )
105
+ self.last_update_time = current_time
106
+
19
107
  def start_task(self, task_id):
20
108
  self.num_tasks_started += 1
21
109
  self.num_tasks_in_progress += 1
@@ -27,12 +115,16 @@ class StatusTracker:
27
115
  def task_succeeded(self, task_id):
28
116
  self.num_tasks_in_progress -= 1
29
117
  self.num_tasks_succeeded += 1
118
+ self.increment_pbar()
30
119
 
31
120
  def task_failed(self, task_id):
32
121
  self.num_tasks_in_progress -= 1
33
122
  self.num_tasks_failed += 1
34
123
 
35
124
  def log_final_status(self):
125
+ # Close progress bar before printing final status
126
+ self.close_progress_bar()
127
+
36
128
  if self.num_tasks_failed > 0:
37
129
  print(
38
130
  f"{self.num_tasks_failed} / {self.num_tasks_started} requests failed."
@@ -41,3 +133,121 @@ class StatusTracker:
41
133
  print(
42
134
  f"{self.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate."
43
135
  )
136
+
137
+ @property
138
+ def pbar(self) -> tqdm | None:
139
+ """Backward compatibility property to access progress bar."""
140
+ return self._pbar
141
+
142
+ def init_progress_bar(self, total: int | None = None, disable: bool | None = None):
143
+ """Initialize progress bar if enabled."""
144
+ if not self.use_progress_bar:
145
+ return
146
+
147
+ if self.use_rich:
148
+ self._init_rich_display(total, disable)
149
+ else:
150
+ # Use provided values or fall back to instance defaults
151
+ pbar_total = total if total is not None else self.progress_bar_total
152
+ pbar_disable = disable if disable is not None else self.progress_bar_disable
153
+ self._pbar = tqdm(total=pbar_total, disable=pbar_disable)
154
+ self.update_pbar()
155
+
156
+ def close_progress_bar(self):
157
+ """Close progress bar if it exists."""
158
+ if self.use_rich and self._rich_stop_event:
159
+ self._close_rich_display()
160
+ elif self._pbar is not None:
161
+ self._pbar.close()
162
+ self._pbar = None
163
+
164
+ def _init_rich_display(self, total: int | None = None, disable: bool | None = None):
165
+ """Initialize Rich display components."""
166
+ if disable:
167
+ return
168
+
169
+ pbar_total = total if total is not None else self.progress_bar_total
170
+ if pbar_total is None:
171
+ pbar_total = 100 # Default fallback
172
+
173
+ self._rich_console = Console()
174
+ self._rich_stop_event = asyncio.Event()
175
+
176
+ # Start the display updater task
177
+ self._rich_display_task = asyncio.create_task(
178
+ self._rich_display_updater(pbar_total)
179
+ )
180
+
181
+ async def _rich_display_updater(self, total: int):
182
+ """Update Rich display independently."""
183
+ if not self._rich_console or self._rich_stop_event is None:
184
+ return
185
+
186
+ # Create progress bar without console so we can use it in Live
187
+ progress = Progress(
188
+ SpinnerColumn(),
189
+ TextColumn("Processing requests..."),
190
+ BarColumn(),
191
+ MofNCompleteColumn(),
192
+ )
193
+ main_task = progress.add_task("requests", total=total)
194
+
195
+ # Use Live to combine progress + text
196
+
197
+ with Live(console=self._rich_console, refresh_per_second=10) as live:
198
+ while not self._rich_stop_event.is_set():
199
+ completed = self.num_tasks_succeeded
200
+ progress.update(main_task, completed=completed)
201
+
202
+ # Create capacity info text
203
+ tokens_info = f"TPM Capacity: {self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k"
204
+ reqs_info = f"RPM Capacity: {int(self.available_request_capacity)}/{self.max_requests_per_minute}"
205
+ in_progress = f"In Progress: {int(self.num_tasks_in_progress)}"
206
+ capacity_text = Text(f"{in_progress} • {tokens_info} • {reqs_info}")
207
+
208
+ # Group progress bar and text
209
+ display = Group(progress, capacity_text)
210
+ live.update(display)
211
+
212
+ await asyncio.sleep(0.1)
213
+
214
+ def _close_rich_display(self):
215
+ """Clean up Rich display."""
216
+ if self._rich_stop_event:
217
+ self._rich_stop_event.set()
218
+ if self._rich_display_task and not self._rich_display_task.done():
219
+ self._rich_display_task.cancel()
220
+
221
+ self._rich_console = None
222
+ self._rich_live = None
223
+ self._rich_display_task = None
224
+ self._rich_stop_event = None
225
+
226
+ def update_pbar(self, n: int = 0):
227
+ """Update progress bar status and optionally increment.
228
+
229
+ Args:
230
+ n: Number of items to increment (0 means just update postfix)
231
+ """
232
+ current_time = time.time()
233
+ if self._pbar and (current_time - self.last_pbar_update_time > 1):
234
+ self.last_pbar_update_time = current_time
235
+ self._pbar.set_postfix(
236
+ {
237
+ "Token Capacity": f"{self.available_token_capacity / 1_000:.1f}k",
238
+ "Req. Capacity": f"{int(self.available_request_capacity)}",
239
+ "Reqs. in Progress": self.num_tasks_in_progress,
240
+ "Limiting Factor": self.limiting_factor,
241
+ }
242
+ )
243
+
244
+ if n > 0 and self._pbar:
245
+ self._pbar.update(n)
246
+
247
+ def increment_pbar(self):
248
+ """Increment progress bar by 1."""
249
+ if self.use_rich:
250
+ # Rich display is updated automatically by the display updater
251
+ pass
252
+ elif self._pbar:
253
+ self._pbar.update(1)
lm_deluge/usage.py ADDED
@@ -0,0 +1,114 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+
5
+ @dataclass
6
+ class Usage:
7
+ """
8
+ Unified usage tracking for all API providers.
9
+
10
+ Tracks token usage including cache hits and writes for providers that support it.
11
+ For providers that don't support caching, cache_read and cache_write will be None.
12
+ """
13
+
14
+ input_tokens: int = 0
15
+ output_tokens: int = 0
16
+ cache_read_tokens: Optional[int] = None # Tokens read from cache (Anthropic)
17
+ cache_write_tokens: Optional[int] = None # Tokens written to cache (Anthropic)
18
+
19
+ @property
20
+ def total_input_tokens(self) -> int:
21
+ """Total input tokens including both fresh input, cache writes, and cache reads."""
22
+ result = self.input_tokens
23
+ if self.cache_read_tokens is not None:
24
+ result += self.cache_read_tokens
25
+ if self.cache_write_tokens is not None:
26
+ result += self.cache_write_tokens
27
+ return result
28
+
29
+ @property
30
+ def total_tokens(self) -> int:
31
+ """Total tokens processed (input + output)."""
32
+ return self.total_input_tokens + self.output_tokens
33
+
34
+ @property
35
+ def has_cache_hit(self) -> bool:
36
+ """Whether this request had any cache hits."""
37
+ return self.cache_read_tokens is not None and self.cache_read_tokens > 0
38
+
39
+ @property
40
+ def has_cache_write(self) -> bool:
41
+ """Whether this request wrote to cache."""
42
+ return self.cache_write_tokens is not None and self.cache_write_tokens > 0
43
+
44
+ @classmethod
45
+ def from_anthropic_usage(cls, usage_data: dict) -> "Usage":
46
+ """Create Usage from Anthropic API response usage data."""
47
+ return cls(
48
+ input_tokens=usage_data.get("input_tokens", 0),
49
+ output_tokens=usage_data.get("output_tokens", 0),
50
+ cache_read_tokens=usage_data.get("cache_read_input_tokens"),
51
+ cache_write_tokens=usage_data.get("cache_creation_input_tokens"),
52
+ )
53
+
54
+ @classmethod
55
+ def from_openai_usage(cls, usage_data: dict) -> "Usage":
56
+ """Create Usage from OpenAI API response usage data."""
57
+ return cls(
58
+ input_tokens=usage_data.get("prompt_tokens", 0),
59
+ output_tokens=usage_data.get("completion_tokens", 0),
60
+ cache_read_tokens=None, # OpenAI doesn't support caching yet
61
+ cache_write_tokens=None,
62
+ )
63
+
64
+ @classmethod
65
+ def from_mistral_usage(cls, usage_data: dict) -> "Usage":
66
+ """Create Usage from Mistral API response usage data."""
67
+ return cls(
68
+ input_tokens=usage_data.get("prompt_tokens", 0),
69
+ output_tokens=usage_data.get("completion_tokens", 0),
70
+ cache_read_tokens=None, # Mistral doesn't support caching
71
+ cache_write_tokens=None,
72
+ )
73
+
74
+ def to_dict(self) -> dict:
75
+ """Convert to dictionary for serialization."""
76
+ return {
77
+ "input_tokens": self.input_tokens,
78
+ "output_tokens": self.output_tokens,
79
+ "cache_read_tokens": self.cache_read_tokens,
80
+ "cache_write_tokens": self.cache_write_tokens,
81
+ "total_input_tokens": self.total_input_tokens,
82
+ "total_tokens": self.total_tokens,
83
+ "has_cache_hit": self.has_cache_hit,
84
+ "has_cache_write": self.has_cache_write,
85
+ }
86
+
87
+ @classmethod
88
+ def from_dict(cls, data: dict) -> "Usage":
89
+ """Create Usage from dictionary."""
90
+ return cls(
91
+ input_tokens=data.get("input_tokens", 0),
92
+ output_tokens=data.get("output_tokens", 0),
93
+ cache_read_tokens=data.get("cache_read_tokens"),
94
+ cache_write_tokens=data.get("cache_write_tokens"),
95
+ )
96
+
97
+ def __add__(self, other: "Usage") -> "Usage":
98
+ """Add two Usage objects together."""
99
+ return Usage(
100
+ input_tokens=self.input_tokens + other.input_tokens,
101
+ output_tokens=self.output_tokens + other.output_tokens,
102
+ cache_read_tokens=(
103
+ (self.cache_read_tokens or 0) + (other.cache_read_tokens or 0)
104
+ if self.cache_read_tokens is not None
105
+ or other.cache_read_tokens is not None
106
+ else None
107
+ ),
108
+ cache_write_tokens=(
109
+ (self.cache_write_tokens or 0) + (other.cache_write_tokens or 0)
110
+ if self.cache_write_tokens is not None
111
+ or other.cache_write_tokens is not None
112
+ else None
113
+ ),
114
+ )
lm_deluge/util/json.py CHANGED
@@ -1,5 +1,6 @@
1
- import re
2
1
  import json
2
+ import re
3
+
3
4
  import json5
4
5
 
5
6
 
@@ -166,3 +167,19 @@ def load_json(
166
167
  pass
167
168
 
168
169
  raise ValueError(f"Invalid JSON string: {json_string}")
170
+
171
+
172
+ def try_load_json(
173
+ json_string: str | None,
174
+ allow_json5: bool = True,
175
+ allow_partial: bool = False,
176
+ allow_healing: bool = True,
177
+ ):
178
+ """
179
+ Like the above, except it returns None instead of raising an error.
180
+ """
181
+ try:
182
+ return load_json(json_string, allow_json5, allow_partial, allow_healing)
183
+ except Exception as e:
184
+ print(f"Failed to load json: {e}. Returning None.")
185
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -22,8 +22,7 @@ Requires-Dist: lxml
22
22
  Requires-Dist: pdf2image
23
23
  Requires-Dist: pillow
24
24
  Requires-Dist: fastmcp>=2.4
25
- Requires-Dist: fasttext-wheel
26
- Requires-Dist: fasttext-langdetect
25
+ Requires-Dist: rich
27
26
  Dynamic: license-file
28
27
 
29
28
  # lm-deluge
@@ -35,6 +34,7 @@ Dynamic: license-file
35
34
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
36
35
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
37
36
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
37
+ - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
38
38
  - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
39
39
  - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
40
40
  - **Sync and async APIs** – Use the client from sync or async code.
@@ -47,7 +47,7 @@ Dynamic: license-file
47
47
  pip install lm-deluge
48
48
  ```
49
49
 
50
- The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables.
50
+ The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
51
51
 
52
52
  ## Quickstart
53
53
 
@@ -63,7 +63,7 @@ print(resp[0].completion)
63
63
 
64
64
  ## Spraying Across Models
65
65
 
66
- To distribute your requests across models, just provide a list of more than one model to the constructor. The rate limits for the client apply to the client as a whole, not per-model, so you may want to increase them:
66
+ To distribute your requests across models, just provide a list of more than one model to the constructor. See all available models in `models.py`. The rate limits for the client apply to the client as a whole, not per-model, so you may want to increase them:
67
67
 
68
68
  ```python
69
69
  from lm_deluge import LLMClient
@@ -84,7 +84,7 @@ API calls can be customized in a few ways.
84
84
 
85
85
  1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models. You can also pass many of these arguments directly to `LLMClient.basic` so you don't have to construct an entire `SamplingParams` object.
86
86
  2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and caching.
87
- 3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object).
87
+ 3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
88
88
 
89
89
  Putting it all together:
90
90
 
@@ -123,7 +123,9 @@ resps = client.process_prompts_sync([prompt])
123
123
 
124
124
  This just works. Images can be local images on disk, URLs, bytes, base64 data URLs... go wild. You can use `Conversation.to_openai` or `Conversation.to_anthropic` to format your messages for the OpenAI or Anthropic clients directly.
125
125
 
126
- ## Basic Tool Use
126
+ See a full multi-turn chat example in `examples/multiturn.md`.
127
+
128
+ ## Tool Use
127
129
 
128
130
  Define tools from Python functions and use them with any model:
129
131
 
@@ -135,27 +137,83 @@ def get_weather(city: str) -> str:
135
137
 
136
138
  tool = Tool.from_function(get_weather)
137
139
  client = LLMClient.basic("claude-3-haiku")
138
- resp = client.process_prompts_sync(["What's the weather in Paris?"], tools=[tool])
139
- ```
140
+ resps = client.process_prompts_sync(
141
+ ["What's the weather in Paris?"],
142
+ tools=[tool]
143
+ )
140
144
 
141
- ## MCP Integration
145
+ # you can iterate over the tool calls in the response automatically
146
+ for tool_call in resps[0].tool_calls:
147
+ print(tool_call.name, tool_call.arguments)
148
+ ```
142
149
 
143
- Connect to MCP servers to extend your models with external tools:
150
+ You can also automatically instantiate tools from MCP servers. Under the hood, the the constructor connects to the server, asks it what tools it has, and then creates a `Tool` from each of them, *with a built-in `call` and `acall` interface*.
144
151
 
145
152
  ```python
146
153
  from lm_deluge import LLMClient, Tool
147
154
 
148
- # Connect to a local MCP server
149
- mcp_tool = Tool.from_mcp("filesystem", command="npx -y @modelcontextprotocol/server-filesystem", args=["/path/to/directory"])
150
- client = LLMClient.basic("gpt-4o-mini", tools=[mcp_tool])
151
- resp = client.process_prompts_sync(["List the files in the current directory"])
155
+ # Connect to a local MCP server and get all of its tools
156
+ filesystem_tools = Tool.from_mcp(
157
+ "filesystem",
158
+ command="npx",
159
+ args=["-y", "@modelcontextprotocol/server-filesystem", "/path/to/directory"]
160
+ )
161
+
162
+ # or load ALL the tools from a Claude Desktop like config
163
+ config = {
164
+ "mcpServers": {
165
+ "exa": {
166
+ "url": f"https://mcp.exa.ai/mcp?exaApiKey={os.getenv('EXA_API_KEY')}"
167
+ },
168
+ "zapier": {
169
+ "url": f"https://mcp.zapier.com/api/mcp/s/{os.getenv('ZAPIER_MCP_SECRET')}/mcp"
170
+ }
171
+ }
172
+ }
173
+ all_tools = Tool.from_mcp_config(config)
174
+
175
+ # let the model use the tools
176
+ client = LLMClient.basic("gpt-4o-mini")
177
+ resps = client.process_prompts_sync(
178
+ ["List the files in the current directory"],
179
+ tools=tools
180
+ )
181
+
182
+ # call the tools
183
+ for tool_call in resps[0].tool_calls:
184
+ # this is dumb sorry will make it better
185
+ tool_to_call = [x for x in tools if x.name == tool_call.name][0]
186
+ tool_to_call.call(**tool_call.arguments) # in async code, use .acall()
152
187
  ```
153
188
 
154
- ## Caching
189
+ ### Prompt Caching (Anthropic)
190
+
191
+ For Anthropic models, you can use prompt caching to reduce costs and latency for repeated context. This uses Anthropic's server-side prompt caching. Other providers like OpenAI and Google do this automatically, but Anthropic requires you to manually set cache-control on messages. You can do this in lm-deluge with a simple "cache" argument to `process_prompts_sync` or `process_prompts_async`:
192
+
193
+ ```python
194
+ from lm_deluge import LLMClient, Conversation, Message
195
+
196
+ # Create a conversation with system message
197
+ conv = (
198
+ Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
199
+ .add(Message.user("How do I use asyncio.gather?"))
200
+ )
201
+
202
+ # Use prompt caching to cache system message and tools
203
+ client = LLMClient.basic("claude-3-5-sonnet")
204
+ resps = client.process_prompts_sync(
205
+ [conv],
206
+ cache="system_and_tools" # Cache system message and any tools
207
+ )
208
+ ```
209
+
210
+ Available cache patterns: `"system_and_tools"`, `"tools_only"`, `"last_user_message"`, `"last_2_user_messages"`, `"last_3_user_messages"`.
211
+
212
+ ## Local Caching
155
213
 
156
- `lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent across different `process_prompts_[...]` calls.
214
+ Besides caching from model providers (which provides cache reads at a discount, but not for free) `lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches to cache prompts locally. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent across different `process_prompts_[...]` calls.
157
215
 
158
- **IMPORTANT:** Caching does not currently work for prompts in the SAME batch. That is, if you call `process_prompts_sync` with the same prompt 100 times, there will be 0 cache hits. If you call `process_prompts_sync` a *second* time with those same 100 prompts, all 100 will be cache hits. The cache is intended to be persistent and help you save costs across many invocations, but it can't help with a single batch-inference session (yet!).
216
+ **IMPORTANT:** Caching does not currently work for prompts in the SAME batch. That is, if you call `process_prompts_sync` with the same prompt 100 times, there will be 0 cache hits. If you call `process_prompts_sync` a *second* time with those same 100 prompts, all 100 will be cache hits. The local cache is intended to be persistent and help you save costs across many invocations, but it can't help with a single batch-inference session (yet!).
159
217
 
160
218
  ## Asynchronous Client
161
219
  Use this in asynchronous code, or in a Jupyter notebook. If you try to use the sync client in a Jupyter notebook, you'll have to use `nest-asyncio`, because internally the sync client uses async code. Don't do it! Just use the async client!
@@ -175,11 +233,11 @@ asyncio.run(main())
175
233
 
176
234
  ## Available Models
177
235
 
178
- We support all models in `src/lm_deluge/models.py`. An older version of this client supported Bedrock and Vertex. We plan to re-implement Bedrock support (our previous support was spotty and we need to figure out cross-region inference in order to support the newest Claude models). Vertex support is not currently planned, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
236
+ We support all models in `src/lm_deluge/models.py`. Vertex support is not planned in the short term, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
179
237
 
180
238
  ## Feature Support
181
239
 
182
- We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We don't support tool use yet, but support is planned (keep an eye out for a unified tool definition spec that works for all models!). We support logprobs for OpenAI models that return them via the `logprobs` argument to the `LLMClient`.
240
+ We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
183
241
 
184
242
  ## Built‑in tools
185
243
 
@@ -0,0 +1,42 @@
1
+ lm_deluge/__init__.py,sha256=XR_EuBvJM4LggqfWdsrdQij1-UIGAFwyvHW9Rp8tnQA,280
2
+ lm_deluge/agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ lm_deluge/batches.py,sha256=dI5G9uvmoDU9hMohrkEhlIDyJPsmsVwZPwxx6qETxxk,17728
4
+ lm_deluge/cache.py,sha256=VB1kv8rM2t5XWPR60uhszFcxLDnVKOe1oA5hYjVDjIo,4375
5
+ lm_deluge/client.py,sha256=nkYO_wsGgUkFfqfb_8JrDzcU39RL9FfplKEK6zrncAo,20564
6
+ lm_deluge/config.py,sha256=E47daVMvqMicoY2CDcgUnN5nVGDLAQejR358B-pRHZk,923
7
+ lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
8
+ lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
9
+ lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
10
+ lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
11
+ lm_deluge/models.py,sha256=gW9ZhKYjwC-ZF-SzWqagFUE_7Mqerdtt_T5NxGo040E,46583
12
+ lm_deluge/prompt.py,sha256=dKaV4gI9yLB0w0Ukdz14kGl34yMm5JNm6Sc-24WQPcg,32202
13
+ lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
14
+ lm_deluge/tool.py,sha256=C2zwU9-7fldfYT0TZDoVVGGSC6dN_It9GSxnfkN6Z_w,9822
15
+ lm_deluge/tracker.py,sha256=Un2uthRNZk3dl2fODvvR6CCyFW3IKWfR0GjvpB_dxoM,9095
16
+ lm_deluge/usage.py,sha256=oS-rmF3ZJ1RMtR7WI6BB2uVOAjJg0scvGF3zZRahWVg,4449
17
+ lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
18
+ lm_deluge/api_requests/anthropic.py,sha256=itKPu1cqCYcrr4fkLarlvSYr6tqLEAGVLGXEG05QXWM,8345
19
+ lm_deluge/api_requests/base.py,sha256=ixI326EtRadoVCbmvIddzzzIp6E_zPfPOIfDEnucZrc,18060
20
+ lm_deluge/api_requests/bedrock.py,sha256=yh4-zMrjlQfmxoBbrc2WYJ8gEqVkTP_-tMR7-XbTAtQ,11753
21
+ lm_deluge/api_requests/common.py,sha256=pcOpODL4heoaNLjbA6_ogkrOAbUSKY3F37D2EyMLW10,359
22
+ lm_deluge/api_requests/mistral.py,sha256=PkuoKbOJAB6DOK_NvzbxpWPAktfvonf69QjC0tVCYuE,5366
23
+ lm_deluge/api_requests/openai.py,sha256=fj-ioXeK6-OGl9VIFpVy6XJRYOvf6TgMv7eu5mkC8RE,16482
24
+ lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
25
+ lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
26
+ lm_deluge/api_requests/deprecated/deepseek.py,sha256=FEApI93VAWDwuaqTooIyKMgONYqRhdUmiAPBRme-IYs,4582
27
+ lm_deluge/api_requests/deprecated/mistral.py,sha256=pOfOZUM4U35I3Plch84SnAFpDAzouHcSNNMtgxRvjy4,4709
28
+ lm_deluge/api_requests/deprecated/vertex.py,sha256=ygXz2RjdXErPCSBbiHLEWbf5_sSTIi31WoX0UaoYzRI,15275
29
+ lm_deluge/computer_use/anthropic_tools.py,sha256=p1CgHw1htX0PTdDW9Tni9N1azVMCoyA_ei-fMT6HHis,2478
30
+ lm_deluge/llm_tools/__init__.py,sha256=TbZTETq9i_9yYskFWQKOG4pGh5ZiyE_D-h3RArfhGp4,231
31
+ lm_deluge/llm_tools/extract.py,sha256=-GtyqJUxKvB567tk_NnCMklazz18xZBCPlAjYHTVUWg,3649
32
+ lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
33
+ lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
34
+ lm_deluge/util/json.py,sha256=_4Oar2Cmz2L1DK3EtPLPDxD6rsYHxjROmV8ZpmMjQ-4,5822
35
+ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
36
+ lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
37
+ lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
38
+ lm_deluge-0.0.13.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
39
+ lm_deluge-0.0.13.dist-info/METADATA,sha256=GEkP9_w0VcPOGEKad9Yh24WOhiW4TQvC2pX4wK1x0jk,11549
40
+ lm_deluge-0.0.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
+ lm_deluge-0.0.13.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
42
+ lm_deluge-0.0.13.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,38 +0,0 @@
1
- lm_deluge/__init__.py,sha256=rndOr4Rcfnpttz-onWU3vVEm-MM0WDFgz6KexKPAx0k,222
2
- lm_deluge/cache.py,sha256=VB1kv8rM2t5XWPR60uhszFcxLDnVKOe1oA5hYjVDjIo,4375
3
- lm_deluge/client.py,sha256=lGD4rqT7qHkTKddjRvKK_1bh7s8GNIzXzQ52GCZhfCg,28932
4
- lm_deluge/embed.py,sha256=m-X8UK4gV9KKD7Wv3yarAceMQaj7gR1JwzD_sB0MOQY,13183
5
- lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
6
- lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
7
- lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
8
- lm_deluge/models.py,sha256=oYrt0x0iVfTwoHjP-l1WWennzEDGwnZczj6ds6a6-xc,45406
9
- lm_deluge/prompt.py,sha256=_pJYwgjL39lDzMNmae8pPIBoORm_ekSM_9qU2iGGpOc,25445
10
- lm_deluge/rerank.py,sha256=tW1c3gQCAqaF8Ez-r-4qxYAcdKqxnLMxwHApKOUKwk4,11289
11
- lm_deluge/sampling_params.py,sha256=E2kewh1vz-1Qcy5xNBCzihfGgT_GcHYMfzaWb3FLiXs,739
12
- lm_deluge/tool.py,sha256=5nFbHchv12C1jkL8nkEh6v9WfxpC0O6rALP25z60WsI,9476
13
- lm_deluge/tracker.py,sha256=Dk99scN_NeDEO0gkLO5efXiZq11Ga-k6cerUHWN7IWY,1292
14
- lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
15
- lm_deluge/api_requests/anthropic.py,sha256=MMI_w9hVbevQpcqP3NVVindpTmLb2KHqjJQpIzCi5RM,7240
16
- lm_deluge/api_requests/base.py,sha256=w0MEOCIccxxy2c67Y2Y-QBox9rinIxQ7MLnp8953sjQ,15954
17
- lm_deluge/api_requests/bedrock.py,sha256=cvB85BFvL9HKTUsP9qFUCLQzJh83IQNAcLXuW6ReZK8,10520
18
- lm_deluge/api_requests/common.py,sha256=U0mX_wC3Tzg2-1u9nYUCTQqYzuYJqvLrICCNW_dbbJM,287
19
- lm_deluge/api_requests/mistral.py,sha256=lU9AOyb46uTzRjKw6Sd5iojEbBIMF432fRex7q6Xtwk,5423
20
- lm_deluge/api_requests/openai.py,sha256=BuMiM_2zJQXfnUjTT94JxJi3ZX5V-KQQueRG-R0SGuc,7361
21
- lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
22
- lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
23
- lm_deluge/api_requests/deprecated/deepseek.py,sha256=FEApI93VAWDwuaqTooIyKMgONYqRhdUmiAPBRme-IYs,4582
24
- lm_deluge/api_requests/deprecated/mistral.py,sha256=pOfOZUM4U35I3Plch84SnAFpDAzouHcSNNMtgxRvjy4,4709
25
- lm_deluge/api_requests/deprecated/vertex.py,sha256=ygXz2RjdXErPCSBbiHLEWbf5_sSTIi31WoX0UaoYzRI,15275
26
- lm_deluge/llm_tools/__init__.py,sha256=TbZTETq9i_9yYskFWQKOG4pGh5ZiyE_D-h3RArfhGp4,231
27
- lm_deluge/llm_tools/extract.py,sha256=-GtyqJUxKvB567tk_NnCMklazz18xZBCPlAjYHTVUWg,3649
28
- lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
29
- lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
30
- lm_deluge/util/json.py,sha256=dCeG9j1D17rXmQJbKJH79X0CGof4Wlqd55TDg4D6ky8,5388
31
- lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
32
- lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
33
- lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
34
- lm_deluge-0.0.11.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
35
- lm_deluge-0.0.11.dist-info/METADATA,sha256=jdPdmbo_F8ecKTAHtPTg2GeyCOFmmsJ6T4-4RUleU24,9210
36
- lm_deluge-0.0.11.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
37
- lm_deluge-0.0.11.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
38
- lm_deluge-0.0.11.dist-info/RECORD,,