lm-deluge 0.0.32__tar.gz → 0.0.33__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.32/src/lm_deluge.egg-info → lm_deluge-0.0.33}/PKG-INFO +25 -12
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/README.md +24 -11
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/pyproject.toml +1 -1
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/client.py +95 -11
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/models.py +32 -2
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/tracker.py +108 -47
- {lm_deluge-0.0.32 → lm_deluge-0.0.33/src/lm_deluge.egg-info}/PKG-INFO +25 -12
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/LICENSE +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/setup.cfg +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/anthropic.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/gemini.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/openai.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/prompt.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.32 → lm_deluge-0.0.33}/tests/test_native_mcp_server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.33
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -54,12 +54,12 @@ The package relies on environment variables for API keys. Typical variables incl
|
|
|
54
54
|
|
|
55
55
|
## Quickstart
|
|
56
56
|
|
|
57
|
-
|
|
57
|
+
`LLMClient` uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
60
|
from lm_deluge import LLMClient
|
|
61
61
|
|
|
62
|
-
client = LLMClient
|
|
62
|
+
client = LLMClient("gpt-4o-mini")
|
|
63
63
|
resps = client.process_prompts_sync(["Hello, world!"])
|
|
64
64
|
print(resp[0].completion)
|
|
65
65
|
```
|
|
@@ -71,7 +71,7 @@ To distribute your requests across models, just provide a list of more than one
|
|
|
71
71
|
```python
|
|
72
72
|
from lm_deluge import LLMClient
|
|
73
73
|
|
|
74
|
-
client = LLMClient
|
|
74
|
+
client = LLMClient(
|
|
75
75
|
["gpt-4o-mini", "claude-3-haiku"],
|
|
76
76
|
max_requests_per_minute=10_000
|
|
77
77
|
)
|
|
@@ -85,8 +85,8 @@ print(resp[0].completion)
|
|
|
85
85
|
|
|
86
86
|
API calls can be customized in a few ways.
|
|
87
87
|
|
|
88
|
-
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
89
|
-
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and
|
|
88
|
+
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
89
|
+
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, caching, **and progress display style**. Set `progress="rich"` (default), `"tqdm"`, or `"manual"` to choose how progress is reported. The manual option prints an update every 30 seconds.
|
|
90
90
|
3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
|
|
91
91
|
|
|
92
92
|
Putting it all together:
|
|
@@ -109,6 +109,19 @@ await client.process_prompts_async(
|
|
|
109
109
|
)
|
|
110
110
|
```
|
|
111
111
|
|
|
112
|
+
### Queueing individual prompts
|
|
113
|
+
|
|
114
|
+
You can queue prompts one at a time and track progress explicitly:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
client = LLMClient("gpt-4.1-mini", progress="tqdm")
|
|
118
|
+
client.open()
|
|
119
|
+
task_id = client.start_nowait("hello there")
|
|
120
|
+
# ... queue more tasks ...
|
|
121
|
+
results = await client.wait_for_all()
|
|
122
|
+
client.close()
|
|
123
|
+
```
|
|
124
|
+
|
|
112
125
|
## Multi-Turn Conversations
|
|
113
126
|
|
|
114
127
|
Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
|
|
@@ -120,7 +133,7 @@ prompt = Conversation.system("You are a helpful assistant.").add(
|
|
|
120
133
|
Message.user("What's in this image?").add_image("tests/image.jpg")
|
|
121
134
|
)
|
|
122
135
|
|
|
123
|
-
client = LLMClient
|
|
136
|
+
client = LLMClient("gpt-4.1-mini")
|
|
124
137
|
resps = client.process_prompts_sync([prompt])
|
|
125
138
|
```
|
|
126
139
|
|
|
@@ -136,9 +149,9 @@ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can ea
|
|
|
136
149
|
from lm_deluge import LLMClient, Conversation
|
|
137
150
|
|
|
138
151
|
# Simple file upload
|
|
139
|
-
client = LLMClient
|
|
152
|
+
client = LLMClient("gpt-4.1-mini")
|
|
140
153
|
conversation = Conversation.user(
|
|
141
|
-
"Please summarize this document",
|
|
154
|
+
"Please summarize this document",
|
|
142
155
|
file="path/to/document.pdf"
|
|
143
156
|
)
|
|
144
157
|
resps = client.process_prompts_sync([conversation])
|
|
@@ -163,7 +176,7 @@ def get_weather(city: str) -> str:
|
|
|
163
176
|
return f"The weather in {city} is sunny and 72°F"
|
|
164
177
|
|
|
165
178
|
tool = Tool.from_function(get_weather)
|
|
166
|
-
client = LLMClient
|
|
179
|
+
client = LLMClient("claude-3-haiku")
|
|
167
180
|
resps = client.process_prompts_sync(
|
|
168
181
|
["What's the weather in Paris?"],
|
|
169
182
|
tools=[tool]
|
|
@@ -200,7 +213,7 @@ config = {
|
|
|
200
213
|
all_tools = Tool.from_mcp_config(config)
|
|
201
214
|
|
|
202
215
|
# let the model use the tools
|
|
203
|
-
client = LLMClient
|
|
216
|
+
client = LLMClient("gpt-4o-mini")
|
|
204
217
|
resps = client.process_prompts_sync(
|
|
205
218
|
["List the files in the current directory"],
|
|
206
219
|
tools=tools
|
|
@@ -237,7 +250,7 @@ conv = (
|
|
|
237
250
|
)
|
|
238
251
|
|
|
239
252
|
# Use prompt caching to cache system message and tools
|
|
240
|
-
client = LLMClient
|
|
253
|
+
client = LLMClient("claude-3-5-sonnet")
|
|
241
254
|
resps = client.process_prompts_sync(
|
|
242
255
|
[conv],
|
|
243
256
|
cache="system_and_tools" # Cache system message and any tools
|
|
@@ -27,12 +27,12 @@ The package relies on environment variables for API keys. Typical variables incl
|
|
|
27
27
|
|
|
28
28
|
## Quickstart
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
`LLMClient` uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
|
|
31
31
|
|
|
32
32
|
```python
|
|
33
33
|
from lm_deluge import LLMClient
|
|
34
34
|
|
|
35
|
-
client = LLMClient
|
|
35
|
+
client = LLMClient("gpt-4o-mini")
|
|
36
36
|
resps = client.process_prompts_sync(["Hello, world!"])
|
|
37
37
|
print(resp[0].completion)
|
|
38
38
|
```
|
|
@@ -44,7 +44,7 @@ To distribute your requests across models, just provide a list of more than one
|
|
|
44
44
|
```python
|
|
45
45
|
from lm_deluge import LLMClient
|
|
46
46
|
|
|
47
|
-
client = LLMClient
|
|
47
|
+
client = LLMClient(
|
|
48
48
|
["gpt-4o-mini", "claude-3-haiku"],
|
|
49
49
|
max_requests_per_minute=10_000
|
|
50
50
|
)
|
|
@@ -58,8 +58,8 @@ print(resp[0].completion)
|
|
|
58
58
|
|
|
59
59
|
API calls can be customized in a few ways.
|
|
60
60
|
|
|
61
|
-
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
62
|
-
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and
|
|
61
|
+
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
62
|
+
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, caching, **and progress display style**. Set `progress="rich"` (default), `"tqdm"`, or `"manual"` to choose how progress is reported. The manual option prints an update every 30 seconds.
|
|
63
63
|
3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
|
|
64
64
|
|
|
65
65
|
Putting it all together:
|
|
@@ -82,6 +82,19 @@ await client.process_prompts_async(
|
|
|
82
82
|
)
|
|
83
83
|
```
|
|
84
84
|
|
|
85
|
+
### Queueing individual prompts
|
|
86
|
+
|
|
87
|
+
You can queue prompts one at a time and track progress explicitly:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
client = LLMClient("gpt-4.1-mini", progress="tqdm")
|
|
91
|
+
client.open()
|
|
92
|
+
task_id = client.start_nowait("hello there")
|
|
93
|
+
# ... queue more tasks ...
|
|
94
|
+
results = await client.wait_for_all()
|
|
95
|
+
client.close()
|
|
96
|
+
```
|
|
97
|
+
|
|
85
98
|
## Multi-Turn Conversations
|
|
86
99
|
|
|
87
100
|
Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
|
|
@@ -93,7 +106,7 @@ prompt = Conversation.system("You are a helpful assistant.").add(
|
|
|
93
106
|
Message.user("What's in this image?").add_image("tests/image.jpg")
|
|
94
107
|
)
|
|
95
108
|
|
|
96
|
-
client = LLMClient
|
|
109
|
+
client = LLMClient("gpt-4.1-mini")
|
|
97
110
|
resps = client.process_prompts_sync([prompt])
|
|
98
111
|
```
|
|
99
112
|
|
|
@@ -109,9 +122,9 @@ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can ea
|
|
|
109
122
|
from lm_deluge import LLMClient, Conversation
|
|
110
123
|
|
|
111
124
|
# Simple file upload
|
|
112
|
-
client = LLMClient
|
|
125
|
+
client = LLMClient("gpt-4.1-mini")
|
|
113
126
|
conversation = Conversation.user(
|
|
114
|
-
"Please summarize this document",
|
|
127
|
+
"Please summarize this document",
|
|
115
128
|
file="path/to/document.pdf"
|
|
116
129
|
)
|
|
117
130
|
resps = client.process_prompts_sync([conversation])
|
|
@@ -136,7 +149,7 @@ def get_weather(city: str) -> str:
|
|
|
136
149
|
return f"The weather in {city} is sunny and 72°F"
|
|
137
150
|
|
|
138
151
|
tool = Tool.from_function(get_weather)
|
|
139
|
-
client = LLMClient
|
|
152
|
+
client = LLMClient("claude-3-haiku")
|
|
140
153
|
resps = client.process_prompts_sync(
|
|
141
154
|
["What's the weather in Paris?"],
|
|
142
155
|
tools=[tool]
|
|
@@ -173,7 +186,7 @@ config = {
|
|
|
173
186
|
all_tools = Tool.from_mcp_config(config)
|
|
174
187
|
|
|
175
188
|
# let the model use the tools
|
|
176
|
-
client = LLMClient
|
|
189
|
+
client = LLMClient("gpt-4o-mini")
|
|
177
190
|
resps = client.process_prompts_sync(
|
|
178
191
|
["List the files in the current directory"],
|
|
179
192
|
tools=tools
|
|
@@ -210,7 +223,7 @@ conv = (
|
|
|
210
223
|
)
|
|
211
224
|
|
|
212
225
|
# Use prompt caching to cache system message and tools
|
|
213
|
-
client = LLMClient
|
|
226
|
+
client = LLMClient("claude-3-5-sonnet")
|
|
214
227
|
resps = client.process_prompts_sync(
|
|
215
228
|
[conv],
|
|
216
229
|
cache="system_and_tools" # Cache system message and any tools
|
|
@@ -25,11 +25,10 @@ from .tracker import StatusTracker
|
|
|
25
25
|
|
|
26
26
|
# TODO: get completions as they finish, not all at once at the end.
|
|
27
27
|
# TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
|
|
28
|
-
class
|
|
28
|
+
class _LLMClient(BaseModel):
|
|
29
29
|
"""
|
|
30
|
-
LLMClient
|
|
31
|
-
|
|
32
|
-
Handles models, sampling params for each model, model weights, rate limits, etc.
|
|
30
|
+
Internal LLMClient implementation using Pydantic.
|
|
31
|
+
Keeps all validation, serialization, and existing functionality.
|
|
33
32
|
"""
|
|
34
33
|
|
|
35
34
|
model_names: str | list[str] = ["gpt-4.1-mini"]
|
|
@@ -53,6 +52,9 @@ class LLMClient(BaseModel):
|
|
|
53
52
|
top_logprobs: int | None = None
|
|
54
53
|
force_local_mcp: bool = False
|
|
55
54
|
|
|
55
|
+
# Progress configuration
|
|
56
|
+
progress: Literal["rich", "tqdm", "manual"] = "rich"
|
|
57
|
+
|
|
56
58
|
# Internal state for async task handling
|
|
57
59
|
_next_task_id: int = PrivateAttr(default=0)
|
|
58
60
|
_tasks: dict[int, asyncio.Task] = PrivateAttr(default_factory=dict)
|
|
@@ -60,6 +62,23 @@ class LLMClient(BaseModel):
|
|
|
60
62
|
_tracker: StatusTracker | None = PrivateAttr(default=None)
|
|
61
63
|
_capacity_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
|
|
62
64
|
|
|
65
|
+
# Progress management for queueing API
|
|
66
|
+
def open(self, total: int | None = None, show_progress: bool = True):
|
|
67
|
+
self._tracker = StatusTracker(
|
|
68
|
+
max_requests_per_minute=self.max_requests_per_minute,
|
|
69
|
+
max_tokens_per_minute=self.max_tokens_per_minute,
|
|
70
|
+
max_concurrent_requests=self.max_concurrent_requests,
|
|
71
|
+
progress_style=self.progress,
|
|
72
|
+
use_progress_bar=show_progress,
|
|
73
|
+
)
|
|
74
|
+
self._tracker.init_progress_bar(total)
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
def close(self):
|
|
78
|
+
if self._tracker:
|
|
79
|
+
self._tracker.log_final_status()
|
|
80
|
+
self._tracker = None
|
|
81
|
+
|
|
63
82
|
# NEW! Builder methods
|
|
64
83
|
def with_model(self, model: str):
|
|
65
84
|
self.model_names = [model]
|
|
@@ -90,7 +109,7 @@ class LLMClient(BaseModel):
|
|
|
90
109
|
max_concurrent_requests=self.max_concurrent_requests,
|
|
91
110
|
use_progress_bar=False,
|
|
92
111
|
progress_bar_disable=True,
|
|
93
|
-
|
|
112
|
+
progress_style=self.progress,
|
|
94
113
|
)
|
|
95
114
|
return self._tracker
|
|
96
115
|
|
|
@@ -100,7 +119,7 @@ class LLMClient(BaseModel):
|
|
|
100
119
|
|
|
101
120
|
@model_validator(mode="before")
|
|
102
121
|
@classmethod
|
|
103
|
-
def fix_lists(cls, data) -> "
|
|
122
|
+
def fix_lists(cls, data) -> "_LLMClient":
|
|
104
123
|
if isinstance(data.get("model_names"), str):
|
|
105
124
|
data["model_names"] = [data["model_names"]]
|
|
106
125
|
if not isinstance(data.get("sampling_params", []), list):
|
|
@@ -343,13 +362,10 @@ class LLMClient(BaseModel):
|
|
|
343
362
|
max_requests_per_minute=self.max_requests_per_minute,
|
|
344
363
|
max_tokens_per_minute=self.max_tokens_per_minute,
|
|
345
364
|
max_concurrent_requests=self.max_concurrent_requests,
|
|
365
|
+
progress_style=self.progress,
|
|
346
366
|
use_progress_bar=show_progress,
|
|
347
|
-
progress_bar_total=len(prompts),
|
|
348
|
-
progress_bar_disable=not show_progress,
|
|
349
|
-
use_rich=show_progress,
|
|
350
367
|
)
|
|
351
|
-
|
|
352
|
-
tracker.init_progress_bar()
|
|
368
|
+
tracker.init_progress_bar(total=len(prompts), disable=not show_progress)
|
|
353
369
|
|
|
354
370
|
# Create retry queue for failed requests
|
|
355
371
|
retry_queue: asyncio.Queue[RequestContext] = asyncio.Queue()
|
|
@@ -510,6 +526,7 @@ class LLMClient(BaseModel):
|
|
|
510
526
|
)
|
|
511
527
|
task = asyncio.create_task(self._run_context(context))
|
|
512
528
|
self._tasks[task_id] = task
|
|
529
|
+
tracker.add_to_total(1)
|
|
513
530
|
return task_id
|
|
514
531
|
|
|
515
532
|
async def start(
|
|
@@ -752,3 +769,70 @@ class LLMClient(BaseModel):
|
|
|
752
769
|
# combined_results["limiting_factor"] = limiting_factor
|
|
753
770
|
|
|
754
771
|
# return combined_results
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
# Clean factory function with perfect IDE support
|
|
775
|
+
@overload
|
|
776
|
+
def LLMClient(model_names: str, **kwargs) -> _LLMClient: ...
|
|
777
|
+
|
|
778
|
+
@overload
|
|
779
|
+
def LLMClient(model_names: list[str], **kwargs) -> _LLMClient: ...
|
|
780
|
+
|
|
781
|
+
def LLMClient(
|
|
782
|
+
model_names: str | list[str] = "gpt-4.1-mini",
|
|
783
|
+
*,
|
|
784
|
+
max_requests_per_minute: int = 1_000,
|
|
785
|
+
max_tokens_per_minute: int = 100_000,
|
|
786
|
+
max_concurrent_requests: int = 225,
|
|
787
|
+
sampling_params: list[SamplingParams] | None = None,
|
|
788
|
+
model_weights: list[float] | Literal["uniform", "dynamic"] = "uniform",
|
|
789
|
+
max_attempts: int = 5,
|
|
790
|
+
request_timeout: int = 30,
|
|
791
|
+
cache: Any = None,
|
|
792
|
+
extra_headers: dict[str, str] | None = None,
|
|
793
|
+
temperature: float = 0.75,
|
|
794
|
+
top_p: float = 1.0,
|
|
795
|
+
json_mode: bool = False,
|
|
796
|
+
max_new_tokens: int = 512,
|
|
797
|
+
reasoning_effort: Literal["low", "medium", "high", None] = None,
|
|
798
|
+
logprobs: bool = False,
|
|
799
|
+
top_logprobs: int | None = None,
|
|
800
|
+
force_local_mcp: bool = False,
|
|
801
|
+
progress: Literal["rich", "tqdm", "manual"] = "rich",
|
|
802
|
+
) -> _LLMClient:
|
|
803
|
+
"""
|
|
804
|
+
Create an LLMClient with model_names as a positional argument.
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
model_names: Model name(s) to use - can be a single string or list of strings
|
|
808
|
+
**kwargs: All other LLMClient configuration options (keyword-only)
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
Configured LLMClient instance
|
|
812
|
+
"""
|
|
813
|
+
# Handle default for mutable argument
|
|
814
|
+
if sampling_params is None:
|
|
815
|
+
sampling_params = []
|
|
816
|
+
|
|
817
|
+
# Simply pass everything to the Pydantic constructor
|
|
818
|
+
return _LLMClient(
|
|
819
|
+
model_names=model_names,
|
|
820
|
+
max_requests_per_minute=max_requests_per_minute,
|
|
821
|
+
max_tokens_per_minute=max_tokens_per_minute,
|
|
822
|
+
max_concurrent_requests=max_concurrent_requests,
|
|
823
|
+
sampling_params=sampling_params,
|
|
824
|
+
model_weights=model_weights,
|
|
825
|
+
max_attempts=max_attempts,
|
|
826
|
+
request_timeout=request_timeout,
|
|
827
|
+
cache=cache,
|
|
828
|
+
extra_headers=extra_headers,
|
|
829
|
+
temperature=temperature,
|
|
830
|
+
top_p=top_p,
|
|
831
|
+
json_mode=json_mode,
|
|
832
|
+
max_new_tokens=max_new_tokens,
|
|
833
|
+
reasoning_effort=reasoning_effort,
|
|
834
|
+
logprobs=logprobs,
|
|
835
|
+
top_logprobs=top_logprobs,
|
|
836
|
+
force_local_mcp=force_local_mcp,
|
|
837
|
+
progress=progress,
|
|
838
|
+
)
|
|
@@ -1261,9 +1261,39 @@ class APIModel:
|
|
|
1261
1261
|
registry: dict[str, APIModel] = {}
|
|
1262
1262
|
|
|
1263
1263
|
|
|
1264
|
-
def register_model(
|
|
1264
|
+
def register_model(
|
|
1265
|
+
id: str,
|
|
1266
|
+
name: str,
|
|
1267
|
+
api_base: str,
|
|
1268
|
+
api_key_env_var: str,
|
|
1269
|
+
api_spec: str,
|
|
1270
|
+
input_cost: float | None = 0, # $ per million input tokens
|
|
1271
|
+
output_cost: float | None = 0, # $ per million output tokens
|
|
1272
|
+
supports_json: bool = False,
|
|
1273
|
+
supports_logprobs: bool = False,
|
|
1274
|
+
supports_responses: bool = False,
|
|
1275
|
+
reasoning_model: bool = False,
|
|
1276
|
+
regions: list[str] | dict[str, int] = field(default_factory=list),
|
|
1277
|
+
tokens_per_minute: int | None = None,
|
|
1278
|
+
requests_per_minute: int | None = None
|
|
1279
|
+
) -> APIModel:
|
|
1265
1280
|
"""Register a model configuration and return the created APIModel."""
|
|
1266
|
-
model = APIModel(
|
|
1281
|
+
model = APIModel(
|
|
1282
|
+
id=id,
|
|
1283
|
+
name=name,
|
|
1284
|
+
api_base=api_base,
|
|
1285
|
+
api_key_env_var=api_key_env_var,
|
|
1286
|
+
api_spec=api_spec,
|
|
1287
|
+
input_cost=input_cost,
|
|
1288
|
+
output_cost=output_cost,
|
|
1289
|
+
supports_json=supports_json,
|
|
1290
|
+
supports_logprobs=supports_logprobs,
|
|
1291
|
+
supports_responses=supports_responses,
|
|
1292
|
+
reasoning_model=reasoning_model,
|
|
1293
|
+
regions=regions,
|
|
1294
|
+
tokens_per_minute=tokens_per_minute,
|
|
1295
|
+
requests_per_minute=requests_per_minute
|
|
1296
|
+
)
|
|
1267
1297
|
registry[model.id] = model
|
|
1268
1298
|
return model
|
|
1269
1299
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import time
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Literal
|
|
4
5
|
|
|
5
6
|
from rich.console import Console, Group
|
|
6
7
|
from rich.live import Live
|
|
@@ -35,17 +36,22 @@ class StatusTracker:
|
|
|
35
36
|
use_progress_bar: bool = True
|
|
36
37
|
progress_bar_total: int | None = None
|
|
37
38
|
progress_bar_disable: bool = False
|
|
39
|
+
progress_style: Literal["rich", "tqdm", "manual"] = "rich"
|
|
40
|
+
progress_print_interval: float = 30.0
|
|
38
41
|
_pbar: tqdm | None = None
|
|
39
42
|
|
|
40
43
|
# Rich display configuration
|
|
41
|
-
use_rich: bool = True
|
|
42
44
|
_rich_console: Console | None = None
|
|
43
45
|
_rich_live: object | None = None
|
|
44
|
-
_rich_progress:
|
|
45
|
-
_rich_task_id:
|
|
46
|
+
_rich_progress: Progress | None = None
|
|
47
|
+
_rich_task_id: int | None = None
|
|
46
48
|
_rich_display_task: asyncio.Task | None = None
|
|
47
49
|
_rich_stop_event: asyncio.Event | None = None
|
|
48
50
|
|
|
51
|
+
# Manual print configuration
|
|
52
|
+
_manual_display_task: asyncio.Task | None = None
|
|
53
|
+
_manual_stop_event: asyncio.Event | None = None
|
|
54
|
+
|
|
49
55
|
def __post_init__(self):
|
|
50
56
|
self.available_request_capacity = self.max_requests_per_minute
|
|
51
57
|
self.available_token_capacity = self.max_tokens_per_minute
|
|
@@ -147,69 +153,75 @@ class StatusTracker:
|
|
|
147
153
|
if not self.use_progress_bar:
|
|
148
154
|
return
|
|
149
155
|
|
|
150
|
-
if self.
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
+
pbar_total = total if total is not None else self.progress_bar_total
|
|
157
|
+
pbar_disable = disable if disable is not None else self.progress_bar_disable
|
|
158
|
+
if pbar_total is None:
|
|
159
|
+
pbar_total = 0
|
|
160
|
+
self.progress_bar_total = pbar_total
|
|
161
|
+
|
|
162
|
+
if self.progress_style == "rich":
|
|
163
|
+
if pbar_disable:
|
|
164
|
+
return
|
|
165
|
+
self._init_rich_display(pbar_total)
|
|
166
|
+
elif self.progress_style == "tqdm":
|
|
156
167
|
self._pbar = tqdm(total=pbar_total, disable=pbar_disable)
|
|
168
|
+
elif self.progress_style == "manual":
|
|
169
|
+
self._init_manual_display(pbar_total)
|
|
170
|
+
|
|
157
171
|
self.update_pbar()
|
|
158
172
|
|
|
159
173
|
def close_progress_bar(self):
|
|
160
174
|
"""Close progress bar if it exists."""
|
|
161
|
-
if
|
|
162
|
-
self._close_rich_display()
|
|
163
|
-
elif self._pbar is not None:
|
|
164
|
-
self._pbar.close()
|
|
165
|
-
self._pbar = None
|
|
166
|
-
|
|
167
|
-
def _init_rich_display(self, total: int | None = None, disable: bool | None = None):
|
|
168
|
-
"""Initialize Rich display components."""
|
|
169
|
-
if disable:
|
|
175
|
+
if not self.use_progress_bar:
|
|
170
176
|
return
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
177
|
+
if self.progress_style == "rich":
|
|
178
|
+
if self._rich_stop_event:
|
|
179
|
+
self._close_rich_display()
|
|
180
|
+
elif self.progress_style == "tqdm":
|
|
181
|
+
if self._pbar is not None:
|
|
182
|
+
self._pbar.close()
|
|
183
|
+
self._pbar = None
|
|
184
|
+
elif self.progress_style == "manual":
|
|
185
|
+
self._close_manual_display()
|
|
186
|
+
|
|
187
|
+
def _init_rich_display(self, total: int):
|
|
188
|
+
"""Initialize Rich display components."""
|
|
176
189
|
self._rich_console = Console()
|
|
177
|
-
self.
|
|
178
|
-
|
|
179
|
-
# Start the display updater task
|
|
180
|
-
self._rich_display_task = asyncio.create_task(
|
|
181
|
-
self._rich_display_updater(pbar_total)
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
async def _rich_display_updater(self, total: int):
|
|
185
|
-
"""Update Rich display independently."""
|
|
186
|
-
if not self._rich_console or self._rich_stop_event is None:
|
|
187
|
-
return
|
|
188
|
-
|
|
189
|
-
# Create progress bar without console so we can use it in Live
|
|
190
|
-
progress = Progress(
|
|
190
|
+
self._rich_progress = Progress(
|
|
191
191
|
SpinnerColumn(),
|
|
192
192
|
TextColumn("Processing requests..."),
|
|
193
193
|
BarColumn(),
|
|
194
194
|
MofNCompleteColumn(),
|
|
195
195
|
)
|
|
196
|
-
|
|
196
|
+
self._rich_task_id = self._rich_progress.add_task("requests", total=total)
|
|
197
|
+
self._rich_stop_event = asyncio.Event()
|
|
198
|
+
self._rich_display_task = asyncio.create_task(self._rich_display_updater())
|
|
197
199
|
|
|
198
|
-
|
|
200
|
+
async def _rich_display_updater(self):
|
|
201
|
+
"""Update Rich display independently."""
|
|
202
|
+
if (
|
|
203
|
+
not self._rich_console
|
|
204
|
+
or self._rich_progress is None
|
|
205
|
+
or self._rich_task_id is None
|
|
206
|
+
or self._rich_stop_event is None
|
|
207
|
+
):
|
|
208
|
+
return
|
|
199
209
|
|
|
200
210
|
with Live(console=self._rich_console, refresh_per_second=10) as live:
|
|
201
211
|
while not self._rich_stop_event.is_set():
|
|
202
212
|
completed = self.num_tasks_succeeded
|
|
203
|
-
|
|
213
|
+
self._rich_progress.update(
|
|
214
|
+
self._rich_task_id,
|
|
215
|
+
completed=completed,
|
|
216
|
+
total=self.progress_bar_total,
|
|
217
|
+
)
|
|
204
218
|
|
|
205
|
-
# Create capacity info text
|
|
206
219
|
tokens_info = f"TPM Capacity: {self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k"
|
|
207
220
|
reqs_info = f"RPM Capacity: {int(self.available_request_capacity)}/{self.max_requests_per_minute}"
|
|
208
221
|
in_progress = f"In Progress: {int(self.num_tasks_in_progress)}"
|
|
209
222
|
capacity_text = Text(f"{in_progress} • {tokens_info} • {reqs_info}")
|
|
210
223
|
|
|
211
|
-
|
|
212
|
-
display = Group(progress, capacity_text)
|
|
224
|
+
display = Group(self._rich_progress, capacity_text)
|
|
213
225
|
live.update(display)
|
|
214
226
|
|
|
215
227
|
await asyncio.sleep(0.1)
|
|
@@ -223,15 +235,45 @@ class StatusTracker:
|
|
|
223
235
|
|
|
224
236
|
self._rich_console = None
|
|
225
237
|
self._rich_live = None
|
|
238
|
+
self._rich_progress = None
|
|
239
|
+
self._rich_task_id = None
|
|
226
240
|
self._rich_display_task = None
|
|
227
241
|
self._rich_stop_event = None
|
|
228
242
|
|
|
243
|
+
def _init_manual_display(self, total: int):
|
|
244
|
+
"""Initialize manual progress printer."""
|
|
245
|
+
self.progress_bar_total = total
|
|
246
|
+
self._manual_stop_event = asyncio.Event()
|
|
247
|
+
self._manual_display_task = asyncio.create_task(
|
|
248
|
+
self._manual_display_updater()
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
async def _manual_display_updater(self):
|
|
252
|
+
if self._manual_stop_event is None:
|
|
253
|
+
return
|
|
254
|
+
while not self._manual_stop_event.is_set():
|
|
255
|
+
print(
|
|
256
|
+
f"Completed {self.num_tasks_succeeded}/{self.progress_bar_total} requests"
|
|
257
|
+
)
|
|
258
|
+
await asyncio.sleep(self.progress_print_interval)
|
|
259
|
+
|
|
260
|
+
def _close_manual_display(self):
|
|
261
|
+
if self._manual_stop_event:
|
|
262
|
+
self._manual_stop_event.set()
|
|
263
|
+
if self._manual_display_task and not self._manual_display_task.done():
|
|
264
|
+
self._manual_display_task.cancel()
|
|
265
|
+
self._manual_display_task = None
|
|
266
|
+
self._manual_stop_event = None
|
|
267
|
+
|
|
229
268
|
def update_pbar(self, n: int = 0):
|
|
230
269
|
"""Update progress bar status and optionally increment.
|
|
231
270
|
|
|
232
271
|
Args:
|
|
233
272
|
n: Number of items to increment (0 means just update postfix)
|
|
234
273
|
"""
|
|
274
|
+
if self.progress_style != "tqdm":
|
|
275
|
+
return
|
|
276
|
+
|
|
235
277
|
current_time = time.time()
|
|
236
278
|
if self._pbar and (current_time - self.last_pbar_update_time > 1):
|
|
237
279
|
self.last_pbar_update_time = current_time
|
|
@@ -249,8 +291,27 @@ class StatusTracker:
|
|
|
249
291
|
|
|
250
292
|
def increment_pbar(self):
|
|
251
293
|
"""Increment progress bar by 1."""
|
|
252
|
-
if self.
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
elif self._pbar:
|
|
294
|
+
if not self.use_progress_bar:
|
|
295
|
+
return
|
|
296
|
+
if self.progress_style == "tqdm" and self._pbar:
|
|
256
297
|
self._pbar.update(1)
|
|
298
|
+
# rich and manual are updated elsewhere
|
|
299
|
+
|
|
300
|
+
def add_to_total(self, n: int = 1):
|
|
301
|
+
"""Increase the total number of tasks being tracked."""
|
|
302
|
+
if self.progress_bar_total is None:
|
|
303
|
+
self.progress_bar_total = 0
|
|
304
|
+
self.progress_bar_total += n
|
|
305
|
+
if not self.use_progress_bar:
|
|
306
|
+
return
|
|
307
|
+
if self.progress_style == "tqdm" and self._pbar:
|
|
308
|
+
self._pbar.total = self.progress_bar_total
|
|
309
|
+
self._pbar.refresh()
|
|
310
|
+
elif (
|
|
311
|
+
self.progress_style == "rich"
|
|
312
|
+
and self._rich_progress
|
|
313
|
+
and self._rich_task_id is not None
|
|
314
|
+
):
|
|
315
|
+
self._rich_progress.update(
|
|
316
|
+
self._rich_task_id, total=self.progress_bar_total
|
|
317
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.33
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -54,12 +54,12 @@ The package relies on environment variables for API keys. Typical variables incl
|
|
|
54
54
|
|
|
55
55
|
## Quickstart
|
|
56
56
|
|
|
57
|
-
|
|
57
|
+
`LLMClient` uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
60
|
from lm_deluge import LLMClient
|
|
61
61
|
|
|
62
|
-
client = LLMClient
|
|
62
|
+
client = LLMClient("gpt-4o-mini")
|
|
63
63
|
resps = client.process_prompts_sync(["Hello, world!"])
|
|
64
64
|
print(resp[0].completion)
|
|
65
65
|
```
|
|
@@ -71,7 +71,7 @@ To distribute your requests across models, just provide a list of more than one
|
|
|
71
71
|
```python
|
|
72
72
|
from lm_deluge import LLMClient
|
|
73
73
|
|
|
74
|
-
client = LLMClient
|
|
74
|
+
client = LLMClient(
|
|
75
75
|
["gpt-4o-mini", "claude-3-haiku"],
|
|
76
76
|
max_requests_per_minute=10_000
|
|
77
77
|
)
|
|
@@ -85,8 +85,8 @@ print(resp[0].completion)
|
|
|
85
85
|
|
|
86
86
|
API calls can be customized in a few ways.
|
|
87
87
|
|
|
88
|
-
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
89
|
-
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and
|
|
88
|
+
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
89
|
+
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, caching, **and progress display style**. Set `progress="rich"` (default), `"tqdm"`, or `"manual"` to choose how progress is reported. The manual option prints an update every 30 seconds.
|
|
90
90
|
3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
|
|
91
91
|
|
|
92
92
|
Putting it all together:
|
|
@@ -109,6 +109,19 @@ await client.process_prompts_async(
|
|
|
109
109
|
)
|
|
110
110
|
```
|
|
111
111
|
|
|
112
|
+
### Queueing individual prompts
|
|
113
|
+
|
|
114
|
+
You can queue prompts one at a time and track progress explicitly:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
client = LLMClient("gpt-4.1-mini", progress="tqdm")
|
|
118
|
+
client.open()
|
|
119
|
+
task_id = client.start_nowait("hello there")
|
|
120
|
+
# ... queue more tasks ...
|
|
121
|
+
results = await client.wait_for_all()
|
|
122
|
+
client.close()
|
|
123
|
+
```
|
|
124
|
+
|
|
112
125
|
## Multi-Turn Conversations
|
|
113
126
|
|
|
114
127
|
Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
|
|
@@ -120,7 +133,7 @@ prompt = Conversation.system("You are a helpful assistant.").add(
|
|
|
120
133
|
Message.user("What's in this image?").add_image("tests/image.jpg")
|
|
121
134
|
)
|
|
122
135
|
|
|
123
|
-
client = LLMClient
|
|
136
|
+
client = LLMClient("gpt-4.1-mini")
|
|
124
137
|
resps = client.process_prompts_sync([prompt])
|
|
125
138
|
```
|
|
126
139
|
|
|
@@ -136,9 +149,9 @@ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can ea
|
|
|
136
149
|
from lm_deluge import LLMClient, Conversation
|
|
137
150
|
|
|
138
151
|
# Simple file upload
|
|
139
|
-
client = LLMClient
|
|
152
|
+
client = LLMClient("gpt-4.1-mini")
|
|
140
153
|
conversation = Conversation.user(
|
|
141
|
-
"Please summarize this document",
|
|
154
|
+
"Please summarize this document",
|
|
142
155
|
file="path/to/document.pdf"
|
|
143
156
|
)
|
|
144
157
|
resps = client.process_prompts_sync([conversation])
|
|
@@ -163,7 +176,7 @@ def get_weather(city: str) -> str:
|
|
|
163
176
|
return f"The weather in {city} is sunny and 72°F"
|
|
164
177
|
|
|
165
178
|
tool = Tool.from_function(get_weather)
|
|
166
|
-
client = LLMClient
|
|
179
|
+
client = LLMClient("claude-3-haiku")
|
|
167
180
|
resps = client.process_prompts_sync(
|
|
168
181
|
["What's the weather in Paris?"],
|
|
169
182
|
tools=[tool]
|
|
@@ -200,7 +213,7 @@ config = {
|
|
|
200
213
|
all_tools = Tool.from_mcp_config(config)
|
|
201
214
|
|
|
202
215
|
# let the model use the tools
|
|
203
|
-
client = LLMClient
|
|
216
|
+
client = LLMClient("gpt-4o-mini")
|
|
204
217
|
resps = client.process_prompts_sync(
|
|
205
218
|
["List the files in the current directory"],
|
|
206
219
|
tools=tools
|
|
@@ -237,7 +250,7 @@ conv = (
|
|
|
237
250
|
)
|
|
238
251
|
|
|
239
252
|
# Use prompt caching to cache system message and tools
|
|
240
|
-
client = LLMClient
|
|
253
|
+
client = LLMClient("claude-3-5-sonnet")
|
|
241
254
|
resps = client.process_prompts_sync(
|
|
242
255
|
[conv],
|
|
243
256
|
cache="system_and_tools" # Cache system message and any tools
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lm_deluge-0.0.32 → lm_deluge-0.0.33}/src/lm_deluge/built_in_tools/anthropic/computer_use.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|