lm-deluge 0.0.22__tar.gz → 0.0.70__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.22/src/lm_deluge.egg-info → lm_deluge-0.0.70}/PKG-INFO +31 -13
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/README.md +28 -12
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/pyproject.toml +9 -2
- lm_deluge-0.0.70/src/lm_deluge/__init__.py +41 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/anthropic.py +24 -8
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/base.py +93 -5
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/bedrock.py +153 -32
- lm_deluge-0.0.70/src/lm_deluge/api_requests/chat_reasoning.py +4 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/gemini.py +21 -14
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/mistral.py +8 -9
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/openai.py +212 -119
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/response.py +33 -5
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/batches.py +256 -45
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/cache.py +10 -1
- lm_deluge-0.0.70/src/lm_deluge/cli.py +300 -0
- lm_deluge-0.0.70/src/lm_deluge/client.py +1064 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/config.py +1 -1
- lm_deluge-0.0.70/src/lm_deluge/file.py +527 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/image.py +30 -1
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/extract.py +7 -5
- lm_deluge-0.0.70/src/lm_deluge/mock_openai.py +641 -0
- lm_deluge-0.0.70/src/lm_deluge/models/__init__.py +151 -0
- lm_deluge-0.0.70/src/lm_deluge/models/anthropic.py +146 -0
- lm_deluge-0.0.70/src/lm_deluge/models/bedrock.py +114 -0
- lm_deluge-0.0.70/src/lm_deluge/models/cerebras.py +58 -0
- lm_deluge-0.0.70/src/lm_deluge/models/cohere.py +82 -0
- lm_deluge-0.0.70/src/lm_deluge/models/deepseek.py +27 -0
- lm_deluge-0.0.70/src/lm_deluge/models/fireworks.py +18 -0
- lm_deluge-0.0.70/src/lm_deluge/models/google.py +141 -0
- lm_deluge-0.0.70/src/lm_deluge/models/grok.py +82 -0
- lm_deluge-0.0.70/src/lm_deluge/models/groq.py +76 -0
- lm_deluge-0.0.70/src/lm_deluge/models/kimi.py +34 -0
- lm_deluge-0.0.70/src/lm_deluge/models/meta.py +57 -0
- lm_deluge-0.0.70/src/lm_deluge/models/minimax.py +10 -0
- lm_deluge-0.0.70/src/lm_deluge/models/mistral.py +110 -0
- lm_deluge-0.0.70/src/lm_deluge/models/openai.py +322 -0
- lm_deluge-0.0.70/src/lm_deluge/models/openrouter.py +64 -0
- lm_deluge-0.0.70/src/lm_deluge/models/together.py +96 -0
- lm_deluge-0.0.70/src/lm_deluge/presets/cerebras.py +17 -0
- lm_deluge-0.0.70/src/lm_deluge/presets/meta.py +13 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/prompt.py +679 -50
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/request_context.py +13 -10
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/tool.py +415 -27
- lm_deluge-0.0.70/src/lm_deluge/tracker.py +390 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/usage.py +30 -21
- lm_deluge-0.0.70/src/lm_deluge/util/harmony.py +47 -0
- lm_deluge-0.0.70/src/lm_deluge/warnings.py +46 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70/src/lm_deluge.egg-info}/PKG-INFO +31 -13
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/SOURCES.txt +28 -4
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/requires.txt +3 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/tests/test_builtin_tools.py +2 -2
- lm_deluge-0.0.70/tests/test_file_upload.py +627 -0
- lm_deluge-0.0.70/tests/test_mock_openai.py +479 -0
- lm_deluge-0.0.70/tests/test_openrouter_generic.py +238 -0
- lm_deluge-0.0.22/src/lm_deluge/__init__.py +0 -17
- lm_deluge-0.0.22/src/lm_deluge/agent.py +0 -0
- lm_deluge-0.0.22/src/lm_deluge/client.py +0 -658
- lm_deluge-0.0.22/src/lm_deluge/file.py +0 -154
- lm_deluge-0.0.22/src/lm_deluge/gemini_limits.py +0 -65
- lm_deluge-0.0.22/src/lm_deluge/models.py +0 -1247
- lm_deluge-0.0.22/src/lm_deluge/tracker.py +0 -256
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/LICENSE +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/setup.cfg +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.22 → lm_deluge-0.0.70}/tests/test_native_mcp_server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.70
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -23,6 +23,8 @@ Requires-Dist: pdf2image
|
|
|
23
23
|
Requires-Dist: pillow
|
|
24
24
|
Requires-Dist: fastmcp>=2.4
|
|
25
25
|
Requires-Dist: rich
|
|
26
|
+
Provides-Extra: openai
|
|
27
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
26
28
|
Dynamic: license-file
|
|
27
29
|
|
|
28
30
|
# lm-deluge
|
|
@@ -54,12 +56,12 @@ The package relies on environment variables for API keys. Typical variables incl
|
|
|
54
56
|
|
|
55
57
|
## Quickstart
|
|
56
58
|
|
|
57
|
-
|
|
59
|
+
`LLMClient` uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
|
|
58
60
|
|
|
59
61
|
```python
|
|
60
62
|
from lm_deluge import LLMClient
|
|
61
63
|
|
|
62
|
-
client = LLMClient
|
|
64
|
+
client = LLMClient("gpt-4o-mini")
|
|
63
65
|
resps = client.process_prompts_sync(["Hello, world!"])
|
|
64
66
|
print(resp[0].completion)
|
|
65
67
|
```
|
|
@@ -71,7 +73,7 @@ To distribute your requests across models, just provide a list of more than one
|
|
|
71
73
|
```python
|
|
72
74
|
from lm_deluge import LLMClient
|
|
73
75
|
|
|
74
|
-
client = LLMClient
|
|
76
|
+
client = LLMClient(
|
|
75
77
|
["gpt-4o-mini", "claude-3-haiku"],
|
|
76
78
|
max_requests_per_minute=10_000
|
|
77
79
|
)
|
|
@@ -85,8 +87,8 @@ print(resp[0].completion)
|
|
|
85
87
|
|
|
86
88
|
API calls can be customized in a few ways.
|
|
87
89
|
|
|
88
|
-
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
89
|
-
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and
|
|
90
|
+
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
91
|
+
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, caching, **and progress display style**. Set `progress="rich"` (default), `"tqdm"`, or `"manual"` to choose how progress is reported. The manual option prints an update every 30 seconds.
|
|
90
92
|
3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
|
|
91
93
|
|
|
92
94
|
Putting it all together:
|
|
@@ -109,6 +111,22 @@ await client.process_prompts_async(
|
|
|
109
111
|
)
|
|
110
112
|
```
|
|
111
113
|
|
|
114
|
+
### Queueing individual prompts
|
|
115
|
+
|
|
116
|
+
You can queue prompts one at a time and track progress explicitly. Iterate over
|
|
117
|
+
results as they finish with `as_completed` (or gather them all at once with
|
|
118
|
+
`wait_for_all`):
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
client = LLMClient("gpt-4.1-mini", progress="tqdm")
|
|
122
|
+
client.open()
|
|
123
|
+
client.start_nowait("hello there")
|
|
124
|
+
# ... queue more tasks ...
|
|
125
|
+
async for task_id, result in client.as_completed():
|
|
126
|
+
print(task_id, result.completion)
|
|
127
|
+
client.close()
|
|
128
|
+
```
|
|
129
|
+
|
|
112
130
|
## Multi-Turn Conversations
|
|
113
131
|
|
|
114
132
|
Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
|
|
@@ -120,7 +138,7 @@ prompt = Conversation.system("You are a helpful assistant.").add(
|
|
|
120
138
|
Message.user("What's in this image?").add_image("tests/image.jpg")
|
|
121
139
|
)
|
|
122
140
|
|
|
123
|
-
client = LLMClient
|
|
141
|
+
client = LLMClient("gpt-4.1-mini")
|
|
124
142
|
resps = client.process_prompts_sync([prompt])
|
|
125
143
|
```
|
|
126
144
|
|
|
@@ -136,9 +154,9 @@ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can ea
|
|
|
136
154
|
from lm_deluge import LLMClient, Conversation
|
|
137
155
|
|
|
138
156
|
# Simple file upload
|
|
139
|
-
client = LLMClient
|
|
157
|
+
client = LLMClient("gpt-4.1-mini")
|
|
140
158
|
conversation = Conversation.user(
|
|
141
|
-
"Please summarize this document",
|
|
159
|
+
"Please summarize this document",
|
|
142
160
|
file="path/to/document.pdf"
|
|
143
161
|
)
|
|
144
162
|
resps = client.process_prompts_sync([conversation])
|
|
@@ -163,7 +181,7 @@ def get_weather(city: str) -> str:
|
|
|
163
181
|
return f"The weather in {city} is sunny and 72°F"
|
|
164
182
|
|
|
165
183
|
tool = Tool.from_function(get_weather)
|
|
166
|
-
client = LLMClient
|
|
184
|
+
client = LLMClient("claude-3-haiku")
|
|
167
185
|
resps = client.process_prompts_sync(
|
|
168
186
|
["What's the weather in Paris?"],
|
|
169
187
|
tools=[tool]
|
|
@@ -200,7 +218,7 @@ config = {
|
|
|
200
218
|
all_tools = Tool.from_mcp_config(config)
|
|
201
219
|
|
|
202
220
|
# let the model use the tools
|
|
203
|
-
client = LLMClient
|
|
221
|
+
client = LLMClient("gpt-4o-mini")
|
|
204
222
|
resps = client.process_prompts_sync(
|
|
205
223
|
["List the files in the current directory"],
|
|
206
224
|
tools=tools
|
|
@@ -237,7 +255,7 @@ conv = (
|
|
|
237
255
|
)
|
|
238
256
|
|
|
239
257
|
# Use prompt caching to cache system message and tools
|
|
240
|
-
client = LLMClient
|
|
258
|
+
client = LLMClient("claude-3-5-sonnet")
|
|
241
259
|
resps = client.process_prompts_sync(
|
|
242
260
|
[conv],
|
|
243
261
|
cache="system_and_tools" # Cache system message and any tools
|
|
@@ -274,7 +292,7 @@ We support all models in `src/lm_deluge/models.py`. Vertex support is not planne
|
|
|
274
292
|
|
|
275
293
|
## Feature Support
|
|
276
294
|
|
|
277
|
-
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
|
|
295
|
+
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Passing `None` (or the string `"none"`) disables Gemini thoughts entirely. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
|
|
278
296
|
|
|
279
297
|
## Built‑in tools
|
|
280
298
|
|
|
@@ -27,12 +27,12 @@ The package relies on environment variables for API keys. Typical variables incl
|
|
|
27
27
|
|
|
28
28
|
## Quickstart
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
`LLMClient` uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
|
|
31
31
|
|
|
32
32
|
```python
|
|
33
33
|
from lm_deluge import LLMClient
|
|
34
34
|
|
|
35
|
-
client = LLMClient
|
|
35
|
+
client = LLMClient("gpt-4o-mini")
|
|
36
36
|
resps = client.process_prompts_sync(["Hello, world!"])
|
|
37
37
|
print(resp[0].completion)
|
|
38
38
|
```
|
|
@@ -44,7 +44,7 @@ To distribute your requests across models, just provide a list of more than one
|
|
|
44
44
|
```python
|
|
45
45
|
from lm_deluge import LLMClient
|
|
46
46
|
|
|
47
|
-
client = LLMClient
|
|
47
|
+
client = LLMClient(
|
|
48
48
|
["gpt-4o-mini", "claude-3-haiku"],
|
|
49
49
|
max_requests_per_minute=10_000
|
|
50
50
|
)
|
|
@@ -58,8 +58,8 @@ print(resp[0].completion)
|
|
|
58
58
|
|
|
59
59
|
API calls can be customized in a few ways.
|
|
60
60
|
|
|
61
|
-
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
62
|
-
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and
|
|
61
|
+
1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
|
|
62
|
+
2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, caching, **and progress display style**. Set `progress="rich"` (default), `"tqdm"`, or `"manual"` to choose how progress is reported. The manual option prints an update every 30 seconds.
|
|
63
63
|
3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
|
|
64
64
|
|
|
65
65
|
Putting it all together:
|
|
@@ -82,6 +82,22 @@ await client.process_prompts_async(
|
|
|
82
82
|
)
|
|
83
83
|
```
|
|
84
84
|
|
|
85
|
+
### Queueing individual prompts
|
|
86
|
+
|
|
87
|
+
You can queue prompts one at a time and track progress explicitly. Iterate over
|
|
88
|
+
results as they finish with `as_completed` (or gather them all at once with
|
|
89
|
+
`wait_for_all`):
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
client = LLMClient("gpt-4.1-mini", progress="tqdm")
|
|
93
|
+
client.open()
|
|
94
|
+
client.start_nowait("hello there")
|
|
95
|
+
# ... queue more tasks ...
|
|
96
|
+
async for task_id, result in client.as_completed():
|
|
97
|
+
print(task_id, result.completion)
|
|
98
|
+
client.close()
|
|
99
|
+
```
|
|
100
|
+
|
|
85
101
|
## Multi-Turn Conversations
|
|
86
102
|
|
|
87
103
|
Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
|
|
@@ -93,7 +109,7 @@ prompt = Conversation.system("You are a helpful assistant.").add(
|
|
|
93
109
|
Message.user("What's in this image?").add_image("tests/image.jpg")
|
|
94
110
|
)
|
|
95
111
|
|
|
96
|
-
client = LLMClient
|
|
112
|
+
client = LLMClient("gpt-4.1-mini")
|
|
97
113
|
resps = client.process_prompts_sync([prompt])
|
|
98
114
|
```
|
|
99
115
|
|
|
@@ -109,9 +125,9 @@ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can ea
|
|
|
109
125
|
from lm_deluge import LLMClient, Conversation
|
|
110
126
|
|
|
111
127
|
# Simple file upload
|
|
112
|
-
client = LLMClient
|
|
128
|
+
client = LLMClient("gpt-4.1-mini")
|
|
113
129
|
conversation = Conversation.user(
|
|
114
|
-
"Please summarize this document",
|
|
130
|
+
"Please summarize this document",
|
|
115
131
|
file="path/to/document.pdf"
|
|
116
132
|
)
|
|
117
133
|
resps = client.process_prompts_sync([conversation])
|
|
@@ -136,7 +152,7 @@ def get_weather(city: str) -> str:
|
|
|
136
152
|
return f"The weather in {city} is sunny and 72°F"
|
|
137
153
|
|
|
138
154
|
tool = Tool.from_function(get_weather)
|
|
139
|
-
client = LLMClient
|
|
155
|
+
client = LLMClient("claude-3-haiku")
|
|
140
156
|
resps = client.process_prompts_sync(
|
|
141
157
|
["What's the weather in Paris?"],
|
|
142
158
|
tools=[tool]
|
|
@@ -173,7 +189,7 @@ config = {
|
|
|
173
189
|
all_tools = Tool.from_mcp_config(config)
|
|
174
190
|
|
|
175
191
|
# let the model use the tools
|
|
176
|
-
client = LLMClient
|
|
192
|
+
client = LLMClient("gpt-4o-mini")
|
|
177
193
|
resps = client.process_prompts_sync(
|
|
178
194
|
["List the files in the current directory"],
|
|
179
195
|
tools=tools
|
|
@@ -210,7 +226,7 @@ conv = (
|
|
|
210
226
|
)
|
|
211
227
|
|
|
212
228
|
# Use prompt caching to cache system message and tools
|
|
213
|
-
client = LLMClient
|
|
229
|
+
client = LLMClient("claude-3-5-sonnet")
|
|
214
230
|
resps = client.process_prompts_sync(
|
|
215
231
|
[conv],
|
|
216
232
|
cache="system_and_tools" # Cache system message and any tools
|
|
@@ -247,7 +263,7 @@ We support all models in `src/lm_deluge/models.py`. Vertex support is not planne
|
|
|
247
263
|
|
|
248
264
|
## Feature Support
|
|
249
265
|
|
|
250
|
-
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
|
|
266
|
+
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Passing `None` (or the string `"none"`) disables Gemini thoughts entirely. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
|
|
251
267
|
|
|
252
268
|
## Built‑in tools
|
|
253
269
|
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|
|
3
3
|
|
|
4
4
|
[project]
|
|
5
5
|
name = "lm_deluge"
|
|
6
|
-
version = "0.0.
|
|
6
|
+
version = "0.0.70"
|
|
7
7
|
authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
|
|
8
8
|
description = "Python utility for using LLM API models."
|
|
9
9
|
readme = "README.md"
|
|
@@ -28,5 +28,12 @@ dependencies = [
|
|
|
28
28
|
"pdf2image",
|
|
29
29
|
"pillow",
|
|
30
30
|
"fastmcp>=2.4",
|
|
31
|
-
"rich"
|
|
31
|
+
"rich",
|
|
32
|
+
# "textual>=0.58.0"
|
|
32
33
|
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
openai = ["openai>=1.0.0"]
|
|
37
|
+
|
|
38
|
+
# [project.scripts]
|
|
39
|
+
# deluge = "lm_deluge.cli:main"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
|
+
from .file import File
|
|
3
|
+
from .prompt import Conversation, Message
|
|
4
|
+
from .tool import Tool, ToolParams
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from .mock_openai import ( # noqa
|
|
8
|
+
APIError,
|
|
9
|
+
APITimeoutError,
|
|
10
|
+
BadRequestError,
|
|
11
|
+
MockAsyncOpenAI,
|
|
12
|
+
RateLimitError,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
_has_openai = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
_has_openai = False
|
|
18
|
+
|
|
19
|
+
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"LLMClient",
|
|
23
|
+
"SamplingParams",
|
|
24
|
+
"APIResponse",
|
|
25
|
+
"Conversation",
|
|
26
|
+
"Message",
|
|
27
|
+
"Tool",
|
|
28
|
+
"ToolParams",
|
|
29
|
+
"File",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
if _has_openai:
|
|
33
|
+
__all__.extend(
|
|
34
|
+
[
|
|
35
|
+
"MockAsyncOpenAI",
|
|
36
|
+
"APIError",
|
|
37
|
+
"APITimeoutError",
|
|
38
|
+
"BadRequestError",
|
|
39
|
+
"RateLimitError",
|
|
40
|
+
]
|
|
41
|
+
)
|
|
@@ -28,24 +28,28 @@ def _add_beta(headers: dict, beta: str):
|
|
|
28
28
|
def _build_anthropic_request(
|
|
29
29
|
model: APIModel,
|
|
30
30
|
context: RequestContext,
|
|
31
|
-
# prompt: Conversation,
|
|
32
|
-
# tools: list[Tool | dict | MCPServer] | None,
|
|
33
|
-
# sampling_params: SamplingParams,
|
|
34
|
-
# cache_pattern: CachePattern | None = None,
|
|
35
31
|
):
|
|
36
32
|
prompt = context.prompt
|
|
37
33
|
cache_pattern = context.cache
|
|
38
34
|
tools = context.tools
|
|
39
35
|
sampling_params = context.sampling_params
|
|
40
36
|
system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
|
|
41
|
-
if not system_message:
|
|
42
|
-
|
|
37
|
+
# if not system_message:
|
|
38
|
+
# print("WARNING: system_message is None")
|
|
43
39
|
base_headers = {
|
|
44
40
|
"x-api-key": os.getenv(model.api_key_env_var),
|
|
45
41
|
"anthropic-version": "2023-06-01",
|
|
46
42
|
"content-type": "application/json",
|
|
47
43
|
}
|
|
48
44
|
|
|
45
|
+
# Check if any messages contain uploaded files (file_id)
|
|
46
|
+
# If so, add the files-api beta header
|
|
47
|
+
for msg in prompt.messages:
|
|
48
|
+
for file in msg.files:
|
|
49
|
+
if file.is_remote and file.remote_provider == "anthropic":
|
|
50
|
+
_add_beta(base_headers, "files-api-2025-04-14")
|
|
51
|
+
break
|
|
52
|
+
|
|
49
53
|
request_json = {
|
|
50
54
|
"model": model.name,
|
|
51
55
|
"messages": messages,
|
|
@@ -57,14 +61,15 @@ def _build_anthropic_request(
|
|
|
57
61
|
# handle thinking
|
|
58
62
|
if model.reasoning_model and sampling_params.reasoning_effort:
|
|
59
63
|
# translate reasoning effort of low, medium, high to budget tokens
|
|
60
|
-
budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
|
|
64
|
+
budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
|
|
61
65
|
sampling_params.reasoning_effort
|
|
62
66
|
)
|
|
63
67
|
request_json["thinking"] = {
|
|
64
68
|
"type": "enabled",
|
|
65
69
|
"budget_tokens": budget,
|
|
66
70
|
}
|
|
67
|
-
|
|
71
|
+
if "top_p" in request_json:
|
|
72
|
+
request_json["top_p"] = max(request_json["top_p"], 0.95)
|
|
68
73
|
request_json["temperature"] = 1.0
|
|
69
74
|
request_json["max_tokens"] += budget
|
|
70
75
|
else:
|
|
@@ -74,12 +79,20 @@ def _build_anthropic_request(
|
|
|
74
79
|
if system_message is not None:
|
|
75
80
|
request_json["system"] = system_message
|
|
76
81
|
|
|
82
|
+
# handle temp + top_p for opus 4.1/sonnet 4.5
|
|
83
|
+
if "4-1" in model.name or "4-5" in model.name:
|
|
84
|
+
if "temperature" in request_json and "top_p" in request_json:
|
|
85
|
+
request_json.pop("top_p")
|
|
86
|
+
|
|
77
87
|
if tools:
|
|
78
88
|
mcp_servers = []
|
|
79
89
|
tool_definitions = []
|
|
80
90
|
for tool in tools:
|
|
81
91
|
if isinstance(tool, Tool):
|
|
82
92
|
tool_definitions.append(tool.dump_for("anthropic"))
|
|
93
|
+
elif isinstance(tool, dict) and "url" in tool:
|
|
94
|
+
_add_beta(base_headers, "mcp-client-2025-04-04")
|
|
95
|
+
mcp_servers.append(tool)
|
|
83
96
|
elif isinstance(tool, dict):
|
|
84
97
|
tool_definitions.append(tool)
|
|
85
98
|
# add betas if needed
|
|
@@ -93,6 +106,9 @@ def _build_anthropic_request(
|
|
|
93
106
|
_add_beta(base_headers, "computer-use-2025-01-24")
|
|
94
107
|
elif tool["type"] == "code_execution_20250522":
|
|
95
108
|
_add_beta(base_headers, "code-execution-2025-05-22")
|
|
109
|
+
elif tool["type"] in ["memory_20250818", "clear_tool_uses_20250919"]:
|
|
110
|
+
_add_beta(base_headers, "context-management-2025-06-27")
|
|
111
|
+
|
|
96
112
|
elif isinstance(tool, MCPServer):
|
|
97
113
|
_add_beta(base_headers, "mcp-client-2025-04-04")
|
|
98
114
|
mcp_servers.append(tool.for_anthropic())
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import time
|
|
2
3
|
import traceback
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
|
|
@@ -6,6 +7,7 @@ import aiohttp
|
|
|
6
7
|
from aiohttp import ClientResponse
|
|
7
8
|
|
|
8
9
|
from ..errors import raise_if_modal_exception
|
|
10
|
+
from ..models.openai import OPENAI_MODELS
|
|
9
11
|
from ..request_context import RequestContext
|
|
10
12
|
from .response import APIResponse
|
|
11
13
|
|
|
@@ -52,6 +54,9 @@ class APIRequestBase(ABC):
|
|
|
52
54
|
self, base_headers: dict[str, str], exclude_patterns: list[str] | None = None
|
|
53
55
|
) -> dict[str, str]:
|
|
54
56
|
"""Merge extra_headers with base headers, giving priority to extra_headers."""
|
|
57
|
+
# Filter out None values from base headers (e.g., missing API keys)
|
|
58
|
+
base_headers = {k: v for k, v in base_headers.items() if v is not None}
|
|
59
|
+
|
|
55
60
|
if not self.context.extra_headers:
|
|
56
61
|
return base_headers
|
|
57
62
|
|
|
@@ -69,6 +74,9 @@ class APIRequestBase(ABC):
|
|
|
69
74
|
# Start with base headers, then overlay filtered extra headers (extra takes precedence)
|
|
70
75
|
merged = dict(base_headers)
|
|
71
76
|
merged.update(filtered_extra)
|
|
77
|
+
|
|
78
|
+
# Filter out None values from final merged headers
|
|
79
|
+
merged = {k: v for k, v in merged.items() if v is not None}
|
|
72
80
|
return merged
|
|
73
81
|
|
|
74
82
|
def handle_success(self, data):
|
|
@@ -76,15 +84,95 @@ class APIRequestBase(ABC):
|
|
|
76
84
|
if self.context.status_tracker:
|
|
77
85
|
self.context.status_tracker.task_succeeded(self.context.task_id)
|
|
78
86
|
|
|
87
|
+
async def _execute_once_background_mode(self) -> APIResponse:
|
|
88
|
+
"""
|
|
89
|
+
ONLY for OpenAI responses API. Implement the
|
|
90
|
+
start -> poll -> result style of request.
|
|
91
|
+
"""
|
|
92
|
+
assert self.context.status_tracker, "no status tracker"
|
|
93
|
+
start_time = time.time()
|
|
94
|
+
async with aiohttp.ClientSession() as session:
|
|
95
|
+
last_status: str | None = None
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
self.context.status_tracker.total_requests += 1
|
|
99
|
+
assert self.url is not None, "URL is not set"
|
|
100
|
+
async with session.post(
|
|
101
|
+
url=self.url,
|
|
102
|
+
headers=self.request_header,
|
|
103
|
+
json=self.request_json,
|
|
104
|
+
) as http_response:
|
|
105
|
+
# make sure we created the Response object
|
|
106
|
+
http_response.raise_for_status()
|
|
107
|
+
data = await http_response.json()
|
|
108
|
+
response_id = data["id"]
|
|
109
|
+
last_status = data["status"]
|
|
110
|
+
|
|
111
|
+
while True:
|
|
112
|
+
if time.time() - start_time > self.context.request_timeout:
|
|
113
|
+
# cancel the response
|
|
114
|
+
async with session.post(
|
|
115
|
+
url=f"{self.url}/{response_id}/cancel",
|
|
116
|
+
headers=self.request_header,
|
|
117
|
+
) as http_response:
|
|
118
|
+
http_response.raise_for_status()
|
|
119
|
+
|
|
120
|
+
return APIResponse(
|
|
121
|
+
id=self.context.task_id,
|
|
122
|
+
model_internal=self.context.model_name,
|
|
123
|
+
prompt=self.context.prompt,
|
|
124
|
+
sampling_params=self.context.sampling_params,
|
|
125
|
+
status_code=None,
|
|
126
|
+
is_error=True,
|
|
127
|
+
error_message="Request timed out (terminated by client).",
|
|
128
|
+
content=None,
|
|
129
|
+
usage=None,
|
|
130
|
+
)
|
|
131
|
+
# poll for the response
|
|
132
|
+
await asyncio.sleep(5.0)
|
|
133
|
+
async with session.get(
|
|
134
|
+
url=f"{self.url}/{response_id}",
|
|
135
|
+
headers=self.request_header,
|
|
136
|
+
) as http_response:
|
|
137
|
+
http_response.raise_for_status()
|
|
138
|
+
data = await http_response.json()
|
|
139
|
+
|
|
140
|
+
if data["status"] != last_status:
|
|
141
|
+
print(
|
|
142
|
+
f"Background req {response_id} status updated to: {data['status']}"
|
|
143
|
+
)
|
|
144
|
+
last_status = data["status"]
|
|
145
|
+
if last_status not in ["queued", "in_progress"]:
|
|
146
|
+
return await self.handle_response(http_response)
|
|
147
|
+
|
|
148
|
+
except Exception as e:
|
|
149
|
+
raise_if_modal_exception(e)
|
|
150
|
+
tb = traceback.format_exc()
|
|
151
|
+
print(tb)
|
|
152
|
+
return APIResponse(
|
|
153
|
+
id=self.context.task_id,
|
|
154
|
+
model_internal=self.context.model_name,
|
|
155
|
+
prompt=self.context.prompt,
|
|
156
|
+
sampling_params=self.context.sampling_params,
|
|
157
|
+
status_code=None,
|
|
158
|
+
is_error=True,
|
|
159
|
+
error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
|
|
160
|
+
content=None,
|
|
161
|
+
usage=None,
|
|
162
|
+
)
|
|
163
|
+
|
|
79
164
|
async def execute_once(self) -> APIResponse:
|
|
80
165
|
"""Send the HTTP request once and return the parsed APIResponse."""
|
|
81
166
|
await self.build_request()
|
|
82
167
|
assert self.context.status_tracker
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
168
|
+
|
|
169
|
+
if (
|
|
170
|
+
self.context.background
|
|
171
|
+
and self.context.use_responses_api
|
|
172
|
+
and self.context.model_name in OPENAI_MODELS
|
|
173
|
+
):
|
|
174
|
+
return await self._execute_once_background_mode()
|
|
175
|
+
|
|
88
176
|
try:
|
|
89
177
|
self.context.status_tracker.total_requests += 1
|
|
90
178
|
timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
|