lm-deluge 0.0.22__tar.gz → 0.0.70__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {lm_deluge-0.0.22/src/lm_deluge.egg-info → lm_deluge-0.0.70}/PKG-INFO +31 -13
  2. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/README.md +28 -12
  3. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/pyproject.toml +9 -2
  4. lm_deluge-0.0.70/src/lm_deluge/__init__.py +41 -0
  5. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/anthropic.py +24 -8
  6. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/base.py +93 -5
  7. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/bedrock.py +153 -32
  8. lm_deluge-0.0.70/src/lm_deluge/api_requests/chat_reasoning.py +4 -0
  9. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/gemini.py +21 -14
  10. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/mistral.py +8 -9
  11. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/openai.py +212 -119
  12. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/response.py +33 -5
  13. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/batches.py +256 -45
  14. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/cache.py +10 -1
  15. lm_deluge-0.0.70/src/lm_deluge/cli.py +300 -0
  16. lm_deluge-0.0.70/src/lm_deluge/client.py +1064 -0
  17. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/config.py +1 -1
  18. lm_deluge-0.0.70/src/lm_deluge/file.py +527 -0
  19. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/image.py +30 -1
  20. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/extract.py +7 -5
  21. lm_deluge-0.0.70/src/lm_deluge/mock_openai.py +641 -0
  22. lm_deluge-0.0.70/src/lm_deluge/models/__init__.py +151 -0
  23. lm_deluge-0.0.70/src/lm_deluge/models/anthropic.py +146 -0
  24. lm_deluge-0.0.70/src/lm_deluge/models/bedrock.py +114 -0
  25. lm_deluge-0.0.70/src/lm_deluge/models/cerebras.py +58 -0
  26. lm_deluge-0.0.70/src/lm_deluge/models/cohere.py +82 -0
  27. lm_deluge-0.0.70/src/lm_deluge/models/deepseek.py +27 -0
  28. lm_deluge-0.0.70/src/lm_deluge/models/fireworks.py +18 -0
  29. lm_deluge-0.0.70/src/lm_deluge/models/google.py +141 -0
  30. lm_deluge-0.0.70/src/lm_deluge/models/grok.py +82 -0
  31. lm_deluge-0.0.70/src/lm_deluge/models/groq.py +76 -0
  32. lm_deluge-0.0.70/src/lm_deluge/models/kimi.py +34 -0
  33. lm_deluge-0.0.70/src/lm_deluge/models/meta.py +57 -0
  34. lm_deluge-0.0.70/src/lm_deluge/models/minimax.py +10 -0
  35. lm_deluge-0.0.70/src/lm_deluge/models/mistral.py +110 -0
  36. lm_deluge-0.0.70/src/lm_deluge/models/openai.py +322 -0
  37. lm_deluge-0.0.70/src/lm_deluge/models/openrouter.py +64 -0
  38. lm_deluge-0.0.70/src/lm_deluge/models/together.py +96 -0
  39. lm_deluge-0.0.70/src/lm_deluge/presets/cerebras.py +17 -0
  40. lm_deluge-0.0.70/src/lm_deluge/presets/meta.py +13 -0
  41. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/prompt.py +679 -50
  42. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/request_context.py +13 -10
  43. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/tool.py +415 -27
  44. lm_deluge-0.0.70/src/lm_deluge/tracker.py +390 -0
  45. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/usage.py +30 -21
  46. lm_deluge-0.0.70/src/lm_deluge/util/harmony.py +47 -0
  47. lm_deluge-0.0.70/src/lm_deluge/warnings.py +46 -0
  48. {lm_deluge-0.0.22 → lm_deluge-0.0.70/src/lm_deluge.egg-info}/PKG-INFO +31 -13
  49. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/SOURCES.txt +28 -4
  50. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/requires.txt +3 -0
  51. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/tests/test_builtin_tools.py +2 -2
  52. lm_deluge-0.0.70/tests/test_file_upload.py +627 -0
  53. lm_deluge-0.0.70/tests/test_mock_openai.py +479 -0
  54. lm_deluge-0.0.70/tests/test_openrouter_generic.py +238 -0
  55. lm_deluge-0.0.22/src/lm_deluge/__init__.py +0 -17
  56. lm_deluge-0.0.22/src/lm_deluge/agent.py +0 -0
  57. lm_deluge-0.0.22/src/lm_deluge/client.py +0 -658
  58. lm_deluge-0.0.22/src/lm_deluge/file.py +0 -154
  59. lm_deluge-0.0.22/src/lm_deluge/gemini_limits.py +0 -65
  60. lm_deluge-0.0.22/src/lm_deluge/models.py +0 -1247
  61. lm_deluge-0.0.22/src/lm_deluge/tracker.py +0 -256
  62. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/LICENSE +0 -0
  63. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/setup.cfg +0 -0
  64. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/__init__.py +0 -0
  65. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/common.py +0 -0
  66. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  67. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  68. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  69. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  70. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  71. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  72. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  73. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  74. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  75. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/base.py +0 -0
  76. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/openai.py +0 -0
  77. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/embed.py +0 -0
  78. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/errors.py +0 -0
  79. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/__init__.py +0 -0
  80. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/classify.py +0 -0
  81. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/locate.py +0 -0
  82. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/ocr.py +0 -0
  83. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/score.py +0 -0
  84. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/translate.py +0 -0
  85. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/rerank.py +0 -0
  86. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/json.py +0 -0
  87. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/logprobs.py +0 -0
  88. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/spatial.py +0 -0
  89. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/validation.py +0 -0
  90. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge/util/xml.py +0 -0
  91. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  92. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/top_level.txt +0 -0
  93. {lm_deluge-0.0.22 → lm_deluge-0.0.70}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.22
3
+ Version: 0.0.70
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -23,6 +23,8 @@ Requires-Dist: pdf2image
23
23
  Requires-Dist: pillow
24
24
  Requires-Dist: fastmcp>=2.4
25
25
  Requires-Dist: rich
26
+ Provides-Extra: openai
27
+ Requires-Dist: openai>=1.0.0; extra == "openai"
26
28
  Dynamic: license-file
27
29
 
28
30
  # lm-deluge
@@ -54,12 +56,12 @@ The package relies on environment variables for API keys. Typical variables incl
54
56
 
55
57
  ## Quickstart
56
58
 
57
- The easiest way to get started is with the `.basic` constructor. This uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
59
+ `LLMClient` uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
58
60
 
59
61
  ```python
60
62
  from lm_deluge import LLMClient
61
63
 
62
- client = LLMClient.basic("gpt-4o-mini")
64
+ client = LLMClient("gpt-4o-mini")
63
65
  resps = client.process_prompts_sync(["Hello, world!"])
64
66
  print(resp[0].completion)
65
67
  ```
@@ -71,7 +73,7 @@ To distribute your requests across models, just provide a list of more than one
71
73
  ```python
72
74
  from lm_deluge import LLMClient
73
75
 
74
- client = LLMClient.basic(
76
+ client = LLMClient(
75
77
  ["gpt-4o-mini", "claude-3-haiku"],
76
78
  max_requests_per_minute=10_000
77
79
  )
@@ -85,8 +87,8 @@ print(resp[0].completion)
85
87
 
86
88
  API calls can be customized in a few ways.
87
89
 
88
- 1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models. You can also pass many of these arguments directly to `LLMClient.basic` so you don't have to construct an entire `SamplingParams` object.
89
- 2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and caching.
90
+ 1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
91
+ 2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, caching, **and progress display style**. Set `progress="rich"` (default), `"tqdm"`, or `"manual"` to choose how progress is reported. The manual option prints an update every 30 seconds.
90
92
  3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
91
93
 
92
94
  Putting it all together:
@@ -109,6 +111,22 @@ await client.process_prompts_async(
109
111
  )
110
112
  ```
111
113
 
114
+ ### Queueing individual prompts
115
+
116
+ You can queue prompts one at a time and track progress explicitly. Iterate over
117
+ results as they finish with `as_completed` (or gather them all at once with
118
+ `wait_for_all`):
119
+
120
+ ```python
121
+ client = LLMClient("gpt-4.1-mini", progress="tqdm")
122
+ client.open()
123
+ client.start_nowait("hello there")
124
+ # ... queue more tasks ...
125
+ async for task_id, result in client.as_completed():
126
+ print(task_id, result.completion)
127
+ client.close()
128
+ ```
129
+
112
130
  ## Multi-Turn Conversations
113
131
 
114
132
  Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
@@ -120,7 +138,7 @@ prompt = Conversation.system("You are a helpful assistant.").add(
120
138
  Message.user("What's in this image?").add_image("tests/image.jpg")
121
139
  )
122
140
 
123
- client = LLMClient.basic("gpt-4.1-mini")
141
+ client = LLMClient("gpt-4.1-mini")
124
142
  resps = client.process_prompts_sync([prompt])
125
143
  ```
126
144
 
@@ -136,9 +154,9 @@ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can ea
136
154
  from lm_deluge import LLMClient, Conversation
137
155
 
138
156
  # Simple file upload
139
- client = LLMClient.basic("gpt-4.1-mini")
157
+ client = LLMClient("gpt-4.1-mini")
140
158
  conversation = Conversation.user(
141
- "Please summarize this document",
159
+ "Please summarize this document",
142
160
  file="path/to/document.pdf"
143
161
  )
144
162
  resps = client.process_prompts_sync([conversation])
@@ -163,7 +181,7 @@ def get_weather(city: str) -> str:
163
181
  return f"The weather in {city} is sunny and 72°F"
164
182
 
165
183
  tool = Tool.from_function(get_weather)
166
- client = LLMClient.basic("claude-3-haiku")
184
+ client = LLMClient("claude-3-haiku")
167
185
  resps = client.process_prompts_sync(
168
186
  ["What's the weather in Paris?"],
169
187
  tools=[tool]
@@ -200,7 +218,7 @@ config = {
200
218
  all_tools = Tool.from_mcp_config(config)
201
219
 
202
220
  # let the model use the tools
203
- client = LLMClient.basic("gpt-4o-mini")
221
+ client = LLMClient("gpt-4o-mini")
204
222
  resps = client.process_prompts_sync(
205
223
  ["List the files in the current directory"],
206
224
  tools=tools
@@ -237,7 +255,7 @@ conv = (
237
255
  )
238
256
 
239
257
  # Use prompt caching to cache system message and tools
240
- client = LLMClient.basic("claude-3-5-sonnet")
258
+ client = LLMClient("claude-3-5-sonnet")
241
259
  resps = client.process_prompts_sync(
242
260
  [conv],
243
261
  cache="system_and_tools" # Cache system message and any tools
@@ -274,7 +292,7 @@ We support all models in `src/lm_deluge/models.py`. Vertex support is not planne
274
292
 
275
293
  ## Feature Support
276
294
 
277
- We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
295
+ We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Passing `None` (or the string `"none"`) disables Gemini thoughts entirely. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
278
296
 
279
297
  ## Built‑in tools
280
298
 
@@ -27,12 +27,12 @@ The package relies on environment variables for API keys. Typical variables incl
27
27
 
28
28
  ## Quickstart
29
29
 
30
- The easiest way to get started is with the `.basic` constructor. This uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
30
+ `LLMClient` uses sensible default arguments for rate limits and sampling parameters so that you don't have to provide a ton of arguments.
31
31
 
32
32
  ```python
33
33
  from lm_deluge import LLMClient
34
34
 
35
- client = LLMClient.basic("gpt-4o-mini")
35
+ client = LLMClient("gpt-4o-mini")
36
36
  resps = client.process_prompts_sync(["Hello, world!"])
37
37
  print(resp[0].completion)
38
38
  ```
@@ -44,7 +44,7 @@ To distribute your requests across models, just provide a list of more than one
44
44
  ```python
45
45
  from lm_deluge import LLMClient
46
46
 
47
- client = LLMClient.basic(
47
+ client = LLMClient(
48
48
  ["gpt-4o-mini", "claude-3-haiku"],
49
49
  max_requests_per_minute=10_000
50
50
  )
@@ -58,8 +58,8 @@ print(resp[0].completion)
58
58
 
59
59
  API calls can be customized in a few ways.
60
60
 
61
- 1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models. You can also pass many of these arguments directly to `LLMClient.basic` so you don't have to construct an entire `SamplingParams` object.
62
- 2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, and caching.
61
+ 1. **Sampling Parameters.** This determines things like structured outputs, maximum completion tokens, nucleus sampling, etc. Provide a custom `SamplingParams` to the `LLMClient` to set temperature, top_p, json_mode, max_new_tokens, and/or reasoning_effort. You can pass 1 `SamplingParams` to use for all models, or a list of `SamplingParams` that's the same length as the list of models.
62
+ 2. **Arguments to LLMClient.** This is where you set request timeout, rate limits, model name(s), model weight(s) for distributing requests across models, retries, caching, **and progress display style**. Set `progress="rich"` (default), `"tqdm"`, or `"manual"` to choose how progress is reported. The manual option prints an update every 30 seconds.
63
63
  3. **Arguments to process_prompts.** Per-call, you can set verbosity, whether to display progress, and whether to return just completions (rather than the full APIResponse object). This is also where you provide tools.
64
64
 
65
65
  Putting it all together:
@@ -82,6 +82,22 @@ await client.process_prompts_async(
82
82
  )
83
83
  ```
84
84
 
85
+ ### Queueing individual prompts
86
+
87
+ You can queue prompts one at a time and track progress explicitly. Iterate over
88
+ results as they finish with `as_completed` (or gather them all at once with
89
+ `wait_for_all`):
90
+
91
+ ```python
92
+ client = LLMClient("gpt-4.1-mini", progress="tqdm")
93
+ client.open()
94
+ client.start_nowait("hello there")
95
+ # ... queue more tasks ...
96
+ async for task_id, result in client.as_completed():
97
+ print(task_id, result.completion)
98
+ client.close()
99
+ ```
100
+
85
101
  ## Multi-Turn Conversations
86
102
 
87
103
  Constructing conversations to pass to models is notoriously annoying. Each provider has a slightly different way of defining a list of messages, and with the introduction of images/multi-part messages it's only gotten worse. We provide convenience constructors so you don't have to remember all that stuff.
@@ -93,7 +109,7 @@ prompt = Conversation.system("You are a helpful assistant.").add(
93
109
  Message.user("What's in this image?").add_image("tests/image.jpg")
94
110
  )
95
111
 
96
- client = LLMClient.basic("gpt-4.1-mini")
112
+ client = LLMClient("gpt-4.1-mini")
97
113
  resps = client.process_prompts_sync([prompt])
98
114
  ```
99
115
 
@@ -109,9 +125,9 @@ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can ea
109
125
  from lm_deluge import LLMClient, Conversation
110
126
 
111
127
  # Simple file upload
112
- client = LLMClient.basic("gpt-4.1-mini")
128
+ client = LLMClient("gpt-4.1-mini")
113
129
  conversation = Conversation.user(
114
- "Please summarize this document",
130
+ "Please summarize this document",
115
131
  file="path/to/document.pdf"
116
132
  )
117
133
  resps = client.process_prompts_sync([conversation])
@@ -136,7 +152,7 @@ def get_weather(city: str) -> str:
136
152
  return f"The weather in {city} is sunny and 72°F"
137
153
 
138
154
  tool = Tool.from_function(get_weather)
139
- client = LLMClient.basic("claude-3-haiku")
155
+ client = LLMClient("claude-3-haiku")
140
156
  resps = client.process_prompts_sync(
141
157
  ["What's the weather in Paris?"],
142
158
  tools=[tool]
@@ -173,7 +189,7 @@ config = {
173
189
  all_tools = Tool.from_mcp_config(config)
174
190
 
175
191
  # let the model use the tools
176
- client = LLMClient.basic("gpt-4o-mini")
192
+ client = LLMClient("gpt-4o-mini")
177
193
  resps = client.process_prompts_sync(
178
194
  ["List the files in the current directory"],
179
195
  tools=tools
@@ -210,7 +226,7 @@ conv = (
210
226
  )
211
227
 
212
228
  # Use prompt caching to cache system message and tools
213
- client = LLMClient.basic("claude-3-5-sonnet")
229
+ client = LLMClient("claude-3-5-sonnet")
214
230
  resps = client.process_prompts_sync(
215
231
  [conv],
216
232
  cache="system_and_tools" # Cache system message and any tools
@@ -247,7 +263,7 @@ We support all models in `src/lm_deluge/models.py`. Vertex support is not planne
247
263
 
248
264
  ## Feature Support
249
265
 
250
- We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
266
+ We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Passing `None` (or the string `"none"`) disables Gemini thoughts entirely. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
251
267
 
252
268
  ## Built‑in tools
253
269
 
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.22"
6
+ version = "0.0.70"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -28,5 +28,12 @@ dependencies = [
28
28
  "pdf2image",
29
29
  "pillow",
30
30
  "fastmcp>=2.4",
31
- "rich"
31
+ "rich",
32
+ # "textual>=0.58.0"
32
33
  ]
34
+
35
+ [project.optional-dependencies]
36
+ openai = ["openai>=1.0.0"]
37
+
38
+ # [project.scripts]
39
+ # deluge = "lm_deluge.cli:main"
@@ -0,0 +1,41 @@
1
+ from .client import APIResponse, LLMClient, SamplingParams
2
+ from .file import File
3
+ from .prompt import Conversation, Message
4
+ from .tool import Tool, ToolParams
5
+
6
+ try:
7
+ from .mock_openai import ( # noqa
8
+ APIError,
9
+ APITimeoutError,
10
+ BadRequestError,
11
+ MockAsyncOpenAI,
12
+ RateLimitError,
13
+ )
14
+
15
+ _has_openai = True
16
+ except ImportError:
17
+ _has_openai = False
18
+
19
+ # dotenv.load_dotenv() - don't do this, fucks with other packages
20
+
21
+ __all__ = [
22
+ "LLMClient",
23
+ "SamplingParams",
24
+ "APIResponse",
25
+ "Conversation",
26
+ "Message",
27
+ "Tool",
28
+ "ToolParams",
29
+ "File",
30
+ ]
31
+
32
+ if _has_openai:
33
+ __all__.extend(
34
+ [
35
+ "MockAsyncOpenAI",
36
+ "APIError",
37
+ "APITimeoutError",
38
+ "BadRequestError",
39
+ "RateLimitError",
40
+ ]
41
+ )
@@ -28,24 +28,28 @@ def _add_beta(headers: dict, beta: str):
28
28
  def _build_anthropic_request(
29
29
  model: APIModel,
30
30
  context: RequestContext,
31
- # prompt: Conversation,
32
- # tools: list[Tool | dict | MCPServer] | None,
33
- # sampling_params: SamplingParams,
34
- # cache_pattern: CachePattern | None = None,
35
31
  ):
36
32
  prompt = context.prompt
37
33
  cache_pattern = context.cache
38
34
  tools = context.tools
39
35
  sampling_params = context.sampling_params
40
36
  system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
41
- if not system_message:
42
- print("WARNING: system_message is None")
37
+ # if not system_message:
38
+ # print("WARNING: system_message is None")
43
39
  base_headers = {
44
40
  "x-api-key": os.getenv(model.api_key_env_var),
45
41
  "anthropic-version": "2023-06-01",
46
42
  "content-type": "application/json",
47
43
  }
48
44
 
45
+ # Check if any messages contain uploaded files (file_id)
46
+ # If so, add the files-api beta header
47
+ for msg in prompt.messages:
48
+ for file in msg.files:
49
+ if file.is_remote and file.remote_provider == "anthropic":
50
+ _add_beta(base_headers, "files-api-2025-04-14")
51
+ break
52
+
49
53
  request_json = {
50
54
  "model": model.name,
51
55
  "messages": messages,
@@ -57,14 +61,15 @@ def _build_anthropic_request(
57
61
  # handle thinking
58
62
  if model.reasoning_model and sampling_params.reasoning_effort:
59
63
  # translate reasoning effort of low, medium, high to budget tokens
60
- budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
64
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
61
65
  sampling_params.reasoning_effort
62
66
  )
63
67
  request_json["thinking"] = {
64
68
  "type": "enabled",
65
69
  "budget_tokens": budget,
66
70
  }
67
- request_json.pop("top_p")
71
+ if "top_p" in request_json:
72
+ request_json["top_p"] = max(request_json["top_p"], 0.95)
68
73
  request_json["temperature"] = 1.0
69
74
  request_json["max_tokens"] += budget
70
75
  else:
@@ -74,12 +79,20 @@ def _build_anthropic_request(
74
79
  if system_message is not None:
75
80
  request_json["system"] = system_message
76
81
 
82
+ # handle temp + top_p for opus 4.1/sonnet 4.5
83
+ if "4-1" in model.name or "4-5" in model.name:
84
+ if "temperature" in request_json and "top_p" in request_json:
85
+ request_json.pop("top_p")
86
+
77
87
  if tools:
78
88
  mcp_servers = []
79
89
  tool_definitions = []
80
90
  for tool in tools:
81
91
  if isinstance(tool, Tool):
82
92
  tool_definitions.append(tool.dump_for("anthropic"))
93
+ elif isinstance(tool, dict) and "url" in tool:
94
+ _add_beta(base_headers, "mcp-client-2025-04-04")
95
+ mcp_servers.append(tool)
83
96
  elif isinstance(tool, dict):
84
97
  tool_definitions.append(tool)
85
98
  # add betas if needed
@@ -93,6 +106,9 @@ def _build_anthropic_request(
93
106
  _add_beta(base_headers, "computer-use-2025-01-24")
94
107
  elif tool["type"] == "code_execution_20250522":
95
108
  _add_beta(base_headers, "code-execution-2025-05-22")
109
+ elif tool["type"] in ["memory_20250818", "clear_tool_uses_20250919"]:
110
+ _add_beta(base_headers, "context-management-2025-06-27")
111
+
96
112
  elif isinstance(tool, MCPServer):
97
113
  _add_beta(base_headers, "mcp-client-2025-04-04")
98
114
  mcp_servers.append(tool.for_anthropic())
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import time
2
3
  import traceback
3
4
  from abc import ABC, abstractmethod
4
5
 
@@ -6,6 +7,7 @@ import aiohttp
6
7
  from aiohttp import ClientResponse
7
8
 
8
9
  from ..errors import raise_if_modal_exception
10
+ from ..models.openai import OPENAI_MODELS
9
11
  from ..request_context import RequestContext
10
12
  from .response import APIResponse
11
13
 
@@ -52,6 +54,9 @@ class APIRequestBase(ABC):
52
54
  self, base_headers: dict[str, str], exclude_patterns: list[str] | None = None
53
55
  ) -> dict[str, str]:
54
56
  """Merge extra_headers with base headers, giving priority to extra_headers."""
57
+ # Filter out None values from base headers (e.g., missing API keys)
58
+ base_headers = {k: v for k, v in base_headers.items() if v is not None}
59
+
55
60
  if not self.context.extra_headers:
56
61
  return base_headers
57
62
 
@@ -69,6 +74,9 @@ class APIRequestBase(ABC):
69
74
  # Start with base headers, then overlay filtered extra headers (extra takes precedence)
70
75
  merged = dict(base_headers)
71
76
  merged.update(filtered_extra)
77
+
78
+ # Filter out None values from final merged headers
79
+ merged = {k: v for k, v in merged.items() if v is not None}
72
80
  return merged
73
81
 
74
82
  def handle_success(self, data):
@@ -76,15 +84,95 @@ class APIRequestBase(ABC):
76
84
  if self.context.status_tracker:
77
85
  self.context.status_tracker.task_succeeded(self.context.task_id)
78
86
 
87
+ async def _execute_once_background_mode(self) -> APIResponse:
88
+ """
89
+ ONLY for OpenAI responses API. Implement the
90
+ start -> poll -> result style of request.
91
+ """
92
+ assert self.context.status_tracker, "no status tracker"
93
+ start_time = time.time()
94
+ async with aiohttp.ClientSession() as session:
95
+ last_status: str | None = None
96
+
97
+ try:
98
+ self.context.status_tracker.total_requests += 1
99
+ assert self.url is not None, "URL is not set"
100
+ async with session.post(
101
+ url=self.url,
102
+ headers=self.request_header,
103
+ json=self.request_json,
104
+ ) as http_response:
105
+ # make sure we created the Response object
106
+ http_response.raise_for_status()
107
+ data = await http_response.json()
108
+ response_id = data["id"]
109
+ last_status = data["status"]
110
+
111
+ while True:
112
+ if time.time() - start_time > self.context.request_timeout:
113
+ # cancel the response
114
+ async with session.post(
115
+ url=f"{self.url}/{response_id}/cancel",
116
+ headers=self.request_header,
117
+ ) as http_response:
118
+ http_response.raise_for_status()
119
+
120
+ return APIResponse(
121
+ id=self.context.task_id,
122
+ model_internal=self.context.model_name,
123
+ prompt=self.context.prompt,
124
+ sampling_params=self.context.sampling_params,
125
+ status_code=None,
126
+ is_error=True,
127
+ error_message="Request timed out (terminated by client).",
128
+ content=None,
129
+ usage=None,
130
+ )
131
+ # poll for the response
132
+ await asyncio.sleep(5.0)
133
+ async with session.get(
134
+ url=f"{self.url}/{response_id}",
135
+ headers=self.request_header,
136
+ ) as http_response:
137
+ http_response.raise_for_status()
138
+ data = await http_response.json()
139
+
140
+ if data["status"] != last_status:
141
+ print(
142
+ f"Background req {response_id} status updated to: {data['status']}"
143
+ )
144
+ last_status = data["status"]
145
+ if last_status not in ["queued", "in_progress"]:
146
+ return await self.handle_response(http_response)
147
+
148
+ except Exception as e:
149
+ raise_if_modal_exception(e)
150
+ tb = traceback.format_exc()
151
+ print(tb)
152
+ return APIResponse(
153
+ id=self.context.task_id,
154
+ model_internal=self.context.model_name,
155
+ prompt=self.context.prompt,
156
+ sampling_params=self.context.sampling_params,
157
+ status_code=None,
158
+ is_error=True,
159
+ error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
160
+ content=None,
161
+ usage=None,
162
+ )
163
+
79
164
  async def execute_once(self) -> APIResponse:
80
165
  """Send the HTTP request once and return the parsed APIResponse."""
81
166
  await self.build_request()
82
167
  assert self.context.status_tracker
83
- # try:
84
- # dumped = json.dumps(self.request_json)
85
- # except Exception:
86
- # print("couldn't serialize request json")
87
- # print(self.request_json)
168
+
169
+ if (
170
+ self.context.background
171
+ and self.context.use_responses_api
172
+ and self.context.model_name in OPENAI_MODELS
173
+ ):
174
+ return await self._execute_once_background_mode()
175
+
88
176
  try:
89
177
  self.context.status_tracker.total_requests += 1
90
178
  timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)