lm-deluge 0.0.58__tar.gz → 0.0.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (78) hide show
  1. {lm_deluge-0.0.58/src/lm_deluge.egg-info → lm_deluge-0.0.59}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/base.py +87 -5
  4. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/openai.py +41 -3
  5. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/batches.py +25 -9
  6. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/client.py +57 -29
  7. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/__init__.py +1 -1
  8. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/prompt.py +19 -7
  9. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/request_context.py +9 -11
  10. {lm_deluge-0.0.58 → lm_deluge-0.0.59/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  11. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/LICENSE +0 -0
  12. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/README.md +0 -0
  13. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/setup.cfg +0 -0
  14. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/__init__.py +0 -0
  15. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/__init__.py +0 -0
  16. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/anthropic.py +0 -0
  17. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/bedrock.py +0 -0
  18. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/common.py +0 -0
  19. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  20. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  21. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  22. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  23. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  24. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/gemini.py +0 -0
  25. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/mistral.py +0 -0
  26. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/response.py +0 -0
  27. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  28. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  29. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  30. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  31. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/base.py +0 -0
  32. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/openai.py +0 -0
  33. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/cache.py +0 -0
  34. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/cli.py +0 -0
  35. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/config.py +0 -0
  36. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/embed.py +0 -0
  37. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/errors.py +0 -0
  38. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/file.py +0 -0
  39. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/image.py +0 -0
  40. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/__init__.py +0 -0
  41. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/classify.py +0 -0
  42. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/extract.py +0 -0
  43. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/locate.py +0 -0
  44. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/ocr.py +0 -0
  45. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/score.py +0 -0
  46. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/translate.py +0 -0
  47. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/anthropic.py +0 -0
  48. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/bedrock.py +0 -0
  49. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/cerebras.py +0 -0
  50. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/cohere.py +0 -0
  51. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/deepseek.py +0 -0
  52. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/fireworks.py +0 -0
  53. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/google.py +0 -0
  54. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/grok.py +0 -0
  55. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/groq.py +0 -0
  56. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/meta.py +0 -0
  57. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/mistral.py +0 -0
  58. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/openai.py +0 -0
  59. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/openrouter.py +0 -0
  60. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/together.py +0 -0
  61. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/presets/cerebras.py +0 -0
  62. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/presets/meta.py +0 -0
  63. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/rerank.py +0 -0
  64. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/tool.py +0 -0
  65. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/tracker.py +0 -0
  66. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/usage.py +0 -0
  67. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/harmony.py +0 -0
  68. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/json.py +0 -0
  69. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/logprobs.py +0 -0
  70. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/spatial.py +0 -0
  71. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/validation.py +0 -0
  72. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/xml.py +0 -0
  73. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
  74. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  75. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/requires.txt +0 -0
  76. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/top_level.txt +0 -0
  77. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/tests/test_builtin_tools.py +0 -0
  78. {lm_deluge-0.0.58 → lm_deluge-0.0.59}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.58
3
+ Version: 0.0.59
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.58"
6
+ version = "0.0.59"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import time
2
3
  import traceback
3
4
  from abc import ABC, abstractmethod
4
5
 
@@ -6,6 +7,7 @@ import aiohttp
6
7
  from aiohttp import ClientResponse
7
8
 
8
9
  from ..errors import raise_if_modal_exception
10
+ from ..models.openai import OPENAI_MODELS
9
11
  from ..request_context import RequestContext
10
12
  from .response import APIResponse
11
13
 
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
82
84
  if self.context.status_tracker:
83
85
  self.context.status_tracker.task_succeeded(self.context.task_id)
84
86
 
87
+ async def _execute_once_background_mode(self) -> APIResponse:
88
+ """
89
+ ONLY for OpenAI responses API. Implement the
90
+ start -> poll -> result style of request.
91
+ """
92
+ assert self.context.status_tracker, "no status tracker"
93
+ start_time = time.time()
94
+ async with aiohttp.ClientSession() as session:
95
+ last_status: str | None = None
96
+
97
+ try:
98
+ self.context.status_tracker.total_requests += 1
99
+ assert self.url is not None, "URL is not set"
100
+ async with session.post(
101
+ url=self.url,
102
+ headers=self.request_header,
103
+ json=self.request_json,
104
+ ) as http_response:
105
+ # make sure we created the Response object
106
+ http_response.raise_for_status()
107
+ data = await http_response.json()
108
+ response_id = data["id"]
109
+ last_status = data["status"]
110
+
111
+ while True:
112
+ if time.time() - start_time > self.context.request_timeout:
113
+ # cancel the response
114
+ async with session.post(
115
+ url=f"{self.url}/{response_id}/cancel",
116
+ headers=self.request_header,
117
+ ) as http_response:
118
+ http_response.raise_for_status()
119
+
120
+ return APIResponse(
121
+ id=self.context.task_id,
122
+ model_internal=self.context.model_name,
123
+ prompt=self.context.prompt,
124
+ sampling_params=self.context.sampling_params,
125
+ status_code=None,
126
+ is_error=True,
127
+ error_message="Request timed out (terminated by client).",
128
+ content=None,
129
+ usage=None,
130
+ )
131
+ # poll for the response
132
+ await asyncio.sleep(5.0)
133
+ async with session.get(
134
+ url=f"{self.url}/{response_id}",
135
+ headers=self.request_header,
136
+ ) as http_response:
137
+ http_response.raise_for_status()
138
+ data = await http_response.json()
139
+
140
+ if data["status"] != last_status:
141
+ print(
142
+ f"Background req {response_id} status updated to: {data['status']}"
143
+ )
144
+ last_status = data["status"]
145
+ if last_status not in ["queued", "in_progress"]:
146
+ return await self.handle_response(http_response)
147
+
148
+ except Exception as e:
149
+ raise_if_modal_exception(e)
150
+ tb = traceback.format_exc()
151
+ print(tb)
152
+ return APIResponse(
153
+ id=self.context.task_id,
154
+ model_internal=self.context.model_name,
155
+ prompt=self.context.prompt,
156
+ sampling_params=self.context.sampling_params,
157
+ status_code=None,
158
+ is_error=True,
159
+ error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
160
+ content=None,
161
+ usage=None,
162
+ )
163
+
85
164
  async def execute_once(self) -> APIResponse:
86
165
  """Send the HTTP request once and return the parsed APIResponse."""
87
166
  await self.build_request()
88
167
  assert self.context.status_tracker
89
- # try:
90
- # dumped = json.dumps(self.request_json)
91
- # except Exception:
92
- # print("couldn't serialize request json")
93
- # print(self.request_json)
168
+
169
+ if (
170
+ self.context.background
171
+ and self.context.use_responses_api
172
+ and self.context.model_name in OPENAI_MODELS
173
+ ):
174
+ return await self._execute_once_background_mode()
175
+
94
176
  try:
95
177
  self.context.status_tracker.total_requests += 1
96
178
  timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
@@ -30,6 +30,26 @@ async def _build_oa_chat_request(
30
30
  "temperature": sampling_params.temperature,
31
31
  "top_p": sampling_params.top_p,
32
32
  }
33
+ if context.service_tier:
34
+ assert context.service_tier in [
35
+ "auto",
36
+ "default",
37
+ "flex",
38
+ "priority",
39
+ ], f"Invalid service tier: {context.service_tier}"
40
+ # flex is only supported for o3, o4-mini, gpt-5 models
41
+ if context.service_tier == "flex":
42
+ model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
43
+ if not model_supports_flex:
44
+ print(
45
+ f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
46
+ f"Using 'auto' instead for model {model.id}."
47
+ )
48
+ request_json["service_tier"] = "auto"
49
+ else:
50
+ request_json["service_tier"] = context.service_tier
51
+ else:
52
+ request_json["service_tier"] = context.service_tier
33
53
  # set max_tokens or max_completion_tokens dep. on provider
34
54
  if "cohere" in model.api_base:
35
55
  request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -213,9 +233,6 @@ class OpenAIRequest(APIRequestBase):
213
233
  async def _build_oa_responses_request(
214
234
  model: APIModel,
215
235
  context: RequestContext,
216
- # prompt: Conversation,
217
- # tools: list[Tool] | None,
218
- # sampling_params: SamplingParams,
219
236
  ):
220
237
  prompt = context.prompt
221
238
  sampling_params = context.sampling_params
@@ -226,7 +243,28 @@ async def _build_oa_responses_request(
226
243
  "input": openai_responses_format["input"],
227
244
  "temperature": sampling_params.temperature,
228
245
  "top_p": sampling_params.top_p,
246
+ "background": context.background or False,
229
247
  }
248
+ if context.service_tier:
249
+ assert context.service_tier in [
250
+ "auto",
251
+ "default",
252
+ "flex",
253
+ "priority",
254
+ ], f"Invalid service tier: {context.service_tier}"
255
+ # flex is only supported for o3, o4-mini, gpt-5 models
256
+ if context.service_tier == "flex":
257
+ model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
258
+ if not model_supports_flex:
259
+ print(
260
+ f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
261
+ f"Model {model.id} doesn't support flex. Using 'auto' instead."
262
+ )
263
+ request_json["service_tier"] = "auto"
264
+ else:
265
+ request_json["service_tier"] = context.service_tier
266
+ else:
267
+ request_json["service_tier"] = context.service_tier
230
268
  if sampling_params.max_new_tokens:
231
269
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
232
270
 
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import tempfile
5
5
  import time
6
- from typing import Literal, Sequence
6
+ from typing import Literal, Sequence, cast
7
7
 
8
8
  import aiohttp
9
9
  from rich.console import Console
@@ -16,7 +16,12 @@ from lm_deluge.api_requests.anthropic import _build_anthropic_request
16
16
  from lm_deluge.api_requests.openai import _build_oa_chat_request
17
17
  from lm_deluge.config import SamplingParams
18
18
  from lm_deluge.models import APIModel, registry
19
- from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
19
+ from lm_deluge.prompt import (
20
+ CachePattern,
21
+ Conversation,
22
+ Prompt,
23
+ prompts_to_conversations,
24
+ )
20
25
  from lm_deluge.request_context import RequestContext
21
26
 
22
27
 
@@ -166,14 +171,18 @@ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
166
171
  async def create_batch_files_oa(
167
172
  model: str,
168
173
  sampling_params: SamplingParams,
169
- prompts: Sequence[str | list[dict] | Conversation],
174
+ prompts: Prompt | Sequence[Prompt],
170
175
  batch_size: int = 50_000,
171
176
  destination: str | None = None, # if none provided, temp files
172
177
  ):
173
178
  MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
174
179
  MAX_BATCH_SIZE_ITEMS = batch_size
175
180
 
176
- prompts = prompts_to_conversations(prompts)
181
+ if not isinstance(prompts, list):
182
+ prompts = cast(Sequence[Prompt], [prompts])
183
+
184
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
185
+ assert isinstance(prompts, Sequence)
177
186
  if any(p is None for p in prompts):
178
187
  raise ValueError("All prompts must be valid.")
179
188
 
@@ -251,14 +260,18 @@ async def create_batch_files_oa(
251
260
  async def submit_batches_oa(
252
261
  model: str,
253
262
  sampling_params: SamplingParams,
254
- prompts: Sequence[str | list[dict] | Conversation],
263
+ prompts: Prompt | Sequence[Prompt],
255
264
  batch_size: int = 50_000,
256
265
  ):
257
266
  """Write OpenAI batch requests to a file and submit."""
258
267
  MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
259
268
  MAX_BATCH_SIZE_ITEMS = batch_size
260
269
 
261
- prompts = prompts_to_conversations(prompts)
270
+ if not isinstance(prompts, list):
271
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
272
+
273
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
274
+ assert isinstance(prompts, Sequence)
262
275
  if any(p is None for p in prompts):
263
276
  raise ValueError("All prompts must be valid.")
264
277
 
@@ -342,7 +355,7 @@ async def submit_batches_oa(
342
355
  async def submit_batches_anthropic(
343
356
  model: str,
344
357
  sampling_params: SamplingParams,
345
- prompts: Sequence[str | list[dict] | Conversation],
358
+ prompts: Prompt | Sequence[Prompt],
346
359
  *,
347
360
  cache: CachePattern | None = None,
348
361
  batch_size=100_000,
@@ -362,13 +375,16 @@ async def submit_batches_anthropic(
362
375
  MAX_BATCH_SIZE_ITEMS = batch_size
363
376
 
364
377
  # Convert prompts to Conversations
365
- prompts = prompts_to_conversations(prompts)
378
+ if not isinstance(prompts, list):
379
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
380
+
381
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
366
382
 
367
383
  request_headers = None
368
384
  batch_tasks = []
369
385
  current_batch = []
370
386
  current_batch_size = 0
371
-
387
+ assert isinstance(prompts, Sequence)
372
388
  for idx, prompt in enumerate(prompts):
373
389
  assert isinstance(prompt, Conversation)
374
390
  context = RequestContext(
@@ -1,5 +1,14 @@
1
1
  import asyncio
2
- from typing import Any, AsyncGenerator, Callable, Literal, Self, Sequence, overload
2
+ from typing import (
3
+ Any,
4
+ AsyncGenerator,
5
+ Callable,
6
+ Literal,
7
+ Self,
8
+ Sequence,
9
+ cast,
10
+ overload,
11
+ )
3
12
 
4
13
  import numpy as np
5
14
  import yaml
@@ -12,7 +21,12 @@ from lm_deluge.batches import (
12
21
  submit_batches_oa,
13
22
  wait_for_batch_completion_async,
14
23
  )
15
- from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
24
+ from lm_deluge.prompt import (
25
+ CachePattern,
26
+ Conversation,
27
+ Prompt,
28
+ prompts_to_conversations,
29
+ )
16
30
  from lm_deluge.tool import MCPServer, Tool
17
31
 
18
32
  from .api_requests.base import APIResponse
@@ -40,6 +54,9 @@ class _LLMClient(BaseModel):
40
54
  request_timeout: int = 30
41
55
  cache: Any = None
42
56
  extra_headers: dict[str, str] | None = None
57
+ extra_body: dict[str, str] | None = None
58
+ use_responses_api: bool = False
59
+ background: bool = False
43
60
  # sampling params - if provided, and sampling_params is not,
44
61
  # these override the defaults
45
62
  temperature: float = 0.75
@@ -171,6 +188,11 @@ class _LLMClient(BaseModel):
171
188
  # normalize weights
172
189
  self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
173
190
 
191
+ # background mode only allowed for responses api
192
+ if self.background:
193
+ assert (
194
+ self.use_responses_api
195
+ ), "background mode only allowed for responses api"
174
196
  # Auto-generate name if not provided
175
197
  if self.name is None:
176
198
  if len(self.model_names) == 1:
@@ -256,13 +278,6 @@ class _LLMClient(BaseModel):
256
278
  # Idle wait before next capacity check. Aim for ~RPM spacing.
257
279
  await asyncio.sleep(max(60.0 / self.max_requests_per_minute, 0.01))
258
280
 
259
- async def _execute_request(self, context: RequestContext) -> APIResponse:
260
- """Create and send a single API request using the provided context."""
261
- model_obj = APIModel.from_registry(context.model_name)
262
- request = model_obj.make_request(context)
263
- response = await request.execute_once()
264
- return response
265
-
266
281
  async def process_single_request(
267
282
  self, context: RequestContext, retry_queue: asyncio.Queue | None = None
268
283
  ) -> APIResponse:
@@ -290,7 +305,9 @@ class _LLMClient(BaseModel):
290
305
  # Execute single request
291
306
  assert context.status_tracker
292
307
  context.status_tracker.update_pbar()
293
- response = await self._execute_request(context)
308
+ model_obj = APIModel.from_registry(context.model_name)
309
+ request = model_obj.make_request(context)
310
+ response = await request.execute_once()
294
311
 
295
312
  # Handle successful response
296
313
  if not response.is_error:
@@ -350,36 +367,36 @@ class _LLMClient(BaseModel):
350
367
  @overload
351
368
  async def process_prompts_async(
352
369
  self,
353
- prompts: Sequence[str | list[dict] | Conversation],
370
+ prompts: Prompt | Sequence[Prompt],
354
371
  *,
355
372
  return_completions_only: Literal[True],
356
373
  show_progress: bool = ...,
357
374
  tools: list[Tool | dict | MCPServer] | None = ...,
358
375
  cache: CachePattern | None = ...,
359
- use_responses_api: bool = ...,
376
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
360
377
  ) -> list[str | None]: ...
361
378
 
362
379
  @overload
363
380
  async def process_prompts_async(
364
381
  self,
365
- prompts: Sequence[str | list[dict] | Conversation],
382
+ prompts: Prompt | Sequence[Prompt],
366
383
  *,
367
384
  return_completions_only: Literal[False] = ...,
368
385
  show_progress: bool = ...,
369
386
  tools: list[Tool | dict | MCPServer] | None = ...,
370
387
  cache: CachePattern | None = ...,
371
- use_responses_api: bool = ...,
388
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
372
389
  ) -> list[APIResponse]: ...
373
390
 
374
391
  async def process_prompts_async(
375
392
  self,
376
- prompts: Sequence[str | list[dict] | Conversation],
393
+ prompts: Prompt | Sequence[Prompt],
377
394
  *,
378
395
  return_completions_only: bool = False,
379
396
  show_progress: bool = True,
380
397
  tools: list[Tool | dict | MCPServer] | None = None,
381
398
  cache: CachePattern | None = None,
382
- use_responses_api: bool = False,
399
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
383
400
  ) -> list[APIResponse] | list[str | None] | dict[str, int]:
384
401
  """Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
385
402
 
@@ -387,7 +404,9 @@ class _LLMClient(BaseModel):
387
404
  avoiding issues with tracker state accumulating across multiple calls.
388
405
  """
389
406
  # Convert prompts to Conversations
390
- prompts = prompts_to_conversations(prompts)
407
+ if not isinstance(prompts, list):
408
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
409
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
391
410
 
392
411
  # Ensure tracker exists (start_nowait will call add_to_total for each task)
393
412
  if self._tracker is None:
@@ -398,13 +417,14 @@ class _LLMClient(BaseModel):
398
417
 
399
418
  # Start all tasks using start_nowait - tasks will coordinate via shared capacity lock
400
419
  task_ids = []
420
+ assert isinstance(prompts, Sequence)
401
421
  for prompt in prompts:
402
422
  assert isinstance(prompt, Conversation)
403
423
  task_id = self.start_nowait(
404
424
  prompt,
405
425
  tools=tools,
406
426
  cache=cache,
407
- use_responses_api=use_responses_api,
427
+ service_tier=service_tier,
408
428
  )
409
429
  task_ids.append(task_id)
410
430
 
@@ -443,13 +463,12 @@ class _LLMClient(BaseModel):
443
463
 
444
464
  def process_prompts_sync(
445
465
  self,
446
- prompts: Sequence[str | list[dict] | Conversation],
466
+ prompts: Prompt | Sequence[Prompt],
447
467
  *,
448
468
  return_completions_only: bool = False,
449
469
  show_progress=True,
450
470
  tools: list[Tool | dict | MCPServer] | None = None,
451
471
  cache: CachePattern | None = None,
452
- use_responses_api: bool = False,
453
472
  ):
454
473
  return asyncio.run(
455
474
  self.process_prompts_async(
@@ -458,7 +477,6 @@ class _LLMClient(BaseModel):
458
477
  show_progress=show_progress,
459
478
  tools=tools,
460
479
  cache=cache,
461
- use_responses_api=use_responses_api,
462
480
  )
463
481
  )
464
482
 
@@ -478,18 +496,18 @@ class _LLMClient(BaseModel):
478
496
 
479
497
  def start_nowait(
480
498
  self,
481
- prompt: str | Conversation,
499
+ prompt: Prompt,
482
500
  *,
483
501
  tools: list[Tool | dict | MCPServer] | None = None,
484
502
  cache: CachePattern | None = None,
485
- use_responses_api: bool = False,
503
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
486
504
  ) -> int:
487
505
  tracker = self._get_tracker()
488
506
  task_id = self._next_task_id
489
507
  self._next_task_id += 1
490
508
  model, sampling_params = self._select_model()
491
- if isinstance(prompt, str):
492
- prompt = Conversation.user(prompt)
509
+ prompt = prompts_to_conversations([prompt])[0]
510
+ assert isinstance(prompt, Conversation)
493
511
  context = RequestContext(
494
512
  task_id=task_id,
495
513
  model_name=model,
@@ -500,7 +518,9 @@ class _LLMClient(BaseModel):
500
518
  status_tracker=tracker,
501
519
  tools=tools,
502
520
  cache=cache,
503
- use_responses_api=use_responses_api,
521
+ use_responses_api=self.use_responses_api,
522
+ background=self.background,
523
+ service_tier=service_tier,
504
524
  extra_headers=self.extra_headers,
505
525
  force_local_mcp=self.force_local_mcp,
506
526
  )
@@ -515,10 +535,10 @@ class _LLMClient(BaseModel):
515
535
  *,
516
536
  tools: list[Tool | dict | MCPServer] | None = None,
517
537
  cache: CachePattern | None = None,
518
- use_responses_api: bool = False,
538
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
519
539
  ) -> APIResponse:
520
540
  task_id = self.start_nowait(
521
- prompt, tools=tools, cache=cache, use_responses_api=use_responses_api
541
+ prompt, tools=tools, cache=cache, service_tier=service_tier
522
542
  )
523
543
  return await self.wait_for(task_id)
524
544
 
@@ -698,7 +718,7 @@ class _LLMClient(BaseModel):
698
718
 
699
719
  async def submit_batch_job(
700
720
  self,
701
- prompts: Sequence[str | list[dict] | Conversation],
721
+ prompts: Prompt | Sequence[Prompt],
702
722
  *,
703
723
  tools: list[Tool] | None = None,
704
724
  cache: CachePattern | None = None,
@@ -760,6 +780,8 @@ def LLMClient(
760
780
  request_timeout: int = 30,
761
781
  cache: Any = None,
762
782
  extra_headers: dict[str, str] | None = None,
783
+ use_responses_api: bool = False,
784
+ background: bool = False,
763
785
  temperature: float = 0.75,
764
786
  top_p: float = 1.0,
765
787
  json_mode: bool = False,
@@ -787,6 +809,8 @@ def LLMClient(
787
809
  request_timeout: int = 30,
788
810
  cache: Any = None,
789
811
  extra_headers: dict[str, str] | None = None,
812
+ use_responses_api: bool = False,
813
+ background: bool = False,
790
814
  temperature: float = 0.75,
791
815
  top_p: float = 1.0,
792
816
  json_mode: bool = False,
@@ -813,6 +837,8 @@ def LLMClient(
813
837
  request_timeout: int = 30,
814
838
  cache: Any = None,
815
839
  extra_headers: dict[str, str] | None = None,
840
+ use_responses_api: bool = False,
841
+ background: bool = False,
816
842
  temperature: float = 0.75,
817
843
  top_p: float = 1.0,
818
844
  json_mode: bool = False,
@@ -851,6 +877,8 @@ def LLMClient(
851
877
  request_timeout=request_timeout,
852
878
  cache=cache,
853
879
  extra_headers=extra_headers,
880
+ use_responses_api=use_responses_api,
881
+ background=background,
854
882
  temperature=temperature,
855
883
  top_p=top_p,
856
884
  json_mode=json_mode,
@@ -62,7 +62,7 @@ class APIModel:
62
62
  raise ValueError("no regions to sample")
63
63
  random.sample(regions, 1, counts=weights)[0]
64
64
 
65
- def make_request(self, context: RequestContext): # -> "APIRequestBase"
65
+ def make_request(self, context: RequestContext):
66
66
  from ..api_requests.common import CLASSES
67
67
 
68
68
  api_spec = self.api_spec
@@ -2,7 +2,7 @@ import io
2
2
  import json
3
3
  from dataclasses import dataclass, field
4
4
  from pathlib import Path
5
- from typing import Literal, Sequence
5
+ from typing import Literal, Sequence, TypeAlias
6
6
 
7
7
  import tiktoken
8
8
  import xxhash
@@ -1495,9 +1495,21 @@ class Conversation:
1495
1495
  return cls(msgs)
1496
1496
 
1497
1497
 
1498
- def prompts_to_conversations(prompts: Sequence[str | list[dict] | Conversation]):
1499
- if any(isinstance(x, list) for x in prompts):
1500
- raise ValueError("can't convert list[dict] to conversation yet")
1501
- return [ # type: ignore
1502
- Conversation.user(p) if isinstance(p, str) else p for p in prompts
1503
- ]
1498
+ Prompt: TypeAlias = str | list[dict] | Message | Conversation
1499
+
1500
+
1501
+ def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[Prompt]:
1502
+ converted = []
1503
+ for prompt in prompts:
1504
+ if isinstance(prompt, Conversation):
1505
+ converted.append(prompt)
1506
+ elif isinstance(prompt, Message):
1507
+ converted.append(Conversation([prompt]))
1508
+ elif isinstance(prompt, str):
1509
+ converted.append(Conversation.user(prompt))
1510
+ elif isinstance(prompt, list):
1511
+ conv, provider = Conversation.from_unknown(prompt)
1512
+ converted.append(conv)
1513
+ else:
1514
+ raise ValueError(f"Unknown prompt type {type(prompt)}")
1515
+ return converted
@@ -26,28 +26,22 @@ class RequestContext:
26
26
 
27
27
  # Infrastructure
28
28
  status_tracker: StatusTracker | None = None
29
- results_arr: list[Any] | None = (
30
- None # list["APIRequestBase"] but avoiding circular import
31
- )
29
+ # avoiding circular import
30
+ results_arr: list[Any] | None = None # list["APIRequestBase"]
32
31
  callback: Callable | None = None
33
32
 
34
33
  # Optional features
35
34
  tools: list | None = None
36
35
  cache: CachePattern | None = None
37
36
  use_responses_api: bool = False
37
+ background: bool = False
38
+ service_tier: str | None = None
38
39
  extra_headers: dict[str, str] | None = None
40
+ extra_body: dict[str, Any] | None = None
39
41
  force_local_mcp: bool = False
40
42
 
41
43
  # Computed properties
42
44
  cache_key: str = field(init=False)
43
- # num_tokens: int = field(init=False)
44
-
45
- # def __post_init__(self):
46
- # # Compute cache key from prompt fingerprint
47
- # # self.cache_key = self.prompt.fingerprint
48
-
49
- # # Compute token count
50
- # self.num_tokens =
51
45
 
52
46
  @cached_property
53
47
  def num_tokens(self):
@@ -74,6 +68,10 @@ class RequestContext:
74
68
  "tools": self.tools,
75
69
  "cache": self.cache,
76
70
  "use_responses_api": self.use_responses_api,
71
+ "background": self.background,
72
+ "service_tier": self.service_tier,
73
+ "extra_headers": self.extra_headers,
74
+ "extra_body": self.extra_body,
77
75
  "force_local_mcp": self.force_local_mcp,
78
76
  }
79
77
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.58
3
+ Version: 0.0.59
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
File without changes
File without changes
File without changes