lm-deluge 0.0.58__tar.gz → 0.0.60__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (79) hide show
  1. {lm_deluge-0.0.58/src/lm_deluge.egg-info → lm_deluge-0.0.60}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/base.py +87 -5
  4. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/bedrock.py +3 -4
  5. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/gemini.py +7 -6
  6. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/mistral.py +8 -9
  7. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/openai.py +57 -16
  8. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/batches.py +25 -9
  9. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/client.py +187 -31
  10. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/__init__.py +1 -1
  11. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/openai.py +28 -0
  12. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/prompt.py +89 -21
  13. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/request_context.py +9 -11
  14. lm_deluge-0.0.60/src/lm_deluge/warnings.py +46 -0
  15. {lm_deluge-0.0.58 → lm_deluge-0.0.60/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  16. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge.egg-info/SOURCES.txt +1 -0
  17. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/LICENSE +0 -0
  18. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/README.md +0 -0
  19. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/setup.cfg +0 -0
  20. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/__init__.py +0 -0
  21. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/__init__.py +0 -0
  22. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/anthropic.py +0 -0
  23. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/common.py +0 -0
  24. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  25. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  26. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  27. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  28. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  29. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/api_requests/response.py +0 -0
  30. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  31. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  32. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  33. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  34. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/built_in_tools/base.py +0 -0
  35. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/built_in_tools/openai.py +0 -0
  36. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/cache.py +0 -0
  37. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/cli.py +0 -0
  38. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/config.py +0 -0
  39. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/embed.py +0 -0
  40. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/errors.py +0 -0
  41. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/file.py +0 -0
  42. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/image.py +0 -0
  43. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/llm_tools/__init__.py +0 -0
  44. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/llm_tools/classify.py +0 -0
  45. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/llm_tools/extract.py +0 -0
  46. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/llm_tools/locate.py +0 -0
  47. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/llm_tools/ocr.py +0 -0
  48. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/llm_tools/score.py +0 -0
  49. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/llm_tools/translate.py +0 -0
  50. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/anthropic.py +0 -0
  51. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/bedrock.py +0 -0
  52. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/cerebras.py +0 -0
  53. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/cohere.py +0 -0
  54. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/deepseek.py +0 -0
  55. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/fireworks.py +0 -0
  56. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/google.py +0 -0
  57. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/grok.py +0 -0
  58. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/groq.py +0 -0
  59. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/meta.py +0 -0
  60. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/mistral.py +0 -0
  61. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/openrouter.py +0 -0
  62. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/models/together.py +0 -0
  63. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/presets/cerebras.py +0 -0
  64. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/presets/meta.py +0 -0
  65. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/rerank.py +0 -0
  66. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/tool.py +0 -0
  67. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/tracker.py +0 -0
  68. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/usage.py +0 -0
  69. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/util/harmony.py +0 -0
  70. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/util/json.py +0 -0
  71. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/util/logprobs.py +0 -0
  72. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/util/spatial.py +0 -0
  73. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/util/validation.py +0 -0
  74. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge/util/xml.py +0 -0
  75. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  76. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge.egg-info/requires.txt +0 -0
  77. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/src/lm_deluge.egg-info/top_level.txt +0 -0
  78. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/tests/test_builtin_tools.py +0 -0
  79. {lm_deluge-0.0.58 → lm_deluge-0.0.60}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.58
3
+ Version: 0.0.60
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.58"
6
+ version = "0.0.60"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import time
2
3
  import traceback
3
4
  from abc import ABC, abstractmethod
4
5
 
@@ -6,6 +7,7 @@ import aiohttp
6
7
  from aiohttp import ClientResponse
7
8
 
8
9
  from ..errors import raise_if_modal_exception
10
+ from ..models.openai import OPENAI_MODELS
9
11
  from ..request_context import RequestContext
10
12
  from .response import APIResponse
11
13
 
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
82
84
  if self.context.status_tracker:
83
85
  self.context.status_tracker.task_succeeded(self.context.task_id)
84
86
 
87
+ async def _execute_once_background_mode(self) -> APIResponse:
88
+ """
89
+ ONLY for OpenAI responses API. Implement the
90
+ start -> poll -> result style of request.
91
+ """
92
+ assert self.context.status_tracker, "no status tracker"
93
+ start_time = time.time()
94
+ async with aiohttp.ClientSession() as session:
95
+ last_status: str | None = None
96
+
97
+ try:
98
+ self.context.status_tracker.total_requests += 1
99
+ assert self.url is not None, "URL is not set"
100
+ async with session.post(
101
+ url=self.url,
102
+ headers=self.request_header,
103
+ json=self.request_json,
104
+ ) as http_response:
105
+ # make sure we created the Response object
106
+ http_response.raise_for_status()
107
+ data = await http_response.json()
108
+ response_id = data["id"]
109
+ last_status = data["status"]
110
+
111
+ while True:
112
+ if time.time() - start_time > self.context.request_timeout:
113
+ # cancel the response
114
+ async with session.post(
115
+ url=f"{self.url}/{response_id}/cancel",
116
+ headers=self.request_header,
117
+ ) as http_response:
118
+ http_response.raise_for_status()
119
+
120
+ return APIResponse(
121
+ id=self.context.task_id,
122
+ model_internal=self.context.model_name,
123
+ prompt=self.context.prompt,
124
+ sampling_params=self.context.sampling_params,
125
+ status_code=None,
126
+ is_error=True,
127
+ error_message="Request timed out (terminated by client).",
128
+ content=None,
129
+ usage=None,
130
+ )
131
+ # poll for the response
132
+ await asyncio.sleep(5.0)
133
+ async with session.get(
134
+ url=f"{self.url}/{response_id}",
135
+ headers=self.request_header,
136
+ ) as http_response:
137
+ http_response.raise_for_status()
138
+ data = await http_response.json()
139
+
140
+ if data["status"] != last_status:
141
+ print(
142
+ f"Background req {response_id} status updated to: {data['status']}"
143
+ )
144
+ last_status = data["status"]
145
+ if last_status not in ["queued", "in_progress"]:
146
+ return await self.handle_response(http_response)
147
+
148
+ except Exception as e:
149
+ raise_if_modal_exception(e)
150
+ tb = traceback.format_exc()
151
+ print(tb)
152
+ return APIResponse(
153
+ id=self.context.task_id,
154
+ model_internal=self.context.model_name,
155
+ prompt=self.context.prompt,
156
+ sampling_params=self.context.sampling_params,
157
+ status_code=None,
158
+ is_error=True,
159
+ error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
160
+ content=None,
161
+ usage=None,
162
+ )
163
+
85
164
  async def execute_once(self) -> APIResponse:
86
165
  """Send the HTTP request once and return the parsed APIResponse."""
87
166
  await self.build_request()
88
167
  assert self.context.status_tracker
89
- # try:
90
- # dumped = json.dumps(self.request_json)
91
- # except Exception:
92
- # print("couldn't serialize request json")
93
- # print(self.request_json)
168
+
169
+ if (
170
+ self.context.background
171
+ and self.context.use_responses_api
172
+ and self.context.model_name in OPENAI_MODELS
173
+ ):
174
+ return await self._execute_once_background_mode()
175
+
94
176
  try:
95
177
  self.context.status_tracker.total_requests += 1
96
178
  timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
@@ -1,10 +1,11 @@
1
1
  import asyncio
2
2
  import json
3
3
  import os
4
- import warnings
5
4
 
6
5
  from aiohttp import ClientResponse
7
6
 
7
+ from lm_deluge.warnings import maybe_warn
8
+
8
9
  try:
9
10
  from requests_aws4auth import AWS4Auth
10
11
  except ImportError:
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
187
188
  # Note: GPT-OSS on Bedrock doesn't support response_format parameter
188
189
  # Even though the model supports JSON, we can't use the response_format parameter
189
190
  if sampling_params.json_mode and model.supports_json:
190
- warnings.warn(
191
- f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
192
- )
191
+ maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
193
192
 
194
193
  if tools:
195
194
  request_tools = []
@@ -1,11 +1,12 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
  from typing import Any
4
+
5
5
  from aiohttp import ClientResponse
6
6
 
7
7
  from lm_deluge.request_context import RequestContext
8
8
  from lm_deluge.tool import Tool
9
+ from lm_deluge.warnings import maybe_warn
9
10
 
10
11
  from ..config import SamplingParams
11
12
  from ..models import APIModel
@@ -54,9 +55,7 @@ async def _build_gemini_request(
54
55
 
55
56
  else:
56
57
  if sampling_params.reasoning_effort:
57
- warnings.warn(
58
- f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
59
- )
58
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
60
59
 
61
60
  # Add tools if provided
62
61
  if tools:
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
76
75
 
77
76
  # Warn if cache is specified for Gemini model
78
77
  if self.context.cache is not None:
79
- warnings.warn(
80
- f"Cache parameter '{self.context.cache}' is not supported for Gemini models, ignoring for {self.context.model_name}"
78
+ maybe_warn(
79
+ "WARN_CACHING_UNSUPPORTED",
80
+ model_name=self.context.model_name,
81
+ cache_param=self.context.cache,
81
82
  )
82
83
 
83
84
  self.model = APIModel.from_registry(self.context.model_name)
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
6
+ from lm_deluge.warnings import maybe_warn
7
+
7
8
  from ..models import APIModel
8
9
  from ..prompt import Message
9
10
  from ..request_context import RequestContext
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
17
18
 
18
19
  # Warn if cache is specified for non-Anthropic model
19
20
  if self.context.cache is not None:
20
- warnings.warn(
21
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
21
+ maybe_warn(
22
+ "WARN_CACHING_UNSUPPORTED",
23
+ model_name=self.context.model_name,
24
+ cache_param=self.context.cache,
22
25
  )
23
26
  self.model = APIModel.from_registry(self.context.model_name)
24
27
 
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
38
41
  "max_tokens": self.context.sampling_params.max_new_tokens,
39
42
  }
40
43
  if self.context.sampling_params.reasoning_effort:
41
- warnings.warn(
42
- f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
43
- )
44
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
44
45
  if self.context.sampling_params.logprobs:
45
- warnings.warn(
46
- f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
47
- )
46
+ maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
48
47
  if self.context.sampling_params.json_mode and self.model.supports_json:
49
48
  self.request_json["response_format"] = {"type": "json_object"}
50
49
 
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  import os
3
3
  import traceback as tb
4
- import warnings
5
4
  from types import SimpleNamespace
6
5
 
7
6
  import aiohttp
@@ -9,6 +8,7 @@ from aiohttp import ClientResponse
9
8
 
10
9
  from lm_deluge.request_context import RequestContext
11
10
  from lm_deluge.tool import MCPServer, Tool
11
+ from lm_deluge.warnings import maybe_warn
12
12
 
13
13
  from ..config import SamplingParams
14
14
  from ..models import APIModel
@@ -30,6 +30,26 @@ async def _build_oa_chat_request(
30
30
  "temperature": sampling_params.temperature,
31
31
  "top_p": sampling_params.top_p,
32
32
  }
33
+ if context.service_tier:
34
+ assert context.service_tier in [
35
+ "auto",
36
+ "default",
37
+ "flex",
38
+ "priority",
39
+ ], f"Invalid service tier: {context.service_tier}"
40
+ # flex is only supported for o3, o4-mini, gpt-5 models
41
+ if context.service_tier == "flex":
42
+ model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
43
+ if not model_supports_flex:
44
+ print(
45
+ f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
46
+ f"Using 'auto' instead for model {model.id}."
47
+ )
48
+ request_json["service_tier"] = "auto"
49
+ else:
50
+ request_json["service_tier"] = context.service_tier
51
+ else:
52
+ request_json["service_tier"] = context.service_tier
33
53
  # set max_tokens or max_completion_tokens dep. on provider
34
54
  if "cohere" in model.api_base:
35
55
  request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -55,9 +75,8 @@ async def _build_oa_chat_request(
55
75
  request_json["reasoning_effort"] = effort
56
76
  else:
57
77
  if sampling_params.reasoning_effort:
58
- warnings.warn(
59
- f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
60
- )
78
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
79
+
61
80
  if sampling_params.logprobs:
62
81
  request_json["logprobs"] = True
63
82
  if sampling_params.top_logprobs is not None:
@@ -85,8 +104,10 @@ class OpenAIRequest(APIRequestBase):
85
104
 
86
105
  # Warn if cache is specified for non-Anthropic model
87
106
  if self.context.cache is not None:
88
- warnings.warn(
89
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
107
+ maybe_warn(
108
+ "WARN_CACHING_UNSUPPORTED",
109
+ model_name=self.context.model_name,
110
+ cache_param=self.context.cache,
90
111
  )
91
112
  self.model = APIModel.from_registry(self.context.model_name)
92
113
 
@@ -213,9 +234,6 @@ class OpenAIRequest(APIRequestBase):
213
234
  async def _build_oa_responses_request(
214
235
  model: APIModel,
215
236
  context: RequestContext,
216
- # prompt: Conversation,
217
- # tools: list[Tool] | None,
218
- # sampling_params: SamplingParams,
219
237
  ):
220
238
  prompt = context.prompt
221
239
  sampling_params = context.sampling_params
@@ -226,7 +244,28 @@ async def _build_oa_responses_request(
226
244
  "input": openai_responses_format["input"],
227
245
  "temperature": sampling_params.temperature,
228
246
  "top_p": sampling_params.top_p,
247
+ "background": context.background or False,
229
248
  }
249
+ if context.service_tier:
250
+ assert context.service_tier in [
251
+ "auto",
252
+ "default",
253
+ "flex",
254
+ "priority",
255
+ ], f"Invalid service tier: {context.service_tier}"
256
+ # flex is only supported for o3, o4-mini, gpt-5 models
257
+ if context.service_tier == "flex":
258
+ model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
259
+ if not model_supports_flex:
260
+ print(
261
+ f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
262
+ f"Model {model.id} doesn't support flex. Using 'auto' instead."
263
+ )
264
+ request_json["service_tier"] = "auto"
265
+ else:
266
+ request_json["service_tier"] = context.service_tier
267
+ else:
268
+ request_json["service_tier"] = context.service_tier
230
269
  if sampling_params.max_new_tokens:
231
270
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
232
271
 
@@ -245,9 +284,7 @@ async def _build_oa_responses_request(
245
284
  }
246
285
  else:
247
286
  if sampling_params.reasoning_effort:
248
- warnings.warn(
249
- f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
250
- )
287
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
251
288
 
252
289
  if sampling_params.json_mode and model.supports_json:
253
290
  request_json["text"] = {"format": {"type": "json_object"}}
@@ -284,8 +321,10 @@ class OpenAIResponsesRequest(APIRequestBase):
284
321
  super().__init__(context)
285
322
  # Warn if cache is specified for non-Anthropic model
286
323
  if self.context.cache is not None:
287
- warnings.warn(
288
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
324
+ maybe_warn(
325
+ "WARN_CACHING_UNSUPPORTED",
326
+ model_name=self.context.model_name,
327
+ cache_param=self.context.cache,
289
328
  )
290
329
  self.model = APIModel.from_registry(self.context.model_name)
291
330
 
@@ -488,8 +527,10 @@ async def stream_chat(
488
527
  extra_headers: dict[str, str] | None = None,
489
528
  ):
490
529
  if cache is not None:
491
- warnings.warn(
492
- f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
530
+ maybe_warn(
531
+ "WARN_CACHING_UNSUPPORTED",
532
+ model_name=model_name,
533
+ cache_param=cache,
493
534
  )
494
535
 
495
536
  model = APIModel.from_registry(model_name)
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import tempfile
5
5
  import time
6
- from typing import Literal, Sequence
6
+ from typing import Literal, Sequence, cast
7
7
 
8
8
  import aiohttp
9
9
  from rich.console import Console
@@ -16,7 +16,12 @@ from lm_deluge.api_requests.anthropic import _build_anthropic_request
16
16
  from lm_deluge.api_requests.openai import _build_oa_chat_request
17
17
  from lm_deluge.config import SamplingParams
18
18
  from lm_deluge.models import APIModel, registry
19
- from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
19
+ from lm_deluge.prompt import (
20
+ CachePattern,
21
+ Conversation,
22
+ Prompt,
23
+ prompts_to_conversations,
24
+ )
20
25
  from lm_deluge.request_context import RequestContext
21
26
 
22
27
 
@@ -166,14 +171,18 @@ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
166
171
  async def create_batch_files_oa(
167
172
  model: str,
168
173
  sampling_params: SamplingParams,
169
- prompts: Sequence[str | list[dict] | Conversation],
174
+ prompts: Prompt | Sequence[Prompt],
170
175
  batch_size: int = 50_000,
171
176
  destination: str | None = None, # if none provided, temp files
172
177
  ):
173
178
  MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
174
179
  MAX_BATCH_SIZE_ITEMS = batch_size
175
180
 
176
- prompts = prompts_to_conversations(prompts)
181
+ if not isinstance(prompts, list):
182
+ prompts = cast(Sequence[Prompt], [prompts])
183
+
184
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
185
+ assert isinstance(prompts, Sequence)
177
186
  if any(p is None for p in prompts):
178
187
  raise ValueError("All prompts must be valid.")
179
188
 
@@ -251,14 +260,18 @@ async def create_batch_files_oa(
251
260
  async def submit_batches_oa(
252
261
  model: str,
253
262
  sampling_params: SamplingParams,
254
- prompts: Sequence[str | list[dict] | Conversation],
263
+ prompts: Prompt | Sequence[Prompt],
255
264
  batch_size: int = 50_000,
256
265
  ):
257
266
  """Write OpenAI batch requests to a file and submit."""
258
267
  MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
259
268
  MAX_BATCH_SIZE_ITEMS = batch_size
260
269
 
261
- prompts = prompts_to_conversations(prompts)
270
+ if not isinstance(prompts, list):
271
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
272
+
273
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
274
+ assert isinstance(prompts, Sequence)
262
275
  if any(p is None for p in prompts):
263
276
  raise ValueError("All prompts must be valid.")
264
277
 
@@ -342,7 +355,7 @@ async def submit_batches_oa(
342
355
  async def submit_batches_anthropic(
343
356
  model: str,
344
357
  sampling_params: SamplingParams,
345
- prompts: Sequence[str | list[dict] | Conversation],
358
+ prompts: Prompt | Sequence[Prompt],
346
359
  *,
347
360
  cache: CachePattern | None = None,
348
361
  batch_size=100_000,
@@ -362,13 +375,16 @@ async def submit_batches_anthropic(
362
375
  MAX_BATCH_SIZE_ITEMS = batch_size
363
376
 
364
377
  # Convert prompts to Conversations
365
- prompts = prompts_to_conversations(prompts)
378
+ if not isinstance(prompts, list):
379
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
380
+
381
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
366
382
 
367
383
  request_headers = None
368
384
  batch_tasks = []
369
385
  current_batch = []
370
386
  current_batch_size = 0
371
-
387
+ assert isinstance(prompts, Sequence)
372
388
  for idx, prompt in enumerate(prompts):
373
389
  assert isinstance(prompt, Conversation)
374
390
  context = RequestContext(