lm-deluge 0.0.58__py3-none-any.whl → 0.0.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import time
2
3
  import traceback
3
4
  from abc import ABC, abstractmethod
4
5
 
@@ -6,6 +7,7 @@ import aiohttp
6
7
  from aiohttp import ClientResponse
7
8
 
8
9
  from ..errors import raise_if_modal_exception
10
+ from ..models.openai import OPENAI_MODELS
9
11
  from ..request_context import RequestContext
10
12
  from .response import APIResponse
11
13
 
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
82
84
  if self.context.status_tracker:
83
85
  self.context.status_tracker.task_succeeded(self.context.task_id)
84
86
 
87
+ async def _execute_once_background_mode(self) -> APIResponse:
88
+ """
89
+ ONLY for OpenAI responses API. Implement the
90
+ start -> poll -> result style of request.
91
+ """
92
+ assert self.context.status_tracker, "no status tracker"
93
+ start_time = time.time()
94
+ async with aiohttp.ClientSession() as session:
95
+ last_status: str | None = None
96
+
97
+ try:
98
+ self.context.status_tracker.total_requests += 1
99
+ assert self.url is not None, "URL is not set"
100
+ async with session.post(
101
+ url=self.url,
102
+ headers=self.request_header,
103
+ json=self.request_json,
104
+ ) as http_response:
105
+ # make sure we created the Response object
106
+ http_response.raise_for_status()
107
+ data = await http_response.json()
108
+ response_id = data["id"]
109
+ last_status = data["status"]
110
+
111
+ while True:
112
+ if time.time() - start_time > self.context.request_timeout:
113
+ # cancel the response
114
+ async with session.post(
115
+ url=f"{self.url}/{response_id}/cancel",
116
+ headers=self.request_header,
117
+ ) as http_response:
118
+ http_response.raise_for_status()
119
+
120
+ return APIResponse(
121
+ id=self.context.task_id,
122
+ model_internal=self.context.model_name,
123
+ prompt=self.context.prompt,
124
+ sampling_params=self.context.sampling_params,
125
+ status_code=None,
126
+ is_error=True,
127
+ error_message="Request timed out (terminated by client).",
128
+ content=None,
129
+ usage=None,
130
+ )
131
+ # poll for the response
132
+ await asyncio.sleep(5.0)
133
+ async with session.get(
134
+ url=f"{self.url}/{response_id}",
135
+ headers=self.request_header,
136
+ ) as http_response:
137
+ http_response.raise_for_status()
138
+ data = await http_response.json()
139
+
140
+ if data["status"] != last_status:
141
+ print(
142
+ f"Background req {response_id} status updated to: {data['status']}"
143
+ )
144
+ last_status = data["status"]
145
+ if last_status not in ["queued", "in_progress"]:
146
+ return await self.handle_response(http_response)
147
+
148
+ except Exception as e:
149
+ raise_if_modal_exception(e)
150
+ tb = traceback.format_exc()
151
+ print(tb)
152
+ return APIResponse(
153
+ id=self.context.task_id,
154
+ model_internal=self.context.model_name,
155
+ prompt=self.context.prompt,
156
+ sampling_params=self.context.sampling_params,
157
+ status_code=None,
158
+ is_error=True,
159
+ error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
160
+ content=None,
161
+ usage=None,
162
+ )
163
+
85
164
  async def execute_once(self) -> APIResponse:
86
165
  """Send the HTTP request once and return the parsed APIResponse."""
87
166
  await self.build_request()
88
167
  assert self.context.status_tracker
89
- # try:
90
- # dumped = json.dumps(self.request_json)
91
- # except Exception:
92
- # print("couldn't serialize request json")
93
- # print(self.request_json)
168
+
169
+ if (
170
+ self.context.background
171
+ and self.context.use_responses_api
172
+ and self.context.model_name in OPENAI_MODELS
173
+ ):
174
+ return await self._execute_once_background_mode()
175
+
94
176
  try:
95
177
  self.context.status_tracker.total_requests += 1
96
178
  timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
@@ -1,10 +1,11 @@
1
1
  import asyncio
2
2
  import json
3
3
  import os
4
- import warnings
5
4
 
6
5
  from aiohttp import ClientResponse
7
6
 
7
+ from lm_deluge.warnings import maybe_warn
8
+
8
9
  try:
9
10
  from requests_aws4auth import AWS4Auth
10
11
  except ImportError:
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
187
188
  # Note: GPT-OSS on Bedrock doesn't support response_format parameter
188
189
  # Even though the model supports JSON, we can't use the response_format parameter
189
190
  if sampling_params.json_mode and model.supports_json:
190
- warnings.warn(
191
- f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
192
- )
191
+ maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
193
192
 
194
193
  if tools:
195
194
  request_tools = []
@@ -1,11 +1,12 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
  from typing import Any
4
+
5
5
  from aiohttp import ClientResponse
6
6
 
7
7
  from lm_deluge.request_context import RequestContext
8
8
  from lm_deluge.tool import Tool
9
+ from lm_deluge.warnings import maybe_warn
9
10
 
10
11
  from ..config import SamplingParams
11
12
  from ..models import APIModel
@@ -54,9 +55,7 @@ async def _build_gemini_request(
54
55
 
55
56
  else:
56
57
  if sampling_params.reasoning_effort:
57
- warnings.warn(
58
- f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
59
- )
58
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
60
59
 
61
60
  # Add tools if provided
62
61
  if tools:
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
76
75
 
77
76
  # Warn if cache is specified for Gemini model
78
77
  if self.context.cache is not None:
79
- warnings.warn(
80
- f"Cache parameter '{self.context.cache}' is not supported for Gemini models, ignoring for {self.context.model_name}"
78
+ maybe_warn(
79
+ "WARN_CACHING_UNSUPPORTED",
80
+ model_name=self.context.model_name,
81
+ cache_param=self.context.cache,
81
82
  )
82
83
 
83
84
  self.model = APIModel.from_registry(self.context.model_name)
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
6
+ from lm_deluge.warnings import maybe_warn
7
+
7
8
  from ..models import APIModel
8
9
  from ..prompt import Message
9
10
  from ..request_context import RequestContext
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
17
18
 
18
19
  # Warn if cache is specified for non-Anthropic model
19
20
  if self.context.cache is not None:
20
- warnings.warn(
21
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
21
+ maybe_warn(
22
+ "WARN_CACHING_UNSUPPORTED",
23
+ model_name=self.context.model_name,
24
+ cache_param=self.context.cache,
22
25
  )
23
26
  self.model = APIModel.from_registry(self.context.model_name)
24
27
 
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
38
41
  "max_tokens": self.context.sampling_params.max_new_tokens,
39
42
  }
40
43
  if self.context.sampling_params.reasoning_effort:
41
- warnings.warn(
42
- f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
43
- )
44
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
44
45
  if self.context.sampling_params.logprobs:
45
- warnings.warn(
46
- f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
47
- )
46
+ maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
48
47
  if self.context.sampling_params.json_mode and self.model.supports_json:
49
48
  self.request_json["response_format"] = {"type": "json_object"}
50
49
 
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  import os
3
3
  import traceback as tb
4
- import warnings
5
4
  from types import SimpleNamespace
6
5
 
7
6
  import aiohttp
@@ -9,6 +8,7 @@ from aiohttp import ClientResponse
9
8
 
10
9
  from lm_deluge.request_context import RequestContext
11
10
  from lm_deluge.tool import MCPServer, Tool
11
+ from lm_deluge.warnings import maybe_warn
12
12
 
13
13
  from ..config import SamplingParams
14
14
  from ..models import APIModel
@@ -30,6 +30,26 @@ async def _build_oa_chat_request(
30
30
  "temperature": sampling_params.temperature,
31
31
  "top_p": sampling_params.top_p,
32
32
  }
33
+ if context.service_tier:
34
+ assert context.service_tier in [
35
+ "auto",
36
+ "default",
37
+ "flex",
38
+ "priority",
39
+ ], f"Invalid service tier: {context.service_tier}"
40
+ # flex is only supported for o3, o4-mini, gpt-5 models
41
+ if context.service_tier == "flex":
42
+ model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
43
+ if not model_supports_flex:
44
+ print(
45
+ f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
46
+ f"Using 'auto' instead for model {model.id}."
47
+ )
48
+ request_json["service_tier"] = "auto"
49
+ else:
50
+ request_json["service_tier"] = context.service_tier
51
+ else:
52
+ request_json["service_tier"] = context.service_tier
33
53
  # set max_tokens or max_completion_tokens dep. on provider
34
54
  if "cohere" in model.api_base:
35
55
  request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -55,9 +75,8 @@ async def _build_oa_chat_request(
55
75
  request_json["reasoning_effort"] = effort
56
76
  else:
57
77
  if sampling_params.reasoning_effort:
58
- warnings.warn(
59
- f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
60
- )
78
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
79
+
61
80
  if sampling_params.logprobs:
62
81
  request_json["logprobs"] = True
63
82
  if sampling_params.top_logprobs is not None:
@@ -85,8 +104,10 @@ class OpenAIRequest(APIRequestBase):
85
104
 
86
105
  # Warn if cache is specified for non-Anthropic model
87
106
  if self.context.cache is not None:
88
- warnings.warn(
89
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
107
+ maybe_warn(
108
+ "WARN_CACHING_UNSUPPORTED",
109
+ model_name=self.context.model_name,
110
+ cache_param=self.context.cache,
90
111
  )
91
112
  self.model = APIModel.from_registry(self.context.model_name)
92
113
 
@@ -213,9 +234,6 @@ class OpenAIRequest(APIRequestBase):
213
234
  async def _build_oa_responses_request(
214
235
  model: APIModel,
215
236
  context: RequestContext,
216
- # prompt: Conversation,
217
- # tools: list[Tool] | None,
218
- # sampling_params: SamplingParams,
219
237
  ):
220
238
  prompt = context.prompt
221
239
  sampling_params = context.sampling_params
@@ -226,7 +244,28 @@ async def _build_oa_responses_request(
226
244
  "input": openai_responses_format["input"],
227
245
  "temperature": sampling_params.temperature,
228
246
  "top_p": sampling_params.top_p,
247
+ "background": context.background or False,
229
248
  }
249
+ if context.service_tier:
250
+ assert context.service_tier in [
251
+ "auto",
252
+ "default",
253
+ "flex",
254
+ "priority",
255
+ ], f"Invalid service tier: {context.service_tier}"
256
+ # flex is only supported for o3, o4-mini, gpt-5 models
257
+ if context.service_tier == "flex":
258
+ model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
259
+ if not model_supports_flex:
260
+ print(
261
+ f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
262
+ f"Model {model.id} doesn't support flex. Using 'auto' instead."
263
+ )
264
+ request_json["service_tier"] = "auto"
265
+ else:
266
+ request_json["service_tier"] = context.service_tier
267
+ else:
268
+ request_json["service_tier"] = context.service_tier
230
269
  if sampling_params.max_new_tokens:
231
270
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
232
271
 
@@ -245,9 +284,7 @@ async def _build_oa_responses_request(
245
284
  }
246
285
  else:
247
286
  if sampling_params.reasoning_effort:
248
- warnings.warn(
249
- f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
250
- )
287
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
251
288
 
252
289
  if sampling_params.json_mode and model.supports_json:
253
290
  request_json["text"] = {"format": {"type": "json_object"}}
@@ -284,8 +321,10 @@ class OpenAIResponsesRequest(APIRequestBase):
284
321
  super().__init__(context)
285
322
  # Warn if cache is specified for non-Anthropic model
286
323
  if self.context.cache is not None:
287
- warnings.warn(
288
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
324
+ maybe_warn(
325
+ "WARN_CACHING_UNSUPPORTED",
326
+ model_name=self.context.model_name,
327
+ cache_param=self.context.cache,
289
328
  )
290
329
  self.model = APIModel.from_registry(self.context.model_name)
291
330
 
@@ -488,8 +527,10 @@ async def stream_chat(
488
527
  extra_headers: dict[str, str] | None = None,
489
528
  ):
490
529
  if cache is not None:
491
- warnings.warn(
492
- f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
530
+ maybe_warn(
531
+ "WARN_CACHING_UNSUPPORTED",
532
+ model_name=model_name,
533
+ cache_param=cache,
493
534
  )
494
535
 
495
536
  model = APIModel.from_registry(model_name)
lm_deluge/batches.py CHANGED
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import tempfile
5
5
  import time
6
- from typing import Literal, Sequence
6
+ from typing import Literal, Sequence, cast
7
7
 
8
8
  import aiohttp
9
9
  from rich.console import Console
@@ -16,7 +16,12 @@ from lm_deluge.api_requests.anthropic import _build_anthropic_request
16
16
  from lm_deluge.api_requests.openai import _build_oa_chat_request
17
17
  from lm_deluge.config import SamplingParams
18
18
  from lm_deluge.models import APIModel, registry
19
- from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
19
+ from lm_deluge.prompt import (
20
+ CachePattern,
21
+ Conversation,
22
+ Prompt,
23
+ prompts_to_conversations,
24
+ )
20
25
  from lm_deluge.request_context import RequestContext
21
26
 
22
27
 
@@ -166,14 +171,18 @@ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
166
171
  async def create_batch_files_oa(
167
172
  model: str,
168
173
  sampling_params: SamplingParams,
169
- prompts: Sequence[str | list[dict] | Conversation],
174
+ prompts: Prompt | Sequence[Prompt],
170
175
  batch_size: int = 50_000,
171
176
  destination: str | None = None, # if none provided, temp files
172
177
  ):
173
178
  MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
174
179
  MAX_BATCH_SIZE_ITEMS = batch_size
175
180
 
176
- prompts = prompts_to_conversations(prompts)
181
+ if not isinstance(prompts, list):
182
+ prompts = cast(Sequence[Prompt], [prompts])
183
+
184
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
185
+ assert isinstance(prompts, Sequence)
177
186
  if any(p is None for p in prompts):
178
187
  raise ValueError("All prompts must be valid.")
179
188
 
@@ -251,14 +260,18 @@ async def create_batch_files_oa(
251
260
  async def submit_batches_oa(
252
261
  model: str,
253
262
  sampling_params: SamplingParams,
254
- prompts: Sequence[str | list[dict] | Conversation],
263
+ prompts: Prompt | Sequence[Prompt],
255
264
  batch_size: int = 50_000,
256
265
  ):
257
266
  """Write OpenAI batch requests to a file and submit."""
258
267
  MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
259
268
  MAX_BATCH_SIZE_ITEMS = batch_size
260
269
 
261
- prompts = prompts_to_conversations(prompts)
270
+ if not isinstance(prompts, list):
271
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
272
+
273
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
274
+ assert isinstance(prompts, Sequence)
262
275
  if any(p is None for p in prompts):
263
276
  raise ValueError("All prompts must be valid.")
264
277
 
@@ -342,7 +355,7 @@ async def submit_batches_oa(
342
355
  async def submit_batches_anthropic(
343
356
  model: str,
344
357
  sampling_params: SamplingParams,
345
- prompts: Sequence[str | list[dict] | Conversation],
358
+ prompts: Prompt | Sequence[Prompt],
346
359
  *,
347
360
  cache: CachePattern | None = None,
348
361
  batch_size=100_000,
@@ -362,13 +375,16 @@ async def submit_batches_anthropic(
362
375
  MAX_BATCH_SIZE_ITEMS = batch_size
363
376
 
364
377
  # Convert prompts to Conversations
365
- prompts = prompts_to_conversations(prompts)
378
+ if not isinstance(prompts, list):
379
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
380
+
381
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
366
382
 
367
383
  request_headers = None
368
384
  batch_tasks = []
369
385
  current_batch = []
370
386
  current_batch_size = 0
371
-
387
+ assert isinstance(prompts, Sequence)
372
388
  for idx, prompt in enumerate(prompts):
373
389
  assert isinstance(prompt, Conversation)
374
390
  context = RequestContext(
lm_deluge/client.py CHANGED
@@ -1,5 +1,14 @@
1
1
  import asyncio
2
- from typing import Any, AsyncGenerator, Callable, Literal, Self, Sequence, overload
2
+ from typing import (
3
+ Any,
4
+ AsyncGenerator,
5
+ Callable,
6
+ Literal,
7
+ Self,
8
+ Sequence,
9
+ cast,
10
+ overload,
11
+ )
3
12
 
4
13
  import numpy as np
5
14
  import yaml
@@ -12,7 +21,12 @@ from lm_deluge.batches import (
12
21
  submit_batches_oa,
13
22
  wait_for_batch_completion_async,
14
23
  )
15
- from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
24
+ from lm_deluge.prompt import (
25
+ CachePattern,
26
+ Conversation,
27
+ Prompt,
28
+ prompts_to_conversations,
29
+ )
16
30
  from lm_deluge.tool import MCPServer, Tool
17
31
 
18
32
  from .api_requests.base import APIResponse
@@ -40,6 +54,9 @@ class _LLMClient(BaseModel):
40
54
  request_timeout: int = 30
41
55
  cache: Any = None
42
56
  extra_headers: dict[str, str] | None = None
57
+ extra_body: dict[str, str] | None = None
58
+ use_responses_api: bool = False
59
+ background: bool = False
43
60
  # sampling params - if provided, and sampling_params is not,
44
61
  # these override the defaults
45
62
  temperature: float = 0.75
@@ -100,13 +117,120 @@ class _LLMClient(BaseModel):
100
117
 
101
118
  # NEW! Builder methods
102
119
  def with_model(self, model: str):
103
- self.model_names = [model]
120
+ self._update_models([model])
104
121
  return self
105
122
 
106
123
  def with_models(self, models: list[str]):
107
- self.model_names = models
124
+ self._update_models(models)
108
125
  return self
109
126
 
127
+ def _update_models(self, models: list[str]) -> None:
128
+ normalized, per_model_efforts = self._normalize_model_names(models)
129
+ if self.reasoning_effort is None:
130
+ unique_efforts = {eff for eff in per_model_efforts if eff is not None}
131
+ if len(normalized) == 1 and per_model_efforts[0] is not None:
132
+ self.reasoning_effort = per_model_efforts[0]
133
+ elif (
134
+ len(unique_efforts) == 1
135
+ and len(unique_efforts) != 0
136
+ and None not in per_model_efforts
137
+ ):
138
+ self.reasoning_effort = next(iter(unique_efforts)) # type: ignore
139
+ self.model_names = normalized
140
+ self._align_sampling_params(per_model_efforts)
141
+ self._reset_model_weights()
142
+
143
+ def _normalize_model_names(
144
+ self, models: list[str]
145
+ ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
146
+ reasoning_effort_suffixes: dict[str, Literal["low", "medium", "high"]] = {
147
+ "-low": "low",
148
+ "-medium": "medium",
149
+ "-high": "high",
150
+ }
151
+ normalized: list[str] = []
152
+ efforts: list[Literal["low", "medium", "high"] | None] = []
153
+
154
+ for name in models:
155
+ base_name = name
156
+ effort: Literal["low", "medium", "high"] | None = None
157
+ for suffix, candidate in reasoning_effort_suffixes.items():
158
+ if name.endswith(suffix) and len(name) > len(suffix):
159
+ base_name = name[: -len(suffix)]
160
+ effort = candidate
161
+ break
162
+ normalized.append(base_name)
163
+ efforts.append(effort)
164
+
165
+ return normalized, efforts
166
+
167
+ def _align_sampling_params(
168
+ self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
169
+ ) -> None:
170
+ if len(per_model_efforts) < len(self.model_names):
171
+ per_model_efforts = per_model_efforts + [None] * (
172
+ len(self.model_names) - len(per_model_efforts)
173
+ )
174
+
175
+ if not self.model_names:
176
+ self.sampling_params = []
177
+ return
178
+
179
+ if not self.sampling_params:
180
+ self.sampling_params = []
181
+
182
+ if len(self.sampling_params) == 0:
183
+ for _ in self.model_names:
184
+ self.sampling_params.append(
185
+ SamplingParams(
186
+ temperature=self.temperature,
187
+ top_p=self.top_p,
188
+ json_mode=self.json_mode,
189
+ max_new_tokens=self.max_new_tokens,
190
+ reasoning_effort=self.reasoning_effort,
191
+ logprobs=self.logprobs,
192
+ top_logprobs=self.top_logprobs,
193
+ )
194
+ )
195
+ elif len(self.sampling_params) == 1 and len(self.model_names) > 1:
196
+ base_param = self.sampling_params[0]
197
+ self.sampling_params = [
198
+ base_param.model_copy(deep=True) for _ in self.model_names
199
+ ]
200
+ elif len(self.sampling_params) != len(self.model_names):
201
+ base_param = self.sampling_params[0]
202
+ self.sampling_params = [
203
+ base_param.model_copy(deep=True) for _ in self.model_names
204
+ ]
205
+
206
+ if self.reasoning_effort is not None:
207
+ for sp in self.sampling_params:
208
+ sp.reasoning_effort = self.reasoning_effort
209
+ else:
210
+ for sp, effort in zip(self.sampling_params, per_model_efforts):
211
+ if effort is not None:
212
+ sp.reasoning_effort = effort
213
+
214
+ def _reset_model_weights(self) -> None:
215
+ if not self.model_names:
216
+ self.model_weights = []
217
+ return
218
+
219
+ if isinstance(self.model_weights, list):
220
+ if len(self.model_weights) == len(self.model_names) and any(
221
+ self.model_weights
222
+ ):
223
+ total = sum(self.model_weights)
224
+ if total == 0:
225
+ self.model_weights = [
226
+ 1 / len(self.model_names) for _ in self.model_names
227
+ ]
228
+ else:
229
+ self.model_weights = [w / total for w in self.model_weights]
230
+ return
231
+ # Fallback to uniform distribution
232
+ self.model_weights = [1 / len(self.model_names) for _ in self.model_names]
233
+
110
234
  def with_limits(
111
235
  self,
112
236
  max_requests_per_minute: int | None = None,
@@ -133,8 +257,29 @@ class _LLMClient(BaseModel):
133
257
  @model_validator(mode="before")
134
258
  @classmethod
135
259
  def fix_lists(cls, data) -> "_LLMClient":
260
+ # Parse reasoning effort from model name suffixes (e.g., "gpt-5-high")
261
+ # Only applies when a single model string is provided
136
262
  if isinstance(data.get("model_names"), str):
263
+ model_name = data["model_names"]
264
+ reasoning_effort_suffixes = {
265
+ "-low": "low",
266
+ "-medium": "medium",
267
+ "-high": "high",
268
+ }
269
+
270
+ for suffix, effort in reasoning_effort_suffixes.items():
271
+ if model_name.endswith(suffix):
272
+ # Extract base model name by removing suffix
273
+ base_model = model_name[: -len(suffix)]
274
+ data["model_names"] = base_model
275
+
276
+ # Set reasoning_effort if not already explicitly set
277
+ if data.get("reasoning_effort") is None:
278
+ data["reasoning_effort"] = effort
279
+ break
280
+
137
281
  data["model_names"] = [data["model_names"]]
282
+
138
283
  if not isinstance(data.get("sampling_params", []), list):
139
284
  data["sampling_params"] = [data["sampling_params"]]
140
285
  if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
@@ -171,6 +316,11 @@ class _LLMClient(BaseModel):
171
316
  # normalize weights
172
317
  self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
173
318
 
319
+ # background mode only allowed for responses api
320
+ if self.background:
321
+ assert (
322
+ self.use_responses_api
323
+ ), "background mode only allowed for responses api"
174
324
  # Auto-generate name if not provided
175
325
  if self.name is None:
176
326
  if len(self.model_names) == 1:
@@ -256,13 +406,6 @@ class _LLMClient(BaseModel):
256
406
  # Idle wait before next capacity check. Aim for ~RPM spacing.
257
407
  await asyncio.sleep(max(60.0 / self.max_requests_per_minute, 0.01))
258
408
 
259
- async def _execute_request(self, context: RequestContext) -> APIResponse:
260
- """Create and send a single API request using the provided context."""
261
- model_obj = APIModel.from_registry(context.model_name)
262
- request = model_obj.make_request(context)
263
- response = await request.execute_once()
264
- return response
265
-
266
409
  async def process_single_request(
267
410
  self, context: RequestContext, retry_queue: asyncio.Queue | None = None
268
411
  ) -> APIResponse:
@@ -290,7 +433,9 @@ class _LLMClient(BaseModel):
290
433
  # Execute single request
291
434
  assert context.status_tracker
292
435
  context.status_tracker.update_pbar()
293
- response = await self._execute_request(context)
436
+ model_obj = APIModel.from_registry(context.model_name)
437
+ request = model_obj.make_request(context)
438
+ response = await request.execute_once()
294
439
 
295
440
  # Handle successful response
296
441
  if not response.is_error:
@@ -350,36 +495,36 @@ class _LLMClient(BaseModel):
350
495
  @overload
351
496
  async def process_prompts_async(
352
497
  self,
353
- prompts: Sequence[str | list[dict] | Conversation],
498
+ prompts: Prompt | Sequence[Prompt],
354
499
  *,
355
500
  return_completions_only: Literal[True],
356
501
  show_progress: bool = ...,
357
502
  tools: list[Tool | dict | MCPServer] | None = ...,
358
503
  cache: CachePattern | None = ...,
359
- use_responses_api: bool = ...,
504
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
360
505
  ) -> list[str | None]: ...
361
506
 
362
507
  @overload
363
508
  async def process_prompts_async(
364
509
  self,
365
- prompts: Sequence[str | list[dict] | Conversation],
510
+ prompts: Prompt | Sequence[Prompt],
366
511
  *,
367
512
  return_completions_only: Literal[False] = ...,
368
513
  show_progress: bool = ...,
369
514
  tools: list[Tool | dict | MCPServer] | None = ...,
370
515
  cache: CachePattern | None = ...,
371
- use_responses_api: bool = ...,
516
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
372
517
  ) -> list[APIResponse]: ...
373
518
 
374
519
  async def process_prompts_async(
375
520
  self,
376
- prompts: Sequence[str | list[dict] | Conversation],
521
+ prompts: Prompt | Sequence[Prompt],
377
522
  *,
378
523
  return_completions_only: bool = False,
379
524
  show_progress: bool = True,
380
525
  tools: list[Tool | dict | MCPServer] | None = None,
381
526
  cache: CachePattern | None = None,
382
- use_responses_api: bool = False,
527
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
383
528
  ) -> list[APIResponse] | list[str | None] | dict[str, int]:
384
529
  """Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
385
530
 
@@ -387,7 +532,9 @@ class _LLMClient(BaseModel):
387
532
  avoiding issues with tracker state accumulating across multiple calls.
388
533
  """
389
534
  # Convert prompts to Conversations
390
- prompts = prompts_to_conversations(prompts)
535
+ if not isinstance(prompts, list):
536
+ prompts = prompts = cast(Sequence[Prompt], [prompts])
537
+ prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
391
538
 
392
539
  # Ensure tracker exists (start_nowait will call add_to_total for each task)
393
540
  if self._tracker is None:
@@ -398,13 +545,14 @@ class _LLMClient(BaseModel):
398
545
 
399
546
  # Start all tasks using start_nowait - tasks will coordinate via shared capacity lock
400
547
  task_ids = []
548
+ assert isinstance(prompts, Sequence)
401
549
  for prompt in prompts:
402
550
  assert isinstance(prompt, Conversation)
403
551
  task_id = self.start_nowait(
404
552
  prompt,
405
553
  tools=tools,
406
554
  cache=cache,
407
- use_responses_api=use_responses_api,
555
+ service_tier=service_tier,
408
556
  )
409
557
  task_ids.append(task_id)
410
558
 
@@ -443,13 +591,12 @@ class _LLMClient(BaseModel):
443
591
 
444
592
  def process_prompts_sync(
445
593
  self,
446
- prompts: Sequence[str | list[dict] | Conversation],
594
+ prompts: Prompt | Sequence[Prompt],
447
595
  *,
448
596
  return_completions_only: bool = False,
449
597
  show_progress=True,
450
598
  tools: list[Tool | dict | MCPServer] | None = None,
451
599
  cache: CachePattern | None = None,
452
- use_responses_api: bool = False,
453
600
  ):
454
601
  return asyncio.run(
455
602
  self.process_prompts_async(
@@ -458,7 +605,6 @@ class _LLMClient(BaseModel):
458
605
  show_progress=show_progress,
459
606
  tools=tools,
460
607
  cache=cache,
461
- use_responses_api=use_responses_api,
462
608
  )
463
609
  )
464
610
 
@@ -478,18 +624,18 @@ class _LLMClient(BaseModel):
478
624
 
479
625
  def start_nowait(
480
626
  self,
481
- prompt: str | Conversation,
627
+ prompt: Prompt,
482
628
  *,
483
629
  tools: list[Tool | dict | MCPServer] | None = None,
484
630
  cache: CachePattern | None = None,
485
- use_responses_api: bool = False,
631
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
486
632
  ) -> int:
487
633
  tracker = self._get_tracker()
488
634
  task_id = self._next_task_id
489
635
  self._next_task_id += 1
490
636
  model, sampling_params = self._select_model()
491
- if isinstance(prompt, str):
492
- prompt = Conversation.user(prompt)
637
+ prompt = prompts_to_conversations([prompt])[0]
638
+ assert isinstance(prompt, Conversation)
493
639
  context = RequestContext(
494
640
  task_id=task_id,
495
641
  model_name=model,
@@ -500,7 +646,9 @@ class _LLMClient(BaseModel):
500
646
  status_tracker=tracker,
501
647
  tools=tools,
502
648
  cache=cache,
503
- use_responses_api=use_responses_api,
649
+ use_responses_api=self.use_responses_api,
650
+ background=self.background,
651
+ service_tier=service_tier,
504
652
  extra_headers=self.extra_headers,
505
653
  force_local_mcp=self.force_local_mcp,
506
654
  )
@@ -515,10 +663,10 @@ class _LLMClient(BaseModel):
515
663
  *,
516
664
  tools: list[Tool | dict | MCPServer] | None = None,
517
665
  cache: CachePattern | None = None,
518
- use_responses_api: bool = False,
666
+ service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
519
667
  ) -> APIResponse:
520
668
  task_id = self.start_nowait(
521
- prompt, tools=tools, cache=cache, use_responses_api=use_responses_api
669
+ prompt, tools=tools, cache=cache, service_tier=service_tier
522
670
  )
523
671
  return await self.wait_for(task_id)
524
672
 
@@ -698,7 +846,7 @@ class _LLMClient(BaseModel):
698
846
 
699
847
  async def submit_batch_job(
700
848
  self,
701
- prompts: Sequence[str | list[dict] | Conversation],
849
+ prompts: Prompt | Sequence[Prompt],
702
850
  *,
703
851
  tools: list[Tool] | None = None,
704
852
  cache: CachePattern | None = None,
@@ -760,6 +908,8 @@ def LLMClient(
760
908
  request_timeout: int = 30,
761
909
  cache: Any = None,
762
910
  extra_headers: dict[str, str] | None = None,
911
+ use_responses_api: bool = False,
912
+ background: bool = False,
763
913
  temperature: float = 0.75,
764
914
  top_p: float = 1.0,
765
915
  json_mode: bool = False,
@@ -787,6 +937,8 @@ def LLMClient(
787
937
  request_timeout: int = 30,
788
938
  cache: Any = None,
789
939
  extra_headers: dict[str, str] | None = None,
940
+ use_responses_api: bool = False,
941
+ background: bool = False,
790
942
  temperature: float = 0.75,
791
943
  top_p: float = 1.0,
792
944
  json_mode: bool = False,
@@ -813,6 +965,8 @@ def LLMClient(
813
965
  request_timeout: int = 30,
814
966
  cache: Any = None,
815
967
  extra_headers: dict[str, str] | None = None,
968
+ use_responses_api: bool = False,
969
+ background: bool = False,
816
970
  temperature: float = 0.75,
817
971
  top_p: float = 1.0,
818
972
  json_mode: bool = False,
@@ -851,6 +1005,8 @@ def LLMClient(
851
1005
  request_timeout=request_timeout,
852
1006
  cache=cache,
853
1007
  extra_headers=extra_headers,
1008
+ use_responses_api=use_responses_api,
1009
+ background=background,
854
1010
  temperature=temperature,
855
1011
  top_p=top_p,
856
1012
  json_mode=json_mode,
@@ -62,7 +62,7 @@ class APIModel:
62
62
  raise ValueError("no regions to sample")
63
63
  random.sample(regions, 1, counts=weights)[0]
64
64
 
65
- def make_request(self, context: RequestContext): # -> "APIRequestBase"
65
+ def make_request(self, context: RequestContext):
66
66
  from ..api_requests.common import CLASSES
67
67
 
68
68
  api_spec = self.api_spec
@@ -10,6 +10,20 @@ OPENAI_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  # ░░░░░
13
+ "gpt-5-codex": {
14
+ "id": "gpt-5-codex",
15
+ "name": "gpt-5-codex",
16
+ "api_base": "https://api.openai.com/v1",
17
+ "api_key_env_var": "OPENAI_API_KEY",
18
+ "supports_json": False,
19
+ "supports_logprobs": True,
20
+ "supports_responses": True,
21
+ "api_spec": "openai",
22
+ "input_cost": 1.25,
23
+ "cached_input_cost": 0.125,
24
+ "output_cost": 10.0,
25
+ "reasoning_model": True,
26
+ },
13
27
  "gpt-5": {
14
28
  "id": "gpt-5",
15
29
  "name": "gpt-5",
@@ -79,6 +93,20 @@ OPENAI_MODELS = {
79
93
  "output_cost": 12.0,
80
94
  "reasoning_model": False,
81
95
  },
96
+ "codex-mini-latest": {
97
+ "id": "codex-mini-latest",
98
+ "name": "codex-mini-latest",
99
+ "api_base": "https://api.openai.com/v1",
100
+ "api_key_env_var": "OPENAI_API_KEY",
101
+ "supports_json": True,
102
+ "supports_logprobs": False,
103
+ "supports_responses": True,
104
+ "api_spec": "openai",
105
+ "input_cost": 1.5,
106
+ "cached_input_cost": 0.375,
107
+ "output_cost": 6.0,
108
+ "reasoning_model": True,
109
+ },
82
110
  "o3": {
83
111
  "id": "o3",
84
112
  "name": "o3-2025-04-16",
lm_deluge/prompt.py CHANGED
@@ -2,13 +2,14 @@ import io
2
2
  import json
3
3
  from dataclasses import dataclass, field
4
4
  from pathlib import Path
5
- from typing import Literal, Sequence
5
+ from typing import Literal, Sequence, TypeAlias
6
6
 
7
7
  import tiktoken
8
8
  import xxhash
9
9
 
10
10
  from lm_deluge.file import File
11
11
  from lm_deluge.image import Image, MediaType
12
+ from lm_deluge.warnings import deprecated
12
13
 
13
14
  CachePattern = Literal[
14
15
  "tools_only",
@@ -415,12 +416,17 @@ class Message:
415
416
 
416
417
  return cls(role, parts)
417
418
 
418
- def add_text(self, content: str) -> "Message":
419
+ def with_text(self, content: str) -> "Message":
419
420
  """Append a text block and return self for chaining."""
420
421
  self.parts.append(Text(content))
421
422
  return self
422
423
 
423
- def add_image(
424
+ @deprecated("with_text")
425
+ def add_text(self, content: str) -> "Message":
426
+ """Append a text block and return self for chaining."""
427
+ return self.with_text(content)
428
+
429
+ def with_image(
424
430
  self,
425
431
  data: bytes | str | Path | io.BytesIO | Image,
426
432
  *,
@@ -446,7 +452,27 @@ class Message:
446
452
  self.parts.append(img)
447
453
  return self
448
454
 
449
- def add_file(
455
+ @deprecated("with_image")
456
+ def add_image(
457
+ self,
458
+ data: bytes | str | Path | io.BytesIO | Image,
459
+ *,
460
+ media_type: MediaType | None = None,
461
+ detail: Literal["low", "high", "auto"] = "auto",
462
+ max_size: int | None = None,
463
+ ) -> "Message":
464
+ """
465
+ Append an image block and return self for chaining.
466
+
467
+ If max_size is provided, the image will be resized so that its longer
468
+ dimension equals max_size, but only if the longer dimension is currently
469
+ larger than max_size.
470
+ """
471
+ return self.with_image(
472
+ data=data, media_type=media_type, detail=detail, max_size=max_size
473
+ )
474
+
475
+ def with_file(
450
476
  self,
451
477
  data: bytes | str | Path | io.BytesIO,
452
478
  *,
@@ -460,11 +486,29 @@ class Message:
460
486
  self.parts.append(file)
461
487
  return self
462
488
 
463
- def add_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
489
+ @deprecated("with_file")
490
+ def add_file(
491
+ self,
492
+ data: bytes | str | Path | io.BytesIO,
493
+ *,
494
+ media_type: str | None = None,
495
+ filename: str | None = None,
496
+ ) -> "Message":
497
+ """
498
+ Append a file block and return self for chaining.
499
+ """
500
+ return self.with_file(data, media_type=media_type, filename=filename)
501
+
502
+ def with_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
464
503
  """Append a tool call block and return self for chaining."""
465
504
  self.parts.append(ToolCall(id=id, name=name, arguments=arguments))
466
505
  return self
467
506
 
507
+ @deprecated("with_tool_call")
508
+ def add_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
509
+ """Append a tool call block and return self for chaining."""
510
+ return self.with_tool_call(id, name, arguments)
511
+
468
512
  def with_tool_result(
469
513
  self, tool_call_id: str, result: str | list[ToolResultPart]
470
514
  ) -> "Message":
@@ -472,11 +516,23 @@ class Message:
472
516
  self.parts.append(ToolResult(tool_call_id=tool_call_id, result=result))
473
517
  return self
474
518
 
475
- def add_thinking(self, content: str) -> "Message":
519
+ @deprecated("with_tool_result")
520
+ def add_tool_result(
521
+ self, tool_call_id: str, result: str | list[ToolResultPart]
522
+ ) -> "Message":
523
+ """Append a tool result block and return self for chaining."""
524
+ return self.with_tool_result(tool_call_id, result)
525
+
526
+ def with_thinking(self, content: str) -> "Message":
476
527
  """Append a thinking block and return self for chaining."""
477
528
  self.parts.append(Thinking(content=content))
478
529
  return self
479
530
 
531
+ @deprecated("with_thinking")
532
+ def add_thinking(self, content: str) -> "Message":
533
+ """Append a thinking block and return self for chaining."""
534
+ return self.with_thinking(content)
535
+
480
536
  # -------- convenient constructors --------
481
537
  @classmethod
482
538
  def user(
@@ -488,25 +544,25 @@ class Message:
488
544
  ) -> "Message":
489
545
  res = cls("user", [])
490
546
  if text is not None:
491
- res.add_text(text)
547
+ res.with_text(text)
492
548
  if image is not None:
493
- res.add_image(image)
549
+ res.with_image(image)
494
550
  if file is not None:
495
- res.add_file(file)
551
+ res.with_file(file)
496
552
  return res
497
553
 
498
554
  @classmethod
499
555
  def system(cls, text: str | None = None) -> "Message":
500
556
  res = cls("system", [])
501
557
  if text is not None:
502
- res.add_text(text)
558
+ res.with_text(text)
503
559
  return res
504
560
 
505
561
  @classmethod
506
562
  def ai(cls, text: str | None = None) -> "Message":
507
563
  res = cls("assistant", [])
508
564
  if text is not None:
509
- res.add_text(text)
565
+ res.with_text(text)
510
566
  return res
511
567
 
512
568
  # ──── provider-specific constructors ───
@@ -698,9 +754,9 @@ class Conversation:
698
754
  ) -> "Conversation":
699
755
  msg = Message.user(text)
700
756
  if image is not None:
701
- msg.add_image(image)
757
+ msg.with_image(image)
702
758
  if file is not None:
703
- msg.add_file(file)
759
+ msg.with_file(file)
704
760
  return cls([msg])
705
761
 
706
762
  @classmethod
@@ -1211,11 +1267,11 @@ class Conversation:
1211
1267
  for i, tool_result in enumerate(m.tool_results):
1212
1268
  images = tool_result.get_images()
1213
1269
  if len(images) > 0:
1214
- user_msg.add_text(
1270
+ user_msg.with_text(
1215
1271
  f"[Images for Tool Call {tool_result.tool_call_id}]"
1216
1272
  )
1217
1273
  for img in images:
1218
- user_msg.add_image(img)
1274
+ user_msg.with_image(img)
1219
1275
 
1220
1276
  else:
1221
1277
  result.append(m.oa_chat())
@@ -1495,9 +1551,21 @@ class Conversation:
1495
1551
  return cls(msgs)
1496
1552
 
1497
1553
 
1498
- def prompts_to_conversations(prompts: Sequence[str | list[dict] | Conversation]):
1499
- if any(isinstance(x, list) for x in prompts):
1500
- raise ValueError("can't convert list[dict] to conversation yet")
1501
- return [ # type: ignore
1502
- Conversation.user(p) if isinstance(p, str) else p for p in prompts
1503
- ]
1554
+ Prompt: TypeAlias = str | list[dict] | Message | Conversation
1555
+
1556
+
1557
+ def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[Prompt]:
1558
+ converted = []
1559
+ for prompt in prompts:
1560
+ if isinstance(prompt, Conversation):
1561
+ converted.append(prompt)
1562
+ elif isinstance(prompt, Message):
1563
+ converted.append(Conversation([prompt]))
1564
+ elif isinstance(prompt, str):
1565
+ converted.append(Conversation.user(prompt))
1566
+ elif isinstance(prompt, list):
1567
+ conv, provider = Conversation.from_unknown(prompt)
1568
+ converted.append(conv)
1569
+ else:
1570
+ raise ValueError(f"Unknown prompt type {type(prompt)}")
1571
+ return converted
@@ -26,28 +26,22 @@ class RequestContext:
26
26
 
27
27
  # Infrastructure
28
28
  status_tracker: StatusTracker | None = None
29
- results_arr: list[Any] | None = (
30
- None # list["APIRequestBase"] but avoiding circular import
31
- )
29
+ # avoiding circular import
30
+ results_arr: list[Any] | None = None # list["APIRequestBase"]
32
31
  callback: Callable | None = None
33
32
 
34
33
  # Optional features
35
34
  tools: list | None = None
36
35
  cache: CachePattern | None = None
37
36
  use_responses_api: bool = False
37
+ background: bool = False
38
+ service_tier: str | None = None
38
39
  extra_headers: dict[str, str] | None = None
40
+ extra_body: dict[str, Any] | None = None
39
41
  force_local_mcp: bool = False
40
42
 
41
43
  # Computed properties
42
44
  cache_key: str = field(init=False)
43
- # num_tokens: int = field(init=False)
44
-
45
- # def __post_init__(self):
46
- # # Compute cache key from prompt fingerprint
47
- # # self.cache_key = self.prompt.fingerprint
48
-
49
- # # Compute token count
50
- # self.num_tokens =
51
45
 
52
46
  @cached_property
53
47
  def num_tokens(self):
@@ -74,6 +68,10 @@ class RequestContext:
74
68
  "tools": self.tools,
75
69
  "cache": self.cache,
76
70
  "use_responses_api": self.use_responses_api,
71
+ "background": self.background,
72
+ "service_tier": self.service_tier,
73
+ "extra_headers": self.extra_headers,
74
+ "extra_body": self.extra_body,
77
75
  "force_local_mcp": self.force_local_mcp,
78
76
  }
79
77
 
lm_deluge/warnings.py ADDED
@@ -0,0 +1,46 @@
1
+ import functools
2
+ import os
3
+ import warnings
4
+
5
+ WARNINGS: dict[str, str] = {
6
+ "WARN_JSON_MODE_UNSUPPORTED": "JSON mode requested for {model_name} but response_format parameter not supported.",
7
+ "WARN_REASONING_UNSUPPORTED": "Ignoring reasoning_effort param for non-reasoning model: {model_name}.",
8
+ "WARN_CACHING_UNSUPPORTED": "Cache parameter '{cache_param}' is not supported, ignoring for {model_name}.",
9
+ "WARN_LOGPROBS_UNSUPPORTED": "Ignoring logprobs param for non-logprobs model: {model_name}",
10
+ }
11
+
12
+
13
+ def maybe_warn(warning: str, **kwargs):
14
+ if os.getenv(warning):
15
+ pass
16
+ else:
17
+ warnings.warn(WARNINGS[warning].format(**kwargs))
18
+ os.environ[warning] = "1"
19
+
20
+
21
+ def deprecated(replacement: str):
22
+ """Decorator to mark methods as deprecated and suggest replacement.
23
+
24
+ Only shows the warning once per method to avoid spam.
25
+
26
+ Args:
27
+ replacement: The name of the replacement method to suggest
28
+ """
29
+
30
+ def decorator(func):
31
+ warning_key = f"DEPRECATED_{func.__module__}_{func.__qualname__}"
32
+
33
+ @functools.wraps(func)
34
+ def wrapper(*args, **kwargs):
35
+ if not os.getenv(warning_key):
36
+ warnings.warn(
37
+ f"{func.__name__} is deprecated, use {replacement} instead",
38
+ DeprecationWarning,
39
+ stacklevel=2,
40
+ )
41
+ os.environ[warning_key] = "1"
42
+ return func(*args, **kwargs)
43
+
44
+ return wrapper
45
+
46
+ return decorator
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.58
3
+ Version: 0.0.60
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -1,27 +1,28 @@
1
1
  lm_deluge/__init__.py,sha256=LKKIcqQoQyDpTck6fnB7iAs75BnfNNa3Bj5Nz7KU4Hk,376
2
- lm_deluge/batches.py,sha256=rQocJLyIs3Ko_nRdAE9jT__5cKWYxiIRAH_Lw3L0E1k,24653
2
+ lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
3
3
  lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
4
4
  lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
5
- lm_deluge/client.py,sha256=1ZxQAWkmtz-zhW4E8rfU2V4BfzvqGsKhvqz_CB63-lc,32894
5
+ lm_deluge/client.py,sha256=nxVxN0oXYLvOiMgiF7b_qmqQk6Hohnf4ZTtSx1SI_PQ,38845
6
6
  lm_deluge/config.py,sha256=H1tQyJDNHGFuwxqQNL5Z-CjWAC0luHSBA3iY_pxmACM,932
7
7
  lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
8
8
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
9
9
  lm_deluge/file.py,sha256=FGomcG8s2go_55Z2CChflHgmU-UqgFftgFY8c7f_G70,5631
10
10
  lm_deluge/image.py,sha256=5AMXmn2x47yXeYNfMSMAOWcnlrOxxOel-4L8QCJwU70,8928
11
- lm_deluge/prompt.py,sha256=RgZBcCiAtThqjILkPa4X530sR53SUK03U-6TWWk07tc,59607
12
- lm_deluge/request_context.py,sha256=o33LSEwnK6YPhZeulUoSE_VrdKCXiCQa0tjjixK2K6M,2540
11
+ lm_deluge/prompt.py,sha256=1hGLOIwdyGFokKv0dPiVpke3OPHD6vK5qO6q9E8H89Y,62020
12
+ lm_deluge/request_context.py,sha256=cBayMFWupWhde2OjRugW3JH-Gin-WFGc6DK2Mb4Prdc,2576
13
13
  lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
14
14
  lm_deluge/tool.py,sha256=eZpzgkSIlGD7KdZQwzLF-UdyRJpRnNNXpceGJrNhRrE,26421
15
15
  lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
16
16
  lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
17
+ lm_deluge/warnings.py,sha256=nlDJMCw30VhDEFxqLO2-bfXH_Tv5qmlglzUSbokCSw8,1498
17
18
  lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
18
19
  lm_deluge/api_requests/anthropic.py,sha256=7tTb_NMPodDHrCzakrLd9LyXuLqeTQyAGU-FvMoV3gI,8437
19
- lm_deluge/api_requests/base.py,sha256=1et-5SdRqfnvXZT3b9fBEx0vvbCwbVunHBWtQr7Wurg,5878
20
- lm_deluge/api_requests/bedrock.py,sha256=GmVxXz3ERAeQ7e52Nlztt81O4H9eJOQeOnS6b65vjm4,15453
20
+ lm_deluge/api_requests/base.py,sha256=GCcydwBRx4_xAuYLvasXlyj-TgqvKAVhVvxRfJkvPbY,9471
21
+ lm_deluge/api_requests/bedrock.py,sha256=Uppne03GcIEk1tVYzoGu7GXK2Sg94a_xvFTLDRN_phY,15412
21
22
  lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
22
- lm_deluge/api_requests/gemini.py,sha256=COHqPWmeaq9fpg0YwOZqQTUbijKnXNF4cvMLnW9kLl8,7857
23
- lm_deluge/api_requests/mistral.py,sha256=S_LpOfCGbCVEROH_od3P-tYeNYTKFMamMTL-c_wFCBI,4597
24
- lm_deluge/api_requests/openai.py,sha256=frxSdQn9ZAAweSO-HMKRZ6gKU3Wdl1PqTVPhwy-iNA8,23202
23
+ lm_deluge/api_requests/gemini.py,sha256=4uD7fQl0yWyAvYkPNi3oO1InBnvYfo5_QR6k-va-2GI,7838
24
+ lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
25
+ lm_deluge/api_requests/openai.py,sha256=qRBakHOOMYJWvKO0HeeE5C1Dv_dbokuizZin9Ca4k_k,24855
25
26
  lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
26
27
  lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
27
28
  lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
@@ -41,7 +42,7 @@ lm_deluge/llm_tools/locate.py,sha256=lYNbKTmy9dTvj0lEQkOQ7yrxyqsgYzjD0C_byJKI_4w
41
42
  lm_deluge/llm_tools/ocr.py,sha256=7fDlvs6uUOvbxMasvGGNJx5Fj6biM6z3lijKZaGN26k,23
42
43
  lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
43
44
  lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
44
- lm_deluge/models/__init__.py,sha256=7HNEnpxpEguZYjcudY_9oJ79hOOLo0oNUvG-kwkEpv4,4539
45
+ lm_deluge/models/__init__.py,sha256=a2xzQNG2axdMaSzoLbzdOKBM5EVOLztvlo8E1k-brqM,4516
45
46
  lm_deluge/models/anthropic.py,sha256=5j75sB40yZzT1wwKC7Dh0f2Y2cXnp8yxHuXW63PCuns,6285
46
47
  lm_deluge/models/bedrock.py,sha256=g1PbfceSRH2lWST3ja0mUlF3oTq4e4T-si6RMe7qXgg,4888
47
48
  lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
@@ -53,7 +54,7 @@ lm_deluge/models/grok.py,sha256=TDzr8yfTaHbdJhwMA-Du6L-efaKFJhjTQViuVElCCHI,2566
53
54
  lm_deluge/models/groq.py,sha256=Mi5WE1xOBGoZlymD0UN6kzhH_NOmfJYU4N2l-TO0Z8Q,2552
54
55
  lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
55
56
  lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
56
- lm_deluge/models/openai.py,sha256=HC_oNLmKkmShkcfeUgyhesACtXGg__I2WiIIDrN-X84,10176
57
+ lm_deluge/models/openai.py,sha256=6J4eAt6Iu5RopokyldUQzRlviFBXBqhLqpVP5tztzqI,11074
57
58
  lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
58
59
  lm_deluge/models/together.py,sha256=AjKhPsazqBgqyLwHkNQW07COM1n_oSrYQRp2BFVvn9o,4381
59
60
  lm_deluge/presets/cerebras.py,sha256=MDkqj15qQRrj8wxSCDNNe_Cs7h1WN1UjV6lTmSY1olQ,479
@@ -64,8 +65,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
64
65
  lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
65
66
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
66
67
  lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
67
- lm_deluge-0.0.58.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
68
- lm_deluge-0.0.58.dist-info/METADATA,sha256=jyhXeGVPAMMYBGm3omp6MKZfQGlRX-ow_9fI58ZZNGg,13443
69
- lm_deluge-0.0.58.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
- lm_deluge-0.0.58.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
71
- lm_deluge-0.0.58.dist-info/RECORD,,
68
+ lm_deluge-0.0.60.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
69
+ lm_deluge-0.0.60.dist-info/METADATA,sha256=uBr_1y__E5eT9sL6rOo3qf0MZ4rNKZe0hKVj4WMcqKE,13443
70
+ lm_deluge-0.0.60.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ lm_deluge-0.0.60.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
72
+ lm_deluge-0.0.60.dist-info/RECORD,,