lm-deluge 0.0.12__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -1,19 +1,56 @@
1
- import asyncio
2
1
  import warnings
3
2
  from aiohttp import ClientResponse
4
3
  import json
5
4
  import os
6
- from tqdm.auto import tqdm
7
5
  from typing import Callable
8
6
 
7
+ from lm_deluge.tool import Tool
8
+
9
9
  from .base import APIRequestBase, APIResponse
10
10
  from ..prompt import Conversation, Message, Text, ToolCall, Thinking, CachePattern
11
11
  from ..usage import Usage
12
12
  from ..tracker import StatusTracker
13
- from ..sampling_params import SamplingParams
13
+ from ..config import SamplingParams
14
14
  from ..models import APIModel
15
15
 
16
16
 
17
+ def _build_oa_chat_request(
18
+ model: APIModel,
19
+ prompt: Conversation,
20
+ tools: list[Tool] | None,
21
+ sampling_params: SamplingParams,
22
+ ) -> dict:
23
+ request_json = {
24
+ "model": model.name,
25
+ "messages": prompt.to_openai(),
26
+ "temperature": sampling_params.temperature,
27
+ "top_p": sampling_params.top_p,
28
+ }
29
+ # set max_tokens or max_completion_tokens dep. on provider
30
+ if "cohere" in model.api_base:
31
+ request_json["max_tokens"] = sampling_params.max_new_tokens
32
+ else:
33
+ request_json["max_completion_tokens"] = sampling_params.max_new_tokens
34
+ if model.reasoning_model:
35
+ request_json["temperature"] = 1.0
36
+ request_json["top_p"] = 1.0
37
+ request_json["reasoning_effort"] = sampling_params.reasoning_effort
38
+ else:
39
+ if sampling_params.reasoning_effort:
40
+ warnings.warn(
41
+ f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
42
+ )
43
+ if sampling_params.logprobs:
44
+ request_json["logprobs"] = True
45
+ if sampling_params.top_logprobs is not None:
46
+ request_json["top_logprobs"] = sampling_params.top_logprobs
47
+ if sampling_params.json_mode and model.supports_json:
48
+ request_json["response_format"] = {"type": "json_object"}
49
+ if tools:
50
+ request_json["tools"] = [tool.dump_for("openai-completions") for tool in tools]
51
+ return request_json
52
+
53
+
17
54
  class OpenAIRequest(APIRequestBase):
18
55
  def __init__(
19
56
  self,
@@ -24,15 +61,10 @@ class OpenAIRequest(APIRequestBase):
24
61
  prompt: Conversation,
25
62
  attempts_left: int,
26
63
  status_tracker: StatusTracker,
27
- retry_queue: asyncio.Queue,
28
64
  results_arr: list,
29
65
  request_timeout: int = 30,
30
66
  sampling_params: SamplingParams = SamplingParams(),
31
- logprobs: bool = False,
32
- top_logprobs: int | None = None,
33
- pbar: tqdm | None = None,
34
67
  callback: Callable | None = None,
35
- debug: bool = False,
36
68
  all_model_names: list[str] | None = None,
37
69
  all_sampling_params: list[SamplingParams] | None = None,
38
70
  tools: list | None = None,
@@ -44,15 +76,10 @@ class OpenAIRequest(APIRequestBase):
44
76
  prompt=prompt,
45
77
  attempts_left=attempts_left,
46
78
  status_tracker=status_tracker,
47
- retry_queue=retry_queue,
48
79
  results_arr=results_arr,
49
80
  request_timeout=request_timeout,
50
81
  sampling_params=sampling_params,
51
- logprobs=logprobs,
52
- top_logprobs=top_logprobs,
53
- pbar=pbar,
54
82
  callback=callback,
55
- debug=debug,
56
83
  all_model_names=all_model_names,
57
84
  all_sampling_params=all_sampling_params,
58
85
  tools=tools,
@@ -70,36 +97,9 @@ class OpenAIRequest(APIRequestBase):
70
97
  "Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
71
98
  }
72
99
 
73
- self.request_json = {
74
- "model": self.model.name,
75
- "messages": prompt.to_openai(),
76
- "temperature": sampling_params.temperature,
77
- "top_p": sampling_params.top_p,
78
- }
79
- # set max_tokens or max_completion_tokens dep. on provider
80
- if "cohere" in self.model.api_base:
81
- self.request_json["max_tokens"] = sampling_params.max_new_tokens
82
- elif "openai" in self.model.api_base:
83
- self.request_json["max_completion_tokens"] = sampling_params.max_new_tokens
84
- if self.model.reasoning_model:
85
- self.request_json["temperature"] = 1.0
86
- self.request_json["top_p"] = 1.0
87
- self.request_json["reasoning_effort"] = sampling_params.reasoning_effort
88
- else:
89
- if sampling_params.reasoning_effort:
90
- warnings.warn(
91
- f"Ignoring reasoning_effort param for non-reasoning model: {model_name}"
92
- )
93
- if logprobs:
94
- self.request_json["logprobs"] = True
95
- if top_logprobs is not None:
96
- self.request_json["top_logprobs"] = top_logprobs
97
- if sampling_params.json_mode and self.model.supports_json:
98
- self.request_json["response_format"] = {"type": "json_object"}
99
- if tools:
100
- self.request_json["tools"] = [
101
- tool.dump_for("openai-completions") for tool in tools
102
- ]
100
+ self.request_json = _build_oa_chat_request(
101
+ self.model, prompt, tools, sampling_params
102
+ )
103
103
 
104
104
  async def handle_response(self, http_response: ClientResponse) -> APIResponse:
105
105
  is_error = False
@@ -151,7 +151,10 @@ class OpenAIRequest(APIRequestBase):
151
151
  content = Message("assistant", parts)
152
152
 
153
153
  usage = Usage.from_openai_usage(data["usage"])
154
- if self.logprobs and "logprobs" in data["choices"][0]:
154
+ if (
155
+ self.sampling_params.logprobs
156
+ and "logprobs" in data["choices"][0]
157
+ ):
155
158
  logprobs = data["choices"][0]["logprobs"]["content"]
156
159
  except Exception:
157
160
  is_error = True
@@ -186,4 +189,227 @@ class OpenAIRequest(APIRequestBase):
186
189
  model_internal=self.model_name,
187
190
  sampling_params=self.sampling_params,
188
191
  usage=usage,
192
+ raw_response=data,
193
+ )
194
+
195
+
196
+ class OpenAIResponsesRequest(APIRequestBase):
197
+ def __init__(
198
+ self,
199
+ task_id: int,
200
+ model_name: str,
201
+ prompt: Conversation,
202
+ attempts_left: int,
203
+ status_tracker: StatusTracker,
204
+ results_arr: list,
205
+ request_timeout: int = 30,
206
+ sampling_params: SamplingParams = SamplingParams(),
207
+ callback: Callable | None = None,
208
+ all_model_names: list[str] | None = None,
209
+ all_sampling_params: list[SamplingParams] | None = None,
210
+ tools: list | None = None,
211
+ cache: CachePattern | None = None,
212
+ computer_use: bool = False,
213
+ display_width: int = 1024,
214
+ display_height: int = 768,
215
+ ):
216
+ super().__init__(
217
+ task_id=task_id,
218
+ model_name=model_name,
219
+ prompt=prompt,
220
+ attempts_left=attempts_left,
221
+ status_tracker=status_tracker,
222
+ results_arr=results_arr,
223
+ request_timeout=request_timeout,
224
+ sampling_params=sampling_params,
225
+ callback=callback,
226
+ all_model_names=all_model_names,
227
+ all_sampling_params=all_sampling_params,
228
+ tools=tools,
229
+ cache=cache,
230
+ )
231
+
232
+ # Store computer use parameters
233
+ self.computer_use = computer_use
234
+ self.display_width = display_width
235
+ self.display_height = display_height
236
+
237
+ # Validate computer use requirements
238
+ if computer_use and model_name != "openai-computer-use-preview":
239
+ raise ValueError(
240
+ f"Computer use is only supported with openai-computer-use-preview model, got {model_name}"
241
+ )
242
+
243
+ # Warn if cache is specified for non-Anthropic model
244
+ if cache is not None:
245
+ warnings.warn(
246
+ f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
247
+ )
248
+ self.model = APIModel.from_registry(model_name)
249
+ self.url = f"{self.model.api_base}/responses"
250
+ self.request_header = {
251
+ "Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
252
+ }
253
+
254
+ # Convert conversation to input format for Responses API
255
+ openai_responses_format = prompt.to_openai_responses()
256
+
257
+ self.request_json = {
258
+ "model": self.model.name,
259
+ "input": openai_responses_format["input"],
260
+ "temperature": sampling_params.temperature,
261
+ "top_p": sampling_params.top_p,
262
+ }
263
+
264
+ # Add max_output_tokens for responses API
265
+ if sampling_params.max_new_tokens:
266
+ self.request_json["max_output_tokens"] = sampling_params.max_new_tokens
267
+
268
+ if self.model.reasoning_model:
269
+ self.request_json["temperature"] = 1.0
270
+ self.request_json["top_p"] = 1.0
271
+ self.request_json["reasoning"] = {
272
+ "effort": sampling_params.reasoning_effort
273
+ }
274
+ else:
275
+ if sampling_params.reasoning_effort:
276
+ warnings.warn(
277
+ f"Ignoring reasoning_effort param for non-reasoning model: {model_name}"
278
+ )
279
+
280
+ if sampling_params.json_mode and self.model.supports_json:
281
+ self.request_json["text"] = {"format": {"type": "json_object"}}
282
+
283
+ # Handle tools
284
+ request_tools = []
285
+ if computer_use:
286
+ # Add computer use tool
287
+ request_tools.append(
288
+ {
289
+ "type": "computer_use_preview",
290
+ "display_width": display_width,
291
+ "display_height": display_height,
292
+ "environment": "browser", # Default to browser, could be configurable
293
+ }
294
+ )
295
+ # Set truncation to auto as required for computer use
296
+ self.request_json["truncation"] = "auto"
297
+
298
+ if tools:
299
+ # Add regular function tools
300
+ request_tools.extend([tool.dump_for("openai-responses") for tool in tools])
301
+
302
+ if request_tools:
303
+ self.request_json["tools"] = request_tools
304
+
305
+ async def handle_response(self, http_response: ClientResponse) -> APIResponse:
306
+ is_error = False
307
+ error_message = None
308
+ thinking = None
309
+ content = None
310
+ usage = None
311
+ logprobs = None
312
+ status_code = http_response.status
313
+ mimetype = http_response.headers.get("Content-Type", None)
314
+ data = None
315
+
316
+ if status_code >= 200 and status_code < 300:
317
+ try:
318
+ data = await http_response.json()
319
+ except Exception:
320
+ is_error = True
321
+ error_message = (
322
+ f"Error calling .json() on response w/ status {status_code}"
323
+ )
324
+ if not is_error:
325
+ assert data is not None, "data is None"
326
+ try:
327
+ # Parse Responses API format
328
+ parts = []
329
+
330
+ # Get the output array from the response
331
+ output = data.get("output", [])
332
+ if not output:
333
+ is_error = True
334
+ error_message = "No output in response"
335
+ else:
336
+ # Process each output item
337
+ for item in output:
338
+ if item.get("type") == "message":
339
+ message_content = item.get("content", [])
340
+ for content_item in message_content:
341
+ if content_item.get("type") == "output_text":
342
+ parts.append(Text(content_item["text"]))
343
+ # Handle tool calls if present
344
+ elif content_item.get("type") == "tool_call":
345
+ tool_call = content_item["tool_call"]
346
+ parts.append(
347
+ ToolCall(
348
+ id=tool_call["id"],
349
+ name=tool_call["function"]["name"],
350
+ arguments=json.loads(
351
+ tool_call["function"]["arguments"]
352
+ ),
353
+ )
354
+ )
355
+ elif item.get("type") == "computer_call":
356
+ # Handle computer use actions
357
+ action = item.get("action", {})
358
+ parts.append(
359
+ ToolCall(
360
+ id=item["call_id"],
361
+ name=f"_computer_{action.get('type', 'action')}",
362
+ arguments=action,
363
+ )
364
+ )
365
+
366
+ # Handle reasoning if present
367
+ if "reasoning" in data and data["reasoning"].get("summary"):
368
+ thinking = data["reasoning"]["summary"]
369
+ parts.append(Thinking(thinking))
370
+
371
+ content = Message("assistant", parts)
372
+
373
+ # Extract usage information
374
+ if "usage" in data:
375
+ usage = Usage.from_openai_usage(data["usage"])
376
+
377
+ # Extract response_id for computer use continuation
378
+ # response_id = data.get("id")
379
+
380
+ except Exception as e:
381
+ is_error = True
382
+ error_message = f"Error parsing {self.model.name} responses API response: {str(e)}"
383
+
384
+ elif mimetype and "json" in mimetype.lower():
385
+ is_error = True
386
+ data = await http_response.json()
387
+ error_message = json.dumps(data)
388
+ else:
389
+ is_error = True
390
+ text = await http_response.text()
391
+ error_message = text
392
+
393
+ # Handle special kinds of errors
394
+ if is_error and error_message is not None:
395
+ if "rate limit" in error_message.lower() or status_code == 429:
396
+ error_message += " (Rate limit error, triggering cooldown.)"
397
+ self.status_tracker.rate_limit_exceeded()
398
+ if "context length" in error_message:
399
+ error_message += " (Context length exceeded, set retries to 0.)"
400
+ self.attempts_left = 0
401
+
402
+ return APIResponse(
403
+ id=self.task_id,
404
+ status_code=status_code,
405
+ is_error=is_error,
406
+ error_message=error_message,
407
+ prompt=self.prompt,
408
+ logprobs=logprobs,
409
+ thinking=thinking,
410
+ content=content,
411
+ model_internal=self.model_name,
412
+ sampling_params=self.sampling_params,
413
+ usage=usage,
414
+ raw_response=data,
189
415
  )