lm-deluge 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

lm_deluge/batches.py CHANGED
@@ -3,7 +3,7 @@ import json
3
3
  import time
4
4
  import asyncio
5
5
  import aiohttp
6
- import pandas as pd
6
+ import tempfile
7
7
  from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
8
8
  from lm_deluge.config import SamplingParams
9
9
  from lm_deluge.models import APIModel
@@ -79,11 +79,8 @@ def _create_batch_status_display(
79
79
  return grid
80
80
 
81
81
 
82
- async def submit_batch_oa(batch_requests: list[dict]):
83
- """Submit one batch asynchronously."""
84
- pd.DataFrame(batch_requests).to_json(
85
- "requests_temp.jsonl", orient="records", lines=True
86
- )
82
+ async def submit_batch_oa(file_path: str):
83
+ """Upload a JSONL file and create one OpenAI batch."""
87
84
 
88
85
  # upload the file
89
86
  api_key = os.environ.get("OPENAI_API_KEY", None)
@@ -99,21 +96,22 @@ async def submit_batch_oa(batch_requests: list[dict]):
99
96
  url = "https://api.openai.com/v1/files"
100
97
  data = aiohttp.FormData()
101
98
  data.add_field("purpose", "batch")
102
- data.add_field(
103
- "file",
104
- open("requests_temp.jsonl", "rb"),
105
- filename="requests_temp.jsonl",
106
- content_type="application/json",
107
- )
108
-
109
- async with session.post(url, data=data, headers=headers) as response:
110
- if response.status != 200:
111
- text = await response.text()
112
- raise ValueError(f"Error uploading file: {text}")
99
+ with open(file_path, "rb") as f:
100
+ data.add_field(
101
+ "file",
102
+ f,
103
+ filename=os.path.basename(file_path),
104
+ content_type="application/json",
105
+ )
113
106
 
114
- print("File uploaded successfully")
115
- response_data = await response.json()
116
- file_id = response_data["id"]
107
+ async with session.post(url, data=data, headers=headers) as response:
108
+ if response.status != 200:
109
+ text = await response.text()
110
+ raise ValueError(f"Error uploading file: {text}")
111
+
112
+ print("File uploaded successfully")
113
+ response_data = await response.json()
114
+ file_id = response_data["id"]
117
115
 
118
116
  # Create batch
119
117
  url = "https://api.openai.com/v1/batches"
@@ -131,7 +129,36 @@ async def submit_batch_oa(batch_requests: list[dict]):
131
129
  response_data = await response.json()
132
130
  batch_id = response_data["id"]
133
131
  print("Batch job started successfully: id = ", batch_id)
134
- return batch_id
132
+
133
+ os.remove(file_path)
134
+ return batch_id
135
+
136
+
137
+ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
138
+ """Upload a JSONL file and create one Anthropic batch."""
139
+
140
+ async with aiohttp.ClientSession() as session:
141
+ url = f"{registry[model].api_base}/messages/batches"
142
+ data = aiohttp.FormData()
143
+ with open(file_path, "rb") as f:
144
+ data.add_field(
145
+ "file",
146
+ f,
147
+ filename=os.path.basename(file_path),
148
+ content_type="application/json",
149
+ )
150
+
151
+ async with session.post(url, data=data, headers=headers) as response:
152
+ if response.status != 200:
153
+ text = await response.text()
154
+ raise ValueError(f"Error creating batch: {text}")
155
+
156
+ batch_data = await response.json()
157
+ batch_id = batch_data["id"]
158
+ print(f"Anthropic batch job started successfully: id = {batch_id}")
159
+
160
+ os.remove(file_path)
161
+ return batch_id
135
162
 
136
163
 
137
164
  async def submit_batches_oa(
@@ -139,38 +166,38 @@ async def submit_batches_oa(
139
166
  sampling_params: SamplingParams,
140
167
  prompts: Sequence[str | list[dict] | Conversation],
141
168
  ):
142
- # if prompts are strings, convert them to message lists
169
+ """Write OpenAI batch requests to a file and submit."""
170
+
143
171
  prompts = prompts_to_conversations(prompts)
144
172
  if any(p is None for p in prompts):
145
173
  raise ValueError("All prompts must be valid.")
146
- ids = [i for i, _ in enumerate(prompts)]
147
174
 
148
- # create file with requests to send to batch api
149
- batch_requests = []
150
175
  model_obj = APIModel.from_registry(model)
151
- for id, prompt in zip(ids, prompts):
152
- assert isinstance(prompt, Conversation)
153
- batch_requests.append(
154
- {
155
- "custom_id": str(id),
156
- "method": "POST",
157
- "url": "/v1/chat/completions",
158
- "body": _build_oa_chat_request(model_obj, prompt, [], sampling_params),
159
- }
160
- )
161
-
162
- # since the api only accepts up to 50,000 requests per batch job, we chunk into 50k chunks
176
+
163
177
  BATCH_SIZE = 50_000
164
- batches = [
165
- batch_requests[i : i + BATCH_SIZE]
166
- for i in range(0, len(batch_requests), BATCH_SIZE)
167
- ]
168
178
  tasks = []
169
- for batch in batches:
170
- tasks.append(asyncio.create_task(submit_batch_oa(batch)))
179
+
180
+ for start in range(0, len(prompts), BATCH_SIZE):
181
+ batch_prompts = prompts[start : start + BATCH_SIZE]
182
+ with tempfile.NamedTemporaryFile(mode="w+", suffix=".jsonl", delete=False) as f:
183
+ for idx, prompt in enumerate(batch_prompts, start=start):
184
+ assert isinstance(prompt, Conversation)
185
+ request = {
186
+ "custom_id": str(idx),
187
+ "method": "POST",
188
+ "url": "/v1/chat/completions",
189
+ "body": _build_oa_chat_request(model_obj, prompt, [], sampling_params),
190
+ }
191
+ json.dump(request, f)
192
+ f.write("\n")
193
+
194
+ file_path = f.name
195
+
196
+ tasks.append(asyncio.create_task(submit_batch_oa(file_path)))
197
+
171
198
  batch_ids = await asyncio.gather(*tasks)
172
199
 
173
- print(f"Submitted {len(batches)} batch jobs.")
200
+ print(f"Submitted {len(tasks)} batch jobs.")
174
201
 
175
202
  return batch_ids
176
203
 
@@ -196,47 +223,29 @@ async def submit_batches_anthropic(
196
223
 
197
224
  # Convert prompts to Conversations
198
225
  prompts = prompts_to_conversations(prompts)
199
- # Create batch requests
200
- request_headers = None
201
- batch_requests = []
202
- for i, prompt in enumerate(prompts):
203
- assert isinstance(prompt, Conversation)
204
- # Build request body
205
- request_body, request_headers = _build_anthropic_request(
206
- APIModel.from_registry(model), prompt, [], sampling_params, cache
207
- )
208
226
 
209
- batch_requests.append({"custom_id": str(i), "params": request_body})
210
-
211
- # Chunk into batches of 100k requests (Anthropic's limit)
227
+ request_headers = None
212
228
  BATCH_SIZE = 100_000
213
- batches = [
214
- batch_requests[i : i + BATCH_SIZE]
215
- for i in range(0, len(batch_requests), BATCH_SIZE)
216
- ]
217
- batch_ids = []
218
229
  batch_tasks = []
219
- async with aiohttp.ClientSession() as session:
220
- for batch in batches:
221
- url = f"{registry[model].api_base}/messages/batches"
222
- data = {"requests": batch}
223
230
 
224
- async def submit_batch(data, url, headers):
225
- async with session.post(url, json=data, headers=headers) as response:
226
- if response.status != 200:
227
- text = await response.text()
228
- raise ValueError(f"Error creating batch: {text}")
231
+ for start in range(0, len(prompts), BATCH_SIZE):
232
+ batch_prompts = prompts[start : start + BATCH_SIZE]
233
+ with tempfile.NamedTemporaryFile(mode="w+", suffix=".jsonl", delete=False) as f:
234
+ for idx, prompt in enumerate(batch_prompts, start=start):
235
+ assert isinstance(prompt, Conversation)
236
+ request_body, request_headers = _build_anthropic_request(
237
+ APIModel.from_registry(model), prompt, [], sampling_params, cache
238
+ )
239
+ json.dump({"custom_id": str(idx), "params": request_body}, f)
240
+ f.write("\n")
229
241
 
230
- batch_data = await response.json()
231
- batch_id = batch_data["id"]
232
- print(f"Anthropic batch job started successfully: id = {batch_id}")
233
- return batch_id
242
+ file_path = f.name
234
243
 
235
- batch_tasks.append(submit_batch(data, url, request_headers))
244
+ batch_tasks.append(asyncio.create_task(_submit_anthropic_batch(file_path, request_headers, model)))
236
245
 
237
- batch_ids = await asyncio.gather(*batch_tasks)
246
+ batch_ids = await asyncio.gather(*batch_tasks)
238
247
 
239
- print(f"Submitted {len(batches)} batch jobs.")
248
+ print(f"Submitted {len(batch_tasks)} batch jobs.")
240
249
  return batch_ids
241
250
 
242
251
 
lm_deluge/client.py CHANGED
@@ -22,11 +22,8 @@ from .models import APIModel, registry
22
22
  from .request_context import RequestContext
23
23
  from .tracker import StatusTracker
24
24
 
25
- # from .cache import LevelDBCache, SqliteCache
26
-
27
25
 
28
26
  # TODO: get completions as they finish, not all at once at the end.
29
- # relatedly, would be nice to cache them as they finish too.
30
27
  # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
31
28
  class LLMClient(BaseModel):
32
29
  """
@@ -60,6 +57,7 @@ class LLMClient(BaseModel):
60
57
  reasoning_effort: Literal["low", "medium", "high", None] = None
61
58
  logprobs: bool = False
62
59
  top_logprobs: int | None = None
60
+ force_local_mcp: bool = False
63
61
 
64
62
  # NEW! Builder methods
65
63
  def with_model(self, model: str):
@@ -113,6 +111,7 @@ class LLMClient(BaseModel):
113
111
  if isinstance(self.model_names, str):
114
112
  self.model_names = [self.model_names]
115
113
  if any(m not in registry for m in self.model_names):
114
+ print("got model names:", self.model_names)
116
115
  raise ValueError("all model_names must be in registry")
117
116
  if isinstance(self.sampling_params, SamplingParams):
118
117
  self.sampling_params = [self.sampling_params for _ in self.model_names]
@@ -368,6 +367,7 @@ class LLMClient(BaseModel):
368
367
  cache=cache,
369
368
  use_responses_api=use_responses_api,
370
369
  extra_headers=self.extra_headers,
370
+ force_local_mcp=self.force_local_mcp,
371
371
  )
372
372
  except StopIteration:
373
373
  prompts_not_finished = False
@@ -389,8 +389,6 @@ class LLMClient(BaseModel):
389
389
  results[ctx.task_id] = response
390
390
  except Exception as e:
391
391
  # Create an error response for validation errors and other exceptions
392
- from .api_requests.response import APIResponse
393
-
394
392
  error_response = APIResponse(
395
393
  id=ctx.task_id,
396
394
  model_internal=ctx.model_name,
@@ -421,7 +419,8 @@ class LLMClient(BaseModel):
421
419
 
422
420
  # Sleep - original logic
423
421
  await asyncio.sleep(seconds_to_sleep_each_loop + tracker.seconds_to_pause)
424
- tracker.log_final_status()
422
+
423
+ tracker.log_final_status()
425
424
 
426
425
  if return_completions_only:
427
426
  return [r.completion if r is not None else None for r in results]
@@ -468,7 +467,7 @@ class LLMClient(BaseModel):
468
467
  self,
469
468
  conversation: str | Conversation,
470
469
  *,
471
- tools: list[Tool | dict] | None = None,
470
+ tools: list[Tool | dict | MCPServer] | None = None,
472
471
  max_rounds: int = 5,
473
472
  show_progress: bool = False,
474
473
  ) -> tuple[Conversation, APIResponse]:
@@ -482,6 +481,16 @@ class LLMClient(BaseModel):
482
481
  if isinstance(conversation, str):
483
482
  conversation = Conversation.user(conversation)
484
483
 
484
+ # Expand MCPServer objects to their constituent tools for tool execution
485
+ expanded_tools: list[Tool] = []
486
+ if tools:
487
+ for tool in tools:
488
+ if isinstance(tool, Tool):
489
+ expanded_tools.append(tool)
490
+ elif isinstance(tool, MCPServer):
491
+ mcp_tools = await tool.to_tools()
492
+ expanded_tools.extend(mcp_tools)
493
+
485
494
  last_response: APIResponse | None = None
486
495
 
487
496
  for _ in range(max_rounds):
@@ -504,9 +513,9 @@ class LLMClient(BaseModel):
504
513
 
505
514
  for call in tool_calls:
506
515
  tool_obj = None
507
- if tools:
508
- for t in tools:
509
- if isinstance(t, Tool) and t.name == call.name:
516
+ if expanded_tools:
517
+ for t in expanded_tools:
518
+ if t.name == call.name:
510
519
  tool_obj = t
511
520
  break
512
521
 
lm_deluge/image.py CHANGED
@@ -10,7 +10,7 @@ from typing import Literal
10
10
  import requests
11
11
  from PIL import Image as PILImage # type: ignore
12
12
 
13
- MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
13
+ MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"] | str
14
14
 
15
15
 
16
16
  @dataclass(slots=True)
@@ -23,6 +23,9 @@ class Image:
23
23
  _fingerprint_cache: str | None = field(init=False, default=None)
24
24
  _size_cache: tuple[int, int] | None = field(init=False, default=None)
25
25
 
26
+ def __repr__(self):
27
+ return f"Image(data=[{type(self.data)}], media_type={self.media_type}, detail={self.detail})"
28
+
26
29
  @classmethod
27
30
  def from_pdf(
28
31
  cls,
@@ -69,10 +72,11 @@ class Image:
69
72
  elif isinstance(self.data, Path) and self.data.exists():
70
73
  return Path(self.data).read_bytes()
71
74
  elif isinstance(self.data, str) and self.data.startswith("data:"):
75
+ # print("base64 path selected")
72
76
  header, encoded = self.data.split(",", 1)
73
77
  return base64.b64decode(encoded)
74
78
  else:
75
- raise ValueError("unreadable image format")
79
+ raise ValueError(f"unreadable image format. type: {type(self.data)}")
76
80
 
77
81
  def _mime(self) -> str:
78
82
  if self.media_type:
lm_deluge/models.py CHANGED
@@ -42,7 +42,7 @@ BUILTIN_MODELS = {
42
42
  "reasoning_model": False,
43
43
  },
44
44
  "llama-3.3-70b": {
45
- "id": "llama-3.3-70B",
45
+ "id": "llama-3.3-70b",
46
46
  "name": "Llama-3.3-70B-Instruct",
47
47
  "api_base": "https://api.llama.com/compat/v1",
48
48
  "api_key_env_var": "META_API_KEY",
@@ -56,7 +56,7 @@ BUILTIN_MODELS = {
56
56
  "reasoning_model": False,
57
57
  },
58
58
  "llama-3.3-8b": {
59
- "id": "llama-3.3-8B",
59
+ "id": "llama-3.3-8b",
60
60
  "name": "Llama-3.3-8B-Instruct",
61
61
  "api_base": "https://api.llama.com/compat/v1",
62
62
  "api_key_env_var": "META_API_KEY",
@@ -670,62 +670,62 @@ BUILTIN_MODELS = {
670
670
  # "requests_per_minute": 120,
671
671
  # "tokens_per_minute": None,
672
672
  # },
673
- "gemini-2.5-pro-vertex": {
674
- "id": "gemini-2.5-pro",
675
- "name": "gemini-2.5-pro-preview-05-06",
676
- "api_base": "",
677
- "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
678
- "supports_json": True,
679
- "supports_logprobs": False,
680
- "api_spec": "vertex_gemini",
681
- "input_cost": 1.25,
682
- "output_cost": 10.0,
683
- "requests_per_minute": 20,
684
- "tokens_per_minute": 100_000,
685
- "reasoning_model": True,
686
- },
687
- "gemini-2.5-flash-vertex": {
688
- "id": "gemini-2.5-flash",
689
- "name": "gemini-2.5-flash-preview-05-20",
690
- "api_base": "",
691
- "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
692
- "supports_json": True,
693
- "supports_logprobs": False,
694
- "api_spec": "vertex_gemini",
695
- "input_cost": 0.15,
696
- "output_cost": 0.6,
697
- "requests_per_minute": 20,
698
- "tokens_per_minute": 100_000,
699
- "reasoning_model": True,
700
- },
701
- "gemini-2.0-flash-vertex": {
702
- "id": "gemini-2.0-flash",
703
- "name": "gemini-2.0-flash",
704
- "api_base": "",
705
- "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
706
- "supports_json": True,
707
- "supports_logprobs": False,
708
- "api_spec": "vertex_gemini",
709
- "input_cost": 0.10,
710
- "output_cost": 0.40,
711
- "requests_per_minute": 20,
712
- "tokens_per_minute": 100_000,
713
- "reasoning_model": False,
714
- },
715
- "gemini-2.0-flash-lite-vertex": {
716
- "id": "gemini-2.0-flash-lite",
717
- "name": "gemini-2.0-flash-lite",
718
- "api_base": "",
719
- "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
720
- "supports_json": True,
721
- "supports_logprobs": False,
722
- "api_spec": "vertex_gemini",
723
- "input_cost": 0.075,
724
- "output_cost": 0.30,
725
- "requests_per_minute": 20,
726
- "tokens_per_minute": 100_000,
727
- "reasoning_model": False,
728
- },
673
+ # "gemini-2.5-pro-vertex": {
674
+ # "id": "gemini-2.5-pro",
675
+ # "name": "gemini-2.5-pro-preview-05-06",
676
+ # "api_base": "",
677
+ # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
678
+ # "supports_json": True,
679
+ # "supports_logprobs": False,
680
+ # "api_spec": "vertex_gemini",
681
+ # "input_cost": 1.25,
682
+ # "output_cost": 10.0,
683
+ # "requests_per_minute": 20,
684
+ # "tokens_per_minute": 100_000,
685
+ # "reasoning_model": True,
686
+ # },
687
+ # "gemini-2.5-flash-vertex": {
688
+ # "id": "gemini-2.5-flash",
689
+ # "name": "gemini-2.5-flash-preview-05-20",
690
+ # "api_base": "",
691
+ # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
692
+ # "supports_json": True,
693
+ # "supports_logprobs": False,
694
+ # "api_spec": "vertex_gemini",
695
+ # "input_cost": 0.15,
696
+ # "output_cost": 0.6,
697
+ # "requests_per_minute": 20,
698
+ # "tokens_per_minute": 100_000,
699
+ # "reasoning_model": True,
700
+ # },
701
+ # "gemini-2.0-flash-vertex": {
702
+ # "id": "gemini-2.0-flash",
703
+ # "name": "gemini-2.0-flash",
704
+ # "api_base": "",
705
+ # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
706
+ # "supports_json": True,
707
+ # "supports_logprobs": False,
708
+ # "api_spec": "vertex_gemini",
709
+ # "input_cost": 0.10,
710
+ # "output_cost": 0.40,
711
+ # "requests_per_minute": 20,
712
+ # "tokens_per_minute": 100_000,
713
+ # "reasoning_model": False,
714
+ # },
715
+ # "gemini-2.0-flash-lite-vertex": {
716
+ # "id": "gemini-2.0-flash-lite",
717
+ # "name": "gemini-2.0-flash-lite",
718
+ # "api_base": "",
719
+ # "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
720
+ # "supports_json": True,
721
+ # "supports_logprobs": False,
722
+ # "api_spec": "vertex_gemini",
723
+ # "input_cost": 0.075,
724
+ # "output_cost": 0.30,
725
+ # "requests_per_minute": 20,
726
+ # "tokens_per_minute": 100_000,
727
+ # "reasoning_model": False,
728
+ # },
729
729
  # ███████████ █████ █████
730
730
  # ░░███░░░░░███ ░░███ ░░███
731
731
  # ░███ ░███ ██████ ███████ ████████ ██████ ██████ ░███ █████
@@ -1138,7 +1138,7 @@ BUILTIN_MODELS = {
1138
1138
  "output_cost": 0.7,
1139
1139
  },
1140
1140
  "mixtral-8x22b": {
1141
- "id": "mistral-8x22b",
1141
+ "id": "mixtral-8x22b",
1142
1142
  "name": "open-mixtral-8x22b",
1143
1143
  "api_base": "https://api.mistral.ai/v1",
1144
1144
  "api_key_env_var": "MISTRAL_API_KEY",
@@ -1243,3 +1243,5 @@ def register_model(**kwargs) -> APIModel:
1243
1243
  # Populate registry with builtin models
1244
1244
  for cfg in BUILTIN_MODELS.values():
1245
1245
  register_model(**cfg)
1246
+
1247
+ # print("Valid models:", registry.keys())