lm-deluge 0.0.58__tar.gz → 0.0.59__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.58/src/lm_deluge.egg-info → lm_deluge-0.0.59}/PKG-INFO +1 -1
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/pyproject.toml +1 -1
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/base.py +87 -5
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/openai.py +41 -3
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/batches.py +25 -9
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/client.py +57 -29
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/__init__.py +1 -1
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/prompt.py +19 -7
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/request_context.py +9 -11
- {lm_deluge-0.0.58 → lm_deluge-0.0.59/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/LICENSE +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/README.md +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/setup.cfg +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/anthropic.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/gemini.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/anthropic.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/google.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/grok.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/openai.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/openrouter.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/models/together.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/presets/cerebras.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/presets/meta.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.58 → lm_deluge-0.0.59}/tests/test_native_mcp_server.py +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import time
|
|
2
3
|
import traceback
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
|
|
@@ -6,6 +7,7 @@ import aiohttp
|
|
|
6
7
|
from aiohttp import ClientResponse
|
|
7
8
|
|
|
8
9
|
from ..errors import raise_if_modal_exception
|
|
10
|
+
from ..models.openai import OPENAI_MODELS
|
|
9
11
|
from ..request_context import RequestContext
|
|
10
12
|
from .response import APIResponse
|
|
11
13
|
|
|
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
|
|
|
82
84
|
if self.context.status_tracker:
|
|
83
85
|
self.context.status_tracker.task_succeeded(self.context.task_id)
|
|
84
86
|
|
|
87
|
+
async def _execute_once_background_mode(self) -> APIResponse:
|
|
88
|
+
"""
|
|
89
|
+
ONLY for OpenAI responses API. Implement the
|
|
90
|
+
start -> poll -> result style of request.
|
|
91
|
+
"""
|
|
92
|
+
assert self.context.status_tracker, "no status tracker"
|
|
93
|
+
start_time = time.time()
|
|
94
|
+
async with aiohttp.ClientSession() as session:
|
|
95
|
+
last_status: str | None = None
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
self.context.status_tracker.total_requests += 1
|
|
99
|
+
assert self.url is not None, "URL is not set"
|
|
100
|
+
async with session.post(
|
|
101
|
+
url=self.url,
|
|
102
|
+
headers=self.request_header,
|
|
103
|
+
json=self.request_json,
|
|
104
|
+
) as http_response:
|
|
105
|
+
# make sure we created the Response object
|
|
106
|
+
http_response.raise_for_status()
|
|
107
|
+
data = await http_response.json()
|
|
108
|
+
response_id = data["id"]
|
|
109
|
+
last_status = data["status"]
|
|
110
|
+
|
|
111
|
+
while True:
|
|
112
|
+
if time.time() - start_time > self.context.request_timeout:
|
|
113
|
+
# cancel the response
|
|
114
|
+
async with session.post(
|
|
115
|
+
url=f"{self.url}/{response_id}/cancel",
|
|
116
|
+
headers=self.request_header,
|
|
117
|
+
) as http_response:
|
|
118
|
+
http_response.raise_for_status()
|
|
119
|
+
|
|
120
|
+
return APIResponse(
|
|
121
|
+
id=self.context.task_id,
|
|
122
|
+
model_internal=self.context.model_name,
|
|
123
|
+
prompt=self.context.prompt,
|
|
124
|
+
sampling_params=self.context.sampling_params,
|
|
125
|
+
status_code=None,
|
|
126
|
+
is_error=True,
|
|
127
|
+
error_message="Request timed out (terminated by client).",
|
|
128
|
+
content=None,
|
|
129
|
+
usage=None,
|
|
130
|
+
)
|
|
131
|
+
# poll for the response
|
|
132
|
+
await asyncio.sleep(5.0)
|
|
133
|
+
async with session.get(
|
|
134
|
+
url=f"{self.url}/{response_id}",
|
|
135
|
+
headers=self.request_header,
|
|
136
|
+
) as http_response:
|
|
137
|
+
http_response.raise_for_status()
|
|
138
|
+
data = await http_response.json()
|
|
139
|
+
|
|
140
|
+
if data["status"] != last_status:
|
|
141
|
+
print(
|
|
142
|
+
f"Background req {response_id} status updated to: {data['status']}"
|
|
143
|
+
)
|
|
144
|
+
last_status = data["status"]
|
|
145
|
+
if last_status not in ["queued", "in_progress"]:
|
|
146
|
+
return await self.handle_response(http_response)
|
|
147
|
+
|
|
148
|
+
except Exception as e:
|
|
149
|
+
raise_if_modal_exception(e)
|
|
150
|
+
tb = traceback.format_exc()
|
|
151
|
+
print(tb)
|
|
152
|
+
return APIResponse(
|
|
153
|
+
id=self.context.task_id,
|
|
154
|
+
model_internal=self.context.model_name,
|
|
155
|
+
prompt=self.context.prompt,
|
|
156
|
+
sampling_params=self.context.sampling_params,
|
|
157
|
+
status_code=None,
|
|
158
|
+
is_error=True,
|
|
159
|
+
error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
|
|
160
|
+
content=None,
|
|
161
|
+
usage=None,
|
|
162
|
+
)
|
|
163
|
+
|
|
85
164
|
async def execute_once(self) -> APIResponse:
|
|
86
165
|
"""Send the HTTP request once and return the parsed APIResponse."""
|
|
87
166
|
await self.build_request()
|
|
88
167
|
assert self.context.status_tracker
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
168
|
+
|
|
169
|
+
if (
|
|
170
|
+
self.context.background
|
|
171
|
+
and self.context.use_responses_api
|
|
172
|
+
and self.context.model_name in OPENAI_MODELS
|
|
173
|
+
):
|
|
174
|
+
return await self._execute_once_background_mode()
|
|
175
|
+
|
|
94
176
|
try:
|
|
95
177
|
self.context.status_tracker.total_requests += 1
|
|
96
178
|
timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
|
|
@@ -30,6 +30,26 @@ async def _build_oa_chat_request(
|
|
|
30
30
|
"temperature": sampling_params.temperature,
|
|
31
31
|
"top_p": sampling_params.top_p,
|
|
32
32
|
}
|
|
33
|
+
if context.service_tier:
|
|
34
|
+
assert context.service_tier in [
|
|
35
|
+
"auto",
|
|
36
|
+
"default",
|
|
37
|
+
"flex",
|
|
38
|
+
"priority",
|
|
39
|
+
], f"Invalid service tier: {context.service_tier}"
|
|
40
|
+
# flex is only supported for o3, o4-mini, gpt-5 models
|
|
41
|
+
if context.service_tier == "flex":
|
|
42
|
+
model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
|
|
43
|
+
if not model_supports_flex:
|
|
44
|
+
print(
|
|
45
|
+
f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
|
|
46
|
+
f"Using 'auto' instead for model {model.id}."
|
|
47
|
+
)
|
|
48
|
+
request_json["service_tier"] = "auto"
|
|
49
|
+
else:
|
|
50
|
+
request_json["service_tier"] = context.service_tier
|
|
51
|
+
else:
|
|
52
|
+
request_json["service_tier"] = context.service_tier
|
|
33
53
|
# set max_tokens or max_completion_tokens dep. on provider
|
|
34
54
|
if "cohere" in model.api_base:
|
|
35
55
|
request_json["max_tokens"] = sampling_params.max_new_tokens
|
|
@@ -213,9 +233,6 @@ class OpenAIRequest(APIRequestBase):
|
|
|
213
233
|
async def _build_oa_responses_request(
|
|
214
234
|
model: APIModel,
|
|
215
235
|
context: RequestContext,
|
|
216
|
-
# prompt: Conversation,
|
|
217
|
-
# tools: list[Tool] | None,
|
|
218
|
-
# sampling_params: SamplingParams,
|
|
219
236
|
):
|
|
220
237
|
prompt = context.prompt
|
|
221
238
|
sampling_params = context.sampling_params
|
|
@@ -226,7 +243,28 @@ async def _build_oa_responses_request(
|
|
|
226
243
|
"input": openai_responses_format["input"],
|
|
227
244
|
"temperature": sampling_params.temperature,
|
|
228
245
|
"top_p": sampling_params.top_p,
|
|
246
|
+
"background": context.background or False,
|
|
229
247
|
}
|
|
248
|
+
if context.service_tier:
|
|
249
|
+
assert context.service_tier in [
|
|
250
|
+
"auto",
|
|
251
|
+
"default",
|
|
252
|
+
"flex",
|
|
253
|
+
"priority",
|
|
254
|
+
], f"Invalid service tier: {context.service_tier}"
|
|
255
|
+
# flex is only supported for o3, o4-mini, gpt-5 models
|
|
256
|
+
if context.service_tier == "flex":
|
|
257
|
+
model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
|
|
258
|
+
if not model_supports_flex:
|
|
259
|
+
print(
|
|
260
|
+
f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
|
|
261
|
+
f"Model {model.id} doesn't support flex. Using 'auto' instead."
|
|
262
|
+
)
|
|
263
|
+
request_json["service_tier"] = "auto"
|
|
264
|
+
else:
|
|
265
|
+
request_json["service_tier"] = context.service_tier
|
|
266
|
+
else:
|
|
267
|
+
request_json["service_tier"] = context.service_tier
|
|
230
268
|
if sampling_params.max_new_tokens:
|
|
231
269
|
request_json["max_output_tokens"] = sampling_params.max_new_tokens
|
|
232
270
|
|
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import tempfile
|
|
5
5
|
import time
|
|
6
|
-
from typing import Literal, Sequence
|
|
6
|
+
from typing import Literal, Sequence, cast
|
|
7
7
|
|
|
8
8
|
import aiohttp
|
|
9
9
|
from rich.console import Console
|
|
@@ -16,7 +16,12 @@ from lm_deluge.api_requests.anthropic import _build_anthropic_request
|
|
|
16
16
|
from lm_deluge.api_requests.openai import _build_oa_chat_request
|
|
17
17
|
from lm_deluge.config import SamplingParams
|
|
18
18
|
from lm_deluge.models import APIModel, registry
|
|
19
|
-
from lm_deluge.prompt import
|
|
19
|
+
from lm_deluge.prompt import (
|
|
20
|
+
CachePattern,
|
|
21
|
+
Conversation,
|
|
22
|
+
Prompt,
|
|
23
|
+
prompts_to_conversations,
|
|
24
|
+
)
|
|
20
25
|
from lm_deluge.request_context import RequestContext
|
|
21
26
|
|
|
22
27
|
|
|
@@ -166,14 +171,18 @@ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
|
|
|
166
171
|
async def create_batch_files_oa(
|
|
167
172
|
model: str,
|
|
168
173
|
sampling_params: SamplingParams,
|
|
169
|
-
prompts:
|
|
174
|
+
prompts: Prompt | Sequence[Prompt],
|
|
170
175
|
batch_size: int = 50_000,
|
|
171
176
|
destination: str | None = None, # if none provided, temp files
|
|
172
177
|
):
|
|
173
178
|
MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
|
|
174
179
|
MAX_BATCH_SIZE_ITEMS = batch_size
|
|
175
180
|
|
|
176
|
-
|
|
181
|
+
if not isinstance(prompts, list):
|
|
182
|
+
prompts = cast(Sequence[Prompt], [prompts])
|
|
183
|
+
|
|
184
|
+
prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
|
|
185
|
+
assert isinstance(prompts, Sequence)
|
|
177
186
|
if any(p is None for p in prompts):
|
|
178
187
|
raise ValueError("All prompts must be valid.")
|
|
179
188
|
|
|
@@ -251,14 +260,18 @@ async def create_batch_files_oa(
|
|
|
251
260
|
async def submit_batches_oa(
|
|
252
261
|
model: str,
|
|
253
262
|
sampling_params: SamplingParams,
|
|
254
|
-
prompts:
|
|
263
|
+
prompts: Prompt | Sequence[Prompt],
|
|
255
264
|
batch_size: int = 50_000,
|
|
256
265
|
):
|
|
257
266
|
"""Write OpenAI batch requests to a file and submit."""
|
|
258
267
|
MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024 # 200MB
|
|
259
268
|
MAX_BATCH_SIZE_ITEMS = batch_size
|
|
260
269
|
|
|
261
|
-
|
|
270
|
+
if not isinstance(prompts, list):
|
|
271
|
+
prompts = prompts = cast(Sequence[Prompt], [prompts])
|
|
272
|
+
|
|
273
|
+
prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
|
|
274
|
+
assert isinstance(prompts, Sequence)
|
|
262
275
|
if any(p is None for p in prompts):
|
|
263
276
|
raise ValueError("All prompts must be valid.")
|
|
264
277
|
|
|
@@ -342,7 +355,7 @@ async def submit_batches_oa(
|
|
|
342
355
|
async def submit_batches_anthropic(
|
|
343
356
|
model: str,
|
|
344
357
|
sampling_params: SamplingParams,
|
|
345
|
-
prompts:
|
|
358
|
+
prompts: Prompt | Sequence[Prompt],
|
|
346
359
|
*,
|
|
347
360
|
cache: CachePattern | None = None,
|
|
348
361
|
batch_size=100_000,
|
|
@@ -362,13 +375,16 @@ async def submit_batches_anthropic(
|
|
|
362
375
|
MAX_BATCH_SIZE_ITEMS = batch_size
|
|
363
376
|
|
|
364
377
|
# Convert prompts to Conversations
|
|
365
|
-
|
|
378
|
+
if not isinstance(prompts, list):
|
|
379
|
+
prompts = prompts = cast(Sequence[Prompt], [prompts])
|
|
380
|
+
|
|
381
|
+
prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
|
|
366
382
|
|
|
367
383
|
request_headers = None
|
|
368
384
|
batch_tasks = []
|
|
369
385
|
current_batch = []
|
|
370
386
|
current_batch_size = 0
|
|
371
|
-
|
|
387
|
+
assert isinstance(prompts, Sequence)
|
|
372
388
|
for idx, prompt in enumerate(prompts):
|
|
373
389
|
assert isinstance(prompt, Conversation)
|
|
374
390
|
context = RequestContext(
|
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import (
|
|
3
|
+
Any,
|
|
4
|
+
AsyncGenerator,
|
|
5
|
+
Callable,
|
|
6
|
+
Literal,
|
|
7
|
+
Self,
|
|
8
|
+
Sequence,
|
|
9
|
+
cast,
|
|
10
|
+
overload,
|
|
11
|
+
)
|
|
3
12
|
|
|
4
13
|
import numpy as np
|
|
5
14
|
import yaml
|
|
@@ -12,7 +21,12 @@ from lm_deluge.batches import (
|
|
|
12
21
|
submit_batches_oa,
|
|
13
22
|
wait_for_batch_completion_async,
|
|
14
23
|
)
|
|
15
|
-
from lm_deluge.prompt import
|
|
24
|
+
from lm_deluge.prompt import (
|
|
25
|
+
CachePattern,
|
|
26
|
+
Conversation,
|
|
27
|
+
Prompt,
|
|
28
|
+
prompts_to_conversations,
|
|
29
|
+
)
|
|
16
30
|
from lm_deluge.tool import MCPServer, Tool
|
|
17
31
|
|
|
18
32
|
from .api_requests.base import APIResponse
|
|
@@ -40,6 +54,9 @@ class _LLMClient(BaseModel):
|
|
|
40
54
|
request_timeout: int = 30
|
|
41
55
|
cache: Any = None
|
|
42
56
|
extra_headers: dict[str, str] | None = None
|
|
57
|
+
extra_body: dict[str, str] | None = None
|
|
58
|
+
use_responses_api: bool = False
|
|
59
|
+
background: bool = False
|
|
43
60
|
# sampling params - if provided, and sampling_params is not,
|
|
44
61
|
# these override the defaults
|
|
45
62
|
temperature: float = 0.75
|
|
@@ -171,6 +188,11 @@ class _LLMClient(BaseModel):
|
|
|
171
188
|
# normalize weights
|
|
172
189
|
self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
|
|
173
190
|
|
|
191
|
+
# background mode only allowed for responses api
|
|
192
|
+
if self.background:
|
|
193
|
+
assert (
|
|
194
|
+
self.use_responses_api
|
|
195
|
+
), "background mode only allowed for responses api"
|
|
174
196
|
# Auto-generate name if not provided
|
|
175
197
|
if self.name is None:
|
|
176
198
|
if len(self.model_names) == 1:
|
|
@@ -256,13 +278,6 @@ class _LLMClient(BaseModel):
|
|
|
256
278
|
# Idle wait before next capacity check. Aim for ~RPM spacing.
|
|
257
279
|
await asyncio.sleep(max(60.0 / self.max_requests_per_minute, 0.01))
|
|
258
280
|
|
|
259
|
-
async def _execute_request(self, context: RequestContext) -> APIResponse:
|
|
260
|
-
"""Create and send a single API request using the provided context."""
|
|
261
|
-
model_obj = APIModel.from_registry(context.model_name)
|
|
262
|
-
request = model_obj.make_request(context)
|
|
263
|
-
response = await request.execute_once()
|
|
264
|
-
return response
|
|
265
|
-
|
|
266
281
|
async def process_single_request(
|
|
267
282
|
self, context: RequestContext, retry_queue: asyncio.Queue | None = None
|
|
268
283
|
) -> APIResponse:
|
|
@@ -290,7 +305,9 @@ class _LLMClient(BaseModel):
|
|
|
290
305
|
# Execute single request
|
|
291
306
|
assert context.status_tracker
|
|
292
307
|
context.status_tracker.update_pbar()
|
|
293
|
-
|
|
308
|
+
model_obj = APIModel.from_registry(context.model_name)
|
|
309
|
+
request = model_obj.make_request(context)
|
|
310
|
+
response = await request.execute_once()
|
|
294
311
|
|
|
295
312
|
# Handle successful response
|
|
296
313
|
if not response.is_error:
|
|
@@ -350,36 +367,36 @@ class _LLMClient(BaseModel):
|
|
|
350
367
|
@overload
|
|
351
368
|
async def process_prompts_async(
|
|
352
369
|
self,
|
|
353
|
-
prompts:
|
|
370
|
+
prompts: Prompt | Sequence[Prompt],
|
|
354
371
|
*,
|
|
355
372
|
return_completions_only: Literal[True],
|
|
356
373
|
show_progress: bool = ...,
|
|
357
374
|
tools: list[Tool | dict | MCPServer] | None = ...,
|
|
358
375
|
cache: CachePattern | None = ...,
|
|
359
|
-
|
|
376
|
+
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
360
377
|
) -> list[str | None]: ...
|
|
361
378
|
|
|
362
379
|
@overload
|
|
363
380
|
async def process_prompts_async(
|
|
364
381
|
self,
|
|
365
|
-
prompts:
|
|
382
|
+
prompts: Prompt | Sequence[Prompt],
|
|
366
383
|
*,
|
|
367
384
|
return_completions_only: Literal[False] = ...,
|
|
368
385
|
show_progress: bool = ...,
|
|
369
386
|
tools: list[Tool | dict | MCPServer] | None = ...,
|
|
370
387
|
cache: CachePattern | None = ...,
|
|
371
|
-
|
|
388
|
+
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
372
389
|
) -> list[APIResponse]: ...
|
|
373
390
|
|
|
374
391
|
async def process_prompts_async(
|
|
375
392
|
self,
|
|
376
|
-
prompts:
|
|
393
|
+
prompts: Prompt | Sequence[Prompt],
|
|
377
394
|
*,
|
|
378
395
|
return_completions_only: bool = False,
|
|
379
396
|
show_progress: bool = True,
|
|
380
397
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
381
398
|
cache: CachePattern | None = None,
|
|
382
|
-
|
|
399
|
+
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
383
400
|
) -> list[APIResponse] | list[str | None] | dict[str, int]:
|
|
384
401
|
"""Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
|
|
385
402
|
|
|
@@ -387,7 +404,9 @@ class _LLMClient(BaseModel):
|
|
|
387
404
|
avoiding issues with tracker state accumulating across multiple calls.
|
|
388
405
|
"""
|
|
389
406
|
# Convert prompts to Conversations
|
|
390
|
-
|
|
407
|
+
if not isinstance(prompts, list):
|
|
408
|
+
prompts = prompts = cast(Sequence[Prompt], [prompts])
|
|
409
|
+
prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
|
|
391
410
|
|
|
392
411
|
# Ensure tracker exists (start_nowait will call add_to_total for each task)
|
|
393
412
|
if self._tracker is None:
|
|
@@ -398,13 +417,14 @@ class _LLMClient(BaseModel):
|
|
|
398
417
|
|
|
399
418
|
# Start all tasks using start_nowait - tasks will coordinate via shared capacity lock
|
|
400
419
|
task_ids = []
|
|
420
|
+
assert isinstance(prompts, Sequence)
|
|
401
421
|
for prompt in prompts:
|
|
402
422
|
assert isinstance(prompt, Conversation)
|
|
403
423
|
task_id = self.start_nowait(
|
|
404
424
|
prompt,
|
|
405
425
|
tools=tools,
|
|
406
426
|
cache=cache,
|
|
407
|
-
|
|
427
|
+
service_tier=service_tier,
|
|
408
428
|
)
|
|
409
429
|
task_ids.append(task_id)
|
|
410
430
|
|
|
@@ -443,13 +463,12 @@ class _LLMClient(BaseModel):
|
|
|
443
463
|
|
|
444
464
|
def process_prompts_sync(
|
|
445
465
|
self,
|
|
446
|
-
prompts:
|
|
466
|
+
prompts: Prompt | Sequence[Prompt],
|
|
447
467
|
*,
|
|
448
468
|
return_completions_only: bool = False,
|
|
449
469
|
show_progress=True,
|
|
450
470
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
451
471
|
cache: CachePattern | None = None,
|
|
452
|
-
use_responses_api: bool = False,
|
|
453
472
|
):
|
|
454
473
|
return asyncio.run(
|
|
455
474
|
self.process_prompts_async(
|
|
@@ -458,7 +477,6 @@ class _LLMClient(BaseModel):
|
|
|
458
477
|
show_progress=show_progress,
|
|
459
478
|
tools=tools,
|
|
460
479
|
cache=cache,
|
|
461
|
-
use_responses_api=use_responses_api,
|
|
462
480
|
)
|
|
463
481
|
)
|
|
464
482
|
|
|
@@ -478,18 +496,18 @@ class _LLMClient(BaseModel):
|
|
|
478
496
|
|
|
479
497
|
def start_nowait(
|
|
480
498
|
self,
|
|
481
|
-
prompt:
|
|
499
|
+
prompt: Prompt,
|
|
482
500
|
*,
|
|
483
501
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
484
502
|
cache: CachePattern | None = None,
|
|
485
|
-
|
|
503
|
+
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
486
504
|
) -> int:
|
|
487
505
|
tracker = self._get_tracker()
|
|
488
506
|
task_id = self._next_task_id
|
|
489
507
|
self._next_task_id += 1
|
|
490
508
|
model, sampling_params = self._select_model()
|
|
491
|
-
|
|
492
|
-
|
|
509
|
+
prompt = prompts_to_conversations([prompt])[0]
|
|
510
|
+
assert isinstance(prompt, Conversation)
|
|
493
511
|
context = RequestContext(
|
|
494
512
|
task_id=task_id,
|
|
495
513
|
model_name=model,
|
|
@@ -500,7 +518,9 @@ class _LLMClient(BaseModel):
|
|
|
500
518
|
status_tracker=tracker,
|
|
501
519
|
tools=tools,
|
|
502
520
|
cache=cache,
|
|
503
|
-
use_responses_api=use_responses_api,
|
|
521
|
+
use_responses_api=self.use_responses_api,
|
|
522
|
+
background=self.background,
|
|
523
|
+
service_tier=service_tier,
|
|
504
524
|
extra_headers=self.extra_headers,
|
|
505
525
|
force_local_mcp=self.force_local_mcp,
|
|
506
526
|
)
|
|
@@ -515,10 +535,10 @@ class _LLMClient(BaseModel):
|
|
|
515
535
|
*,
|
|
516
536
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
517
537
|
cache: CachePattern | None = None,
|
|
518
|
-
|
|
538
|
+
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
519
539
|
) -> APIResponse:
|
|
520
540
|
task_id = self.start_nowait(
|
|
521
|
-
prompt, tools=tools, cache=cache,
|
|
541
|
+
prompt, tools=tools, cache=cache, service_tier=service_tier
|
|
522
542
|
)
|
|
523
543
|
return await self.wait_for(task_id)
|
|
524
544
|
|
|
@@ -698,7 +718,7 @@ class _LLMClient(BaseModel):
|
|
|
698
718
|
|
|
699
719
|
async def submit_batch_job(
|
|
700
720
|
self,
|
|
701
|
-
prompts:
|
|
721
|
+
prompts: Prompt | Sequence[Prompt],
|
|
702
722
|
*,
|
|
703
723
|
tools: list[Tool] | None = None,
|
|
704
724
|
cache: CachePattern | None = None,
|
|
@@ -760,6 +780,8 @@ def LLMClient(
|
|
|
760
780
|
request_timeout: int = 30,
|
|
761
781
|
cache: Any = None,
|
|
762
782
|
extra_headers: dict[str, str] | None = None,
|
|
783
|
+
use_responses_api: bool = False,
|
|
784
|
+
background: bool = False,
|
|
763
785
|
temperature: float = 0.75,
|
|
764
786
|
top_p: float = 1.0,
|
|
765
787
|
json_mode: bool = False,
|
|
@@ -787,6 +809,8 @@ def LLMClient(
|
|
|
787
809
|
request_timeout: int = 30,
|
|
788
810
|
cache: Any = None,
|
|
789
811
|
extra_headers: dict[str, str] | None = None,
|
|
812
|
+
use_responses_api: bool = False,
|
|
813
|
+
background: bool = False,
|
|
790
814
|
temperature: float = 0.75,
|
|
791
815
|
top_p: float = 1.0,
|
|
792
816
|
json_mode: bool = False,
|
|
@@ -813,6 +837,8 @@ def LLMClient(
|
|
|
813
837
|
request_timeout: int = 30,
|
|
814
838
|
cache: Any = None,
|
|
815
839
|
extra_headers: dict[str, str] | None = None,
|
|
840
|
+
use_responses_api: bool = False,
|
|
841
|
+
background: bool = False,
|
|
816
842
|
temperature: float = 0.75,
|
|
817
843
|
top_p: float = 1.0,
|
|
818
844
|
json_mode: bool = False,
|
|
@@ -851,6 +877,8 @@ def LLMClient(
|
|
|
851
877
|
request_timeout=request_timeout,
|
|
852
878
|
cache=cache,
|
|
853
879
|
extra_headers=extra_headers,
|
|
880
|
+
use_responses_api=use_responses_api,
|
|
881
|
+
background=background,
|
|
854
882
|
temperature=temperature,
|
|
855
883
|
top_p=top_p,
|
|
856
884
|
json_mode=json_mode,
|
|
@@ -62,7 +62,7 @@ class APIModel:
|
|
|
62
62
|
raise ValueError("no regions to sample")
|
|
63
63
|
random.sample(regions, 1, counts=weights)[0]
|
|
64
64
|
|
|
65
|
-
def make_request(self, context: RequestContext):
|
|
65
|
+
def make_request(self, context: RequestContext):
|
|
66
66
|
from ..api_requests.common import CLASSES
|
|
67
67
|
|
|
68
68
|
api_spec = self.api_spec
|
|
@@ -2,7 +2,7 @@ import io
|
|
|
2
2
|
import json
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Literal, Sequence
|
|
5
|
+
from typing import Literal, Sequence, TypeAlias
|
|
6
6
|
|
|
7
7
|
import tiktoken
|
|
8
8
|
import xxhash
|
|
@@ -1495,9 +1495,21 @@ class Conversation:
|
|
|
1495
1495
|
return cls(msgs)
|
|
1496
1496
|
|
|
1497
1497
|
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1498
|
+
Prompt: TypeAlias = str | list[dict] | Message | Conversation
|
|
1499
|
+
|
|
1500
|
+
|
|
1501
|
+
def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[Prompt]:
|
|
1502
|
+
converted = []
|
|
1503
|
+
for prompt in prompts:
|
|
1504
|
+
if isinstance(prompt, Conversation):
|
|
1505
|
+
converted.append(prompt)
|
|
1506
|
+
elif isinstance(prompt, Message):
|
|
1507
|
+
converted.append(Conversation([prompt]))
|
|
1508
|
+
elif isinstance(prompt, str):
|
|
1509
|
+
converted.append(Conversation.user(prompt))
|
|
1510
|
+
elif isinstance(prompt, list):
|
|
1511
|
+
conv, provider = Conversation.from_unknown(prompt)
|
|
1512
|
+
converted.append(conv)
|
|
1513
|
+
else:
|
|
1514
|
+
raise ValueError(f"Unknown prompt type {type(prompt)}")
|
|
1515
|
+
return converted
|
|
@@ -26,28 +26,22 @@ class RequestContext:
|
|
|
26
26
|
|
|
27
27
|
# Infrastructure
|
|
28
28
|
status_tracker: StatusTracker | None = None
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
)
|
|
29
|
+
# avoiding circular import
|
|
30
|
+
results_arr: list[Any] | None = None # list["APIRequestBase"]
|
|
32
31
|
callback: Callable | None = None
|
|
33
32
|
|
|
34
33
|
# Optional features
|
|
35
34
|
tools: list | None = None
|
|
36
35
|
cache: CachePattern | None = None
|
|
37
36
|
use_responses_api: bool = False
|
|
37
|
+
background: bool = False
|
|
38
|
+
service_tier: str | None = None
|
|
38
39
|
extra_headers: dict[str, str] | None = None
|
|
40
|
+
extra_body: dict[str, Any] | None = None
|
|
39
41
|
force_local_mcp: bool = False
|
|
40
42
|
|
|
41
43
|
# Computed properties
|
|
42
44
|
cache_key: str = field(init=False)
|
|
43
|
-
# num_tokens: int = field(init=False)
|
|
44
|
-
|
|
45
|
-
# def __post_init__(self):
|
|
46
|
-
# # Compute cache key from prompt fingerprint
|
|
47
|
-
# # self.cache_key = self.prompt.fingerprint
|
|
48
|
-
|
|
49
|
-
# # Compute token count
|
|
50
|
-
# self.num_tokens =
|
|
51
45
|
|
|
52
46
|
@cached_property
|
|
53
47
|
def num_tokens(self):
|
|
@@ -74,6 +68,10 @@ class RequestContext:
|
|
|
74
68
|
"tools": self.tools,
|
|
75
69
|
"cache": self.cache,
|
|
76
70
|
"use_responses_api": self.use_responses_api,
|
|
71
|
+
"background": self.background,
|
|
72
|
+
"service_tier": self.service_tier,
|
|
73
|
+
"extra_headers": self.extra_headers,
|
|
74
|
+
"extra_body": self.extra_body,
|
|
77
75
|
"force_local_mcp": self.force_local_mcp,
|
|
78
76
|
}
|
|
79
77
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lm_deluge-0.0.58 → lm_deluge-0.0.59}/src/lm_deluge/built_in_tools/anthropic/computer_use.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|