lm-deluge 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +2 -0
- lm_deluge/api_requests/base.py +2 -148
- lm_deluge/api_requests/openai.py +72 -6
- lm_deluge/api_requests/response.py +153 -0
- lm_deluge/client.py +36 -48
- lm_deluge/config.py +3 -2
- lm_deluge/file.py +149 -0
- lm_deluge/prompt.py +70 -9
- lm_deluge/tracker.py +5 -3
- {lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/METADATA +4 -1
- {lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/RECORD +14 -12
- {lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .client import LLMClient, SamplingParams, APIResponse
|
|
2
2
|
from .prompt import Conversation, Message
|
|
3
3
|
from .tool import Tool
|
|
4
|
+
from .file import File
|
|
4
5
|
import dotenv
|
|
5
6
|
|
|
6
7
|
dotenv.load_dotenv()
|
|
@@ -12,4 +13,5 @@ __all__ = [
|
|
|
12
13
|
"Conversation",
|
|
13
14
|
"Message",
|
|
14
15
|
"Tool",
|
|
16
|
+
"File",
|
|
15
17
|
]
|
lm_deluge/api_requests/base.py
CHANGED
|
@@ -1,165 +1,19 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import json
|
|
3
2
|
import random
|
|
4
3
|
import traceback
|
|
5
4
|
from abc import ABC, abstractmethod
|
|
6
|
-
from dataclasses import dataclass
|
|
7
5
|
from typing import Callable
|
|
8
6
|
|
|
9
7
|
import aiohttp
|
|
10
8
|
from aiohttp import ClientResponse
|
|
11
9
|
|
|
12
|
-
from lm_deluge.prompt import CachePattern, Conversation
|
|
13
|
-
from lm_deluge.usage import Usage
|
|
10
|
+
from lm_deluge.prompt import CachePattern, Conversation
|
|
14
11
|
|
|
15
12
|
from ..config import SamplingParams
|
|
16
13
|
from ..errors import raise_if_modal_exception
|
|
17
14
|
from ..models import APIModel
|
|
18
15
|
from ..tracker import StatusTracker
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@dataclass
|
|
22
|
-
class APIResponse:
|
|
23
|
-
# request information
|
|
24
|
-
id: int # should be unique to the request within a given prompt-processing call
|
|
25
|
-
model_internal: str # our internal model tag
|
|
26
|
-
prompt: Conversation
|
|
27
|
-
sampling_params: SamplingParams
|
|
28
|
-
|
|
29
|
-
# http response information
|
|
30
|
-
status_code: int | None
|
|
31
|
-
is_error: bool | None
|
|
32
|
-
error_message: str | None
|
|
33
|
-
|
|
34
|
-
# completion information - unified usage tracking
|
|
35
|
-
usage: Usage | None = None
|
|
36
|
-
|
|
37
|
-
# response content - structured format
|
|
38
|
-
content: Message | None = None
|
|
39
|
-
|
|
40
|
-
# optional or calculated automatically
|
|
41
|
-
thinking: str | None = None # if model shows thinking tokens
|
|
42
|
-
model_external: str | None = None # the model tag used by the API
|
|
43
|
-
region: str | None = None
|
|
44
|
-
logprobs: list | None = None
|
|
45
|
-
finish_reason: str | None = None # make required later
|
|
46
|
-
cost: float | None = None # calculated automatically
|
|
47
|
-
cache_hit: bool = False # manually set if true
|
|
48
|
-
# set to true if is_error and should be retried with a different model
|
|
49
|
-
retry_with_different_model: bool | None = False
|
|
50
|
-
# set to true if should NOT retry with the same model (unrecoverable error)
|
|
51
|
-
give_up_if_no_other_models: bool | None = False
|
|
52
|
-
# OpenAI Responses API specific - used for computer use continuation
|
|
53
|
-
response_id: str | None = None
|
|
54
|
-
# Raw API response for debugging
|
|
55
|
-
raw_response: dict | None = None
|
|
56
|
-
|
|
57
|
-
@property
|
|
58
|
-
def completion(self) -> str | None:
|
|
59
|
-
"""Backward compatibility: extract text from content Message."""
|
|
60
|
-
if self.content is not None:
|
|
61
|
-
return self.content.completion
|
|
62
|
-
return None
|
|
63
|
-
|
|
64
|
-
@property
|
|
65
|
-
def input_tokens(self) -> int | None:
|
|
66
|
-
"""Get input tokens from usage object."""
|
|
67
|
-
return self.usage.input_tokens if self.usage else None
|
|
68
|
-
|
|
69
|
-
@property
|
|
70
|
-
def output_tokens(self) -> int | None:
|
|
71
|
-
"""Get output tokens from usage object."""
|
|
72
|
-
return self.usage.output_tokens if self.usage else None
|
|
73
|
-
|
|
74
|
-
@property
|
|
75
|
-
def cache_read_tokens(self) -> int | None:
|
|
76
|
-
"""Get cache read tokens from usage object."""
|
|
77
|
-
return self.usage.cache_read_tokens if self.usage else None
|
|
78
|
-
|
|
79
|
-
@property
|
|
80
|
-
def cache_write_tokens(self) -> int | None:
|
|
81
|
-
"""Get cache write tokens from usage object."""
|
|
82
|
-
return self.usage.cache_write_tokens if self.usage else None
|
|
83
|
-
|
|
84
|
-
def __post_init__(self):
|
|
85
|
-
# calculate cost & get external model name
|
|
86
|
-
self.id = int(self.id)
|
|
87
|
-
api_model = APIModel.from_registry(self.model_internal)
|
|
88
|
-
self.model_external = api_model.name
|
|
89
|
-
self.cost = None
|
|
90
|
-
if (
|
|
91
|
-
self.usage is not None
|
|
92
|
-
and api_model.input_cost is not None
|
|
93
|
-
and api_model.output_cost is not None
|
|
94
|
-
):
|
|
95
|
-
self.cost = (
|
|
96
|
-
self.usage.input_tokens * api_model.input_cost / 1e6
|
|
97
|
-
+ self.usage.output_tokens * api_model.output_cost / 1e6
|
|
98
|
-
)
|
|
99
|
-
elif self.content is not None and self.completion is not None:
|
|
100
|
-
print(
|
|
101
|
-
f"Warning: Completion provided without token counts for model {self.model_internal}."
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
def to_dict(self):
|
|
105
|
-
return {
|
|
106
|
-
"id": self.id,
|
|
107
|
-
"model_internal": self.model_internal,
|
|
108
|
-
"model_external": self.model_external,
|
|
109
|
-
"region": self.region,
|
|
110
|
-
"prompt": self.prompt.to_log(), # destroys image if present
|
|
111
|
-
"sampling_params": self.sampling_params.__dict__,
|
|
112
|
-
"status_code": self.status_code,
|
|
113
|
-
"is_error": self.is_error,
|
|
114
|
-
"error_message": self.error_message,
|
|
115
|
-
"completion": self.completion, # computed property
|
|
116
|
-
"content": self.content.to_log() if self.content else None,
|
|
117
|
-
"usage": self.usage.to_dict() if self.usage else None,
|
|
118
|
-
"finish_reason": self.finish_reason,
|
|
119
|
-
"cost": self.cost,
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
@classmethod
|
|
123
|
-
def from_dict(cls, data: dict):
|
|
124
|
-
# Handle backward compatibility for content/completion
|
|
125
|
-
content = None
|
|
126
|
-
if "content" in data and data["content"] is not None:
|
|
127
|
-
# Reconstruct message from log format
|
|
128
|
-
content = Message.from_log(data["content"])
|
|
129
|
-
elif "completion" in data and data["completion"] is not None:
|
|
130
|
-
# Backward compatibility: create a Message with just text
|
|
131
|
-
content = Message.ai(data["completion"])
|
|
132
|
-
|
|
133
|
-
usage = None
|
|
134
|
-
if "usage" in data and data["usage"] is not None:
|
|
135
|
-
usage = Usage.from_dict(data["usage"])
|
|
136
|
-
|
|
137
|
-
return cls(
|
|
138
|
-
id=data.get("id", random.randint(0, 1_000_000_000)),
|
|
139
|
-
model_internal=data["model_internal"],
|
|
140
|
-
prompt=Conversation.from_log(data["prompt"]),
|
|
141
|
-
sampling_params=SamplingParams(**data["sampling_params"]),
|
|
142
|
-
status_code=data["status_code"],
|
|
143
|
-
is_error=data["is_error"],
|
|
144
|
-
error_message=data["error_message"],
|
|
145
|
-
usage=usage,
|
|
146
|
-
content=content,
|
|
147
|
-
thinking=data.get("thinking"),
|
|
148
|
-
model_external=data.get("model_external"),
|
|
149
|
-
region=data.get("region"),
|
|
150
|
-
logprobs=data.get("logprobs"),
|
|
151
|
-
finish_reason=data.get("finish_reason"),
|
|
152
|
-
cost=data.get("cost"),
|
|
153
|
-
cache_hit=data.get("cache_hit", False),
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
def write_to_file(self, filename):
|
|
157
|
-
"""
|
|
158
|
-
Writes the APIResponse as a line to a file.
|
|
159
|
-
If file exists, appends to it.
|
|
160
|
-
"""
|
|
161
|
-
with open(filename, "a") as f:
|
|
162
|
-
f.write(json.dumps(self.to_dict()) + "\n")
|
|
16
|
+
from .response import APIResponse
|
|
163
17
|
|
|
164
18
|
|
|
165
19
|
class APIRequestBase(ABC):
|
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
import warnings
|
|
2
|
-
from aiohttp import ClientResponse
|
|
3
1
|
import json
|
|
4
2
|
import os
|
|
3
|
+
import warnings
|
|
5
4
|
from typing import Callable
|
|
6
5
|
|
|
6
|
+
import aiohttp
|
|
7
|
+
from aiohttp import ClientResponse
|
|
8
|
+
|
|
7
9
|
from lm_deluge.tool import Tool
|
|
8
10
|
|
|
9
|
-
from .base import APIRequestBase, APIResponse
|
|
10
|
-
from ..prompt import Conversation, Message, Text, ToolCall, Thinking, CachePattern
|
|
11
|
-
from ..usage import Usage
|
|
12
|
-
from ..tracker import StatusTracker
|
|
13
11
|
from ..config import SamplingParams
|
|
14
12
|
from ..models import APIModel
|
|
13
|
+
from ..prompt import CachePattern, Conversation, Message, Text, Thinking, ToolCall
|
|
14
|
+
from ..tracker import StatusTracker
|
|
15
|
+
from ..usage import Usage
|
|
16
|
+
from .base import APIRequestBase, APIResponse
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
def _build_oa_chat_request(
|
|
@@ -111,6 +113,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
111
113
|
status_code = http_response.status
|
|
112
114
|
mimetype = http_response.headers.get("Content-Type", None)
|
|
113
115
|
data = None
|
|
116
|
+
finish_reason = None
|
|
114
117
|
if status_code >= 200 and status_code < 300:
|
|
115
118
|
try:
|
|
116
119
|
data = await http_response.json()
|
|
@@ -125,6 +128,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
125
128
|
# Parse response into Message with parts
|
|
126
129
|
parts = []
|
|
127
130
|
message = data["choices"][0]["message"]
|
|
131
|
+
finish_reason = data["choices"][0]["finish_reason"]
|
|
128
132
|
|
|
129
133
|
# Add text content if present
|
|
130
134
|
if message.get("content"):
|
|
@@ -190,6 +194,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
190
194
|
sampling_params=self.sampling_params,
|
|
191
195
|
usage=usage,
|
|
192
196
|
raw_response=data,
|
|
197
|
+
finish_reason=finish_reason,
|
|
193
198
|
)
|
|
194
199
|
|
|
195
200
|
|
|
@@ -266,6 +271,13 @@ class OpenAIResponsesRequest(APIRequestBase):
|
|
|
266
271
|
self.request_json["max_output_tokens"] = sampling_params.max_new_tokens
|
|
267
272
|
|
|
268
273
|
if self.model.reasoning_model:
|
|
274
|
+
if sampling_params.reasoning_effort in [None, "none"]:
|
|
275
|
+
# gemini models can switch reasoning off
|
|
276
|
+
if "gemini" in self.model.id:
|
|
277
|
+
self.sampling_params.reasoning_effort = "none" # expects string
|
|
278
|
+
# openai models can only go down to "low"
|
|
279
|
+
else:
|
|
280
|
+
self.sampling_params.reasoning_effort = "low"
|
|
269
281
|
self.request_json["temperature"] = 1.0
|
|
270
282
|
self.request_json["top_p"] = 1.0
|
|
271
283
|
self.request_json["reasoning"] = {
|
|
@@ -413,3 +425,57 @@ class OpenAIResponsesRequest(APIRequestBase):
|
|
|
413
425
|
usage=usage,
|
|
414
426
|
raw_response=data,
|
|
415
427
|
)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
async def stream_chat(
|
|
431
|
+
model_name: str, # must correspond to registry
|
|
432
|
+
prompt: Conversation,
|
|
433
|
+
sampling_params: SamplingParams = SamplingParams(),
|
|
434
|
+
tools: list | None = None,
|
|
435
|
+
cache: CachePattern | None = None,
|
|
436
|
+
):
|
|
437
|
+
if cache is not None:
|
|
438
|
+
warnings.warn(
|
|
439
|
+
f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
model = APIModel.from_registry(model_name)
|
|
443
|
+
if model.api_spec != "openai":
|
|
444
|
+
raise ValueError("streaming only supported on openai models for now")
|
|
445
|
+
url = f"{model.api_base}/chat/completions"
|
|
446
|
+
request_header = {"Authorization": f"Bearer {os.getenv(model.api_key_env_var)}"}
|
|
447
|
+
request_json = _build_oa_chat_request(model, prompt, tools, sampling_params)
|
|
448
|
+
request_json["stream"] = True
|
|
449
|
+
|
|
450
|
+
async with aiohttp.ClientSession() as s:
|
|
451
|
+
async with s.post(url, headers=request_header, json=request_json) as r:
|
|
452
|
+
r.raise_for_status() # bail on 4xx/5xx
|
|
453
|
+
content = ""
|
|
454
|
+
buf = ""
|
|
455
|
+
async for chunk in r.content.iter_any(): # raw bytes
|
|
456
|
+
buf += chunk.decode()
|
|
457
|
+
while "\n\n" in buf: # full SSE frame
|
|
458
|
+
event, buf = buf.split("\n\n", 1)
|
|
459
|
+
if not event.startswith("data:"):
|
|
460
|
+
continue # ignore comments
|
|
461
|
+
data = event[5:].strip() # after "data:"
|
|
462
|
+
if data == "[DONE]":
|
|
463
|
+
yield APIResponse(
|
|
464
|
+
id=0,
|
|
465
|
+
status_code=None,
|
|
466
|
+
is_error=False,
|
|
467
|
+
error_message=None,
|
|
468
|
+
prompt=prompt,
|
|
469
|
+
content=Message(
|
|
470
|
+
role="assistant", parts=[Text(text=content)]
|
|
471
|
+
),
|
|
472
|
+
model_internal=model.id,
|
|
473
|
+
sampling_params=sampling_params,
|
|
474
|
+
usage=None,
|
|
475
|
+
raw_response=None,
|
|
476
|
+
)
|
|
477
|
+
msg = json.loads(data) # SSE payload
|
|
478
|
+
delta = msg["choices"][0]["delta"].get("content")
|
|
479
|
+
if delta:
|
|
480
|
+
content += delta
|
|
481
|
+
yield delta
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import random
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from lm_deluge.prompt import Conversation, Message
|
|
6
|
+
from lm_deluge.usage import Usage
|
|
7
|
+
|
|
8
|
+
from ..config import SamplingParams
|
|
9
|
+
from ..models import APIModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class APIResponse:
|
|
14
|
+
# request information
|
|
15
|
+
id: int # should be unique to the request within a given prompt-processing call
|
|
16
|
+
model_internal: str # our internal model tag
|
|
17
|
+
prompt: Conversation
|
|
18
|
+
sampling_params: SamplingParams
|
|
19
|
+
|
|
20
|
+
# http response information
|
|
21
|
+
status_code: int | None
|
|
22
|
+
is_error: bool | None
|
|
23
|
+
error_message: str | None
|
|
24
|
+
|
|
25
|
+
# completion information - unified usage tracking
|
|
26
|
+
usage: Usage | None = None
|
|
27
|
+
|
|
28
|
+
# response content - structured format
|
|
29
|
+
content: Message | None = None
|
|
30
|
+
|
|
31
|
+
# optional or calculated automatically
|
|
32
|
+
thinking: str | None = None # if model shows thinking tokens
|
|
33
|
+
model_external: str | None = None # the model tag used by the API
|
|
34
|
+
region: str | None = None
|
|
35
|
+
logprobs: list | None = None
|
|
36
|
+
finish_reason: str | None = None # make required later
|
|
37
|
+
cost: float | None = None # calculated automatically
|
|
38
|
+
cache_hit: bool = False # manually set if true
|
|
39
|
+
# set to true if is_error and should be retried with a different model
|
|
40
|
+
retry_with_different_model: bool | None = False
|
|
41
|
+
# set to true if should NOT retry with the same model (unrecoverable error)
|
|
42
|
+
give_up_if_no_other_models: bool | None = False
|
|
43
|
+
# OpenAI Responses API specific - used for computer use continuation
|
|
44
|
+
response_id: str | None = None
|
|
45
|
+
# Raw API response for debugging
|
|
46
|
+
raw_response: dict | None = None
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def completion(self) -> str | None:
|
|
50
|
+
"""Backward compatibility: extract text from content Message."""
|
|
51
|
+
if self.content is not None:
|
|
52
|
+
return self.content.completion
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def input_tokens(self) -> int | None:
|
|
57
|
+
"""Get input tokens from usage object."""
|
|
58
|
+
return self.usage.input_tokens if self.usage else None
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def output_tokens(self) -> int | None:
|
|
62
|
+
"""Get output tokens from usage object."""
|
|
63
|
+
return self.usage.output_tokens if self.usage else None
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def cache_read_tokens(self) -> int | None:
|
|
67
|
+
"""Get cache read tokens from usage object."""
|
|
68
|
+
return self.usage.cache_read_tokens if self.usage else None
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def cache_write_tokens(self) -> int | None:
|
|
72
|
+
"""Get cache write tokens from usage object."""
|
|
73
|
+
return self.usage.cache_write_tokens if self.usage else None
|
|
74
|
+
|
|
75
|
+
def __post_init__(self):
|
|
76
|
+
# calculate cost & get external model name
|
|
77
|
+
self.id = int(self.id)
|
|
78
|
+
api_model = APIModel.from_registry(self.model_internal)
|
|
79
|
+
self.model_external = api_model.name
|
|
80
|
+
self.cost = None
|
|
81
|
+
if (
|
|
82
|
+
self.usage is not None
|
|
83
|
+
and api_model.input_cost is not None
|
|
84
|
+
and api_model.output_cost is not None
|
|
85
|
+
):
|
|
86
|
+
self.cost = (
|
|
87
|
+
self.usage.input_tokens * api_model.input_cost / 1e6
|
|
88
|
+
+ self.usage.output_tokens * api_model.output_cost / 1e6
|
|
89
|
+
)
|
|
90
|
+
elif self.content is not None and self.completion is not None:
|
|
91
|
+
print(
|
|
92
|
+
f"Warning: Completion provided without token counts for model {self.model_internal}."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def to_dict(self):
|
|
96
|
+
return {
|
|
97
|
+
"id": self.id,
|
|
98
|
+
"model_internal": self.model_internal,
|
|
99
|
+
"model_external": self.model_external,
|
|
100
|
+
"region": self.region,
|
|
101
|
+
"prompt": self.prompt.to_log(), # destroys image if present
|
|
102
|
+
"sampling_params": self.sampling_params.__dict__,
|
|
103
|
+
"status_code": self.status_code,
|
|
104
|
+
"is_error": self.is_error,
|
|
105
|
+
"error_message": self.error_message,
|
|
106
|
+
"completion": self.completion, # computed property
|
|
107
|
+
"content": self.content.to_log() if self.content else None,
|
|
108
|
+
"usage": self.usage.to_dict() if self.usage else None,
|
|
109
|
+
"finish_reason": self.finish_reason,
|
|
110
|
+
"cost": self.cost,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def from_dict(cls, data: dict):
|
|
115
|
+
# Handle backward compatibility for content/completion
|
|
116
|
+
content = None
|
|
117
|
+
if "content" in data and data["content"] is not None:
|
|
118
|
+
# Reconstruct message from log format
|
|
119
|
+
content = Message.from_log(data["content"])
|
|
120
|
+
elif "completion" in data and data["completion"] is not None:
|
|
121
|
+
# Backward compatibility: create a Message with just text
|
|
122
|
+
content = Message.ai(data["completion"])
|
|
123
|
+
|
|
124
|
+
usage = None
|
|
125
|
+
if "usage" in data and data["usage"] is not None:
|
|
126
|
+
usage = Usage.from_dict(data["usage"])
|
|
127
|
+
|
|
128
|
+
return cls(
|
|
129
|
+
id=data.get("id", random.randint(0, 1_000_000_000)),
|
|
130
|
+
model_internal=data["model_internal"],
|
|
131
|
+
prompt=Conversation.from_log(data["prompt"]),
|
|
132
|
+
sampling_params=SamplingParams(**data["sampling_params"]),
|
|
133
|
+
status_code=data["status_code"],
|
|
134
|
+
is_error=data["is_error"],
|
|
135
|
+
error_message=data["error_message"],
|
|
136
|
+
usage=usage,
|
|
137
|
+
content=content,
|
|
138
|
+
thinking=data.get("thinking"),
|
|
139
|
+
model_external=data.get("model_external"),
|
|
140
|
+
region=data.get("region"),
|
|
141
|
+
logprobs=data.get("logprobs"),
|
|
142
|
+
finish_reason=data.get("finish_reason"),
|
|
143
|
+
cost=data.get("cost"),
|
|
144
|
+
cache_hit=data.get("cache_hit", False),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def write_to_file(self, filename):
|
|
148
|
+
"""
|
|
149
|
+
Writes the APIResponse as a line to a file.
|
|
150
|
+
If file exists, appends to it.
|
|
151
|
+
"""
|
|
152
|
+
with open(filename, "a") as f:
|
|
153
|
+
f.write(json.dumps(self.to_dict()) + "\n")
|
lm_deluge/client.py
CHANGED
|
@@ -6,6 +6,7 @@ import yaml
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
from pydantic.functional_validators import model_validator
|
|
8
8
|
|
|
9
|
+
from lm_deluge.api_requests.openai import stream_chat
|
|
9
10
|
from lm_deluge.batches import (
|
|
10
11
|
submit_batches_anthropic,
|
|
11
12
|
submit_batches_oa,
|
|
@@ -34,6 +35,12 @@ class LLMClient(BaseModel):
|
|
|
34
35
|
"""
|
|
35
36
|
|
|
36
37
|
model_names: list[str] = ["gpt-4.1-mini"]
|
|
38
|
+
|
|
39
|
+
def __init__(self, model_name: str | list[str] | None = None, **kwargs):
|
|
40
|
+
if model_name is not None:
|
|
41
|
+
kwargs["model_names"] = model_name
|
|
42
|
+
super().__init__(**kwargs)
|
|
43
|
+
|
|
37
44
|
max_requests_per_minute: int = 1_000
|
|
38
45
|
max_tokens_per_minute: int = 100_000
|
|
39
46
|
max_concurrent_requests: int = 225
|
|
@@ -81,7 +88,7 @@ class LLMClient(BaseModel):
|
|
|
81
88
|
@model_validator(mode="before")
|
|
82
89
|
@classmethod
|
|
83
90
|
def fix_lists(cls, data) -> "LLMClient":
|
|
84
|
-
if isinstance(data
|
|
91
|
+
if isinstance(data.get("model_names"), str):
|
|
85
92
|
data["model_names"] = [data["model_names"]]
|
|
86
93
|
if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
|
|
87
94
|
data["sampling_params"] = [
|
|
@@ -162,6 +169,11 @@ class LLMClient(BaseModel):
|
|
|
162
169
|
kwargs["model_names"] = model
|
|
163
170
|
return cls(**kwargs)
|
|
164
171
|
|
|
172
|
+
def _select_model(self):
|
|
173
|
+
assert isinstance(self.model_weights, list)
|
|
174
|
+
model_idx = np.random.choice(range(len(self.models)), p=self.model_weights)
|
|
175
|
+
return self.models[model_idx], self.sampling_params[model_idx]
|
|
176
|
+
|
|
165
177
|
@overload
|
|
166
178
|
async def process_prompts_async(
|
|
167
179
|
self,
|
|
@@ -249,41 +261,6 @@ class LLMClient(BaseModel):
|
|
|
249
261
|
if len(cache_hit_ids) > 0:
|
|
250
262
|
tracker.update_pbar(len(cache_hit_ids))
|
|
251
263
|
|
|
252
|
-
# api_task = asyncio.create_task(
|
|
253
|
-
# process_api_prompts_async(
|
|
254
|
-
# ids,
|
|
255
|
-
# prompts, # type: ignore -- fix later for dry running conversations
|
|
256
|
-
# self.models,
|
|
257
|
-
# self.model_weights, # type: ignore
|
|
258
|
-
# self.sampling_params, # type: ignore
|
|
259
|
-
# max_attempts=self.max_attempts,
|
|
260
|
-
# max_concurrent_requests=self.max_concurrent_requests,
|
|
261
|
-
# request_timeout=self.request_timeout,
|
|
262
|
-
# status_tracker=tracker,
|
|
263
|
-
# tools=tools,
|
|
264
|
-
# cache=cache,
|
|
265
|
-
# computer_use=computer_use,
|
|
266
|
-
# display_width=display_width,
|
|
267
|
-
# display_height=display_height,
|
|
268
|
-
# use_responses_api=use_responses_api,
|
|
269
|
-
# )
|
|
270
|
-
# )
|
|
271
|
-
# async def process_api_prompts_async(
|
|
272
|
-
|
|
273
|
-
# models: str | list[str],
|
|
274
|
-
# model_weights: list[float],
|
|
275
|
-
# sampling_params: list[SamplingParams],
|
|
276
|
-
# max_attempts: int = 5,
|
|
277
|
-
# max_concurrent_requests: int = 1_000,
|
|
278
|
-
# request_timeout: int = 30,
|
|
279
|
-
# status_tracker: StatusTracker | None = None,
|
|
280
|
-
# tools: list[Tool] | None = None,
|
|
281
|
-
# cache: CachePattern | None = None,
|
|
282
|
-
# computer_use: bool = False,
|
|
283
|
-
# display_width: int = 1024,
|
|
284
|
-
# display_height: int = 768,
|
|
285
|
-
# use_responses_api: bool = False,
|
|
286
|
-
# ):
|
|
287
264
|
if isinstance(ids, np.ndarray):
|
|
288
265
|
ids = ids.tolist() # pyright: ignore
|
|
289
266
|
|
|
@@ -296,28 +273,28 @@ class LLMClient(BaseModel):
|
|
|
296
273
|
assert tracker.retry_queue, "retry queue not initialized"
|
|
297
274
|
while True:
|
|
298
275
|
# get next request (if one is not already waiting for capacity)
|
|
276
|
+
retry_request = False
|
|
299
277
|
if next_request is None:
|
|
300
278
|
if not tracker.retry_queue.empty():
|
|
301
279
|
next_request = tracker.retry_queue.get_nowait()
|
|
280
|
+
retry_request = True
|
|
302
281
|
print(f"Retrying request {next_request.task_id}.")
|
|
303
282
|
elif prompts_not_finished:
|
|
304
283
|
try:
|
|
305
284
|
# get new request
|
|
306
285
|
id, prompt = next(prompts_iter)
|
|
307
286
|
# select model
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
range(len(self.models)), p=self.model_weights
|
|
311
|
-
)
|
|
287
|
+
model, sampling_params = self._select_model()
|
|
288
|
+
|
|
312
289
|
next_request = create_api_request(
|
|
313
290
|
task_id=id,
|
|
314
|
-
model_name=
|
|
291
|
+
model_name=model,
|
|
315
292
|
prompt=prompt, # type: ignore
|
|
316
293
|
request_timeout=self.request_timeout,
|
|
317
294
|
attempts_left=self.max_attempts,
|
|
318
295
|
status_tracker=tracker,
|
|
319
296
|
results_arr=requests,
|
|
320
|
-
sampling_params=
|
|
297
|
+
sampling_params=sampling_params,
|
|
321
298
|
all_model_names=self.models,
|
|
322
299
|
all_sampling_params=self.sampling_params,
|
|
323
300
|
tools=tools,
|
|
@@ -339,10 +316,9 @@ class LLMClient(BaseModel):
|
|
|
339
316
|
# if enough capacity available, call API
|
|
340
317
|
if next_request:
|
|
341
318
|
next_request_tokens = next_request.num_tokens
|
|
342
|
-
if tracker.check_capacity(next_request_tokens):
|
|
319
|
+
if tracker.check_capacity(next_request_tokens, retry=retry_request):
|
|
343
320
|
tracker.set_limiting_factor(None)
|
|
344
|
-
|
|
345
|
-
# call API
|
|
321
|
+
# call API (attempts_left will be decremented in handle_error if it fails)
|
|
346
322
|
asyncio.create_task(next_request.call_api())
|
|
347
323
|
next_request = None # reset next_request to empty
|
|
348
324
|
# update pbar status
|
|
@@ -360,9 +336,10 @@ class LLMClient(BaseModel):
|
|
|
360
336
|
await asyncio.sleep(tracker.seconds_to_pause)
|
|
361
337
|
print(f"Pausing {tracker.seconds_to_pause}s to cool down.")
|
|
362
338
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
339
|
+
# after finishing, log final status
|
|
340
|
+
tracker.log_final_status()
|
|
341
|
+
|
|
342
|
+
# deduplicate results by id
|
|
366
343
|
api_results = deduplicate_responses(requests)
|
|
367
344
|
for res in api_results:
|
|
368
345
|
results[res.id] = res
|
|
@@ -399,6 +376,17 @@ class LLMClient(BaseModel):
|
|
|
399
376
|
)
|
|
400
377
|
)
|
|
401
378
|
|
|
379
|
+
async def stream(self, prompt: str | Conversation, tools: list[Tool] | None = None):
|
|
380
|
+
model, sampling_params = self._select_model()
|
|
381
|
+
if isinstance(prompt, str):
|
|
382
|
+
prompt = Conversation.user(prompt)
|
|
383
|
+
async for item in stream_chat(model, prompt, sampling_params, tools, None):
|
|
384
|
+
if isinstance(item, str):
|
|
385
|
+
print(item, end="", flush=True)
|
|
386
|
+
else:
|
|
387
|
+
# final item
|
|
388
|
+
return item
|
|
389
|
+
|
|
402
390
|
async def submit_batch_job(
|
|
403
391
|
self,
|
|
404
392
|
prompts: Sequence[str | list[dict] | Conversation],
|
lm_deluge/config.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
from pydantic import BaseModel
|
|
2
1
|
from typing import Literal
|
|
3
2
|
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
4
5
|
|
|
5
6
|
class SamplingParams(BaseModel):
|
|
6
7
|
temperature: float = 0.0
|
|
7
8
|
top_p: float = 1.0
|
|
8
9
|
json_mode: bool = False
|
|
9
10
|
max_new_tokens: int = 512
|
|
10
|
-
reasoning_effort: Literal["low", "medium", "high", None] = None
|
|
11
|
+
reasoning_effort: Literal["low", "medium", "high", "none", None] = None
|
|
11
12
|
logprobs: bool = False
|
|
12
13
|
top_logprobs: int | None = None
|
|
13
14
|
|
lm_deluge/file.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import io
|
|
3
|
+
import requests
|
|
4
|
+
import base64
|
|
5
|
+
import mimetypes
|
|
6
|
+
import xxhash
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(slots=True)
|
|
12
|
+
class File:
|
|
13
|
+
# raw bytes, pathlike, http url, base64 data url, or file_id
|
|
14
|
+
data: bytes | io.BytesIO | Path | str
|
|
15
|
+
media_type: str | None = None # inferred if None
|
|
16
|
+
filename: str | None = None # optional filename for uploads
|
|
17
|
+
file_id: str | None = None # for OpenAI file uploads or Anthropic file API
|
|
18
|
+
type: str = field(init=False, default="file")
|
|
19
|
+
|
|
20
|
+
# helpers -----------------------------------------------------------------
|
|
21
|
+
def _bytes(self) -> bytes:
|
|
22
|
+
if isinstance(self.data, bytes):
|
|
23
|
+
return self.data
|
|
24
|
+
elif isinstance(self.data, io.BytesIO):
|
|
25
|
+
return self.data.getvalue()
|
|
26
|
+
elif isinstance(self.data, str) and self.data.startswith("http"):
|
|
27
|
+
res = requests.get(self.data)
|
|
28
|
+
res.raise_for_status()
|
|
29
|
+
return res.content
|
|
30
|
+
elif isinstance(self.data, str) and os.path.exists(self.data):
|
|
31
|
+
with open(self.data, "rb") as f:
|
|
32
|
+
return f.read()
|
|
33
|
+
elif isinstance(self.data, Path) and self.data.exists():
|
|
34
|
+
return Path(self.data).read_bytes()
|
|
35
|
+
elif isinstance(self.data, str) and self.data.startswith("data:"):
|
|
36
|
+
header, encoded = self.data.split(",", 1)
|
|
37
|
+
return base64.b64decode(encoded)
|
|
38
|
+
else:
|
|
39
|
+
raise ValueError("unreadable file format")
|
|
40
|
+
|
|
41
|
+
def _mime(self) -> str:
|
|
42
|
+
if self.media_type:
|
|
43
|
+
return self.media_type
|
|
44
|
+
if isinstance(self.data, (Path, str)):
|
|
45
|
+
# For URL or path, try to guess from the string
|
|
46
|
+
path_str = str(self.data)
|
|
47
|
+
guess = mimetypes.guess_type(path_str)[0]
|
|
48
|
+
if guess:
|
|
49
|
+
return guess
|
|
50
|
+
return "application/pdf" # default to PDF
|
|
51
|
+
|
|
52
|
+
def _filename(self) -> str:
|
|
53
|
+
if self.filename:
|
|
54
|
+
return self.filename
|
|
55
|
+
if isinstance(self.data, (Path, str)):
|
|
56
|
+
path_str = str(self.data)
|
|
57
|
+
if path_str.startswith("http"):
|
|
58
|
+
# Extract filename from URL
|
|
59
|
+
return path_str.split("/")[-1].split("?")[0] or "document.pdf"
|
|
60
|
+
else:
|
|
61
|
+
# Extract from local path
|
|
62
|
+
return os.path.basename(path_str) or "document.pdf"
|
|
63
|
+
return "document.pdf"
|
|
64
|
+
|
|
65
|
+
def _base64(self, include_header: bool = True) -> str:
|
|
66
|
+
encoded = base64.b64encode(self._bytes()).decode("utf-8")
|
|
67
|
+
if not include_header:
|
|
68
|
+
return encoded
|
|
69
|
+
return f"data:{self._mime()};base64,{encoded}"
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def fingerprint(self) -> str:
|
|
73
|
+
# Hash the file contents for fingerprinting
|
|
74
|
+
file_bytes = self._bytes()
|
|
75
|
+
return xxhash.xxh64(file_bytes).hexdigest()
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def size(self) -> int:
|
|
79
|
+
"""Return file size in bytes."""
|
|
80
|
+
return len(self._bytes())
|
|
81
|
+
|
|
82
|
+
# ── provider-specific emission ────────────────────────────────────────────
|
|
83
|
+
def oa_chat(self) -> dict:
|
|
84
|
+
"""For OpenAI Chat Completions - file content as base64 or file_id."""
|
|
85
|
+
if self.file_id:
|
|
86
|
+
return {
|
|
87
|
+
"type": "file",
|
|
88
|
+
"file": {
|
|
89
|
+
"file_id": self.file_id,
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
else:
|
|
93
|
+
return {
|
|
94
|
+
"type": "file",
|
|
95
|
+
"file": {
|
|
96
|
+
"filename": self._filename(),
|
|
97
|
+
"file_data": self._base64(),
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
def oa_resp(self) -> dict:
|
|
102
|
+
"""For OpenAI Responses API - file content as base64 or file_id."""
|
|
103
|
+
if self.file_id:
|
|
104
|
+
return {
|
|
105
|
+
"type": "input_file",
|
|
106
|
+
"file_id": self.file_id,
|
|
107
|
+
}
|
|
108
|
+
else:
|
|
109
|
+
return {
|
|
110
|
+
"type": "input_file",
|
|
111
|
+
"filename": self._filename(),
|
|
112
|
+
"file_data": self._base64(),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
def anthropic(self) -> dict:
|
|
116
|
+
"""For Anthropic Messages API - file content as base64 or file_id."""
|
|
117
|
+
if self.file_id:
|
|
118
|
+
return {
|
|
119
|
+
"type": "document",
|
|
120
|
+
"source": {
|
|
121
|
+
"type": "file",
|
|
122
|
+
"file_id": self.file_id,
|
|
123
|
+
},
|
|
124
|
+
}
|
|
125
|
+
else:
|
|
126
|
+
b64 = base64.b64encode(self._bytes()).decode()
|
|
127
|
+
return {
|
|
128
|
+
"type": "document",
|
|
129
|
+
"source": {
|
|
130
|
+
"type": "base64",
|
|
131
|
+
"media_type": self._mime(),
|
|
132
|
+
"data": b64,
|
|
133
|
+
},
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
def anthropic_file_upload(self) -> tuple[str, bytes, str]:
|
|
137
|
+
"""For Anthropic Files API - return tuple for file upload."""
|
|
138
|
+
filename = self._filename()
|
|
139
|
+
content = self._bytes()
|
|
140
|
+
media_type = self._mime()
|
|
141
|
+
return filename, content, media_type
|
|
142
|
+
|
|
143
|
+
def gemini(self) -> dict:
|
|
144
|
+
"""For Gemini API - not yet supported."""
|
|
145
|
+
raise NotImplementedError("File support for Gemini is not yet implemented")
|
|
146
|
+
|
|
147
|
+
def mistral(self) -> dict:
|
|
148
|
+
"""For Mistral API - not yet supported."""
|
|
149
|
+
raise NotImplementedError("File support for Mistral is not yet implemented")
|
lm_deluge/prompt.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import json
|
|
3
|
-
import tiktoken
|
|
4
|
-
import xxhash
|
|
5
3
|
from dataclasses import dataclass, field
|
|
6
4
|
from pathlib import Path
|
|
7
5
|
from typing import Literal, Sequence
|
|
8
|
-
|
|
6
|
+
|
|
7
|
+
import tiktoken
|
|
8
|
+
import xxhash
|
|
9
|
+
|
|
10
|
+
from lm_deluge.file import File
|
|
9
11
|
from lm_deluge.image import Image
|
|
12
|
+
from lm_deluge.models import APIModel
|
|
10
13
|
|
|
11
14
|
CachePattern = Literal[
|
|
12
15
|
"tools_only",
|
|
@@ -203,7 +206,7 @@ class Thinking:
|
|
|
203
206
|
return {"type": "text", "text": f"[Thinking: {self.content}]"}
|
|
204
207
|
|
|
205
208
|
|
|
206
|
-
Part = Text | Image | ToolCall | ToolResult | Thinking
|
|
209
|
+
Part = Text | Image | File | ToolCall | ToolResult | Thinking
|
|
207
210
|
|
|
208
211
|
|
|
209
212
|
###############################################################################
|
|
@@ -246,6 +249,11 @@ class Message:
|
|
|
246
249
|
"""Get all image parts with proper typing."""
|
|
247
250
|
return [part for part in self.parts if part.type == "image"] # type: ignore
|
|
248
251
|
|
|
252
|
+
@property
|
|
253
|
+
def files(self) -> list[File]:
|
|
254
|
+
"""Get all file parts with proper typing."""
|
|
255
|
+
return [part for part in self.parts if part.type == "file"] # type: ignore
|
|
256
|
+
|
|
249
257
|
@property
|
|
250
258
|
def thinking_parts(self) -> list["Thinking"]:
|
|
251
259
|
"""Get all thinking parts with proper typing."""
|
|
@@ -262,6 +270,9 @@ class Message:
|
|
|
262
270
|
elif isinstance(p, Image): # Image – redact the bytes, keep a hint
|
|
263
271
|
w, h = p.size
|
|
264
272
|
content_blocks.append({"type": "image", "tag": f"<Image ({w}×{h})>"})
|
|
273
|
+
elif isinstance(p, File): # File – redact the bytes, keep a hint
|
|
274
|
+
size = p.size
|
|
275
|
+
content_blocks.append({"type": "file", "tag": f"<File ({size} bytes)>"})
|
|
265
276
|
elif isinstance(p, ToolCall):
|
|
266
277
|
content_blocks.append(
|
|
267
278
|
{
|
|
@@ -296,6 +307,9 @@ class Message:
|
|
|
296
307
|
elif p["type"] == "image":
|
|
297
308
|
# We only stored a placeholder tag, so keep that placeholder.
|
|
298
309
|
parts.append(Image(p["tag"], detail="low"))
|
|
310
|
+
elif p["type"] == "file":
|
|
311
|
+
# We only stored a placeholder tag, so keep that placeholder.
|
|
312
|
+
parts.append(File(p["tag"]))
|
|
299
313
|
elif p["type"] == "tool_call":
|
|
300
314
|
parts.append(
|
|
301
315
|
ToolCall(id=p["id"], name=p["name"], arguments=p["arguments"])
|
|
@@ -340,6 +354,20 @@ class Message:
|
|
|
340
354
|
self.parts.append(img)
|
|
341
355
|
return self
|
|
342
356
|
|
|
357
|
+
def add_file(
|
|
358
|
+
self,
|
|
359
|
+
data: bytes | str | Path | io.BytesIO,
|
|
360
|
+
*,
|
|
361
|
+
media_type: str | None = None,
|
|
362
|
+
filename: str | None = None,
|
|
363
|
+
) -> "Message":
|
|
364
|
+
"""
|
|
365
|
+
Append a file block and return self for chaining.
|
|
366
|
+
"""
|
|
367
|
+
file = File(data, media_type=media_type, filename=filename)
|
|
368
|
+
self.parts.append(file)
|
|
369
|
+
return self
|
|
370
|
+
|
|
343
371
|
def add_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
|
|
344
372
|
"""Append a tool call block and return self for chaining."""
|
|
345
373
|
self.parts.append(ToolCall(id=id, name=name, arguments=arguments))
|
|
@@ -362,12 +390,15 @@ class Message:
|
|
|
362
390
|
text: str | None = None,
|
|
363
391
|
*,
|
|
364
392
|
image: str | bytes | Path | io.BytesIO | None = None,
|
|
393
|
+
file: str | bytes | Path | io.BytesIO | None = None,
|
|
365
394
|
) -> "Message":
|
|
366
395
|
res = cls("user", [])
|
|
367
396
|
if text is not None:
|
|
368
397
|
res.add_text(text)
|
|
369
398
|
if image is not None:
|
|
370
399
|
res.add_image(image)
|
|
400
|
+
if file is not None:
|
|
401
|
+
res.add_file(file)
|
|
371
402
|
return res
|
|
372
403
|
|
|
373
404
|
@classmethod
|
|
@@ -403,6 +434,19 @@ class Message:
|
|
|
403
434
|
part_list.append(Text(item["text"]))
|
|
404
435
|
elif item["type"] == "image_url":
|
|
405
436
|
part_list.append(Image(data=item["image_url"]["url"]))
|
|
437
|
+
elif item["type"] == "file":
|
|
438
|
+
file_data = item["file"]
|
|
439
|
+
if "file_id" in file_data:
|
|
440
|
+
# Handle file ID reference (not implemented yet)
|
|
441
|
+
part_list.append(File(data=file_data["file_id"]))
|
|
442
|
+
elif "file_data" in file_data:
|
|
443
|
+
# Handle base64 file data
|
|
444
|
+
part_list.append(
|
|
445
|
+
File(
|
|
446
|
+
data=file_data["file_data"],
|
|
447
|
+
filename=file_data.get("filename"),
|
|
448
|
+
)
|
|
449
|
+
)
|
|
406
450
|
parts = part_list
|
|
407
451
|
|
|
408
452
|
# Handle tool calls (assistant messages)
|
|
@@ -511,11 +555,17 @@ class Conversation:
|
|
|
511
555
|
|
|
512
556
|
@classmethod
|
|
513
557
|
def user(
|
|
514
|
-
cls,
|
|
558
|
+
cls,
|
|
559
|
+
text: str,
|
|
560
|
+
*,
|
|
561
|
+
image: bytes | str | Path | None = None,
|
|
562
|
+
file: bytes | str | Path | None = None,
|
|
515
563
|
) -> "Conversation":
|
|
516
|
-
msg = (
|
|
517
|
-
|
|
518
|
-
|
|
564
|
+
msg = Message.user(text)
|
|
565
|
+
if image is not None:
|
|
566
|
+
msg.add_image(image)
|
|
567
|
+
if file is not None:
|
|
568
|
+
msg.add_file(file)
|
|
519
569
|
return cls([msg])
|
|
520
570
|
|
|
521
571
|
@classmethod
|
|
@@ -677,6 +727,9 @@ class Conversation:
|
|
|
677
727
|
if isinstance(part, Image):
|
|
678
728
|
# Force conversion to bytes if not already
|
|
679
729
|
part.data = part._bytes()
|
|
730
|
+
elif isinstance(part, File):
|
|
731
|
+
# Force conversion to bytes if not already
|
|
732
|
+
part.data = part._bytes()
|
|
680
733
|
return self
|
|
681
734
|
|
|
682
735
|
def _add_cache_control_to_message(self, message: dict) -> None:
|
|
@@ -765,6 +818,11 @@ class Conversation:
|
|
|
765
818
|
content_blocks.append(
|
|
766
819
|
{"type": "image", "tag": f"<Image ({w}×{h})>"}
|
|
767
820
|
)
|
|
821
|
+
elif isinstance(p, File): # File – redact the bytes, keep a hint
|
|
822
|
+
size = p.size
|
|
823
|
+
content_blocks.append(
|
|
824
|
+
{"type": "file", "tag": f"<File ({size} bytes)>"}
|
|
825
|
+
)
|
|
768
826
|
elif isinstance(p, ToolCall):
|
|
769
827
|
content_blocks.append(
|
|
770
828
|
{
|
|
@@ -795,7 +853,7 @@ class Conversation:
|
|
|
795
853
|
|
|
796
854
|
for m in payload.get("messages", []):
|
|
797
855
|
role: Role = m["role"] # 'system' | 'user' | 'assistant'
|
|
798
|
-
parts: list[
|
|
856
|
+
parts: list[Part] = []
|
|
799
857
|
|
|
800
858
|
for p in m["content"]:
|
|
801
859
|
if p["type"] == "text":
|
|
@@ -804,6 +862,9 @@ class Conversation:
|
|
|
804
862
|
# We only stored a placeholder tag, so keep that placeholder.
|
|
805
863
|
# You could raise instead if real image bytes are required.
|
|
806
864
|
parts.append(Image(p["tag"], detail="low"))
|
|
865
|
+
elif p["type"] == "file":
|
|
866
|
+
# We only stored a placeholder tag, so keep that placeholder.
|
|
867
|
+
parts.append(File(p["tag"]))
|
|
807
868
|
elif p["type"] == "tool_call":
|
|
808
869
|
parts.append(
|
|
809
870
|
ToolCall(id=p["id"], name=p["name"], arguments=p["arguments"])
|
lm_deluge/tracker.py
CHANGED
|
@@ -67,7 +67,7 @@ class StatusTracker:
|
|
|
67
67
|
def set_limiting_factor(self, factor):
|
|
68
68
|
self.limiting_factor = factor
|
|
69
69
|
|
|
70
|
-
def check_capacity(self, num_tokens: int):
|
|
70
|
+
def check_capacity(self, num_tokens: int, retry: bool = False):
|
|
71
71
|
request_available = self.available_request_capacity >= 1
|
|
72
72
|
tokens_available = self.available_token_capacity >= num_tokens
|
|
73
73
|
concurrent_request_available = (
|
|
@@ -76,8 +76,10 @@ class StatusTracker:
|
|
|
76
76
|
if request_available and tokens_available and concurrent_request_available:
|
|
77
77
|
self.available_request_capacity -= 1
|
|
78
78
|
self.available_token_capacity -= num_tokens
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
if not retry:
|
|
80
|
+
# Only count new tasks, not retries
|
|
81
|
+
self.num_tasks_started += 1
|
|
82
|
+
self.num_tasks_in_progress += 1
|
|
81
83
|
self.set_limiting_factor(None)
|
|
82
84
|
return True
|
|
83
85
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.14
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -30,6 +30,7 @@ Dynamic: license-file
|
|
|
30
30
|
`lm-deluge` is a lightweight helper library for maxing out your rate limits with LLM providers. It provides the following:
|
|
31
31
|
|
|
32
32
|
- **Unified client** – Send prompts to all relevant models with a single client.
|
|
33
|
+
- **Files and Images** - Include images easily for multimodal models, and PDF files for models that support them (OpenAI and Anthropic).
|
|
33
34
|
- **Massive concurrency with throttling** – Set `max_tokens_per_minute` and `max_requests_per_minute` and let it fly. The client will process as many requests as possible while respecting rate limits and retrying failures.
|
|
34
35
|
- **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
|
|
35
36
|
- **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
|
|
@@ -41,6 +42,8 @@ Dynamic: license-file
|
|
|
41
42
|
|
|
42
43
|
**STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
|
|
43
44
|
|
|
45
|
+
**Update 06/02/2025:** I lied, it supports (very basic) streaming now via client.stream(...). It will print tokens as they arrive, then return an APIResponse at the end. More sophisticated streaming may or may not be implemented later, don't count on it.
|
|
46
|
+
|
|
44
47
|
## Installation
|
|
45
48
|
|
|
46
49
|
```bash
|
|
@@ -1,26 +1,28 @@
|
|
|
1
|
-
lm_deluge/__init__.py,sha256=
|
|
1
|
+
lm_deluge/__init__.py,sha256=mAztMuxINmh7dGbYnT8tsmw1eryQAvd0jpY8yHzd0EE,315
|
|
2
2
|
lm_deluge/agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
lm_deluge/batches.py,sha256=dI5G9uvmoDU9hMohrkEhlIDyJPsmsVwZPwxx6qETxxk,17728
|
|
4
4
|
lm_deluge/cache.py,sha256=VB1kv8rM2t5XWPR60uhszFcxLDnVKOe1oA5hYjVDjIo,4375
|
|
5
|
-
lm_deluge/client.py,sha256=
|
|
6
|
-
lm_deluge/config.py,sha256=
|
|
5
|
+
lm_deluge/client.py,sha256=kMHA3VlCRk_Ly1CiJ6rRz2GxttxhVuw6WEQtdMVrK-4,19806
|
|
6
|
+
lm_deluge/config.py,sha256=H1tQyJDNHGFuwxqQNL5Z-CjWAC0luHSBA3iY_pxmACM,932
|
|
7
7
|
lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
|
|
8
8
|
lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
|
|
9
|
+
lm_deluge/file.py,sha256=9l-zWKoHPnPhTL_CZNbxyoKwbLxlXHkRU2bz43qxaV4,5311
|
|
9
10
|
lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
|
|
10
11
|
lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
|
|
11
12
|
lm_deluge/models.py,sha256=gW9ZhKYjwC-ZF-SzWqagFUE_7Mqerdtt_T5NxGo040E,46583
|
|
12
|
-
lm_deluge/prompt.py,sha256=
|
|
13
|
+
lm_deluge/prompt.py,sha256=KOuJFwpRKuz2F5WLniZzjOTW05I--mzYyMglr-s47F8,34601
|
|
13
14
|
lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
|
|
14
15
|
lm_deluge/tool.py,sha256=C2zwU9-7fldfYT0TZDoVVGGSC6dN_It9GSxnfkN6Z_w,9822
|
|
15
|
-
lm_deluge/tracker.py,sha256=
|
|
16
|
+
lm_deluge/tracker.py,sha256=4QQ0-H01KQp8x8KccidBIJWA5zfSQyA0kgTynvSG0gk,9202
|
|
16
17
|
lm_deluge/usage.py,sha256=oS-rmF3ZJ1RMtR7WI6BB2uVOAjJg0scvGF3zZRahWVg,4449
|
|
17
18
|
lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
|
|
18
19
|
lm_deluge/api_requests/anthropic.py,sha256=itKPu1cqCYcrr4fkLarlvSYr6tqLEAGVLGXEG05QXWM,8345
|
|
19
|
-
lm_deluge/api_requests/base.py,sha256=
|
|
20
|
+
lm_deluge/api_requests/base.py,sha256=THgCceZ_z9YjA_E9WWME5f2tIRSOOI2OAQCAWVlV-Xg,12448
|
|
20
21
|
lm_deluge/api_requests/bedrock.py,sha256=yh4-zMrjlQfmxoBbrc2WYJ8gEqVkTP_-tMR7-XbTAtQ,11753
|
|
21
22
|
lm_deluge/api_requests/common.py,sha256=pcOpODL4heoaNLjbA6_ogkrOAbUSKY3F37D2EyMLW10,359
|
|
22
23
|
lm_deluge/api_requests/mistral.py,sha256=PkuoKbOJAB6DOK_NvzbxpWPAktfvonf69QjC0tVCYuE,5366
|
|
23
|
-
lm_deluge/api_requests/openai.py,sha256=
|
|
24
|
+
lm_deluge/api_requests/openai.py,sha256=HUn83Y_Roo3pCUTBnrQhL9skW_PJ4OvS5gr5rIg58dU,19366
|
|
25
|
+
lm_deluge/api_requests/response.py,sha256=X6AHXv-4dWHLKkPv7J0MSesweunqxIqJED6UY6ypdzE,5770
|
|
24
26
|
lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
|
|
25
27
|
lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
|
|
26
28
|
lm_deluge/api_requests/deprecated/deepseek.py,sha256=FEApI93VAWDwuaqTooIyKMgONYqRhdUmiAPBRme-IYs,4582
|
|
@@ -35,8 +37,8 @@ lm_deluge/util/json.py,sha256=_4Oar2Cmz2L1DK3EtPLPDxD6rsYHxjROmV8ZpmMjQ-4,5822
|
|
|
35
37
|
lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
|
|
36
38
|
lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
|
|
37
39
|
lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
|
|
38
|
-
lm_deluge-0.0.
|
|
39
|
-
lm_deluge-0.0.
|
|
40
|
-
lm_deluge-0.0.
|
|
41
|
-
lm_deluge-0.0.
|
|
42
|
-
lm_deluge-0.0.
|
|
40
|
+
lm_deluge-0.0.14.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
|
|
41
|
+
lm_deluge-0.0.14.dist-info/METADATA,sha256=iK9UuTpf235TbQQ6CkrLX725loOMSdwTscZJQgEHeoo,11942
|
|
42
|
+
lm_deluge-0.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
43
|
+
lm_deluge-0.0.14.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
|
|
44
|
+
lm_deluge-0.0.14.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|