lm-deluge 0.0.15__tar.gz → 0.0.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.15/src/lm_deluge.egg-info → lm_deluge-0.0.17}/PKG-INFO +35 -1
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/README.md +34 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/pyproject.toml +1 -1
- lm_deluge-0.0.17/src/lm_deluge/api_requests/__init__.py +1 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/anthropic.py +58 -84
- lm_deluge-0.0.17/src/lm_deluge/api_requests/base.py +120 -0
- lm_deluge-0.0.17/src/lm_deluge/api_requests/bedrock.py +298 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/gemini.py +18 -44
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/mistral.py +30 -60
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/openai.py +147 -148
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/response.py +2 -1
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/batches.py +1 -1
- lm_deluge-0.0.15/src/lm_deluge/computer_use/anthropic_tools.py → lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/__init__.py +58 -5
- lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/editor.py +559 -0
- lm_deluge-0.0.17/src/lm_deluge/built_in_tools/base.py +9 -0
- lm_deluge-0.0.17/src/lm_deluge/built_in_tools/openai.py +28 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/client.py +304 -150
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/image.py +13 -8
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/extract.py +23 -4
- lm_deluge-0.0.17/src/lm_deluge/llm_tools/ocr.py +1 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/models.py +39 -2
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/prompt.py +43 -27
- lm_deluge-0.0.17/src/lm_deluge/request_context.py +75 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/tool.py +97 -15
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/tracker.py +1 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17/src/lm_deluge.egg-info}/PKG-INFO +35 -1
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/SOURCES.txt +10 -33
- lm_deluge-0.0.17/tests/test_builtin_tools.py +58 -0
- lm_deluge-0.0.17/tests/test_native_mcp_server.py +66 -0
- lm_deluge-0.0.15/src/lm_deluge/api_requests/__init__.py +0 -3
- lm_deluge-0.0.15/src/lm_deluge/api_requests/base.py +0 -306
- lm_deluge-0.0.15/src/lm_deluge/api_requests/bedrock.py +0 -320
- lm_deluge-0.0.15/tests/test_all_models.py +0 -88
- lm_deluge-0.0.15/tests/test_batch_real.py +0 -95
- lm_deluge-0.0.15/tests/test_bedrock_computer_use.py +0 -378
- lm_deluge-0.0.15/tests/test_bedrock_models.py +0 -205
- lm_deluge-0.0.15/tests/test_cache.py +0 -56
- lm_deluge-0.0.15/tests/test_client_tracker_integration.py +0 -43
- lm_deluge-0.0.15/tests/test_computer_use.py +0 -103
- lm_deluge-0.0.15/tests/test_computer_use_integration.py +0 -277
- lm_deluge-0.0.15/tests/test_debug_format.py +0 -47
- lm_deluge-0.0.15/tests/test_file_integration.py +0 -156
- lm_deluge-0.0.15/tests/test_file_support.py +0 -210
- lm_deluge-0.0.15/tests/test_gemini_integration.py +0 -238
- lm_deluge-0.0.15/tests/test_image_models.py +0 -57
- lm_deluge-0.0.15/tests/test_image_utils.py +0 -21
- lm_deluge-0.0.15/tests/test_json_utils.py +0 -78
- lm_deluge-0.0.15/tests/test_logprobs_refactor.py +0 -306
- lm_deluge-0.0.15/tests/test_max_concurrent_requests.py +0 -38
- lm_deluge-0.0.15/tests/test_mcp_tools.py +0 -221
- lm_deluge-0.0.15/tests/test_openai_responses.py +0 -356
- lm_deluge-0.0.15/tests/test_prompt_caching.py +0 -257
- lm_deluge-0.0.15/tests/test_real_caching.py +0 -305
- lm_deluge-0.0.15/tests/test_real_caching_bedrock.py +0 -307
- lm_deluge-0.0.15/tests/test_retry_fix.py +0 -67
- lm_deluge-0.0.15/tests/test_rich_display.py +0 -114
- lm_deluge-0.0.15/tests/test_sampling_params.py +0 -13
- lm_deluge-0.0.15/tests/test_simple_gemini.py +0 -32
- lm_deluge-0.0.15/tests/test_tool_calls.py +0 -401
- lm_deluge-0.0.15/tests/test_tool_from_function.py +0 -150
- lm_deluge-0.0.15/tests/test_tool_validation.py +0 -36
- lm_deluge-0.0.15/tests/test_tracker_refactor.py +0 -99
- lm_deluge-0.0.15/tests/test_translate.py +0 -31
- lm_deluge-0.0.15/tests/test_xml_utils.py +0 -35
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/LICENSE +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/setup.cfg +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.17
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -128,6 +128,30 @@ This just works. Images can be local images on disk, URLs, bytes, base64 data UR
|
|
|
128
128
|
|
|
129
129
|
See a full multi-turn chat example in `examples/multiturn.md`.
|
|
130
130
|
|
|
131
|
+
## Files
|
|
132
|
+
|
|
133
|
+
For models that support file uploads (OpenAI, Anthropic, and Gemini), you can easily include PDF files and other documents:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from lm_deluge import LLMClient, Conversation
|
|
137
|
+
|
|
138
|
+
# Simple file upload
|
|
139
|
+
client = LLMClient.basic("gpt-4.1-mini")
|
|
140
|
+
conversation = Conversation.user(
|
|
141
|
+
"Please summarize this document",
|
|
142
|
+
file="path/to/document.pdf"
|
|
143
|
+
)
|
|
144
|
+
resps = client.process_prompts_sync([conversation])
|
|
145
|
+
|
|
146
|
+
# You can also create File objects for more control
|
|
147
|
+
from lm_deluge import File
|
|
148
|
+
file = File("path/to/report.pdf", filename="Q4_Report.pdf")
|
|
149
|
+
conversation = Conversation.user("Analyze this financial report")
|
|
150
|
+
conversation.messages[0].parts.append(file)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Files can be local paths, URLs, bytes, or base64 data URLs, just like images.
|
|
154
|
+
|
|
131
155
|
## Tool Use
|
|
132
156
|
|
|
133
157
|
Define tools from Python functions and use them with any model:
|
|
@@ -187,6 +211,16 @@ for tool_call in resps[0].tool_calls:
|
|
|
187
211
|
# this is dumb sorry will make it better
|
|
188
212
|
tool_to_call = [x for x in tools if x.name == tool_call.name][0]
|
|
189
213
|
tool_to_call.call(**tool_call.arguments) # in async code, use .acall()
|
|
214
|
+
|
|
215
|
+
# or use the built-in agent loop to handle this automatically
|
|
216
|
+
import asyncio
|
|
217
|
+
|
|
218
|
+
async def main():
|
|
219
|
+
conv = Conversation.user("List the files in the current directory")
|
|
220
|
+
conv, resp = await client.run_agent_loop(conv, tools=tools)
|
|
221
|
+
print(resp.content.completion)
|
|
222
|
+
|
|
223
|
+
asyncio.run(main())
|
|
190
224
|
```
|
|
191
225
|
|
|
192
226
|
### Prompt Caching (Anthropic)
|
|
@@ -101,6 +101,30 @@ This just works. Images can be local images on disk, URLs, bytes, base64 data UR
|
|
|
101
101
|
|
|
102
102
|
See a full multi-turn chat example in `examples/multiturn.md`.
|
|
103
103
|
|
|
104
|
+
## Files
|
|
105
|
+
|
|
106
|
+
For models that support file uploads (OpenAI, Anthropic, and Gemini), you can easily include PDF files and other documents:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from lm_deluge import LLMClient, Conversation
|
|
110
|
+
|
|
111
|
+
# Simple file upload
|
|
112
|
+
client = LLMClient.basic("gpt-4.1-mini")
|
|
113
|
+
conversation = Conversation.user(
|
|
114
|
+
"Please summarize this document",
|
|
115
|
+
file="path/to/document.pdf"
|
|
116
|
+
)
|
|
117
|
+
resps = client.process_prompts_sync([conversation])
|
|
118
|
+
|
|
119
|
+
# You can also create File objects for more control
|
|
120
|
+
from lm_deluge import File
|
|
121
|
+
file = File("path/to/report.pdf", filename="Q4_Report.pdf")
|
|
122
|
+
conversation = Conversation.user("Analyze this financial report")
|
|
123
|
+
conversation.messages[0].parts.append(file)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Files can be local paths, URLs, bytes, or base64 data URLs, just like images.
|
|
127
|
+
|
|
104
128
|
## Tool Use
|
|
105
129
|
|
|
106
130
|
Define tools from Python functions and use them with any model:
|
|
@@ -160,6 +184,16 @@ for tool_call in resps[0].tool_calls:
|
|
|
160
184
|
# this is dumb sorry will make it better
|
|
161
185
|
tool_to_call = [x for x in tools if x.name == tool_call.name][0]
|
|
162
186
|
tool_to_call.call(**tool_call.arguments) # in async code, use .acall()
|
|
187
|
+
|
|
188
|
+
# or use the built-in agent loop to handle this automatically
|
|
189
|
+
import asyncio
|
|
190
|
+
|
|
191
|
+
async def main():
|
|
192
|
+
conv = Conversation.user("List the files in the current directory")
|
|
193
|
+
conv, resp = await client.run_agent_loop(conv, tools=tools)
|
|
194
|
+
print(resp.content.completion)
|
|
195
|
+
|
|
196
|
+
asyncio.run(main())
|
|
163
197
|
```
|
|
164
198
|
|
|
165
199
|
### Prompt Caching (Anthropic)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -1,35 +1,39 @@
|
|
|
1
|
-
from aiohttp import ClientResponse
|
|
2
1
|
import json
|
|
3
2
|
import os
|
|
4
|
-
|
|
3
|
+
|
|
4
|
+
from aiohttp import ClientResponse
|
|
5
5
|
|
|
6
6
|
from lm_deluge.prompt import (
|
|
7
|
+
CachePattern,
|
|
7
8
|
Conversation,
|
|
8
9
|
Message,
|
|
9
10
|
Text,
|
|
10
|
-
ToolCall,
|
|
11
11
|
Thinking,
|
|
12
|
-
|
|
12
|
+
ToolCall,
|
|
13
13
|
)
|
|
14
|
-
from lm_deluge.
|
|
14
|
+
from lm_deluge.request_context import RequestContext
|
|
15
|
+
from lm_deluge.tool import MCPServer, Tool
|
|
15
16
|
from lm_deluge.usage import Usage
|
|
16
|
-
from .base import APIRequestBase, APIResponse
|
|
17
17
|
|
|
18
|
-
from ..tracker import StatusTracker
|
|
19
18
|
from ..config import SamplingParams
|
|
20
19
|
from ..models import APIModel
|
|
21
|
-
from
|
|
20
|
+
from .base import APIRequestBase, APIResponse
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _add_beta(headers: dict, beta: str):
|
|
24
|
+
if "anthropic-beta" in headers and headers["anthropic-beta"]:
|
|
25
|
+
if beta not in headers["anthropic-beta"]:
|
|
26
|
+
headers["anthropic-beta"] += f",{beta}"
|
|
27
|
+
else:
|
|
28
|
+
headers["anthropic-beta"] = beta
|
|
22
29
|
|
|
23
30
|
|
|
24
31
|
def _build_anthropic_request(
|
|
25
32
|
model: APIModel,
|
|
26
33
|
prompt: Conversation,
|
|
27
|
-
tools: list[Tool] | None,
|
|
34
|
+
tools: list[Tool | dict | MCPServer] | None,
|
|
28
35
|
sampling_params: SamplingParams,
|
|
29
36
|
cache_pattern: CachePattern | None = None,
|
|
30
|
-
computer_use: bool = False,
|
|
31
|
-
display_width: int = 1024,
|
|
32
|
-
display_height: int = 768,
|
|
33
37
|
):
|
|
34
38
|
system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
|
|
35
39
|
request_header = {
|
|
@@ -38,10 +42,6 @@ def _build_anthropic_request(
|
|
|
38
42
|
"content-type": "application/json",
|
|
39
43
|
}
|
|
40
44
|
|
|
41
|
-
# Add beta header for Computer Use
|
|
42
|
-
if computer_use:
|
|
43
|
-
request_header["anthropic-beta"] = "computer-use-2025-01-24"
|
|
44
|
-
|
|
45
45
|
request_json = {
|
|
46
46
|
"model": model.name,
|
|
47
47
|
"messages": messages,
|
|
@@ -69,89 +69,61 @@ def _build_anthropic_request(
|
|
|
69
69
|
print("ignoring reasoning_effort for non-reasoning model")
|
|
70
70
|
if system_message is not None:
|
|
71
71
|
request_json["system"] = system_message
|
|
72
|
-
if tools
|
|
72
|
+
if tools:
|
|
73
|
+
mcp_servers = []
|
|
73
74
|
tool_definitions = []
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
75
|
+
for tool in tools:
|
|
76
|
+
if isinstance(tool, Tool):
|
|
77
|
+
tool_definitions.append(tool.dump_for("anthropic"))
|
|
78
|
+
elif isinstance(tool, dict):
|
|
79
|
+
tool_definitions.append(tool)
|
|
80
|
+
# add betas if needed
|
|
81
|
+
if tool["type"] in [
|
|
82
|
+
"computer_20241022",
|
|
83
|
+
"text_editor_20241022",
|
|
84
|
+
"bash_20241022",
|
|
85
|
+
]:
|
|
86
|
+
_add_beta(request_header, "computer-use-2024-10-22")
|
|
87
|
+
elif tool["type"] == "computer_20250124":
|
|
88
|
+
_add_beta(request_header, "computer-use-2025-01-24")
|
|
89
|
+
elif tool["type"] == "code_execution_20250522":
|
|
90
|
+
_add_beta(request_header, "code-execution-2025-05-22")
|
|
91
|
+
elif isinstance(tool, MCPServer):
|
|
92
|
+
_add_beta(request_header, "mcp-client-2025-04-04")
|
|
93
|
+
mcp_servers.append(tool.for_anthropic())
|
|
84
94
|
|
|
85
95
|
# Add cache control to last tool if tools_only caching is specified
|
|
86
96
|
if cache_pattern == "tools_only" and tool_definitions:
|
|
87
97
|
tool_definitions[-1]["cache_control"] = {"type": "ephemeral"}
|
|
88
98
|
|
|
89
99
|
request_json["tools"] = tool_definitions
|
|
100
|
+
if len(mcp_servers) > 0:
|
|
101
|
+
request_json["mcp_servers"] = mcp_servers
|
|
90
102
|
|
|
91
103
|
return request_json, request_header
|
|
92
104
|
|
|
93
105
|
|
|
94
106
|
class AnthropicRequest(APIRequestBase):
|
|
95
|
-
def __init__(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
# internal logic should handle translating to specific API format
|
|
100
|
-
model_name: str, # must correspond to registry
|
|
101
|
-
prompt: Conversation,
|
|
102
|
-
attempts_left: int,
|
|
103
|
-
status_tracker: StatusTracker,
|
|
104
|
-
results_arr: list,
|
|
105
|
-
request_timeout: int = 30,
|
|
106
|
-
sampling_params: SamplingParams = SamplingParams(),
|
|
107
|
-
callback: Callable | None = None,
|
|
108
|
-
# for retries
|
|
109
|
-
all_model_names: list[str] | None = None,
|
|
110
|
-
all_sampling_params: list[SamplingParams] | None = None,
|
|
111
|
-
tools: list | None = None,
|
|
112
|
-
cache: CachePattern | None = None,
|
|
113
|
-
# Computer Use support
|
|
114
|
-
computer_use: bool = False,
|
|
115
|
-
display_width: int = 1024,
|
|
116
|
-
display_height: int = 768,
|
|
117
|
-
):
|
|
118
|
-
super().__init__(
|
|
119
|
-
task_id=task_id,
|
|
120
|
-
model_name=model_name,
|
|
121
|
-
prompt=prompt,
|
|
122
|
-
attempts_left=attempts_left,
|
|
123
|
-
status_tracker=status_tracker,
|
|
124
|
-
results_arr=results_arr,
|
|
125
|
-
request_timeout=request_timeout,
|
|
126
|
-
sampling_params=sampling_params,
|
|
127
|
-
callback=callback,
|
|
128
|
-
all_model_names=all_model_names,
|
|
129
|
-
all_sampling_params=all_sampling_params,
|
|
130
|
-
tools=tools,
|
|
131
|
-
cache=cache,
|
|
132
|
-
)
|
|
133
|
-
self.computer_use = computer_use
|
|
134
|
-
self.display_width = display_width
|
|
135
|
-
self.display_height = display_height
|
|
136
|
-
self.model = APIModel.from_registry(model_name)
|
|
107
|
+
def __init__(self, context: RequestContext):
|
|
108
|
+
super().__init__(context=context)
|
|
109
|
+
|
|
110
|
+
self.model = APIModel.from_registry(self.context.model_name)
|
|
137
111
|
self.url = f"{self.model.api_base}/messages"
|
|
138
112
|
|
|
139
113
|
# Lock images as bytes if caching is enabled
|
|
140
|
-
if cache is not None:
|
|
141
|
-
prompt.lock_images_as_bytes()
|
|
114
|
+
if self.context.cache is not None:
|
|
115
|
+
self.context.prompt.lock_images_as_bytes()
|
|
142
116
|
|
|
143
117
|
self.request_json, self.request_header = _build_anthropic_request(
|
|
144
118
|
self.model,
|
|
145
|
-
prompt,
|
|
146
|
-
tools,
|
|
147
|
-
sampling_params,
|
|
148
|
-
cache,
|
|
149
|
-
computer_use,
|
|
150
|
-
display_width,
|
|
151
|
-
display_height,
|
|
119
|
+
self.context.prompt,
|
|
120
|
+
self.context.tools,
|
|
121
|
+
self.context.sampling_params,
|
|
122
|
+
self.context.cache,
|
|
152
123
|
)
|
|
153
124
|
|
|
154
125
|
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
126
|
+
data = None
|
|
155
127
|
is_error = False
|
|
156
128
|
error_message = None
|
|
157
129
|
thinking = None
|
|
@@ -160,6 +132,7 @@ class AnthropicRequest(APIRequestBase):
|
|
|
160
132
|
status_code = http_response.status
|
|
161
133
|
mimetype = http_response.headers.get("Content-Type", None)
|
|
162
134
|
rate_limits = {}
|
|
135
|
+
assert self.context.status_tracker
|
|
163
136
|
for header in [
|
|
164
137
|
"anthropic-ratelimit-requests-limit",
|
|
165
138
|
"anthropic-ratelimit-requests-remaining",
|
|
@@ -215,20 +188,21 @@ class AnthropicRequest(APIRequestBase):
|
|
|
215
188
|
or "overloaded" in error_message.lower()
|
|
216
189
|
):
|
|
217
190
|
error_message += " (Rate limit error, triggering cooldown.)"
|
|
218
|
-
self.status_tracker.rate_limit_exceeded()
|
|
191
|
+
self.context.status_tracker.rate_limit_exceeded()
|
|
219
192
|
if "context length" in error_message:
|
|
220
193
|
error_message += " (Context length exceeded, set retries to 0.)"
|
|
221
|
-
self.attempts_left = 0
|
|
194
|
+
self.context.attempts_left = 0
|
|
222
195
|
|
|
223
196
|
return APIResponse(
|
|
224
|
-
id=self.task_id,
|
|
197
|
+
id=self.context.task_id,
|
|
225
198
|
status_code=status_code,
|
|
226
199
|
is_error=is_error,
|
|
227
200
|
error_message=error_message,
|
|
228
|
-
prompt=self.prompt,
|
|
201
|
+
prompt=self.context.prompt,
|
|
229
202
|
content=content,
|
|
230
203
|
thinking=thinking,
|
|
231
|
-
model_internal=self.model_name,
|
|
232
|
-
sampling_params=self.sampling_params,
|
|
204
|
+
model_internal=self.context.model_name,
|
|
205
|
+
sampling_params=self.context.sampling_params,
|
|
233
206
|
usage=usage,
|
|
207
|
+
raw_response=data,
|
|
234
208
|
)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import traceback
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
from aiohttp import ClientResponse
|
|
7
|
+
|
|
8
|
+
from ..errors import raise_if_modal_exception
|
|
9
|
+
from ..request_context import RequestContext
|
|
10
|
+
from .response import APIResponse
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class APIRequestBase(ABC):
|
|
14
|
+
"""
|
|
15
|
+
Class for handling API requests. All model/endpoint-specific logic should be
|
|
16
|
+
handled by overriding __init__ and implementing the handle_response method.
|
|
17
|
+
For call_api to work, the __init__ must handle setting:
|
|
18
|
+
- url
|
|
19
|
+
- request_header
|
|
20
|
+
- request_json
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
context: RequestContext,
|
|
26
|
+
):
|
|
27
|
+
# If context is provided, use it; otherwise construct one from individual parameters
|
|
28
|
+
self.context = context
|
|
29
|
+
|
|
30
|
+
# Everything is now accessed through self.context - no copying!
|
|
31
|
+
self.system_prompt = None
|
|
32
|
+
self.result = [] # list of APIResponse objects from each attempt
|
|
33
|
+
|
|
34
|
+
# these should be set in the __init__ of the subclass
|
|
35
|
+
self.url = None
|
|
36
|
+
self.request_header = None
|
|
37
|
+
self.request_json = None
|
|
38
|
+
self.region = None
|
|
39
|
+
|
|
40
|
+
def increment_pbar(self):
|
|
41
|
+
if self.context.status_tracker:
|
|
42
|
+
self.context.status_tracker.increment_pbar()
|
|
43
|
+
|
|
44
|
+
def call_callback(self):
|
|
45
|
+
if self.context.callback is not None:
|
|
46
|
+
# the APIResponse in self.result includes all the information
|
|
47
|
+
self.context.callback(self.result[-1], self.context.status_tracker)
|
|
48
|
+
|
|
49
|
+
def handle_success(self, data):
|
|
50
|
+
self.call_callback()
|
|
51
|
+
if self.context.status_tracker:
|
|
52
|
+
self.context.status_tracker.task_succeeded(self.context.task_id)
|
|
53
|
+
|
|
54
|
+
async def execute_once(self) -> APIResponse:
|
|
55
|
+
"""Send the HTTP request once and return the parsed APIResponse."""
|
|
56
|
+
assert self.context.status_tracker
|
|
57
|
+
try:
|
|
58
|
+
self.context.status_tracker.total_requests += 1
|
|
59
|
+
timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
|
|
60
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
61
|
+
assert self.url is not None, "URL is not set"
|
|
62
|
+
async with session.post(
|
|
63
|
+
url=self.url,
|
|
64
|
+
headers=self.request_header,
|
|
65
|
+
json=self.request_json,
|
|
66
|
+
) as http_response:
|
|
67
|
+
response: APIResponse = await self.handle_response(http_response)
|
|
68
|
+
return response
|
|
69
|
+
|
|
70
|
+
except asyncio.TimeoutError:
|
|
71
|
+
return APIResponse(
|
|
72
|
+
id=self.context.task_id,
|
|
73
|
+
model_internal=self.context.model_name,
|
|
74
|
+
prompt=self.context.prompt,
|
|
75
|
+
sampling_params=self.context.sampling_params,
|
|
76
|
+
status_code=None,
|
|
77
|
+
is_error=True,
|
|
78
|
+
error_message="Request timed out (terminated by client).",
|
|
79
|
+
content=None,
|
|
80
|
+
usage=None,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
except Exception as e:
|
|
84
|
+
raise_if_modal_exception(e)
|
|
85
|
+
tb = traceback.format_exc()
|
|
86
|
+
print(tb)
|
|
87
|
+
return APIResponse(
|
|
88
|
+
id=self.context.task_id,
|
|
89
|
+
model_internal=self.context.model_name,
|
|
90
|
+
prompt=self.context.prompt,
|
|
91
|
+
sampling_params=self.context.sampling_params,
|
|
92
|
+
status_code=None,
|
|
93
|
+
is_error=True,
|
|
94
|
+
error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
|
|
95
|
+
content=None,
|
|
96
|
+
usage=None,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
101
|
+
raise NotImplementedError
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def deduplicate_responses(results: list[APIRequestBase]) -> list[APIResponse]:
|
|
105
|
+
deduplicated = {}
|
|
106
|
+
for request in results:
|
|
107
|
+
if request.context.task_id not in deduplicated:
|
|
108
|
+
deduplicated[request.context.task_id] = request.result[-1]
|
|
109
|
+
else:
|
|
110
|
+
current_response: APIResponse = deduplicated[request.context.task_id]
|
|
111
|
+
# only replace if the current request has no completion and the new one does
|
|
112
|
+
if (
|
|
113
|
+
request.result[-1].completion is not None
|
|
114
|
+
and current_response.completion is None
|
|
115
|
+
):
|
|
116
|
+
deduplicated[request.context.task_id] = request.result[-1]
|
|
117
|
+
|
|
118
|
+
output = [deduplicated[request.context.task_id] for request in results]
|
|
119
|
+
|
|
120
|
+
return output
|