lm-deluge 0.0.12__tar.gz → 0.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.12/src/lm_deluge.egg-info → lm_deluge-0.0.14}/PKG-INFO +8 -5
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/README.md +6 -2
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/pyproject.toml +2 -3
- lm_deluge-0.0.14/src/lm_deluge/__init__.py +17 -0
- lm_deluge-0.0.14/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/anthropic.py +90 -58
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/base.py +63 -180
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/bedrock.py +34 -10
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/common.py +2 -1
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/mistral.py +6 -15
- lm_deluge-0.0.14/src/lm_deluge/api_requests/openai.py +481 -0
- lm_deluge-0.0.14/src/lm_deluge/api_requests/response.py +153 -0
- lm_deluge-0.0.14/src/lm_deluge/batches.py +498 -0
- lm_deluge-0.0.14/src/lm_deluge/client.py +489 -0
- lm_deluge-0.0.14/src/lm_deluge/computer_use/anthropic_tools.py +75 -0
- lm_deluge-0.0.12/src/lm_deluge/sampling_params.py → lm_deluge-0.0.14/src/lm_deluge/config.py +12 -4
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/embed.py +17 -11
- lm_deluge-0.0.14/src/lm_deluge/file.py +149 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/models.py +33 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/prompt.py +156 -15
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/rerank.py +18 -12
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/tool.py +11 -1
- lm_deluge-0.0.14/src/lm_deluge/tracker.py +255 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/json.py +18 -1
- {lm_deluge-0.0.12 → lm_deluge-0.0.14/src/lm_deluge.egg-info}/PKG-INFO +8 -5
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/SOURCES.txt +21 -1
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/requires.txt +1 -2
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_all_models.py +24 -24
- lm_deluge-0.0.14/tests/test_batch_real.py +95 -0
- lm_deluge-0.0.14/tests/test_bedrock_computer_use.py +378 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_cache.py +5 -4
- lm_deluge-0.0.14/tests/test_client_tracker_integration.py +43 -0
- lm_deluge-0.0.14/tests/test_computer_use.py +103 -0
- lm_deluge-0.0.14/tests/test_computer_use_integration.py +277 -0
- lm_deluge-0.0.14/tests/test_debug_format.py +47 -0
- lm_deluge-0.0.14/tests/test_file_integration.py +156 -0
- lm_deluge-0.0.14/tests/test_file_support.py +210 -0
- lm_deluge-0.0.14/tests/test_logprobs_refactor.py +306 -0
- lm_deluge-0.0.14/tests/test_max_concurrent_requests.py +38 -0
- lm_deluge-0.0.14/tests/test_openai_responses.py +356 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_prompt_caching.py +9 -13
- lm_deluge-0.0.14/tests/test_retry_fix.py +67 -0
- lm_deluge-0.0.14/tests/test_rich_display.py +114 -0
- lm_deluge-0.0.14/tests/test_tool_validation.py +36 -0
- lm_deluge-0.0.14/tests/test_tracker_refactor.py +99 -0
- lm_deluge-0.0.12/src/lm_deluge/__init__.py +0 -7
- lm_deluge-0.0.12/src/lm_deluge/api_requests/openai.py +0 -189
- lm_deluge-0.0.12/src/lm_deluge/client.py +0 -771
- lm_deluge-0.0.12/src/lm_deluge/tracker.py +0 -43
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/LICENSE +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/setup.cfg +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_bedrock_models.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_image_models.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_image_utils.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_json_utils.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_mcp_tools.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_real_caching.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_real_caching_bedrock.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_sampling_params.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_tool_calls.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_tool_from_function.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_translate.py +0 -0
- {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_xml_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.14
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -22,8 +22,7 @@ Requires-Dist: lxml
|
|
|
22
22
|
Requires-Dist: pdf2image
|
|
23
23
|
Requires-Dist: pillow
|
|
24
24
|
Requires-Dist: fastmcp>=2.4
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist: fasttext-langdetect
|
|
25
|
+
Requires-Dist: rich
|
|
27
26
|
Dynamic: license-file
|
|
28
27
|
|
|
29
28
|
# lm-deluge
|
|
@@ -31,16 +30,20 @@ Dynamic: license-file
|
|
|
31
30
|
`lm-deluge` is a lightweight helper library for maxing out your rate limits with LLM providers. It provides the following:
|
|
32
31
|
|
|
33
32
|
- **Unified client** – Send prompts to all relevant models with a single client.
|
|
33
|
+
- **Files and Images** - Include images easily for multimodal models, and PDF files for models that support them (OpenAI and Anthropic).
|
|
34
34
|
- **Massive concurrency with throttling** – Set `max_tokens_per_minute` and `max_requests_per_minute` and let it fly. The client will process as many requests as possible while respecting rate limits and retrying failures.
|
|
35
35
|
- **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
|
|
36
36
|
- **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
|
|
37
37
|
- **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
|
|
38
|
+
- **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
|
|
38
39
|
- **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
|
|
39
40
|
- **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
|
|
40
41
|
- **Sync and async APIs** – Use the client from sync or async code.
|
|
41
42
|
|
|
42
43
|
**STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
|
|
43
44
|
|
|
45
|
+
**Update 06/02/2025:** I lied, it supports (very basic) streaming now via client.stream(...). It will print tokens as they arrive, then return an APIResponse at the end. More sophisticated streaming may or may not be implemented later, don't count on it.
|
|
46
|
+
|
|
44
47
|
## Installation
|
|
45
48
|
|
|
46
49
|
```bash
|
|
@@ -233,11 +236,11 @@ asyncio.run(main())
|
|
|
233
236
|
|
|
234
237
|
## Available Models
|
|
235
238
|
|
|
236
|
-
We support all models in `src/lm_deluge/models.py`.
|
|
239
|
+
We support all models in `src/lm_deluge/models.py`. Vertex support is not planned in the short term, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
|
|
237
240
|
|
|
238
241
|
## Feature Support
|
|
239
242
|
|
|
240
|
-
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We
|
|
243
|
+
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
|
|
241
244
|
|
|
242
245
|
## Built‑in tools
|
|
243
246
|
|
|
@@ -3,16 +3,20 @@
|
|
|
3
3
|
`lm-deluge` is a lightweight helper library for maxing out your rate limits with LLM providers. It provides the following:
|
|
4
4
|
|
|
5
5
|
- **Unified client** – Send prompts to all relevant models with a single client.
|
|
6
|
+
- **Files and Images** - Include images easily for multimodal models, and PDF files for models that support them (OpenAI and Anthropic).
|
|
6
7
|
- **Massive concurrency with throttling** – Set `max_tokens_per_minute` and `max_requests_per_minute` and let it fly. The client will process as many requests as possible while respecting rate limits and retrying failures.
|
|
7
8
|
- **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
|
|
8
9
|
- **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
|
|
9
10
|
- **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
|
|
11
|
+
- **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
|
|
10
12
|
- **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
|
|
11
13
|
- **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
|
|
12
14
|
- **Sync and async APIs** – Use the client from sync or async code.
|
|
13
15
|
|
|
14
16
|
**STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
|
|
15
17
|
|
|
18
|
+
**Update 06/02/2025:** I lied, it supports (very basic) streaming now via client.stream(...). It will print tokens as they arrive, then return an APIResponse at the end. More sophisticated streaming may or may not be implemented later, don't count on it.
|
|
19
|
+
|
|
16
20
|
## Installation
|
|
17
21
|
|
|
18
22
|
```bash
|
|
@@ -205,11 +209,11 @@ asyncio.run(main())
|
|
|
205
209
|
|
|
206
210
|
## Available Models
|
|
207
211
|
|
|
208
|
-
We support all models in `src/lm_deluge/models.py`.
|
|
212
|
+
We support all models in `src/lm_deluge/models.py`. Vertex support is not planned in the short term, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
|
|
209
213
|
|
|
210
214
|
## Feature Support
|
|
211
215
|
|
|
212
|
-
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We
|
|
216
|
+
We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
|
|
213
217
|
|
|
214
218
|
## Built‑in tools
|
|
215
219
|
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|
|
3
3
|
|
|
4
4
|
[project]
|
|
5
5
|
name = "lm_deluge"
|
|
6
|
-
version = "0.0.
|
|
6
|
+
version = "0.0.14"
|
|
7
7
|
authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
|
|
8
8
|
description = "Python utility for using LLM API models."
|
|
9
9
|
readme = "README.md"
|
|
@@ -28,6 +28,5 @@ dependencies = [
|
|
|
28
28
|
"pdf2image",
|
|
29
29
|
"pillow",
|
|
30
30
|
"fastmcp>=2.4",
|
|
31
|
-
"
|
|
32
|
-
"fasttext-langdetect",
|
|
31
|
+
"rich"
|
|
33
32
|
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .client import LLMClient, SamplingParams, APIResponse
|
|
2
|
+
from .prompt import Conversation, Message
|
|
3
|
+
from .tool import Tool
|
|
4
|
+
from .file import File
|
|
5
|
+
import dotenv
|
|
6
|
+
|
|
7
|
+
dotenv.load_dotenv()
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"LLMClient",
|
|
11
|
+
"SamplingParams",
|
|
12
|
+
"APIResponse",
|
|
13
|
+
"Conversation",
|
|
14
|
+
"Message",
|
|
15
|
+
"Tool",
|
|
16
|
+
"File",
|
|
17
|
+
]
|
|
File without changes
|
|
@@ -1,9 +1,6 @@
|
|
|
1
|
-
import asyncio
|
|
2
1
|
from aiohttp import ClientResponse
|
|
3
2
|
import json
|
|
4
3
|
import os
|
|
5
|
-
import warnings
|
|
6
|
-
from tqdm import tqdm
|
|
7
4
|
from typing import Callable
|
|
8
5
|
|
|
9
6
|
from lm_deluge.prompt import (
|
|
@@ -14,12 +11,84 @@ from lm_deluge.prompt import (
|
|
|
14
11
|
Thinking,
|
|
15
12
|
CachePattern,
|
|
16
13
|
)
|
|
14
|
+
from lm_deluge.tool import Tool
|
|
17
15
|
from lm_deluge.usage import Usage
|
|
18
16
|
from .base import APIRequestBase, APIResponse
|
|
19
17
|
|
|
20
18
|
from ..tracker import StatusTracker
|
|
21
|
-
from ..
|
|
19
|
+
from ..config import SamplingParams
|
|
22
20
|
from ..models import APIModel
|
|
21
|
+
from ..computer_use.anthropic_tools import get_anthropic_cu_tools
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _build_anthropic_request(
|
|
25
|
+
model: APIModel,
|
|
26
|
+
prompt: Conversation,
|
|
27
|
+
tools: list[Tool] | None,
|
|
28
|
+
sampling_params: SamplingParams,
|
|
29
|
+
cache_pattern: CachePattern | None = None,
|
|
30
|
+
computer_use: bool = False,
|
|
31
|
+
display_width: int = 1024,
|
|
32
|
+
display_height: int = 768,
|
|
33
|
+
):
|
|
34
|
+
system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
|
|
35
|
+
request_header = {
|
|
36
|
+
"x-api-key": os.getenv(model.api_key_env_var),
|
|
37
|
+
"anthropic-version": "2023-06-01",
|
|
38
|
+
"content-type": "application/json",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# Add beta header for Computer Use
|
|
42
|
+
if computer_use:
|
|
43
|
+
request_header["anthropic-beta"] = "computer-use-2025-01-24"
|
|
44
|
+
|
|
45
|
+
request_json = {
|
|
46
|
+
"model": model.name,
|
|
47
|
+
"messages": messages,
|
|
48
|
+
"temperature": sampling_params.temperature,
|
|
49
|
+
"top_p": sampling_params.top_p,
|
|
50
|
+
"max_tokens": sampling_params.max_new_tokens,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# handle thinking
|
|
54
|
+
if model.reasoning_model and sampling_params.reasoning_effort:
|
|
55
|
+
# translate reasoning effort of low, medium, high to budget tokens
|
|
56
|
+
budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
|
|
57
|
+
sampling_params.reasoning_effort
|
|
58
|
+
)
|
|
59
|
+
request_json["thinking"] = {
|
|
60
|
+
"type": "enabled",
|
|
61
|
+
"budget_tokens": budget,
|
|
62
|
+
}
|
|
63
|
+
request_json.pop("top_p")
|
|
64
|
+
request_json["temperature"] = 1.0
|
|
65
|
+
request_json["max_tokens"] += budget
|
|
66
|
+
else:
|
|
67
|
+
request_json["thinking"] = {"type": "disabled"}
|
|
68
|
+
if sampling_params.reasoning_effort:
|
|
69
|
+
print("ignoring reasoning_effort for non-reasoning model")
|
|
70
|
+
if system_message is not None:
|
|
71
|
+
request_json["system"] = system_message
|
|
72
|
+
if tools or computer_use:
|
|
73
|
+
tool_definitions = []
|
|
74
|
+
if tools:
|
|
75
|
+
tool_definitions.extend([tool.dump_for("anthropic") for tool in tools])
|
|
76
|
+
# Add Computer Use tools
|
|
77
|
+
if computer_use:
|
|
78
|
+
cu_tools = get_anthropic_cu_tools(
|
|
79
|
+
model=model.id,
|
|
80
|
+
display_width=display_width, # todo: set from ComputerUseParams
|
|
81
|
+
display_height=display_height,
|
|
82
|
+
)
|
|
83
|
+
tool_definitions.extend(cu_tools)
|
|
84
|
+
|
|
85
|
+
# Add cache control to last tool if tools_only caching is specified
|
|
86
|
+
if cache_pattern == "tools_only" and tool_definitions:
|
|
87
|
+
tool_definitions[-1]["cache_control"] = {"type": "ephemeral"}
|
|
88
|
+
|
|
89
|
+
request_json["tools"] = tool_definitions
|
|
90
|
+
|
|
91
|
+
return request_json, request_header
|
|
23
92
|
|
|
24
93
|
|
|
25
94
|
class AnthropicRequest(APIRequestBase):
|
|
@@ -32,18 +101,19 @@ class AnthropicRequest(APIRequestBase):
|
|
|
32
101
|
prompt: Conversation,
|
|
33
102
|
attempts_left: int,
|
|
34
103
|
status_tracker: StatusTracker,
|
|
35
|
-
retry_queue: asyncio.Queue,
|
|
36
104
|
results_arr: list,
|
|
37
105
|
request_timeout: int = 30,
|
|
38
106
|
sampling_params: SamplingParams = SamplingParams(),
|
|
39
|
-
pbar: tqdm | None = None,
|
|
40
107
|
callback: Callable | None = None,
|
|
41
|
-
debug: bool = False,
|
|
42
108
|
# for retries
|
|
43
109
|
all_model_names: list[str] | None = None,
|
|
44
110
|
all_sampling_params: list[SamplingParams] | None = None,
|
|
45
111
|
tools: list | None = None,
|
|
46
112
|
cache: CachePattern | None = None,
|
|
113
|
+
# Computer Use support
|
|
114
|
+
computer_use: bool = False,
|
|
115
|
+
display_width: int = 1024,
|
|
116
|
+
display_height: int = 768,
|
|
47
117
|
):
|
|
48
118
|
super().__init__(
|
|
49
119
|
task_id=task_id,
|
|
@@ -51,18 +121,18 @@ class AnthropicRequest(APIRequestBase):
|
|
|
51
121
|
prompt=prompt,
|
|
52
122
|
attempts_left=attempts_left,
|
|
53
123
|
status_tracker=status_tracker,
|
|
54
|
-
retry_queue=retry_queue,
|
|
55
124
|
results_arr=results_arr,
|
|
56
125
|
request_timeout=request_timeout,
|
|
57
126
|
sampling_params=sampling_params,
|
|
58
|
-
pbar=pbar,
|
|
59
127
|
callback=callback,
|
|
60
|
-
debug=debug,
|
|
61
128
|
all_model_names=all_model_names,
|
|
62
129
|
all_sampling_params=all_sampling_params,
|
|
63
130
|
tools=tools,
|
|
64
131
|
cache=cache,
|
|
65
132
|
)
|
|
133
|
+
self.computer_use = computer_use
|
|
134
|
+
self.display_width = display_width
|
|
135
|
+
self.display_height = display_height
|
|
66
136
|
self.model = APIModel.from_registry(model_name)
|
|
67
137
|
self.url = f"{self.model.api_base}/messages"
|
|
68
138
|
|
|
@@ -70,52 +140,16 @@ class AnthropicRequest(APIRequestBase):
|
|
|
70
140
|
if cache is not None:
|
|
71
141
|
prompt.lock_images_as_bytes()
|
|
72
142
|
|
|
73
|
-
self.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
"temperature": self.sampling_params.temperature,
|
|
84
|
-
"top_p": self.sampling_params.top_p,
|
|
85
|
-
"max_tokens": self.sampling_params.max_new_tokens,
|
|
86
|
-
}
|
|
87
|
-
# handle thinking
|
|
88
|
-
if self.model.reasoning_model:
|
|
89
|
-
if sampling_params.reasoning_effort:
|
|
90
|
-
# translate reasoning effort of low, medium, high to budget tokens
|
|
91
|
-
budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
|
|
92
|
-
sampling_params.reasoning_effort
|
|
93
|
-
)
|
|
94
|
-
self.request_json["thinking"] = {
|
|
95
|
-
"type": "enabled",
|
|
96
|
-
"budget_tokens": budget,
|
|
97
|
-
}
|
|
98
|
-
self.request_json.pop("top_p")
|
|
99
|
-
self.request_json["temperature"] = 1.0
|
|
100
|
-
self.request_json["max_tokens"] += (
|
|
101
|
-
budget # assume max tokens is max completion tokens
|
|
102
|
-
)
|
|
103
|
-
else:
|
|
104
|
-
# no thinking
|
|
105
|
-
self.request_json["thinking"] = {"type": "disabled"}
|
|
106
|
-
else:
|
|
107
|
-
if sampling_params.reasoning_effort:
|
|
108
|
-
warnings.warn(
|
|
109
|
-
f"Ignoring reasoning_effort param for non-reasoning model: {model_name}"
|
|
110
|
-
)
|
|
111
|
-
if self.system_message is not None:
|
|
112
|
-
self.request_json["system"] = self.system_message
|
|
113
|
-
if tools:
|
|
114
|
-
tool_definitions = [tool.dump_for("anthropic") for tool in tools]
|
|
115
|
-
# Add cache control to last tool if tools_only caching is specified
|
|
116
|
-
if cache == "tools_only" and tool_definitions:
|
|
117
|
-
tool_definitions[-1]["cache_control"] = {"type": "ephemeral"}
|
|
118
|
-
self.request_json["tools"] = tool_definitions
|
|
143
|
+
self.request_json, self.request_header = _build_anthropic_request(
|
|
144
|
+
self.model,
|
|
145
|
+
prompt,
|
|
146
|
+
tools,
|
|
147
|
+
sampling_params,
|
|
148
|
+
cache,
|
|
149
|
+
computer_use,
|
|
150
|
+
display_width,
|
|
151
|
+
display_height,
|
|
152
|
+
)
|
|
119
153
|
|
|
120
154
|
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
121
155
|
is_error = False
|
|
@@ -135,8 +169,6 @@ class AnthropicRequest(APIRequestBase):
|
|
|
135
169
|
"anthropic-ratelimit-tokens-reset",
|
|
136
170
|
]:
|
|
137
171
|
rate_limits[header] = http_response.headers.get(header, None)
|
|
138
|
-
if self.debug:
|
|
139
|
-
print(f"Rate limits: {rate_limits}")
|
|
140
172
|
if status_code >= 200 and status_code < 300:
|
|
141
173
|
try:
|
|
142
174
|
data = await http_response.json()
|