lm-deluge 0.0.12__tar.gz → 0.0.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (83) hide show
  1. {lm_deluge-0.0.12/src/lm_deluge.egg-info → lm_deluge-0.0.14}/PKG-INFO +8 -5
  2. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/README.md +6 -2
  3. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/pyproject.toml +2 -3
  4. lm_deluge-0.0.14/src/lm_deluge/__init__.py +17 -0
  5. lm_deluge-0.0.14/src/lm_deluge/agent.py +0 -0
  6. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/anthropic.py +90 -58
  7. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/base.py +63 -180
  8. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/bedrock.py +34 -10
  9. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/common.py +2 -1
  10. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/mistral.py +6 -15
  11. lm_deluge-0.0.14/src/lm_deluge/api_requests/openai.py +481 -0
  12. lm_deluge-0.0.14/src/lm_deluge/api_requests/response.py +153 -0
  13. lm_deluge-0.0.14/src/lm_deluge/batches.py +498 -0
  14. lm_deluge-0.0.14/src/lm_deluge/client.py +489 -0
  15. lm_deluge-0.0.14/src/lm_deluge/computer_use/anthropic_tools.py +75 -0
  16. lm_deluge-0.0.12/src/lm_deluge/sampling_params.py → lm_deluge-0.0.14/src/lm_deluge/config.py +12 -4
  17. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/embed.py +17 -11
  18. lm_deluge-0.0.14/src/lm_deluge/file.py +149 -0
  19. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/models.py +33 -0
  20. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/prompt.py +156 -15
  21. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/rerank.py +18 -12
  22. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/tool.py +11 -1
  23. lm_deluge-0.0.14/src/lm_deluge/tracker.py +255 -0
  24. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/json.py +18 -1
  25. {lm_deluge-0.0.12 → lm_deluge-0.0.14/src/lm_deluge.egg-info}/PKG-INFO +8 -5
  26. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/SOURCES.txt +21 -1
  27. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/requires.txt +1 -2
  28. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_all_models.py +24 -24
  29. lm_deluge-0.0.14/tests/test_batch_real.py +95 -0
  30. lm_deluge-0.0.14/tests/test_bedrock_computer_use.py +378 -0
  31. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_cache.py +5 -4
  32. lm_deluge-0.0.14/tests/test_client_tracker_integration.py +43 -0
  33. lm_deluge-0.0.14/tests/test_computer_use.py +103 -0
  34. lm_deluge-0.0.14/tests/test_computer_use_integration.py +277 -0
  35. lm_deluge-0.0.14/tests/test_debug_format.py +47 -0
  36. lm_deluge-0.0.14/tests/test_file_integration.py +156 -0
  37. lm_deluge-0.0.14/tests/test_file_support.py +210 -0
  38. lm_deluge-0.0.14/tests/test_logprobs_refactor.py +306 -0
  39. lm_deluge-0.0.14/tests/test_max_concurrent_requests.py +38 -0
  40. lm_deluge-0.0.14/tests/test_openai_responses.py +356 -0
  41. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_prompt_caching.py +9 -13
  42. lm_deluge-0.0.14/tests/test_retry_fix.py +67 -0
  43. lm_deluge-0.0.14/tests/test_rich_display.py +114 -0
  44. lm_deluge-0.0.14/tests/test_tool_validation.py +36 -0
  45. lm_deluge-0.0.14/tests/test_tracker_refactor.py +99 -0
  46. lm_deluge-0.0.12/src/lm_deluge/__init__.py +0 -7
  47. lm_deluge-0.0.12/src/lm_deluge/api_requests/openai.py +0 -189
  48. lm_deluge-0.0.12/src/lm_deluge/client.py +0 -771
  49. lm_deluge-0.0.12/src/lm_deluge/tracker.py +0 -43
  50. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/LICENSE +0 -0
  51. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/setup.cfg +0 -0
  52. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/__init__.py +0 -0
  53. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  54. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  55. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  56. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  57. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  58. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/cache.py +0 -0
  59. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/errors.py +0 -0
  60. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/gemini_limits.py +0 -0
  61. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/image.py +0 -0
  62. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/__init__.py +0 -0
  63. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/extract.py +0 -0
  64. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/score.py +0 -0
  65. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/llm_tools/translate.py +0 -0
  66. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/usage.py +0 -0
  67. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/logprobs.py +0 -0
  68. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/validation.py +0 -0
  69. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge/util/xml.py +0 -0
  70. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  71. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/src/lm_deluge.egg-info/top_level.txt +0 -0
  72. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_bedrock_models.py +0 -0
  73. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_image_models.py +0 -0
  74. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_image_utils.py +0 -0
  75. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_json_utils.py +0 -0
  76. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_mcp_tools.py +0 -0
  77. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_real_caching.py +0 -0
  78. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_real_caching_bedrock.py +0 -0
  79. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_sampling_params.py +0 -0
  80. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_tool_calls.py +0 -0
  81. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_tool_from_function.py +0 -0
  82. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_translate.py +0 -0
  83. {lm_deluge-0.0.12 → lm_deluge-0.0.14}/tests/test_xml_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.12
3
+ Version: 0.0.14
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -22,8 +22,7 @@ Requires-Dist: lxml
22
22
  Requires-Dist: pdf2image
23
23
  Requires-Dist: pillow
24
24
  Requires-Dist: fastmcp>=2.4
25
- Requires-Dist: fasttext-wheel
26
- Requires-Dist: fasttext-langdetect
25
+ Requires-Dist: rich
27
26
  Dynamic: license-file
28
27
 
29
28
  # lm-deluge
@@ -31,16 +30,20 @@ Dynamic: license-file
31
30
  `lm-deluge` is a lightweight helper library for maxing out your rate limits with LLM providers. It provides the following:
32
31
 
33
32
  - **Unified client** – Send prompts to all relevant models with a single client.
33
+ - **Files and Images** - Include images easily for multimodal models, and PDF files for models that support them (OpenAI and Anthropic).
34
34
  - **Massive concurrency with throttling** – Set `max_tokens_per_minute` and `max_requests_per_minute` and let it fly. The client will process as many requests as possible while respecting rate limits and retrying failures.
35
35
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
36
36
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
37
37
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
38
+ - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
38
39
  - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
39
40
  - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
40
41
  - **Sync and async APIs** – Use the client from sync or async code.
41
42
 
42
43
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
43
44
 
45
+ **Update 06/02/2025:** I lied, it supports (very basic) streaming now via client.stream(...). It will print tokens as they arrive, then return an APIResponse at the end. More sophisticated streaming may or may not be implemented later, don't count on it.
46
+
44
47
  ## Installation
45
48
 
46
49
  ```bash
@@ -233,11 +236,11 @@ asyncio.run(main())
233
236
 
234
237
  ## Available Models
235
238
 
236
- We support all models in `src/lm_deluge/models.py`. An older version of this client supported Bedrock and Vertex. We plan to re-implement Bedrock support (our previous support was spotty and we need to figure out cross-region inference in order to support the newest Claude models). Vertex support is not currently planned, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
239
+ We support all models in `src/lm_deluge/models.py`. Vertex support is not planned in the short term, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
237
240
 
238
241
  ## Feature Support
239
242
 
240
- We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We don't support tool use yet, but support is planned (keep an eye out for a unified tool definition spec that works for all models!). We support logprobs for OpenAI models that return them via the `logprobs` argument to the `LLMClient`.
243
+ We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
241
244
 
242
245
  ## Built‑in tools
243
246
 
@@ -3,16 +3,20 @@
3
3
  `lm-deluge` is a lightweight helper library for maxing out your rate limits with LLM providers. It provides the following:
4
4
 
5
5
  - **Unified client** – Send prompts to all relevant models with a single client.
6
+ - **Files and Images** - Include images easily for multimodal models, and PDF files for models that support them (OpenAI and Anthropic).
6
7
  - **Massive concurrency with throttling** – Set `max_tokens_per_minute` and `max_requests_per_minute` and let it fly. The client will process as many requests as possible while respecting rate limits and retrying failures.
7
8
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
8
9
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
9
10
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
11
+ - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
10
12
  - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
11
13
  - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
12
14
  - **Sync and async APIs** – Use the client from sync or async code.
13
15
 
14
16
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
15
17
 
18
+ **Update 06/02/2025:** I lied, it supports (very basic) streaming now via client.stream(...). It will print tokens as they arrive, then return an APIResponse at the end. More sophisticated streaming may or may not be implemented later, don't count on it.
19
+
16
20
  ## Installation
17
21
 
18
22
  ```bash
@@ -205,11 +209,11 @@ asyncio.run(main())
205
209
 
206
210
  ## Available Models
207
211
 
208
- We support all models in `src/lm_deluge/models.py`. An older version of this client supported Bedrock and Vertex. We plan to re-implement Bedrock support (our previous support was spotty and we need to figure out cross-region inference in order to support the newest Claude models). Vertex support is not currently planned, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
212
+ We support all models in `src/lm_deluge/models.py`. Vertex support is not planned in the short term, since Google allows you to connect your Vertex account to AI Studio, and Vertex authentication is a huge pain (requires service account credentials, etc.)
209
213
 
210
214
  ## Feature Support
211
215
 
212
- We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We don't support tool use yet, but support is planned (keep an eye out for a unified tool definition spec that works for all models!). We support logprobs for OpenAI models that return them via the `logprobs` argument to the `LLMClient`.
216
+ We support structured outputs via `json_mode` parameter provided to `SamplingParams`. Structured outputs with a schema are planned. Reasoning models are supported via the `reasoning_effort` parameter, which is translated to a thinking budget for Claude/Gemini. Image models are supported. We support tool use as documented above. We support logprobs for OpenAI models that return them.
213
217
 
214
218
  ## Built‑in tools
215
219
 
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.12"
6
+ version = "0.0.14"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -28,6 +28,5 @@ dependencies = [
28
28
  "pdf2image",
29
29
  "pillow",
30
30
  "fastmcp>=2.4",
31
- "fasttext-wheel",
32
- "fasttext-langdetect",
31
+ "rich"
33
32
  ]
@@ -0,0 +1,17 @@
1
+ from .client import LLMClient, SamplingParams, APIResponse
2
+ from .prompt import Conversation, Message
3
+ from .tool import Tool
4
+ from .file import File
5
+ import dotenv
6
+
7
+ dotenv.load_dotenv()
8
+
9
+ __all__ = [
10
+ "LLMClient",
11
+ "SamplingParams",
12
+ "APIResponse",
13
+ "Conversation",
14
+ "Message",
15
+ "Tool",
16
+ "File",
17
+ ]
File without changes
@@ -1,9 +1,6 @@
1
- import asyncio
2
1
  from aiohttp import ClientResponse
3
2
  import json
4
3
  import os
5
- import warnings
6
- from tqdm import tqdm
7
4
  from typing import Callable
8
5
 
9
6
  from lm_deluge.prompt import (
@@ -14,12 +11,84 @@ from lm_deluge.prompt import (
14
11
  Thinking,
15
12
  CachePattern,
16
13
  )
14
+ from lm_deluge.tool import Tool
17
15
  from lm_deluge.usage import Usage
18
16
  from .base import APIRequestBase, APIResponse
19
17
 
20
18
  from ..tracker import StatusTracker
21
- from ..sampling_params import SamplingParams
19
+ from ..config import SamplingParams
22
20
  from ..models import APIModel
21
+ from ..computer_use.anthropic_tools import get_anthropic_cu_tools
22
+
23
+
24
+ def _build_anthropic_request(
25
+ model: APIModel,
26
+ prompt: Conversation,
27
+ tools: list[Tool] | None,
28
+ sampling_params: SamplingParams,
29
+ cache_pattern: CachePattern | None = None,
30
+ computer_use: bool = False,
31
+ display_width: int = 1024,
32
+ display_height: int = 768,
33
+ ):
34
+ system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
35
+ request_header = {
36
+ "x-api-key": os.getenv(model.api_key_env_var),
37
+ "anthropic-version": "2023-06-01",
38
+ "content-type": "application/json",
39
+ }
40
+
41
+ # Add beta header for Computer Use
42
+ if computer_use:
43
+ request_header["anthropic-beta"] = "computer-use-2025-01-24"
44
+
45
+ request_json = {
46
+ "model": model.name,
47
+ "messages": messages,
48
+ "temperature": sampling_params.temperature,
49
+ "top_p": sampling_params.top_p,
50
+ "max_tokens": sampling_params.max_new_tokens,
51
+ }
52
+
53
+ # handle thinking
54
+ if model.reasoning_model and sampling_params.reasoning_effort:
55
+ # translate reasoning effort of low, medium, high to budget tokens
56
+ budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
57
+ sampling_params.reasoning_effort
58
+ )
59
+ request_json["thinking"] = {
60
+ "type": "enabled",
61
+ "budget_tokens": budget,
62
+ }
63
+ request_json.pop("top_p")
64
+ request_json["temperature"] = 1.0
65
+ request_json["max_tokens"] += budget
66
+ else:
67
+ request_json["thinking"] = {"type": "disabled"}
68
+ if sampling_params.reasoning_effort:
69
+ print("ignoring reasoning_effort for non-reasoning model")
70
+ if system_message is not None:
71
+ request_json["system"] = system_message
72
+ if tools or computer_use:
73
+ tool_definitions = []
74
+ if tools:
75
+ tool_definitions.extend([tool.dump_for("anthropic") for tool in tools])
76
+ # Add Computer Use tools
77
+ if computer_use:
78
+ cu_tools = get_anthropic_cu_tools(
79
+ model=model.id,
80
+ display_width=display_width, # todo: set from ComputerUseParams
81
+ display_height=display_height,
82
+ )
83
+ tool_definitions.extend(cu_tools)
84
+
85
+ # Add cache control to last tool if tools_only caching is specified
86
+ if cache_pattern == "tools_only" and tool_definitions:
87
+ tool_definitions[-1]["cache_control"] = {"type": "ephemeral"}
88
+
89
+ request_json["tools"] = tool_definitions
90
+
91
+ return request_json, request_header
23
92
 
24
93
 
25
94
  class AnthropicRequest(APIRequestBase):
@@ -32,18 +101,19 @@ class AnthropicRequest(APIRequestBase):
32
101
  prompt: Conversation,
33
102
  attempts_left: int,
34
103
  status_tracker: StatusTracker,
35
- retry_queue: asyncio.Queue,
36
104
  results_arr: list,
37
105
  request_timeout: int = 30,
38
106
  sampling_params: SamplingParams = SamplingParams(),
39
- pbar: tqdm | None = None,
40
107
  callback: Callable | None = None,
41
- debug: bool = False,
42
108
  # for retries
43
109
  all_model_names: list[str] | None = None,
44
110
  all_sampling_params: list[SamplingParams] | None = None,
45
111
  tools: list | None = None,
46
112
  cache: CachePattern | None = None,
113
+ # Computer Use support
114
+ computer_use: bool = False,
115
+ display_width: int = 1024,
116
+ display_height: int = 768,
47
117
  ):
48
118
  super().__init__(
49
119
  task_id=task_id,
@@ -51,18 +121,18 @@ class AnthropicRequest(APIRequestBase):
51
121
  prompt=prompt,
52
122
  attempts_left=attempts_left,
53
123
  status_tracker=status_tracker,
54
- retry_queue=retry_queue,
55
124
  results_arr=results_arr,
56
125
  request_timeout=request_timeout,
57
126
  sampling_params=sampling_params,
58
- pbar=pbar,
59
127
  callback=callback,
60
- debug=debug,
61
128
  all_model_names=all_model_names,
62
129
  all_sampling_params=all_sampling_params,
63
130
  tools=tools,
64
131
  cache=cache,
65
132
  )
133
+ self.computer_use = computer_use
134
+ self.display_width = display_width
135
+ self.display_height = display_height
66
136
  self.model = APIModel.from_registry(model_name)
67
137
  self.url = f"{self.model.api_base}/messages"
68
138
 
@@ -70,52 +140,16 @@ class AnthropicRequest(APIRequestBase):
70
140
  if cache is not None:
71
141
  prompt.lock_images_as_bytes()
72
142
 
73
- self.system_message, messages = prompt.to_anthropic(cache_pattern=cache)
74
- self.request_header = {
75
- "x-api-key": os.getenv(self.model.api_key_env_var),
76
- "anthropic-version": "2023-06-01",
77
- "content-type": "application/json",
78
- }
79
-
80
- self.request_json = {
81
- "model": self.model.name,
82
- "messages": messages,
83
- "temperature": self.sampling_params.temperature,
84
- "top_p": self.sampling_params.top_p,
85
- "max_tokens": self.sampling_params.max_new_tokens,
86
- }
87
- # handle thinking
88
- if self.model.reasoning_model:
89
- if sampling_params.reasoning_effort:
90
- # translate reasoning effort of low, medium, high to budget tokens
91
- budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
92
- sampling_params.reasoning_effort
93
- )
94
- self.request_json["thinking"] = {
95
- "type": "enabled",
96
- "budget_tokens": budget,
97
- }
98
- self.request_json.pop("top_p")
99
- self.request_json["temperature"] = 1.0
100
- self.request_json["max_tokens"] += (
101
- budget # assume max tokens is max completion tokens
102
- )
103
- else:
104
- # no thinking
105
- self.request_json["thinking"] = {"type": "disabled"}
106
- else:
107
- if sampling_params.reasoning_effort:
108
- warnings.warn(
109
- f"Ignoring reasoning_effort param for non-reasoning model: {model_name}"
110
- )
111
- if self.system_message is not None:
112
- self.request_json["system"] = self.system_message
113
- if tools:
114
- tool_definitions = [tool.dump_for("anthropic") for tool in tools]
115
- # Add cache control to last tool if tools_only caching is specified
116
- if cache == "tools_only" and tool_definitions:
117
- tool_definitions[-1]["cache_control"] = {"type": "ephemeral"}
118
- self.request_json["tools"] = tool_definitions
143
+ self.request_json, self.request_header = _build_anthropic_request(
144
+ self.model,
145
+ prompt,
146
+ tools,
147
+ sampling_params,
148
+ cache,
149
+ computer_use,
150
+ display_width,
151
+ display_height,
152
+ )
119
153
 
120
154
  async def handle_response(self, http_response: ClientResponse) -> APIResponse:
121
155
  is_error = False
@@ -135,8 +169,6 @@ class AnthropicRequest(APIRequestBase):
135
169
  "anthropic-ratelimit-tokens-reset",
136
170
  ]:
137
171
  rate_limits[header] = http_response.headers.get(header, None)
138
- if self.debug:
139
- print(f"Rate limits: {rate_limits}")
140
172
  if status_code >= 200 and status_code < 300:
141
173
  try:
142
174
  data = await http_response.json()