anthropic 0.74.0__py3-none-any.whl → 0.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anthropic/_version.py +1 -1
- anthropic/lib/streaming/_beta_messages.py +1 -1
- anthropic/lib/tools/_beta_compaction_control.py +48 -0
- anthropic/lib/tools/_beta_functions.py +30 -8
- anthropic/lib/tools/_beta_runner.py +216 -34
- anthropic/resources/beta/messages/messages.py +76 -4
- anthropic/resources/messages/messages.py +2 -0
- anthropic/types/anthropic_beta_param.py +1 -0
- anthropic/types/beta/__init__.py +31 -0
- anthropic/types/beta/beta_code_execution_tool_20250522_param.py +9 -1
- anthropic/types/beta/beta_code_execution_tool_20250825_param.py +9 -1
- anthropic/types/beta/beta_content_block.py +2 -0
- anthropic/types/beta/beta_content_block_param.py +2 -0
- anthropic/types/beta/beta_direct_caller.py +11 -0
- anthropic/types/beta/beta_direct_caller_param.py +11 -0
- anthropic/types/beta/beta_mcp_tool_config_param.py +13 -0
- anthropic/types/beta/beta_mcp_tool_default_config_param.py +13 -0
- anthropic/types/beta/beta_mcp_toolset_param.py +28 -0
- anthropic/types/beta/beta_memory_tool_20250818_param.py +11 -1
- anthropic/types/beta/beta_output_config_param.py +13 -0
- anthropic/types/beta/beta_raw_content_block_start_event.py +2 -0
- anthropic/types/beta/beta_server_tool_caller.py +13 -0
- anthropic/types/beta/beta_server_tool_caller_param.py +13 -0
- anthropic/types/beta/beta_server_tool_use_block.py +20 -4
- anthropic/types/beta/beta_server_tool_use_block_param.py +19 -4
- anthropic/types/beta/beta_tool_bash_20241022_param.py +11 -1
- anthropic/types/beta/beta_tool_bash_20250124_param.py +11 -1
- anthropic/types/beta/beta_tool_computer_use_20241022_param.py +11 -1
- anthropic/types/beta/beta_tool_computer_use_20250124_param.py +11 -1
- anthropic/types/beta/beta_tool_computer_use_20251124_param.py +47 -0
- anthropic/types/beta/beta_tool_param.py +11 -1
- anthropic/types/beta/beta_tool_reference_block.py +13 -0
- anthropic/types/beta/beta_tool_reference_block_param.py +19 -0
- anthropic/types/beta/beta_tool_result_block_param.py +6 -1
- anthropic/types/beta/beta_tool_search_tool_bm25_20251119_param.py +33 -0
- anthropic/types/beta/beta_tool_search_tool_regex_20251119_param.py +33 -0
- anthropic/types/beta/beta_tool_search_tool_result_block.py +20 -0
- anthropic/types/beta/beta_tool_search_tool_result_block_param.py +25 -0
- anthropic/types/beta/beta_tool_search_tool_result_error.py +16 -0
- anthropic/types/beta/beta_tool_search_tool_result_error_param.py +13 -0
- anthropic/types/beta/beta_tool_search_tool_search_result_block.py +15 -0
- anthropic/types/beta/beta_tool_search_tool_search_result_block_param.py +16 -0
- anthropic/types/beta/beta_tool_text_editor_20241022_param.py +11 -1
- anthropic/types/beta/beta_tool_text_editor_20250124_param.py +11 -1
- anthropic/types/beta/beta_tool_text_editor_20250429_param.py +11 -1
- anthropic/types/beta/beta_tool_text_editor_20250728_param.py +11 -1
- anthropic/types/beta/beta_tool_union_param.py +8 -0
- anthropic/types/beta/beta_tool_use_block.py +11 -3
- anthropic/types/beta/beta_tool_use_block_param.py +10 -3
- anthropic/types/beta/beta_web_fetch_tool_20250910_param.py +9 -1
- anthropic/types/beta/beta_web_search_tool_20250305_param.py +9 -1
- anthropic/types/beta/message_count_tokens_params.py +15 -0
- anthropic/types/beta/message_create_params.py +7 -0
- anthropic/types/beta/messages/batch_create_params.py +4 -4
- anthropic/types/messages/batch_create_params.py +2 -2
- anthropic/types/model.py +2 -0
- anthropic/types/model_param.py +2 -0
- {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/METADATA +1 -1
- {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/RECORD +61 -41
- {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/WHEEL +0 -0
- {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/licenses/LICENSE +0 -0
anthropic/_version.py
CHANGED
|
@@ -468,7 +468,7 @@ def accumulate_event(
|
|
|
468
468
|
current_snapshot.content.append(
|
|
469
469
|
cast(
|
|
470
470
|
Any, # Pydantic does not support generic unions at runtime
|
|
471
|
-
construct_type(type_=ParsedBetaContentBlock, value=event.content_block.
|
|
471
|
+
construct_type(type_=ParsedBetaContentBlock, value=event.content_block.to_dict()),
|
|
472
472
|
),
|
|
473
473
|
)
|
|
474
474
|
elif event.type == "content_block_delta":
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from typing import TypedDict
|
|
2
|
+
from typing_extensions import Required
|
|
3
|
+
|
|
4
|
+
DEFAULT_SUMMARY_PROMPT = """You have been working on the task described above but have not yet completed it. Write a continuation summary that will allow you (or another instance of yourself) to resume work efficiently in a future context window where the conversation history will be replaced with this summary. Your summary should be structured, concise, and actionable. Include:
|
|
5
|
+
1. Task Overview
|
|
6
|
+
The user's core request and success criteria
|
|
7
|
+
Any clarifications or constraints they specified
|
|
8
|
+
2. Current State
|
|
9
|
+
What has been completed so far
|
|
10
|
+
Files created, modified, or analyzed (with paths if relevant)
|
|
11
|
+
Key outputs or artifacts produced
|
|
12
|
+
3. Important Discoveries
|
|
13
|
+
Technical constraints or requirements uncovered
|
|
14
|
+
Decisions made and their rationale
|
|
15
|
+
Errors encountered and how they were resolved
|
|
16
|
+
What approaches were tried that didn't work (and why)
|
|
17
|
+
4. Next Steps
|
|
18
|
+
Specific actions needed to complete the task
|
|
19
|
+
Any blockers or open questions to resolve
|
|
20
|
+
Priority order if multiple steps remain
|
|
21
|
+
5. Context to Preserve
|
|
22
|
+
User preferences or style requirements
|
|
23
|
+
Domain-specific details that aren't obvious
|
|
24
|
+
Any promises made to the user
|
|
25
|
+
Be concise but complete—err on the side of including information that would prevent duplicate work or repeated mistakes. Write in a way that enables immediate resumption of the task.
|
|
26
|
+
Wrap your summary in <summary></summary> tags."""
|
|
27
|
+
|
|
28
|
+
DEFAULT_THRESHOLD = 100_000
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CompactionControl(TypedDict, total=False):
|
|
32
|
+
context_token_threshold: int
|
|
33
|
+
"""The context token threshold at which to trigger compaction.
|
|
34
|
+
|
|
35
|
+
When the cumulative token count (input + output) across all messages exceeds this threshold,
|
|
36
|
+
the message history will be automatically summarized and compressed. Defaults to 150,000 tokens.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
model: str
|
|
40
|
+
"""
|
|
41
|
+
The model to use for generating the compaction summary.
|
|
42
|
+
If not specified, defaults to the same model used for the tool runner.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
summary_prompt: str
|
|
46
|
+
"""The prompt used to instruct the model on how to generate the summary."""
|
|
47
|
+
|
|
48
|
+
enabled: Required[bool]
|
|
@@ -14,9 +14,9 @@ from ... import _compat
|
|
|
14
14
|
from ..._utils import is_dict
|
|
15
15
|
from ..._compat import cached_property
|
|
16
16
|
from ..._models import TypeAdapter
|
|
17
|
-
from ...types.beta import BetaToolUnionParam
|
|
17
|
+
from ...types.beta import BetaToolParam, BetaToolUnionParam
|
|
18
18
|
from ..._utils._utils import CallableT
|
|
19
|
-
from ...types.tool_param import
|
|
19
|
+
from ...types.tool_param import InputSchema
|
|
20
20
|
from ...types.beta.beta_tool_result_block_param import Content as BetaContent
|
|
21
21
|
|
|
22
22
|
log = logging.getLogger(__name__)
|
|
@@ -39,7 +39,10 @@ class BetaBuiltinFunctionTool(ABC):
|
|
|
39
39
|
|
|
40
40
|
@property
|
|
41
41
|
def name(self) -> str:
|
|
42
|
-
|
|
42
|
+
raw = self.to_dict()
|
|
43
|
+
if "mcp_server_name" in raw:
|
|
44
|
+
return raw["mcp_server_name"]
|
|
45
|
+
return raw["name"]
|
|
43
46
|
|
|
44
47
|
|
|
45
48
|
class BetaAsyncBuiltinFunctionTool(ABC):
|
|
@@ -51,7 +54,10 @@ class BetaAsyncBuiltinFunctionTool(ABC):
|
|
|
51
54
|
|
|
52
55
|
@property
|
|
53
56
|
def name(self) -> str:
|
|
54
|
-
|
|
57
|
+
raw = self.to_dict()
|
|
58
|
+
if "mcp_server_name" in raw:
|
|
59
|
+
return raw["mcp_server_name"]
|
|
60
|
+
return raw["name"]
|
|
55
61
|
|
|
56
62
|
|
|
57
63
|
class BaseFunctionTool(Generic[CallableT]):
|
|
@@ -72,6 +78,7 @@ class BaseFunctionTool(Generic[CallableT]):
|
|
|
72
78
|
name: str | None = None,
|
|
73
79
|
description: str | None = None,
|
|
74
80
|
input_schema: InputSchema | type[BaseModel] | None = None,
|
|
81
|
+
defer_loading: bool | None = None,
|
|
75
82
|
) -> None:
|
|
76
83
|
if _compat.PYDANTIC_V1:
|
|
77
84
|
raise RuntimeError("Tool functions are only supported with Pydantic v2")
|
|
@@ -79,6 +86,7 @@ class BaseFunctionTool(Generic[CallableT]):
|
|
|
79
86
|
self.func = func
|
|
80
87
|
self._func_with_validate = pydantic.validate_call(func)
|
|
81
88
|
self.name = name or func.__name__
|
|
89
|
+
self._defer_loading = defer_loading
|
|
82
90
|
|
|
83
91
|
self.description = description or self._get_description_from_docstring()
|
|
84
92
|
|
|
@@ -94,12 +102,15 @@ class BaseFunctionTool(Generic[CallableT]):
|
|
|
94
102
|
def __call__(self) -> CallableT:
|
|
95
103
|
return self.func
|
|
96
104
|
|
|
97
|
-
def to_dict(self) ->
|
|
98
|
-
|
|
105
|
+
def to_dict(self) -> BetaToolParam:
|
|
106
|
+
defn: BetaToolParam = {
|
|
99
107
|
"name": self.name,
|
|
100
108
|
"description": self.description,
|
|
101
109
|
"input_schema": self.input_schema,
|
|
102
110
|
}
|
|
111
|
+
if self._defer_loading is not None:
|
|
112
|
+
defn["defer_loading"] = self._defer_loading
|
|
113
|
+
return defn
|
|
103
114
|
|
|
104
115
|
@cached_property
|
|
105
116
|
def _parsed_docstring(self) -> docstring_parser.Docstring:
|
|
@@ -211,6 +222,7 @@ def beta_tool(
|
|
|
211
222
|
name: str | None = None,
|
|
212
223
|
description: str | None = None,
|
|
213
224
|
input_schema: InputSchema | type[BaseModel] | None = None,
|
|
225
|
+
defer_loading: bool | None = None,
|
|
214
226
|
) -> Callable[[FunctionT], BetaFunctionTool[FunctionT]]: ...
|
|
215
227
|
|
|
216
228
|
|
|
@@ -220,6 +232,7 @@ def beta_tool(
|
|
|
220
232
|
name: str | None = None,
|
|
221
233
|
description: str | None = None,
|
|
222
234
|
input_schema: InputSchema | type[BaseModel] | None = None,
|
|
235
|
+
defer_loading: bool | None = None,
|
|
223
236
|
) -> BetaFunctionTool[FunctionT] | Callable[[FunctionT], BetaFunctionTool[FunctionT]]:
|
|
224
237
|
"""Create a FunctionTool from a function with automatic schema inference.
|
|
225
238
|
|
|
@@ -239,11 +252,15 @@ def beta_tool(
|
|
|
239
252
|
|
|
240
253
|
if func is not None:
|
|
241
254
|
# @beta_tool called without parentheses
|
|
242
|
-
return BetaFunctionTool(
|
|
255
|
+
return BetaFunctionTool(
|
|
256
|
+
func=func, name=name, description=description, input_schema=input_schema, defer_loading=defer_loading
|
|
257
|
+
)
|
|
243
258
|
|
|
244
259
|
# @beta_tool()
|
|
245
260
|
def decorator(func: FunctionT) -> BetaFunctionTool[FunctionT]:
|
|
246
|
-
return BetaFunctionTool(
|
|
261
|
+
return BetaFunctionTool(
|
|
262
|
+
func=func, name=name, description=description, input_schema=input_schema, defer_loading=defer_loading
|
|
263
|
+
)
|
|
247
264
|
|
|
248
265
|
return decorator
|
|
249
266
|
|
|
@@ -259,6 +276,7 @@ def beta_async_tool(
|
|
|
259
276
|
name: str | None = None,
|
|
260
277
|
description: str | None = None,
|
|
261
278
|
input_schema: InputSchema | type[BaseModel] | None = None,
|
|
279
|
+
defer_loading: bool | None = None,
|
|
262
280
|
) -> BetaAsyncFunctionTool[AsyncFunctionT]: ...
|
|
263
281
|
|
|
264
282
|
|
|
@@ -268,6 +286,7 @@ def beta_async_tool(
|
|
|
268
286
|
name: str | None = None,
|
|
269
287
|
description: str | None = None,
|
|
270
288
|
input_schema: InputSchema | type[BaseModel] | None = None,
|
|
289
|
+
defer_loading: bool | None = None,
|
|
271
290
|
) -> Callable[[AsyncFunctionT], BetaAsyncFunctionTool[AsyncFunctionT]]: ...
|
|
272
291
|
|
|
273
292
|
|
|
@@ -277,6 +296,7 @@ def beta_async_tool(
|
|
|
277
296
|
name: str | None = None,
|
|
278
297
|
description: str | None = None,
|
|
279
298
|
input_schema: InputSchema | type[BaseModel] | None = None,
|
|
299
|
+
defer_loading: bool | None = None,
|
|
280
300
|
) -> BetaAsyncFunctionTool[AsyncFunctionT] | Callable[[AsyncFunctionT], BetaAsyncFunctionTool[AsyncFunctionT]]:
|
|
281
301
|
"""Create an AsyncFunctionTool from a function with automatic schema inference.
|
|
282
302
|
|
|
@@ -301,6 +321,7 @@ def beta_async_tool(
|
|
|
301
321
|
name=name,
|
|
302
322
|
description=description,
|
|
303
323
|
input_schema=input_schema,
|
|
324
|
+
defer_loading=defer_loading,
|
|
304
325
|
)
|
|
305
326
|
|
|
306
327
|
# @beta_async_tool()
|
|
@@ -310,6 +331,7 @@ def beta_async_tool(
|
|
|
310
331
|
name=name,
|
|
311
332
|
description=description,
|
|
312
333
|
input_schema=input_schema,
|
|
334
|
+
defer_loading=defer_loading,
|
|
313
335
|
)
|
|
314
336
|
|
|
315
337
|
return decorator
|
|
@@ -31,6 +31,7 @@ from ._beta_functions import (
|
|
|
31
31
|
BetaBuiltinFunctionTool,
|
|
32
32
|
BetaAsyncBuiltinFunctionTool,
|
|
33
33
|
)
|
|
34
|
+
from ._beta_compaction_control import DEFAULT_THRESHOLD, DEFAULT_SUMMARY_PROMPT, CompactionControl
|
|
34
35
|
from ..streaming._beta_messages import BetaMessageStream, BetaAsyncMessageStream
|
|
35
36
|
from ...types.beta.parsed_beta_message import ResponseFormatT, ParsedBetaMessage, ParsedBetaContentBlock
|
|
36
37
|
from ...types.beta.message_create_params import ParseMessageCreateParamsBase
|
|
@@ -66,6 +67,7 @@ class BaseToolRunner(Generic[AnyFunctionToolT, ResponseFormatT]):
|
|
|
66
67
|
options: RequestOptions,
|
|
67
68
|
tools: Iterable[AnyFunctionToolT],
|
|
68
69
|
max_iterations: int | None = None,
|
|
70
|
+
compaction_control: CompactionControl | None = None,
|
|
69
71
|
) -> None:
|
|
70
72
|
self._tools_by_name = {tool.name: tool for tool in tools}
|
|
71
73
|
self._params: ParseMessageCreateParamsBase[ResponseFormatT] = {
|
|
@@ -77,6 +79,7 @@ class BaseToolRunner(Generic[AnyFunctionToolT, ResponseFormatT]):
|
|
|
77
79
|
self._cached_tool_call_response: BetaMessageParam | None = None
|
|
78
80
|
self._max_iterations = max_iterations
|
|
79
81
|
self._iteration_count = 0
|
|
82
|
+
self._compaction_control = compaction_control
|
|
80
83
|
|
|
81
84
|
def set_messages_params(
|
|
82
85
|
self,
|
|
@@ -122,9 +125,17 @@ class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Gene
|
|
|
122
125
|
tools: Iterable[BetaRunnableTool],
|
|
123
126
|
client: Anthropic,
|
|
124
127
|
max_iterations: int | None = None,
|
|
128
|
+
compaction_control: CompactionControl | None = None,
|
|
125
129
|
) -> None:
|
|
126
|
-
super().__init__(
|
|
130
|
+
super().__init__(
|
|
131
|
+
params=params,
|
|
132
|
+
options=options,
|
|
133
|
+
tools=tools,
|
|
134
|
+
max_iterations=max_iterations,
|
|
135
|
+
compaction_control=compaction_control,
|
|
136
|
+
)
|
|
127
137
|
self._client = client
|
|
138
|
+
|
|
128
139
|
self._iterator = self.__run__()
|
|
129
140
|
self._last_message: (
|
|
130
141
|
Callable[[], ParsedBetaMessage[ResponseFormatT]] | ParsedBetaMessage[ResponseFormatT] | None
|
|
@@ -143,31 +154,112 @@ class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Gene
|
|
|
143
154
|
raise NotImplementedError()
|
|
144
155
|
yield # type: ignore[unreachable]
|
|
145
156
|
|
|
146
|
-
def
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
self.
|
|
157
|
+
def _check_and_compact(self) -> bool:
|
|
158
|
+
"""
|
|
159
|
+
Check token usage and compact messages if threshold exceeded.
|
|
160
|
+
Returns True if compaction was performed, False otherwise.
|
|
161
|
+
"""
|
|
162
|
+
if self._compaction_control is None or not self._compaction_control["enabled"]:
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
message = self._get_last_message()
|
|
166
|
+
tokens_used = 0
|
|
167
|
+
if message is not None:
|
|
168
|
+
total_input_tokens = (
|
|
169
|
+
message.usage.input_tokens
|
|
170
|
+
+ (message.usage.cache_creation_input_tokens or 0)
|
|
171
|
+
+ (message.usage.cache_read_input_tokens or 0)
|
|
172
|
+
)
|
|
173
|
+
tokens_used = total_input_tokens + message.usage.output_tokens
|
|
174
|
+
|
|
175
|
+
threshold = self._compaction_control.get("context_token_threshold", DEFAULT_THRESHOLD)
|
|
176
|
+
|
|
177
|
+
if tokens_used < threshold:
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
# Perform compaction
|
|
181
|
+
log.info(f"Token usage {tokens_used} has exceeded the threshold of {threshold}. Performing compaction.")
|
|
182
|
+
|
|
183
|
+
model = self._compaction_control.get("model", self._params["model"])
|
|
184
|
+
|
|
185
|
+
messages = list(self._params["messages"])
|
|
186
|
+
|
|
187
|
+
if messages[-1]["role"] == "assistant":
|
|
188
|
+
# Remove tool_use blocks from the last message to avoid 400 error
|
|
189
|
+
# (tool_use requires tool_result, which we don't have yet)
|
|
190
|
+
non_tool_blocks = [
|
|
191
|
+
block
|
|
192
|
+
for block in messages[-1]["content"]
|
|
193
|
+
if isinstance(block, dict) and block.get("type") != "tool_use"
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
if non_tool_blocks:
|
|
197
|
+
messages[-1]["content"] = non_tool_blocks
|
|
198
|
+
else:
|
|
199
|
+
messages.pop()
|
|
200
|
+
|
|
201
|
+
messages = [
|
|
202
|
+
*messages,
|
|
203
|
+
BetaMessageParam(
|
|
204
|
+
role="user",
|
|
205
|
+
content=self._compaction_control.get("summary_prompt", DEFAULT_SUMMARY_PROMPT),
|
|
206
|
+
),
|
|
207
|
+
]
|
|
152
208
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
209
|
+
response = self._client.beta.messages.create(
|
|
210
|
+
model=model,
|
|
211
|
+
messages=messages,
|
|
212
|
+
max_tokens=self._params["max_tokens"],
|
|
213
|
+
extra_headers={"X-Stainless-Helper": "compaction"},
|
|
214
|
+
)
|
|
158
215
|
|
|
159
|
-
|
|
160
|
-
self.append_messages(message, response)
|
|
216
|
+
log.info(f"Compaction complete. New token usage: {response.usage.output_tokens}")
|
|
161
217
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
218
|
+
first_content = list(response.content)[0]
|
|
219
|
+
|
|
220
|
+
if first_content.type != "text":
|
|
221
|
+
raise ValueError("Compaction response content is not of type 'text'")
|
|
222
|
+
|
|
223
|
+
self.set_messages_params(
|
|
224
|
+
lambda params: {
|
|
225
|
+
**params,
|
|
226
|
+
"messages": [
|
|
227
|
+
{
|
|
228
|
+
"role": "user",
|
|
229
|
+
"content": [
|
|
230
|
+
{
|
|
231
|
+
"type": "text",
|
|
232
|
+
"text": first_content.text,
|
|
233
|
+
}
|
|
234
|
+
],
|
|
235
|
+
}
|
|
236
|
+
],
|
|
237
|
+
}
|
|
238
|
+
)
|
|
239
|
+
return True
|
|
165
240
|
|
|
241
|
+
def __run__(self) -> Iterator[RunnerItemT]:
|
|
242
|
+
while not self._should_stop():
|
|
166
243
|
with self._handle_request() as item:
|
|
167
244
|
yield item
|
|
168
245
|
message = self._get_last_message()
|
|
169
246
|
assert message is not None
|
|
170
247
|
|
|
248
|
+
self._iteration_count += 1
|
|
249
|
+
|
|
250
|
+
# If the compaction was performed, skip tool call generation this iteration
|
|
251
|
+
if not self._check_and_compact():
|
|
252
|
+
response = self.generate_tool_call_response()
|
|
253
|
+
if response is None:
|
|
254
|
+
log.debug("Tool call was not requested, exiting from tool runner loop.")
|
|
255
|
+
return
|
|
256
|
+
|
|
257
|
+
if not self._messages_modified:
|
|
258
|
+
self.append_messages(message, response)
|
|
259
|
+
|
|
260
|
+
self._messages_modified = False
|
|
261
|
+
self._cached_tool_call_response = None
|
|
262
|
+
|
|
171
263
|
def until_done(self) -> ParsedBetaMessage[ResponseFormatT]:
|
|
172
264
|
"""
|
|
173
265
|
Consumes the tool runner stream and returns the last message if it has not been consumed yet.
|
|
@@ -274,9 +366,17 @@ class BaseAsyncToolRunner(
|
|
|
274
366
|
tools: Iterable[BetaAsyncRunnableTool],
|
|
275
367
|
client: AsyncAnthropic,
|
|
276
368
|
max_iterations: int | None = None,
|
|
369
|
+
compaction_control: CompactionControl | None = None,
|
|
277
370
|
) -> None:
|
|
278
|
-
super().__init__(
|
|
371
|
+
super().__init__(
|
|
372
|
+
params=params,
|
|
373
|
+
options=options,
|
|
374
|
+
tools=tools,
|
|
375
|
+
max_iterations=max_iterations,
|
|
376
|
+
compaction_control=compaction_control,
|
|
377
|
+
)
|
|
279
378
|
self._client = client
|
|
379
|
+
|
|
280
380
|
self._iterator = self.__run__()
|
|
281
381
|
self._last_message: (
|
|
282
382
|
Callable[[], Coroutine[None, None, ParsedBetaMessage[ResponseFormatT]]]
|
|
@@ -297,30 +397,112 @@ class BaseAsyncToolRunner(
|
|
|
297
397
|
raise NotImplementedError()
|
|
298
398
|
yield # type: ignore[unreachable]
|
|
299
399
|
|
|
300
|
-
async def
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
self.
|
|
400
|
+
async def _check_and_compact(self) -> bool:
|
|
401
|
+
"""
|
|
402
|
+
Check token usage and compact messages if threshold exceeded.
|
|
403
|
+
Returns True if compaction was performed, False otherwise.
|
|
404
|
+
"""
|
|
405
|
+
if self._compaction_control is None or not self._compaction_control["enabled"]:
|
|
406
|
+
return False
|
|
407
|
+
|
|
408
|
+
message = await self._get_last_message()
|
|
409
|
+
tokens_used = 0
|
|
410
|
+
if message is not None:
|
|
411
|
+
total_input_tokens = (
|
|
412
|
+
message.usage.input_tokens
|
|
413
|
+
+ (message.usage.cache_creation_input_tokens or 0)
|
|
414
|
+
+ (message.usage.cache_read_input_tokens or 0)
|
|
415
|
+
)
|
|
416
|
+
tokens_used = total_input_tokens + message.usage.output_tokens
|
|
417
|
+
|
|
418
|
+
threshold = self._compaction_control.get("context_token_threshold", DEFAULT_THRESHOLD)
|
|
419
|
+
|
|
420
|
+
if tokens_used < threshold:
|
|
421
|
+
return False
|
|
422
|
+
|
|
423
|
+
# Perform compaction
|
|
424
|
+
log.info(f"Token usage {tokens_used} has exceeded the threshold of {threshold}. Performing compaction.")
|
|
425
|
+
|
|
426
|
+
model = self._compaction_control.get("model", self._params["model"])
|
|
427
|
+
|
|
428
|
+
messages = list(self._params["messages"])
|
|
429
|
+
|
|
430
|
+
if messages[-1]["role"] == "assistant":
|
|
431
|
+
# Remove tool_use blocks from the last message to avoid 400 error
|
|
432
|
+
# (tool_use requires tool_result, which we don't have yet)
|
|
433
|
+
non_tool_blocks = [
|
|
434
|
+
block
|
|
435
|
+
for block in messages[-1]["content"]
|
|
436
|
+
if isinstance(block, dict) and block.get("type") != "tool_use"
|
|
437
|
+
]
|
|
438
|
+
|
|
439
|
+
if non_tool_blocks:
|
|
440
|
+
messages[-1]["content"] = non_tool_blocks
|
|
441
|
+
else:
|
|
442
|
+
messages.pop()
|
|
443
|
+
|
|
444
|
+
messages = [
|
|
445
|
+
*self._params["messages"],
|
|
446
|
+
BetaMessageParam(
|
|
447
|
+
role="user",
|
|
448
|
+
content=self._compaction_control.get("summary_prompt", DEFAULT_SUMMARY_PROMPT),
|
|
449
|
+
),
|
|
450
|
+
]
|
|
306
451
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
452
|
+
response = await self._client.beta.messages.create(
|
|
453
|
+
model=model,
|
|
454
|
+
messages=messages,
|
|
455
|
+
max_tokens=self._params["max_tokens"],
|
|
456
|
+
extra_headers={"X-Stainless-Helper": "compaction"},
|
|
457
|
+
)
|
|
312
458
|
|
|
313
|
-
|
|
314
|
-
self.append_messages(message, response)
|
|
315
|
-
self._iteration_count += 1
|
|
316
|
-
self._messages_modified = False
|
|
317
|
-
self._cached_tool_call_response = None
|
|
459
|
+
log.info(f"Compaction complete. New token usage: {response.usage.output_tokens}")
|
|
318
460
|
|
|
461
|
+
first_content = list(response.content)[0]
|
|
462
|
+
|
|
463
|
+
if first_content.type != "text":
|
|
464
|
+
raise ValueError("Compaction response content is not of type 'text'")
|
|
465
|
+
|
|
466
|
+
self.set_messages_params(
|
|
467
|
+
lambda params: {
|
|
468
|
+
**params,
|
|
469
|
+
"messages": [
|
|
470
|
+
{
|
|
471
|
+
"role": "user",
|
|
472
|
+
"content": [
|
|
473
|
+
{
|
|
474
|
+
"type": "text",
|
|
475
|
+
"text": first_content.text,
|
|
476
|
+
}
|
|
477
|
+
],
|
|
478
|
+
}
|
|
479
|
+
],
|
|
480
|
+
}
|
|
481
|
+
)
|
|
482
|
+
return True
|
|
483
|
+
|
|
484
|
+
async def __run__(self) -> AsyncIterator[RunnerItemT]:
|
|
485
|
+
while not self._should_stop():
|
|
319
486
|
async with self._handle_request() as item:
|
|
320
487
|
yield item
|
|
321
488
|
message = await self._get_last_message()
|
|
322
489
|
assert message is not None
|
|
323
490
|
|
|
491
|
+
self._iteration_count += 1
|
|
492
|
+
|
|
493
|
+
# If the compaction was performed, skip tool call generation this iteration
|
|
494
|
+
if not await self._check_and_compact():
|
|
495
|
+
response = await self.generate_tool_call_response()
|
|
496
|
+
if response is None:
|
|
497
|
+
log.debug("Tool call was not requested, exiting from tool runner loop.")
|
|
498
|
+
return
|
|
499
|
+
|
|
500
|
+
if not self._messages_modified:
|
|
501
|
+
self.append_messages(message, response)
|
|
502
|
+
|
|
503
|
+
self._messages_modified = False
|
|
504
|
+
self._cached_tool_call_response = None
|
|
505
|
+
|
|
324
506
|
async def until_done(self) -> ParsedBetaMessage[ResponseFormatT]:
|
|
325
507
|
"""
|
|
326
508
|
Consumes the tool runner stream and returns the last message if it has not been consumed yet.
|