anthropic 0.74.0__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. anthropic/_version.py +1 -1
  2. anthropic/lib/streaming/_beta_messages.py +1 -1
  3. anthropic/lib/tools/_beta_compaction_control.py +48 -0
  4. anthropic/lib/tools/_beta_functions.py +30 -8
  5. anthropic/lib/tools/_beta_runner.py +216 -34
  6. anthropic/resources/beta/messages/messages.py +76 -4
  7. anthropic/resources/messages/messages.py +2 -0
  8. anthropic/types/anthropic_beta_param.py +1 -0
  9. anthropic/types/beta/__init__.py +31 -0
  10. anthropic/types/beta/beta_code_execution_tool_20250522_param.py +9 -1
  11. anthropic/types/beta/beta_code_execution_tool_20250825_param.py +9 -1
  12. anthropic/types/beta/beta_content_block.py +2 -0
  13. anthropic/types/beta/beta_content_block_param.py +2 -0
  14. anthropic/types/beta/beta_direct_caller.py +11 -0
  15. anthropic/types/beta/beta_direct_caller_param.py +11 -0
  16. anthropic/types/beta/beta_mcp_tool_config_param.py +13 -0
  17. anthropic/types/beta/beta_mcp_tool_default_config_param.py +13 -0
  18. anthropic/types/beta/beta_mcp_toolset_param.py +28 -0
  19. anthropic/types/beta/beta_memory_tool_20250818_param.py +11 -1
  20. anthropic/types/beta/beta_output_config_param.py +13 -0
  21. anthropic/types/beta/beta_raw_content_block_start_event.py +2 -0
  22. anthropic/types/beta/beta_server_tool_caller.py +13 -0
  23. anthropic/types/beta/beta_server_tool_caller_param.py +13 -0
  24. anthropic/types/beta/beta_server_tool_use_block.py +20 -4
  25. anthropic/types/beta/beta_server_tool_use_block_param.py +19 -4
  26. anthropic/types/beta/beta_tool_bash_20241022_param.py +11 -1
  27. anthropic/types/beta/beta_tool_bash_20250124_param.py +11 -1
  28. anthropic/types/beta/beta_tool_computer_use_20241022_param.py +11 -1
  29. anthropic/types/beta/beta_tool_computer_use_20250124_param.py +11 -1
  30. anthropic/types/beta/beta_tool_computer_use_20251124_param.py +47 -0
  31. anthropic/types/beta/beta_tool_param.py +11 -1
  32. anthropic/types/beta/beta_tool_reference_block.py +13 -0
  33. anthropic/types/beta/beta_tool_reference_block_param.py +19 -0
  34. anthropic/types/beta/beta_tool_result_block_param.py +6 -1
  35. anthropic/types/beta/beta_tool_search_tool_bm25_20251119_param.py +33 -0
  36. anthropic/types/beta/beta_tool_search_tool_regex_20251119_param.py +33 -0
  37. anthropic/types/beta/beta_tool_search_tool_result_block.py +20 -0
  38. anthropic/types/beta/beta_tool_search_tool_result_block_param.py +25 -0
  39. anthropic/types/beta/beta_tool_search_tool_result_error.py +16 -0
  40. anthropic/types/beta/beta_tool_search_tool_result_error_param.py +13 -0
  41. anthropic/types/beta/beta_tool_search_tool_search_result_block.py +15 -0
  42. anthropic/types/beta/beta_tool_search_tool_search_result_block_param.py +16 -0
  43. anthropic/types/beta/beta_tool_text_editor_20241022_param.py +11 -1
  44. anthropic/types/beta/beta_tool_text_editor_20250124_param.py +11 -1
  45. anthropic/types/beta/beta_tool_text_editor_20250429_param.py +11 -1
  46. anthropic/types/beta/beta_tool_text_editor_20250728_param.py +11 -1
  47. anthropic/types/beta/beta_tool_union_param.py +8 -0
  48. anthropic/types/beta/beta_tool_use_block.py +11 -3
  49. anthropic/types/beta/beta_tool_use_block_param.py +10 -3
  50. anthropic/types/beta/beta_web_fetch_tool_20250910_param.py +9 -1
  51. anthropic/types/beta/beta_web_search_tool_20250305_param.py +9 -1
  52. anthropic/types/beta/message_count_tokens_params.py +15 -0
  53. anthropic/types/beta/message_create_params.py +7 -0
  54. anthropic/types/beta/messages/batch_create_params.py +4 -4
  55. anthropic/types/messages/batch_create_params.py +2 -2
  56. anthropic/types/model.py +2 -0
  57. anthropic/types/model_param.py +2 -0
  58. {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/METADATA +1 -1
  59. {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/RECORD +61 -41
  60. {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/WHEEL +0 -0
  61. {anthropic-0.74.0.dist-info → anthropic-0.75.0.dist-info}/licenses/LICENSE +0 -0
anthropic/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "anthropic"
4
- __version__ = "0.74.0" # x-release-please-version
4
+ __version__ = "0.75.0" # x-release-please-version
@@ -468,7 +468,7 @@ def accumulate_event(
468
468
  current_snapshot.content.append(
469
469
  cast(
470
470
  Any, # Pydantic does not support generic unions at runtime
471
- construct_type(type_=ParsedBetaContentBlock, value=event.content_block.model_dump()),
471
+ construct_type(type_=ParsedBetaContentBlock, value=event.content_block.to_dict()),
472
472
  ),
473
473
  )
474
474
  elif event.type == "content_block_delta":
@@ -0,0 +1,48 @@
1
+ from typing import TypedDict
2
+ from typing_extensions import Required
3
+
4
+ DEFAULT_SUMMARY_PROMPT = """You have been working on the task described above but have not yet completed it. Write a continuation summary that will allow you (or another instance of yourself) to resume work efficiently in a future context window where the conversation history will be replaced with this summary. Your summary should be structured, concise, and actionable. Include:
5
+ 1. Task Overview
6
+ The user's core request and success criteria
7
+ Any clarifications or constraints they specified
8
+ 2. Current State
9
+ What has been completed so far
10
+ Files created, modified, or analyzed (with paths if relevant)
11
+ Key outputs or artifacts produced
12
+ 3. Important Discoveries
13
+ Technical constraints or requirements uncovered
14
+ Decisions made and their rationale
15
+ Errors encountered and how they were resolved
16
+ What approaches were tried that didn't work (and why)
17
+ 4. Next Steps
18
+ Specific actions needed to complete the task
19
+ Any blockers or open questions to resolve
20
+ Priority order if multiple steps remain
21
+ 5. Context to Preserve
22
+ User preferences or style requirements
23
+ Domain-specific details that aren't obvious
24
+ Any promises made to the user
25
+ Be concise but complete—err on the side of including information that would prevent duplicate work or repeated mistakes. Write in a way that enables immediate resumption of the task.
26
+ Wrap your summary in <summary></summary> tags."""
27
+
28
+ DEFAULT_THRESHOLD = 100_000
29
+
30
+
31
+ class CompactionControl(TypedDict, total=False):
32
+ context_token_threshold: int
33
+ """The context token threshold at which to trigger compaction.
34
+
35
+ When the cumulative token count (input + output) across all messages exceeds this threshold,
36
+ the message history will be automatically summarized and compressed. Defaults to 150,000 tokens.
37
+ """
38
+
39
+ model: str
40
+ """
41
+ The model to use for generating the compaction summary.
42
+ If not specified, defaults to the same model used for the tool runner.
43
+ """
44
+
45
+ summary_prompt: str
46
+ """The prompt used to instruct the model on how to generate the summary."""
47
+
48
+ enabled: Required[bool]
@@ -14,9 +14,9 @@ from ... import _compat
14
14
  from ..._utils import is_dict
15
15
  from ..._compat import cached_property
16
16
  from ..._models import TypeAdapter
17
- from ...types.beta import BetaToolUnionParam
17
+ from ...types.beta import BetaToolParam, BetaToolUnionParam
18
18
  from ..._utils._utils import CallableT
19
- from ...types.tool_param import ToolParam, InputSchema
19
+ from ...types.tool_param import InputSchema
20
20
  from ...types.beta.beta_tool_result_block_param import Content as BetaContent
21
21
 
22
22
  log = logging.getLogger(__name__)
@@ -39,7 +39,10 @@ class BetaBuiltinFunctionTool(ABC):
39
39
 
40
40
  @property
41
41
  def name(self) -> str:
42
- return self.to_dict()["name"]
42
+ raw = self.to_dict()
43
+ if "mcp_server_name" in raw:
44
+ return raw["mcp_server_name"]
45
+ return raw["name"]
43
46
 
44
47
 
45
48
  class BetaAsyncBuiltinFunctionTool(ABC):
@@ -51,7 +54,10 @@ class BetaAsyncBuiltinFunctionTool(ABC):
51
54
 
52
55
  @property
53
56
  def name(self) -> str:
54
- return self.to_dict()["name"]
57
+ raw = self.to_dict()
58
+ if "mcp_server_name" in raw:
59
+ return raw["mcp_server_name"]
60
+ return raw["name"]
55
61
 
56
62
 
57
63
  class BaseFunctionTool(Generic[CallableT]):
@@ -72,6 +78,7 @@ class BaseFunctionTool(Generic[CallableT]):
72
78
  name: str | None = None,
73
79
  description: str | None = None,
74
80
  input_schema: InputSchema | type[BaseModel] | None = None,
81
+ defer_loading: bool | None = None,
75
82
  ) -> None:
76
83
  if _compat.PYDANTIC_V1:
77
84
  raise RuntimeError("Tool functions are only supported with Pydantic v2")
@@ -79,6 +86,7 @@ class BaseFunctionTool(Generic[CallableT]):
79
86
  self.func = func
80
87
  self._func_with_validate = pydantic.validate_call(func)
81
88
  self.name = name or func.__name__
89
+ self._defer_loading = defer_loading
82
90
 
83
91
  self.description = description or self._get_description_from_docstring()
84
92
 
@@ -94,12 +102,15 @@ class BaseFunctionTool(Generic[CallableT]):
94
102
  def __call__(self) -> CallableT:
95
103
  return self.func
96
104
 
97
- def to_dict(self) -> ToolParam:
98
- return {
105
+ def to_dict(self) -> BetaToolParam:
106
+ defn: BetaToolParam = {
99
107
  "name": self.name,
100
108
  "description": self.description,
101
109
  "input_schema": self.input_schema,
102
110
  }
111
+ if self._defer_loading is not None:
112
+ defn["defer_loading"] = self._defer_loading
113
+ return defn
103
114
 
104
115
  @cached_property
105
116
  def _parsed_docstring(self) -> docstring_parser.Docstring:
@@ -211,6 +222,7 @@ def beta_tool(
211
222
  name: str | None = None,
212
223
  description: str | None = None,
213
224
  input_schema: InputSchema | type[BaseModel] | None = None,
225
+ defer_loading: bool | None = None,
214
226
  ) -> Callable[[FunctionT], BetaFunctionTool[FunctionT]]: ...
215
227
 
216
228
 
@@ -220,6 +232,7 @@ def beta_tool(
220
232
  name: str | None = None,
221
233
  description: str | None = None,
222
234
  input_schema: InputSchema | type[BaseModel] | None = None,
235
+ defer_loading: bool | None = None,
223
236
  ) -> BetaFunctionTool[FunctionT] | Callable[[FunctionT], BetaFunctionTool[FunctionT]]:
224
237
  """Create a FunctionTool from a function with automatic schema inference.
225
238
 
@@ -239,11 +252,15 @@ def beta_tool(
239
252
 
240
253
  if func is not None:
241
254
  # @beta_tool called without parentheses
242
- return BetaFunctionTool(func=func, name=name, description=description, input_schema=input_schema)
255
+ return BetaFunctionTool(
256
+ func=func, name=name, description=description, input_schema=input_schema, defer_loading=defer_loading
257
+ )
243
258
 
244
259
  # @beta_tool()
245
260
  def decorator(func: FunctionT) -> BetaFunctionTool[FunctionT]:
246
- return BetaFunctionTool(func=func, name=name, description=description, input_schema=input_schema)
261
+ return BetaFunctionTool(
262
+ func=func, name=name, description=description, input_schema=input_schema, defer_loading=defer_loading
263
+ )
247
264
 
248
265
  return decorator
249
266
 
@@ -259,6 +276,7 @@ def beta_async_tool(
259
276
  name: str | None = None,
260
277
  description: str | None = None,
261
278
  input_schema: InputSchema | type[BaseModel] | None = None,
279
+ defer_loading: bool | None = None,
262
280
  ) -> BetaAsyncFunctionTool[AsyncFunctionT]: ...
263
281
 
264
282
 
@@ -268,6 +286,7 @@ def beta_async_tool(
268
286
  name: str | None = None,
269
287
  description: str | None = None,
270
288
  input_schema: InputSchema | type[BaseModel] | None = None,
289
+ defer_loading: bool | None = None,
271
290
  ) -> Callable[[AsyncFunctionT], BetaAsyncFunctionTool[AsyncFunctionT]]: ...
272
291
 
273
292
 
@@ -277,6 +296,7 @@ def beta_async_tool(
277
296
  name: str | None = None,
278
297
  description: str | None = None,
279
298
  input_schema: InputSchema | type[BaseModel] | None = None,
299
+ defer_loading: bool | None = None,
280
300
  ) -> BetaAsyncFunctionTool[AsyncFunctionT] | Callable[[AsyncFunctionT], BetaAsyncFunctionTool[AsyncFunctionT]]:
281
301
  """Create an AsyncFunctionTool from a function with automatic schema inference.
282
302
 
@@ -301,6 +321,7 @@ def beta_async_tool(
301
321
  name=name,
302
322
  description=description,
303
323
  input_schema=input_schema,
324
+ defer_loading=defer_loading,
304
325
  )
305
326
 
306
327
  # @beta_async_tool()
@@ -310,6 +331,7 @@ def beta_async_tool(
310
331
  name=name,
311
332
  description=description,
312
333
  input_schema=input_schema,
334
+ defer_loading=defer_loading,
313
335
  )
314
336
 
315
337
  return decorator
@@ -31,6 +31,7 @@ from ._beta_functions import (
31
31
  BetaBuiltinFunctionTool,
32
32
  BetaAsyncBuiltinFunctionTool,
33
33
  )
34
+ from ._beta_compaction_control import DEFAULT_THRESHOLD, DEFAULT_SUMMARY_PROMPT, CompactionControl
34
35
  from ..streaming._beta_messages import BetaMessageStream, BetaAsyncMessageStream
35
36
  from ...types.beta.parsed_beta_message import ResponseFormatT, ParsedBetaMessage, ParsedBetaContentBlock
36
37
  from ...types.beta.message_create_params import ParseMessageCreateParamsBase
@@ -66,6 +67,7 @@ class BaseToolRunner(Generic[AnyFunctionToolT, ResponseFormatT]):
66
67
  options: RequestOptions,
67
68
  tools: Iterable[AnyFunctionToolT],
68
69
  max_iterations: int | None = None,
70
+ compaction_control: CompactionControl | None = None,
69
71
  ) -> None:
70
72
  self._tools_by_name = {tool.name: tool for tool in tools}
71
73
  self._params: ParseMessageCreateParamsBase[ResponseFormatT] = {
@@ -77,6 +79,7 @@ class BaseToolRunner(Generic[AnyFunctionToolT, ResponseFormatT]):
77
79
  self._cached_tool_call_response: BetaMessageParam | None = None
78
80
  self._max_iterations = max_iterations
79
81
  self._iteration_count = 0
82
+ self._compaction_control = compaction_control
80
83
 
81
84
  def set_messages_params(
82
85
  self,
@@ -122,9 +125,17 @@ class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Gene
122
125
  tools: Iterable[BetaRunnableTool],
123
126
  client: Anthropic,
124
127
  max_iterations: int | None = None,
128
+ compaction_control: CompactionControl | None = None,
125
129
  ) -> None:
126
- super().__init__(params=params, options=options, tools=tools, max_iterations=max_iterations)
130
+ super().__init__(
131
+ params=params,
132
+ options=options,
133
+ tools=tools,
134
+ max_iterations=max_iterations,
135
+ compaction_control=compaction_control,
136
+ )
127
137
  self._client = client
138
+
128
139
  self._iterator = self.__run__()
129
140
  self._last_message: (
130
141
  Callable[[], ParsedBetaMessage[ResponseFormatT]] | ParsedBetaMessage[ResponseFormatT] | None
@@ -143,31 +154,112 @@ class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Gene
143
154
  raise NotImplementedError()
144
155
  yield # type: ignore[unreachable]
145
156
 
146
- def __run__(self) -> Iterator[RunnerItemT]:
147
- with self._handle_request() as item:
148
- yield item
149
- message = self._get_last_message()
150
- assert message is not None
151
- self._iteration_count += 1
157
+ def _check_and_compact(self) -> bool:
158
+ """
159
+ Check token usage and compact messages if threshold exceeded.
160
+ Returns True if compaction was performed, False otherwise.
161
+ """
162
+ if self._compaction_control is None or not self._compaction_control["enabled"]:
163
+ return False
164
+
165
+ message = self._get_last_message()
166
+ tokens_used = 0
167
+ if message is not None:
168
+ total_input_tokens = (
169
+ message.usage.input_tokens
170
+ + (message.usage.cache_creation_input_tokens or 0)
171
+ + (message.usage.cache_read_input_tokens or 0)
172
+ )
173
+ tokens_used = total_input_tokens + message.usage.output_tokens
174
+
175
+ threshold = self._compaction_control.get("context_token_threshold", DEFAULT_THRESHOLD)
176
+
177
+ if tokens_used < threshold:
178
+ return False
179
+
180
+ # Perform compaction
181
+ log.info(f"Token usage {tokens_used} has exceeded the threshold of {threshold}. Performing compaction.")
182
+
183
+ model = self._compaction_control.get("model", self._params["model"])
184
+
185
+ messages = list(self._params["messages"])
186
+
187
+ if messages[-1]["role"] == "assistant":
188
+ # Remove tool_use blocks from the last message to avoid 400 error
189
+ # (tool_use requires tool_result, which we don't have yet)
190
+ non_tool_blocks = [
191
+ block
192
+ for block in messages[-1]["content"]
193
+ if isinstance(block, dict) and block.get("type") != "tool_use"
194
+ ]
195
+
196
+ if non_tool_blocks:
197
+ messages[-1]["content"] = non_tool_blocks
198
+ else:
199
+ messages.pop()
200
+
201
+ messages = [
202
+ *messages,
203
+ BetaMessageParam(
204
+ role="user",
205
+ content=self._compaction_control.get("summary_prompt", DEFAULT_SUMMARY_PROMPT),
206
+ ),
207
+ ]
152
208
 
153
- while not self._should_stop():
154
- response = self.generate_tool_call_response()
155
- if response is None:
156
- log.debug("Tool call was not requested, exiting from tool runner loop.")
157
- return
209
+ response = self._client.beta.messages.create(
210
+ model=model,
211
+ messages=messages,
212
+ max_tokens=self._params["max_tokens"],
213
+ extra_headers={"X-Stainless-Helper": "compaction"},
214
+ )
158
215
 
159
- if not self._messages_modified:
160
- self.append_messages(message, response)
216
+ log.info(f"Compaction complete. New token usage: {response.usage.output_tokens}")
161
217
 
162
- self._iteration_count += 1
163
- self._messages_modified = False
164
- self._cached_tool_call_response = None
218
+ first_content = list(response.content)[0]
219
+
220
+ if first_content.type != "text":
221
+ raise ValueError("Compaction response content is not of type 'text'")
222
+
223
+ self.set_messages_params(
224
+ lambda params: {
225
+ **params,
226
+ "messages": [
227
+ {
228
+ "role": "user",
229
+ "content": [
230
+ {
231
+ "type": "text",
232
+ "text": first_content.text,
233
+ }
234
+ ],
235
+ }
236
+ ],
237
+ }
238
+ )
239
+ return True
165
240
 
241
+ def __run__(self) -> Iterator[RunnerItemT]:
242
+ while not self._should_stop():
166
243
  with self._handle_request() as item:
167
244
  yield item
168
245
  message = self._get_last_message()
169
246
  assert message is not None
170
247
 
248
+ self._iteration_count += 1
249
+
250
+ # If the compaction was performed, skip tool call generation this iteration
251
+ if not self._check_and_compact():
252
+ response = self.generate_tool_call_response()
253
+ if response is None:
254
+ log.debug("Tool call was not requested, exiting from tool runner loop.")
255
+ return
256
+
257
+ if not self._messages_modified:
258
+ self.append_messages(message, response)
259
+
260
+ self._messages_modified = False
261
+ self._cached_tool_call_response = None
262
+
171
263
  def until_done(self) -> ParsedBetaMessage[ResponseFormatT]:
172
264
  """
173
265
  Consumes the tool runner stream and returns the last message if it has not been consumed yet.
@@ -274,9 +366,17 @@ class BaseAsyncToolRunner(
274
366
  tools: Iterable[BetaAsyncRunnableTool],
275
367
  client: AsyncAnthropic,
276
368
  max_iterations: int | None = None,
369
+ compaction_control: CompactionControl | None = None,
277
370
  ) -> None:
278
- super().__init__(params=params, options=options, tools=tools, max_iterations=max_iterations)
371
+ super().__init__(
372
+ params=params,
373
+ options=options,
374
+ tools=tools,
375
+ max_iterations=max_iterations,
376
+ compaction_control=compaction_control,
377
+ )
279
378
  self._client = client
379
+
280
380
  self._iterator = self.__run__()
281
381
  self._last_message: (
282
382
  Callable[[], Coroutine[None, None, ParsedBetaMessage[ResponseFormatT]]]
@@ -297,30 +397,112 @@ class BaseAsyncToolRunner(
297
397
  raise NotImplementedError()
298
398
  yield # type: ignore[unreachable]
299
399
 
300
- async def __run__(self) -> AsyncIterator[RunnerItemT]:
301
- async with self._handle_request() as item:
302
- yield item
303
- message = await self._get_last_message()
304
- assert message is not None
305
- self._iteration_count += 1
400
+ async def _check_and_compact(self) -> bool:
401
+ """
402
+ Check token usage and compact messages if threshold exceeded.
403
+ Returns True if compaction was performed, False otherwise.
404
+ """
405
+ if self._compaction_control is None or not self._compaction_control["enabled"]:
406
+ return False
407
+
408
+ message = await self._get_last_message()
409
+ tokens_used = 0
410
+ if message is not None:
411
+ total_input_tokens = (
412
+ message.usage.input_tokens
413
+ + (message.usage.cache_creation_input_tokens or 0)
414
+ + (message.usage.cache_read_input_tokens or 0)
415
+ )
416
+ tokens_used = total_input_tokens + message.usage.output_tokens
417
+
418
+ threshold = self._compaction_control.get("context_token_threshold", DEFAULT_THRESHOLD)
419
+
420
+ if tokens_used < threshold:
421
+ return False
422
+
423
+ # Perform compaction
424
+ log.info(f"Token usage {tokens_used} has exceeded the threshold of {threshold}. Performing compaction.")
425
+
426
+ model = self._compaction_control.get("model", self._params["model"])
427
+
428
+ messages = list(self._params["messages"])
429
+
430
+ if messages[-1]["role"] == "assistant":
431
+ # Remove tool_use blocks from the last message to avoid 400 error
432
+ # (tool_use requires tool_result, which we don't have yet)
433
+ non_tool_blocks = [
434
+ block
435
+ for block in messages[-1]["content"]
436
+ if isinstance(block, dict) and block.get("type") != "tool_use"
437
+ ]
438
+
439
+ if non_tool_blocks:
440
+ messages[-1]["content"] = non_tool_blocks
441
+ else:
442
+ messages.pop()
443
+
444
+ messages = [
445
+ *self._params["messages"],
446
+ BetaMessageParam(
447
+ role="user",
448
+ content=self._compaction_control.get("summary_prompt", DEFAULT_SUMMARY_PROMPT),
449
+ ),
450
+ ]
306
451
 
307
- while not self._should_stop():
308
- response = await self.generate_tool_call_response()
309
- if response is None:
310
- log.debug("Tool call was not requested, exiting from tool runner loop.")
311
- return
452
+ response = await self._client.beta.messages.create(
453
+ model=model,
454
+ messages=messages,
455
+ max_tokens=self._params["max_tokens"],
456
+ extra_headers={"X-Stainless-Helper": "compaction"},
457
+ )
312
458
 
313
- if not self._messages_modified:
314
- self.append_messages(message, response)
315
- self._iteration_count += 1
316
- self._messages_modified = False
317
- self._cached_tool_call_response = None
459
+ log.info(f"Compaction complete. New token usage: {response.usage.output_tokens}")
318
460
 
461
+ first_content = list(response.content)[0]
462
+
463
+ if first_content.type != "text":
464
+ raise ValueError("Compaction response content is not of type 'text'")
465
+
466
+ self.set_messages_params(
467
+ lambda params: {
468
+ **params,
469
+ "messages": [
470
+ {
471
+ "role": "user",
472
+ "content": [
473
+ {
474
+ "type": "text",
475
+ "text": first_content.text,
476
+ }
477
+ ],
478
+ }
479
+ ],
480
+ }
481
+ )
482
+ return True
483
+
484
+ async def __run__(self) -> AsyncIterator[RunnerItemT]:
485
+ while not self._should_stop():
319
486
  async with self._handle_request() as item:
320
487
  yield item
321
488
  message = await self._get_last_message()
322
489
  assert message is not None
323
490
 
491
+ self._iteration_count += 1
492
+
493
+ # If the compaction was performed, skip tool call generation this iteration
494
+ if not await self._check_and_compact():
495
+ response = await self.generate_tool_call_response()
496
+ if response is None:
497
+ log.debug("Tool call was not requested, exiting from tool runner loop.")
498
+ return
499
+
500
+ if not self._messages_modified:
501
+ self.append_messages(message, response)
502
+
503
+ self._messages_modified = False
504
+ self._cached_tool_call_response = None
505
+
324
506
  async def until_done(self) -> ParsedBetaMessage[ResponseFormatT]:
325
507
  """
326
508
  Consumes the tool runner stream and returns the last message if it has not been consumed yet.