PyPI - anthropic - Versions diffs - 0.74.0__py3-none-any.whl → 0.75.0__py3-none-any.whl - Mend

anthropic 0.74.0py3-none-any.whl → 0.75.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

anthropic/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 __title__ = "anthropic"
-__version__ = "0.74.0"  # x-release-please-version
+__version__ = "0.75.0"  # x-release-please-version

anthropic/lib/streaming/_beta_messages.py CHANGED Viewed

@@ -468,7 +468,7 @@ def accumulate_event(
         current_snapshot.content.append(
             cast(
                 Any,  # Pydantic does not support generic unions at runtime
-                construct_type(type_=ParsedBetaContentBlock, value=event.content_block.model_dump()),
+                construct_type(type_=ParsedBetaContentBlock, value=event.content_block.to_dict()),
             ),
         )
     elif event.type == "content_block_delta":

anthropic/lib/tools/_beta_compaction_control.py ADDED Viewed

@@ -0,0 +1,48 @@
+from typing import TypedDict
+from typing_extensions import Required
+DEFAULT_SUMMARY_PROMPT = """You have been working on the task described above but have not yet completed it. Write a continuation summary that will allow you (or another instance of yourself) to resume work efficiently in a future context window where the conversation history will be replaced with this summary. Your summary should be structured, concise, and actionable. Include:
+1. Task Overview
+The user's core request and success criteria
+Any clarifications or constraints they specified
+2. Current State
+What has been completed so far
+Files created, modified, or analyzed (with paths if relevant)
+Key outputs or artifacts produced
+3. Important Discoveries
+Technical constraints or requirements uncovered
+Decisions made and their rationale
+Errors encountered and how they were resolved
+What approaches were tried that didn't work (and why)
+4. Next Steps
+Specific actions needed to complete the task
+Any blockers or open questions to resolve
+Priority order if multiple steps remain
+5. Context to Preserve
+User preferences or style requirements
+Domain-specific details that aren't obvious
+Any promises made to the user
+Be concise but complete—err on the side of including information that would prevent duplicate work or repeated mistakes. Write in a way that enables immediate resumption of the task.
+Wrap your summary in <summary></summary> tags."""
+DEFAULT_THRESHOLD = 100_000
+class CompactionControl(TypedDict, total=False):
+    context_token_threshold: int
+    """The context token threshold at which to trigger compaction.
+    When the cumulative token count (input + output) across all messages exceeds this threshold,
+    the message history will be automatically summarized and compressed. Defaults to 150,000 tokens.
+    """
+    model: str
+    """
+    The model to use for generating the compaction summary.
+    If not specified, defaults to the same model used for the tool runner.
+    """
+    summary_prompt: str
+    """The prompt used to instruct the model on how to generate the summary."""
+    enabled: Required[bool]

anthropic/lib/tools/_beta_functions.py CHANGED Viewed

@@ -14,9 +14,9 @@ from ... import _compat
 from ..._utils import is_dict
 from ..._compat import cached_property
 from ..._models import TypeAdapter
-from ...types.beta import BetaToolUnionParam
+from ...types.beta import BetaToolParam, BetaToolUnionParam
 from ..._utils._utils import CallableT
-from ...types.tool_param import ToolParam, InputSchema
+from ...types.tool_param import InputSchema
 from ...types.beta.beta_tool_result_block_param import Content as BetaContent
 log = logging.getLogger(__name__)
@@ -39,7 +39,10 @@ class BetaBuiltinFunctionTool(ABC):
     @property
     def name(self) -> str:
-        return self.to_dict()["name"]
+        raw = self.to_dict()
+        if "mcp_server_name" in raw:
+            return raw["mcp_server_name"]
+        return raw["name"]
 class BetaAsyncBuiltinFunctionTool(ABC):
@@ -51,7 +54,10 @@ class BetaAsyncBuiltinFunctionTool(ABC):
     @property
     def name(self) -> str:
-        return self.to_dict()["name"]
+        raw = self.to_dict()
+        if "mcp_server_name" in raw:
+            return raw["mcp_server_name"]
+        return raw["name"]
 class BaseFunctionTool(Generic[CallableT]):
@@ -72,6 +78,7 @@ class BaseFunctionTool(Generic[CallableT]):
         name: str | None = None,
         description: str | None = None,
         input_schema: InputSchema | type[BaseModel] | None = None,
+        defer_loading: bool | None = None,
     ) -> None:
         if _compat.PYDANTIC_V1:
             raise RuntimeError("Tool functions are only supported with Pydantic v2")
@@ -79,6 +86,7 @@ class BaseFunctionTool(Generic[CallableT]):
         self.func = func
         self._func_with_validate = pydantic.validate_call(func)
         self.name = name or func.__name__
+        self._defer_loading = defer_loading
         self.description = description or self._get_description_from_docstring()
@@ -94,12 +102,15 @@ class BaseFunctionTool(Generic[CallableT]):
     def __call__(self) -> CallableT:
         return self.func
-    def to_dict(self) -> ToolParam:
-        return {
+    def to_dict(self) -> BetaToolParam:
+        defn: BetaToolParam = {
             "name": self.name,
             "description": self.description,
             "input_schema": self.input_schema,
         }
+        if self._defer_loading is not None:
+            defn["defer_loading"] = self._defer_loading
+        return defn
     @cached_property
     def _parsed_docstring(self) -> docstring_parser.Docstring:
@@ -211,6 +222,7 @@ def beta_tool(
     name: str | None = None,
     description: str | None = None,
     input_schema: InputSchema | type[BaseModel] | None = None,
+    defer_loading: bool | None = None,
 ) -> Callable[[FunctionT], BetaFunctionTool[FunctionT]]: ...
@@ -220,6 +232,7 @@ def beta_tool(
     name: str | None = None,
     description: str | None = None,
     input_schema: InputSchema | type[BaseModel] | None = None,
+    defer_loading: bool | None = None,
 ) -> BetaFunctionTool[FunctionT] | Callable[[FunctionT], BetaFunctionTool[FunctionT]]:
     """Create a FunctionTool from a function with automatic schema inference.
@@ -239,11 +252,15 @@ def beta_tool(
     if func is not None:
         # @beta_tool called without parentheses
-        return BetaFunctionTool(func=func, name=name, description=description, input_schema=input_schema)
+        return BetaFunctionTool(
+            func=func, name=name, description=description, input_schema=input_schema, defer_loading=defer_loading
+        )
     # @beta_tool()
     def decorator(func: FunctionT) -> BetaFunctionTool[FunctionT]:
-        return BetaFunctionTool(func=func, name=name, description=description, input_schema=input_schema)
+        return BetaFunctionTool(
+            func=func, name=name, description=description, input_schema=input_schema, defer_loading=defer_loading
+        )
     return decorator
@@ -259,6 +276,7 @@ def beta_async_tool(
     name: str | None = None,
     description: str | None = None,
     input_schema: InputSchema | type[BaseModel] | None = None,
+    defer_loading: bool | None = None,
 ) -> BetaAsyncFunctionTool[AsyncFunctionT]: ...
@@ -268,6 +286,7 @@ def beta_async_tool(
     name: str | None = None,
     description: str | None = None,
     input_schema: InputSchema | type[BaseModel] | None = None,
+    defer_loading: bool | None = None,
 ) -> Callable[[AsyncFunctionT], BetaAsyncFunctionTool[AsyncFunctionT]]: ...
@@ -277,6 +296,7 @@ def beta_async_tool(
     name: str | None = None,
     description: str | None = None,
     input_schema: InputSchema | type[BaseModel] | None = None,
+    defer_loading: bool | None = None,
 ) -> BetaAsyncFunctionTool[AsyncFunctionT] | Callable[[AsyncFunctionT], BetaAsyncFunctionTool[AsyncFunctionT]]:
     """Create an AsyncFunctionTool from a function with automatic schema inference.
@@ -301,6 +321,7 @@ def beta_async_tool(
             name=name,
             description=description,
             input_schema=input_schema,
+            defer_loading=defer_loading,
         )
     # @beta_async_tool()
@@ -310,6 +331,7 @@ def beta_async_tool(
             name=name,
             description=description,
             input_schema=input_schema,
+            defer_loading=defer_loading,
         )
     return decorator

anthropic/lib/tools/_beta_runner.py CHANGED Viewed

@@ -31,6 +31,7 @@ from ._beta_functions import (
     BetaBuiltinFunctionTool,
     BetaAsyncBuiltinFunctionTool,
 )
+from ._beta_compaction_control import DEFAULT_THRESHOLD, DEFAULT_SUMMARY_PROMPT, CompactionControl
 from ..streaming._beta_messages import BetaMessageStream, BetaAsyncMessageStream
 from ...types.beta.parsed_beta_message import ResponseFormatT, ParsedBetaMessage, ParsedBetaContentBlock
 from ...types.beta.message_create_params import ParseMessageCreateParamsBase
@@ -66,6 +67,7 @@ class BaseToolRunner(Generic[AnyFunctionToolT, ResponseFormatT]):
         options: RequestOptions,
         tools: Iterable[AnyFunctionToolT],
         max_iterations: int | None = None,
+        compaction_control: CompactionControl | None = None,
     ) -> None:
         self._tools_by_name = {tool.name: tool for tool in tools}
         self._params: ParseMessageCreateParamsBase[ResponseFormatT] = {
@@ -77,6 +79,7 @@ class BaseToolRunner(Generic[AnyFunctionToolT, ResponseFormatT]):
         self._cached_tool_call_response: BetaMessageParam | None = None
         self._max_iterations = max_iterations
         self._iteration_count = 0
+        self._compaction_control = compaction_control
     def set_messages_params(
         self,
@@ -122,9 +125,17 @@ class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Gene
         tools: Iterable[BetaRunnableTool],
         client: Anthropic,
         max_iterations: int | None = None,
+        compaction_control: CompactionControl | None = None,
     ) -> None:
-        super().__init__(params=params, options=options, tools=tools, max_iterations=max_iterations)
+        super().__init__(
+            params=params,
+            options=options,
+            tools=tools,
+            max_iterations=max_iterations,
+            compaction_control=compaction_control,
+        )
         self._client = client
         self._iterator = self.__run__()
         self._last_message: (
             Callable[[], ParsedBetaMessage[ResponseFormatT]] | ParsedBetaMessage[ResponseFormatT] | None
@@ -143,31 +154,112 @@ class BaseSyncToolRunner(BaseToolRunner[BetaRunnableTool, ResponseFormatT], Gene
         raise NotImplementedError()
         yield  # type: ignore[unreachable]
-    def __run__(self) -> Iterator[RunnerItemT]:
-        with self._handle_request() as item:
-            yield item
-            message = self._get_last_message()
-            assert message is not None
-        self._iteration_count += 1
+    def _check_and_compact(self) -> bool:
+        """
+        Check token usage and compact messages if threshold exceeded.
+        Returns True if compaction was performed, False otherwise.
+        """
+        if self._compaction_control is None or not self._compaction_control["enabled"]:
+            return False
+        message = self._get_last_message()
+        tokens_used = 0
+        if message is not None:
+            total_input_tokens = (
+                message.usage.input_tokens
+                + (message.usage.cache_creation_input_tokens or 0)
+                + (message.usage.cache_read_input_tokens or 0)
+            )
+            tokens_used = total_input_tokens + message.usage.output_tokens
+        threshold = self._compaction_control.get("context_token_threshold", DEFAULT_THRESHOLD)
+        if tokens_used < threshold:
+            return False
+        # Perform compaction
+        log.info(f"Token usage {tokens_used} has exceeded the threshold of {threshold}. Performing compaction.")
+        model = self._compaction_control.get("model", self._params["model"])
+        messages = list(self._params["messages"])
+        if messages[-1]["role"] == "assistant":
+            # Remove tool_use blocks from the last message to avoid 400 error
+            # (tool_use requires tool_result, which we don't have yet)
+            non_tool_blocks = [
+                block
+                for block in messages[-1]["content"]
+                if isinstance(block, dict) and block.get("type") != "tool_use"
+            ]
+            if non_tool_blocks:
+                messages[-1]["content"] = non_tool_blocks
+            else:
+                messages.pop()
+        messages = [
+            *messages,
+            BetaMessageParam(
+                role="user",
+                content=self._compaction_control.get("summary_prompt", DEFAULT_SUMMARY_PROMPT),
+            ),
+        ]
-        while not self._should_stop():
-            response = self.generate_tool_call_response()
-            if response is None:
-                log.debug("Tool call was not requested, exiting from tool runner loop.")
-                return
+        response = self._client.beta.messages.create(
+            model=model,
+            messages=messages,
+            max_tokens=self._params["max_tokens"],
+            extra_headers={"X-Stainless-Helper": "compaction"},
+        )
-            if not self._messages_modified:
-                self.append_messages(message, response)
+        log.info(f"Compaction complete. New token usage: {response.usage.output_tokens}")
-            self._iteration_count += 1
-            self._messages_modified = False
-            self._cached_tool_call_response = None
+        first_content = list(response.content)[0]
+        if first_content.type != "text":
+            raise ValueError("Compaction response content is not of type 'text'")
+        self.set_messages_params(
+            lambda params: {
+                **params,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": first_content.text,
+                            }
+                        ],
+                    }
+                ],
+            }
+        )
+        return True
+    def __run__(self) -> Iterator[RunnerItemT]:
+        while not self._should_stop():
             with self._handle_request() as item:
                 yield item
                 message = self._get_last_message()
                 assert message is not None
+            self._iteration_count += 1
+            # If the compaction was performed, skip tool call generation this iteration
+            if not self._check_and_compact():
+                response = self.generate_tool_call_response()
+                if response is None:
+                    log.debug("Tool call was not requested, exiting from tool runner loop.")
+                    return
+                if not self._messages_modified:
+                    self.append_messages(message, response)
+            self._messages_modified = False
+            self._cached_tool_call_response = None
     def until_done(self) -> ParsedBetaMessage[ResponseFormatT]:
         """
         Consumes the tool runner stream and returns the last message if it has not been consumed yet.
@@ -274,9 +366,17 @@ class BaseAsyncToolRunner(
         tools: Iterable[BetaAsyncRunnableTool],
         client: AsyncAnthropic,
         max_iterations: int | None = None,
+        compaction_control: CompactionControl | None = None,
     ) -> None:
-        super().__init__(params=params, options=options, tools=tools, max_iterations=max_iterations)
+        super().__init__(
+            params=params,
+            options=options,
+            tools=tools,
+            max_iterations=max_iterations,
+            compaction_control=compaction_control,
+        )
         self._client = client
         self._iterator = self.__run__()
         self._last_message: (
             Callable[[], Coroutine[None, None, ParsedBetaMessage[ResponseFormatT]]]
@@ -297,30 +397,112 @@ class BaseAsyncToolRunner(
         raise NotImplementedError()
         yield  # type: ignore[unreachable]
-    async def __run__(self) -> AsyncIterator[RunnerItemT]:
-        async with self._handle_request() as item:
-            yield item
-            message = await self._get_last_message()
-            assert message is not None
-        self._iteration_count += 1
+    async def _check_and_compact(self) -> bool:
+        """
+        Check token usage and compact messages if threshold exceeded.
+        Returns True if compaction was performed, False otherwise.
+        """
+        if self._compaction_control is None or not self._compaction_control["enabled"]:
+            return False
+        message = await self._get_last_message()
+        tokens_used = 0
+        if message is not None:
+            total_input_tokens = (
+                message.usage.input_tokens
+                + (message.usage.cache_creation_input_tokens or 0)
+                + (message.usage.cache_read_input_tokens or 0)
+            )
+            tokens_used = total_input_tokens + message.usage.output_tokens
+        threshold = self._compaction_control.get("context_token_threshold", DEFAULT_THRESHOLD)
+        if tokens_used < threshold:
+            return False
+        # Perform compaction
+        log.info(f"Token usage {tokens_used} has exceeded the threshold of {threshold}. Performing compaction.")
+        model = self._compaction_control.get("model", self._params["model"])
+        messages = list(self._params["messages"])
+        if messages[-1]["role"] == "assistant":
+            # Remove tool_use blocks from the last message to avoid 400 error
+            # (tool_use requires tool_result, which we don't have yet)
+            non_tool_blocks = [
+                block
+                for block in messages[-1]["content"]
+                if isinstance(block, dict) and block.get("type") != "tool_use"
+            ]
+            if non_tool_blocks:
+                messages[-1]["content"] = non_tool_blocks
+            else:
+                messages.pop()
+        messages = [
+            *self._params["messages"],
+            BetaMessageParam(
+                role="user",
+                content=self._compaction_control.get("summary_prompt", DEFAULT_SUMMARY_PROMPT),
+            ),
+        ]
-        while not self._should_stop():
-            response = await self.generate_tool_call_response()
-            if response is None:
-                log.debug("Tool call was not requested, exiting from tool runner loop.")
-                return
+        response = await self._client.beta.messages.create(
+            model=model,
+            messages=messages,
+            max_tokens=self._params["max_tokens"],
+            extra_headers={"X-Stainless-Helper": "compaction"},
+        )
-            if not self._messages_modified:
-                self.append_messages(message, response)
-            self._iteration_count += 1
-            self._messages_modified = False
-            self._cached_tool_call_response = None
+        log.info(f"Compaction complete. New token usage: {response.usage.output_tokens}")
+        first_content = list(response.content)[0]
+        if first_content.type != "text":
+            raise ValueError("Compaction response content is not of type 'text'")
+        self.set_messages_params(
+            lambda params: {
+                **params,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": first_content.text,
+                            }
+                        ],
+                    }
+                ],
+            }
+        )
+        return True
+    async def __run__(self) -> AsyncIterator[RunnerItemT]:
+        while not self._should_stop():
             async with self._handle_request() as item:
                 yield item
                 message = await self._get_last_message()
                 assert message is not None
+            self._iteration_count += 1
+            # If the compaction was performed, skip tool call generation this iteration
+            if not await self._check_and_compact():
+                response = await self.generate_tool_call_response()
+                if response is None:
+                    log.debug("Tool call was not requested, exiting from tool runner loop.")
+                    return
+                if not self._messages_modified:
+                    self.append_messages(message, response)
+            self._messages_modified = False
+            self._cached_tool_call_response = None
     async def until_done(self) -> ParsedBetaMessage[ResponseFormatT]:
         """
         Consumes the tool runner stream and returns the last message if it has not been consumed yet.

anthropic 0.74.0__py3-none-any.whl → 0.75.0__py3-none-any.whl

anthropic 0.74.0py3-none-any.whl → 0.75.0py3-none-any.whl