PyPI - mirascope - Versions diffs - 2.0.0a3__py3-none-any.whl → 2.0.0a4__py3-none-any.whl - Mend

mirascope 2.0.0a3py3-none-any.whl → 2.0.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

mirascope/llm/providers/anthropic/_utils/beta_encode.py ADDED Viewed

@@ -0,0 +1,216 @@
+"""Beta Anthropic message encoding and request preparation."""
+from collections.abc import Sequence
+from typing import Any, TypedDict, cast
+from typing_extensions import Required
+from anthropic import Omit
+from anthropic.types.anthropic_beta_param import AnthropicBetaParam
+from anthropic.types.beta import (
+    BetaContentBlockParam,
+    BetaMessageParam,
+    BetaTextBlockParam,
+    BetaThinkingConfigParam,
+    BetaToolChoiceParam,
+    BetaToolParam,
+)
+from pydantic import BaseModel
+from ....content import ContentPart
+from ....exceptions import FormattingModeNotSupportedError
+from ....formatting import (
+    Format,
+    FormattableT,
+    _utils as _formatting_utils,
+    resolve_format,
+)
+from ....messages import AssistantMessage, Message, UserMessage
+from ....tools import AnyToolSchema, BaseToolkit
+from ...base import Params, _utils as _base_utils
+from ..model_id import model_name
+from ..model_info import MODELS_WITHOUT_STRICT_STRUCTURED_OUTPUTS
+from .encode import (
+    DEFAULT_MAX_TOKENS,
+    FORMAT_TOOL_NAME,
+    convert_tool_to_tool_param,
+    encode_content,
+    process_params,
+)
+DEFAULT_FORMAT_MODE = "strict"
+class BetaParseKwargs(TypedDict, total=False):
+    """Kwargs for Anthropic beta.messages.parse method."""
+    model: Required[str]
+    max_tokens: Required[int]
+    messages: Sequence[BetaMessageParam]
+    system: Sequence[BetaTextBlockParam] | Omit
+    tools: Sequence[BetaToolParam] | Omit
+    tool_choice: BetaToolChoiceParam | Omit
+    temperature: float | Omit
+    top_p: float | Omit
+    top_k: int | Omit
+    stop_sequences: list[str] | Omit
+    thinking: BetaThinkingConfigParam | Omit
+    betas: list[AnthropicBetaParam]
+    output_format: type[BaseModel]
+def _beta_encode_content(
+    content: Sequence[ContentPart],
+    encode_thoughts: bool,
+    add_cache_control: bool = False,
+) -> str | Sequence[BetaContentBlockParam]:
+    """Convert mirascope content to Beta Anthropic content format."""
+    result = encode_content(content, encode_thoughts, add_cache_control)
+    if isinstance(result, str):
+        return result
+    return cast(Sequence[BetaContentBlockParam], result)
+def _beta_encode_message(
+    message: UserMessage | AssistantMessage,
+    model_id: str,
+    encode_thoughts: bool,
+    add_cache_control: bool = False,
+) -> BetaMessageParam:
+    """Convert user or assistant Message to Beta MessageParam format.
+    Args:
+        message: The message to encode
+        model_id: The Anthropic model ID
+        encode_thoughts: Whether to encode thought blocks as text
+        add_cache_control: Whether to add cache_control to the last content block
+    """
+    if (
+        message.role == "assistant"
+        and message.provider_id == "anthropic"
+        and message.model_id == model_id
+        and message.raw_message
+        and not encode_thoughts
+        and not add_cache_control
+    ):
+        raw = cast(dict[str, Any], message.raw_message)
+        return BetaMessageParam(
+            role=raw["role"],
+            content=raw["content"],
+        )
+    content = _beta_encode_content(message.content, encode_thoughts, add_cache_control)
+    return BetaMessageParam(
+        role=message.role,
+        content=content,
+    )
+def _beta_encode_messages(
+    messages: Sequence[UserMessage | AssistantMessage],
+    model_id: str,
+    encode_thoughts: bool,
+) -> Sequence[BetaMessageParam]:
+    """Encode messages and add cache control for multi-turn conversations.
+    If the conversation contains assistant messages (indicating multi-turn),
+    adds cache_control to the last content block of the last message.
+    """
+    # Detect multi-turn conversations by checking for assistant messages
+    has_assistant_message = any(msg.role == "assistant" for msg in messages)
+    # Encode messages, adding cache_control to the last message if multi-turn
+    encoded_messages: list[BetaMessageParam] = []
+    for i, message in enumerate(messages):
+        is_last = i == len(messages) - 1
+        add_cache = has_assistant_message and is_last
+        encoded_messages.append(
+            _beta_encode_message(message, model_id, encode_thoughts, add_cache)
+        )
+    return encoded_messages
+def _beta_convert_tool_to_tool_param(tool: AnyToolSchema) -> BetaToolParam:
+    """Convert a single Mirascope tool to Beta Anthropic tool format."""
+    return cast(BetaToolParam, convert_tool_to_tool_param(tool))
+def beta_encode_request(
+    *,
+    model_id: str,
+    messages: Sequence[Message],
+    tools: Sequence[AnyToolSchema] | BaseToolkit[AnyToolSchema] | None,
+    format: type[FormattableT] | Format[FormattableT] | None,
+    params: Params,
+) -> tuple[Sequence[Message], Format[FormattableT] | None, BetaParseKwargs]:
+    """Prepares a request for the Anthropic beta.messages.parse method."""
+    processed = process_params(params, DEFAULT_MAX_TOKENS)
+    encode_thoughts = processed.pop("encode_thoughts", False)
+    max_tokens = processed.pop("max_tokens", DEFAULT_MAX_TOKENS)
+    kwargs: BetaParseKwargs = BetaParseKwargs(
+        {
+            "model": model_name(model_id),
+            "max_tokens": max_tokens,
+            "betas": ["structured-outputs-2025-11-13"],
+            **processed,
+        }
+    )
+    tools = tools.tools if isinstance(tools, BaseToolkit) else tools or []
+    anthropic_tools = [_beta_convert_tool_to_tool_param(tool) for tool in tools]
+    format = resolve_format(format, default_mode=DEFAULT_FORMAT_MODE)
+    if format is not None:
+        if format.mode == "strict":
+            if model_name(model_id) in MODELS_WITHOUT_STRICT_STRUCTURED_OUTPUTS:
+                raise FormattingModeNotSupportedError(
+                    formatting_mode=format.mode,
+                    provider_id="anthropic",
+                    model_id=model_id,
+                )
+            else:
+                kwargs["output_format"] = cast(type[BaseModel], format.formattable)
+        if format.mode == "tool":
+            format_tool_schema = _formatting_utils.create_tool_schema(format)
+            anthropic_tools.append(_beta_convert_tool_to_tool_param(format_tool_schema))
+            if tools:
+                kwargs["tool_choice"] = {"type": "any"}
+            else:
+                kwargs["tool_choice"] = {
+                    "type": "tool",
+                    "name": FORMAT_TOOL_NAME,
+                    "disable_parallel_tool_use": True,
+                }
+        if format.formatting_instructions:
+            messages = _base_utils.add_system_instructions(
+                messages, format.formatting_instructions
+            )
+    if anthropic_tools:
+        # Add cache control to the last tool for prompt caching
+        last_tool = anthropic_tools[-1]
+        last_tool["cache_control"] = {"type": "ephemeral"}
+        kwargs["tools"] = anthropic_tools
+    system_message_content, remaining_messages = _base_utils.extract_system_message(
+        messages
+    )
+    kwargs["messages"] = _beta_encode_messages(
+        remaining_messages, model_id, encode_thoughts
+    )
+    if system_message_content:
+        kwargs["system"] = [
+            BetaTextBlockParam(
+                type="text",
+                text=system_message_content,
+                cache_control={"type": "ephemeral"},
+            )
+        ]
+    return messages, format, kwargs

mirascope/llm/providers/anthropic/_utils/decode.py CHANGED Viewed

@@ -1,10 +1,11 @@
-"""Anthropic response decoding."""
+"""Standard Anthropic response decoding."""
 import json
 from typing import Any, TypeAlias, cast
 from anthropic import types as anthropic_types
 from anthropic.lib.streaming import AsyncMessageStreamManager, MessageStreamManager
+from anthropic.types.beta import BetaUsage
 from ....content import (
     AssistantContentPart,
@@ -29,6 +30,8 @@ from ....responses import (
     FinishReasonChunk,
     RawMessageChunk,
     RawStreamEventChunk,
+    Usage,
+    UsageDeltaChunk,
 )
 from ..model_id import AnthropicModelId, model_name
@@ -58,11 +61,30 @@ def _decode_assistant_content(
         )
+def decode_usage(
+    usage: anthropic_types.Usage | BetaUsage,
+) -> Usage:
+    """Convert Anthropic Usage (or BetaUsage) to Mirascope Usage."""
+    cache_read_tokens = usage.cache_read_input_tokens or 0
+    cache_write_tokens = usage.cache_creation_input_tokens or 0
+    input_tokens = usage.input_tokens + cache_read_tokens + cache_write_tokens
+    output_tokens = usage.output_tokens
+    return Usage(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cache_read_tokens=cache_read_tokens,
+        cache_write_tokens=cache_write_tokens,
+        reasoning_tokens=0,
+        raw=usage,
+    )
 def decode_response(
     response: anthropic_types.Message,
     model_id: AnthropicModelId,
-) -> tuple[AssistantMessage, FinishReason | None]:
-    """Convert Anthropic message to mirascope AssistantMessage."""
+) -> tuple[AssistantMessage, FinishReason | None, Usage]:
+    """Convert Anthropic message to mirascope AssistantMessage and usage."""
     assistant_message = AssistantMessage(
         content=[_decode_assistant_content(part) for part in response.content],
         provider_id="anthropic",
@@ -78,14 +100,14 @@ def decode_response(
         if response.stop_reason
         else None
     )
-    return assistant_message, finish_reason
+    usage = decode_usage(response.usage)
+    return assistant_message, finish_reason, usage
 ContentBlock: TypeAlias = (
     anthropic_types.TextBlockParam
     | anthropic_types.ThinkingBlockParam
     | anthropic_types.ToolUseBlockParam
-    | anthropic_types.ThinkingBlockParam
     | anthropic_types.RedactedThinkingBlockParam
 )
@@ -210,6 +232,16 @@ class _AnthropicChunkProcessor:
                 if finish_reason is not None:
                     yield FinishReasonChunk(finish_reason=finish_reason)
+            # Emit usage delta
+            usage = event.usage
+            yield UsageDeltaChunk(
+                input_tokens=usage.input_tokens or 0,
+                output_tokens=usage.output_tokens,
+                cache_read_tokens=usage.cache_read_input_tokens or 0,
+                cache_write_tokens=usage.cache_creation_input_tokens or 0,
+                reasoning_tokens=0,
+            )
     def raw_message_chunk(self) -> RawMessageChunk:
         return RawMessageChunk(
             raw_message=cast(
@@ -225,7 +257,7 @@ class _AnthropicChunkProcessor:
 def decode_stream(
     anthropic_stream_manager: MessageStreamManager,
 ) -> ChunkIterator:
-    """Returns a ChunkIterator converted from an Anthropic MessageStreamManager"""
+    """Returns a ChunkIterator converted from an Anthropic MessageStreamManager."""
     processor = _AnthropicChunkProcessor()
     with anthropic_stream_manager as stream:
         for event in stream._raw_stream:  # pyright: ignore[reportPrivateUsage]
@@ -236,7 +268,7 @@ def decode_stream(
 async def decode_async_stream(
     anthropic_stream_manager: AsyncMessageStreamManager,
 ) -> AsyncChunkIterator:
-    """Returns an AsyncChunkIterator converted from an Anthropic MessageStreamManager"""
+    """Returns an AsyncChunkIterator converted from an Anthropic MessageStreamManager."""
     processor = _AnthropicChunkProcessor()
     async with anthropic_stream_manager as stream:
         async for event in stream._raw_stream:  # pyright: ignore[reportPrivateUsage]

mirascope/llm/providers/anthropic/_utils/encode.py CHANGED Viewed

@@ -1,9 +1,9 @@
-"""Anthropic message encoding and request preparation."""
+"""Shared Anthropic encoding utilities."""
 import json
 from collections.abc import Sequence
 from functools import lru_cache
-from typing import Literal, TypedDict, cast
+from typing import Any, Literal, TypedDict, cast
 from typing_extensions import Required
 from anthropic import Omit, types as anthropic_types
@@ -22,14 +22,14 @@ from ...base import Params, _utils as _base_utils
 from ..model_id import AnthropicModelId, model_name
 DEFAULT_MAX_TOKENS = 16000
+# TODO: Change DEFAULT_FORMAT_MODE to strict when strict is no longer a beta feature.
+DEFAULT_FORMAT_MODE = "tool"
 AnthropicImageMimeType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
-def encode_image_mime_type(
-    mime_type: ImageMimeType,
-) -> AnthropicImageMimeType:
-    """Convert an ImageMimeType into anthropic supported mime type"""
+def encode_image_mime_type(mime_type: ImageMimeType) -> AnthropicImageMimeType:
+    """Convert an ImageMimeType into anthropic supported mime type."""
     if mime_type in ("image/jpeg", "image/png", "image/gif", "image/webp"):
         return mime_type
     raise FeatureNotSupportedError(
@@ -37,13 +37,60 @@ def encode_image_mime_type(
     )  # pragma: no cover
+class ProcessedParams(TypedDict, total=False):
+    """Common parameters processed from Params."""
+    temperature: float
+    max_tokens: int
+    top_p: float
+    top_k: int
+    stop_sequences: list[str]
+    thinking: dict[str, Any]
+    encode_thoughts: bool
+def process_params(params: Params, default_max_tokens: int) -> ProcessedParams:
+    """Process common Anthropic parameters from Params.
+    Returns a dict with processed parameters that can be merged into kwargs.
+    """
+    result: ProcessedParams = {
+        "max_tokens": default_max_tokens,
+        "encode_thoughts": False,
+    }
+    with _base_utils.ensure_all_params_accessed(
+        params=params, provider_id="anthropic", unsupported_params=["seed"]
+    ) as param_accessor:
+        if param_accessor.temperature is not None:
+            result["temperature"] = param_accessor.temperature
+        if param_accessor.max_tokens is not None:
+            result["max_tokens"] = param_accessor.max_tokens
+        if param_accessor.top_p is not None:
+            result["top_p"] = param_accessor.top_p
+        if param_accessor.top_k is not None:
+            result["top_k"] = param_accessor.top_k
+        if param_accessor.stop_sequences is not None:
+            result["stop_sequences"] = param_accessor.stop_sequences
+        if param_accessor.thinking is not None:
+            if param_accessor.thinking:
+                budget_tokens = max(1024, result["max_tokens"] // 2)
+                result["thinking"] = {"type": "enabled", "budget_tokens": budget_tokens}
+            else:
+                result["thinking"] = {"type": "disabled"}
+        if param_accessor.encode_thoughts_as_text:
+            result["encode_thoughts"] = True
+    return result
 class MessageCreateKwargs(TypedDict, total=False):
     """Kwargs for Anthropic Message.create method."""
     model: Required[str]
     max_tokens: Required[int]
     messages: Sequence[anthropic_types.MessageParam]
-    system: str | Omit
+    system: Sequence[anthropic_types.TextBlockParam] | Omit
     tools: Sequence[anthropic_types.ToolParam] | Omit
     tool_choice: anthropic_types.ToolChoiceParam | Omit
     temperature: float | Omit
@@ -53,8 +100,10 @@ class MessageCreateKwargs(TypedDict, total=False):
     thinking: anthropic_types.ThinkingConfigParam | Omit
-def _encode_content(
-    content: Sequence[ContentPart], encode_thoughts: bool
+def encode_content(
+    content: Sequence[ContentPart],
+    encode_thoughts: bool,
+    add_cache_control: bool,
 ) -> str | Sequence[anthropic_types.ContentBlockParam]:
     """Convert mirascope content to Anthropic content format."""
@@ -65,15 +114,42 @@ def _encode_content(
                 "anthropic",
                 message="Anthropic does not support empty message content.",
             )
+        if add_cache_control:
+            return [
+                anthropic_types.TextBlockParam(
+                    type="text",
+                    text=content[0].text,
+                    cache_control={"type": "ephemeral"},
+                )
+            ]
         return content[0].text
     blocks: list[anthropic_types.ContentBlockParam] = []
-    for part in content:
+    # Find the last cacheable content part (text, image, tool_result, or tool_call)
+    last_cacheable_index = -1
+    if add_cache_control:
+        for i in range(len(content) - 1, -1, -1):
+            part = content[i]
+            if part.type in ("text", "image", "tool_output", "tool_call"):
+                if part.type == "text" and not part.text:  # pragma: no cover
+                    continue  # Skip empty text
+                last_cacheable_index = i
+                break
+    for i, part in enumerate(content):
+        should_add_cache = add_cache_control and i == last_cacheable_index
         if part.type == "text":
             if part.text:
                 blocks.append(
-                    anthropic_types.TextBlockParam(type="text", text=part.text)
+                    anthropic_types.TextBlockParam(
+                        type="text",
+                        text=part.text,
+                        cache_control={"type": "ephemeral"}
+                        if should_add_cache
+                        else None,
+                    )
                 )
         elif part.type == "image":
             source: (
@@ -91,7 +167,13 @@ def _encode_content(
                     type="url",
                     url=part.source.url,
                 )
-            blocks.append(anthropic_types.ImageBlockParam(type="image", source=source))
+            blocks.append(
+                anthropic_types.ImageBlockParam(
+                    type="image",
+                    source=source,
+                    cache_control={"type": "ephemeral"} if should_add_cache else None,
+                )
+            )
         elif part.type == "audio":
             raise FeatureNotSupportedError(
                 "audio input",
@@ -104,6 +186,7 @@ def _encode_content(
                     type="tool_result",
                     tool_use_id=part.id,
                     content=str(part.value),
+                    cache_control={"type": "ephemeral"} if should_add_cache else None,
                 )
             )
         elif part.type == "tool_call":
@@ -113,6 +196,7 @@ def _encode_content(
                     id=part.id,
                     name=part.name,
                     input=json.loads(part.args),
+                    cache_control={"type": "ephemeral"} if should_add_cache else None,
                 )
             )
         elif part.type == "thought":
@@ -139,33 +223,60 @@ def _encode_message(
     message: UserMessage | AssistantMessage,
     model_id: AnthropicModelId,
     encode_thoughts: bool,
+    add_cache_control: bool = False,
 ) -> anthropic_types.MessageParam:
-    """Convert user or assistant `Message`s to Anthropic `MessageParam` format.
+    """Convert user or assistant Message to Anthropic MessageParam format.
     Args:
-        messages: A Sequence containing `UserMessage`s or `AssistantMessage`s
-        model_id: The Anthropic model ID being used
-    Returns:
-        A Sequence of converted Anthropic `MessageParam`
+        message: The message to encode
+        model_id: The Anthropic model ID
+        encode_thoughts: Whether to encode thought blocks as text
+        add_cache_control: Whether to add cache_control to the last content block
     """
     if (
         message.role == "assistant"
         and message.provider_id == "anthropic"
         and message.model_id == model_id
         and message.raw_message
         and not encode_thoughts
+        and not add_cache_control
     ):
         return cast(anthropic_types.MessageParam, message.raw_message)
+    content = encode_content(message.content, encode_thoughts, add_cache_control)
     return {
         "role": message.role,
-        "content": _encode_content(message.content, encode_thoughts),
+        "content": content,
     }
+def _encode_messages(
+    messages: Sequence[UserMessage | AssistantMessage],
+    model_id: AnthropicModelId,
+    encode_thoughts: bool,
+) -> Sequence[anthropic_types.MessageParam]:
+    """Encode messages and add cache control for multi-turn conversations.
+    If the conversation contains assistant messages (indicating multi-turn),
+    adds cache_control to the last content block of the last message.
+    """
+    # Detect multi-turn conversations by checking for assistant messages
+    has_assistant_message = any(msg.role == "assistant" for msg in messages)
+    # Encode messages, adding cache_control to the last message if multi-turn
+    encoded_messages: list[anthropic_types.MessageParam] = []
+    for i, message in enumerate(messages):
+        is_last = i == len(messages) - 1
+        add_cache = has_assistant_message and is_last
+        encoded_messages.append(
+            _encode_message(message, model_id, encode_thoughts, add_cache)
+        )
+    return encoded_messages
 @lru_cache(maxsize=128)
-def _convert_tool_to_tool_param(tool: AnyToolSchema) -> anthropic_types.ToolParam:
+def convert_tool_to_tool_param(tool: AnyToolSchema) -> anthropic_types.ToolParam:
     """Convert a single Mirascope tool to Anthropic tool format with caching."""
     schema_dict = tool.parameters.model_dump(by_alias=True, exclude_none=True)
     schema_dict["type"] = "object"
@@ -184,54 +295,29 @@ def encode_request(
     format: type[FormattableT] | Format[FormattableT] | None,
     params: Params,
 ) -> tuple[Sequence[Message], Format[FormattableT] | None, MessageCreateKwargs]:
-    """Prepares a request for the `Anthropic.messages.create` method."""
-    if not model_id.startswith("anthropic/"):  # pragma: no cover
-        raise ValueError(
-            f"Model ID must start with 'anthropic/' prefix, got: {model_id}"
-        )
+    """Prepares a request for the Anthropic messages.create method."""
+    processed = process_params(params, DEFAULT_MAX_TOKENS)
+    encode_thoughts = processed.pop("encode_thoughts", False)
+    max_tokens = processed.pop("max_tokens", DEFAULT_MAX_TOKENS)
     kwargs: MessageCreateKwargs = MessageCreateKwargs(
-        {
-            "model": model_name(model_id),
-            "max_tokens": DEFAULT_MAX_TOKENS,
-        }
+        {"model": model_name(model_id), "max_tokens": max_tokens, **processed}
     )
-    encode_thoughts = False
-    with _base_utils.ensure_all_params_accessed(
-        params=params, provider_id="anthropic", unsupported_params=["seed"]
-    ) as param_accessor:
-        if param_accessor.temperature is not None:
-            kwargs["temperature"] = param_accessor.temperature
-        if param_accessor.max_tokens is not None:
-            kwargs["max_tokens"] = param_accessor.max_tokens
-        if param_accessor.top_p is not None:
-            kwargs["top_p"] = param_accessor.top_p
-        if param_accessor.top_k is not None:
-            kwargs["top_k"] = param_accessor.top_k
-        if param_accessor.stop_sequences is not None:
-            kwargs["stop_sequences"] = param_accessor.stop_sequences
-        if param_accessor.thinking is not None:
-            if param_accessor.thinking:
-                # Set budget to 50% of max_tokens with minimum of 1024
-                budget_tokens = max(1024, kwargs["max_tokens"] // 2)
-                kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget_tokens}
-            else:
-                kwargs["thinking"] = {"type": "disabled"}
-        if param_accessor.encode_thoughts_as_text:
-            encode_thoughts = True
     tools = tools.tools if isinstance(tools, BaseToolkit) else tools or []
-    anthropic_tools = [_convert_tool_to_tool_param(tool) for tool in tools]
-    format = resolve_format(format, default_mode="tool")
+    anthropic_tools = [convert_tool_to_tool_param(tool) for tool in tools]
+    format = resolve_format(format, default_mode=DEFAULT_FORMAT_MODE)
     if format is not None:
         if format.mode == "strict":
             raise FormattingModeNotSupportedError(
-                formatting_mode="strict", provider_id="anthropic"
+                formatting_mode="strict",
+                provider_id="anthropic",
+                model_id=model_id,
             )
-        elif format.mode == "tool":
+        if format.mode == "tool":
             format_tool_schema = _formatting_utils.create_tool_schema(format)
-            anthropic_tools.append(_convert_tool_to_tool_param(format_tool_schema))
+            anthropic_tools.append(convert_tool_to_tool_param(format_tool_schema))
             if tools:
                 kwargs["tool_choice"] = {"type": "any"}
             else:
@@ -247,18 +333,24 @@ def encode_request(
             )
     if anthropic_tools:
+        # Add cache control to the last tool for prompt caching
+        last_tool = anthropic_tools[-1]
+        last_tool["cache_control"] = {"type": "ephemeral"}
         kwargs["tools"] = anthropic_tools
     system_message_content, remaining_messages = _base_utils.extract_system_message(
         messages
     )
-    kwargs["messages"] = [
-        _encode_message(remaining_message, model_id, encode_thoughts)
-        for remaining_message in remaining_messages
-    ]
+    kwargs["messages"] = _encode_messages(remaining_messages, model_id, encode_thoughts)
     if system_message_content:
-        kwargs["system"] = system_message_content
+        kwargs["system"] = [
+            anthropic_types.TextBlockParam(
+                type="text",
+                text=system_message_content,
+                cache_control={"type": "ephemeral"},
+            )
+        ]
     return messages, format, kwargs

mirascope 2.0.0a3__py3-none-any.whl → 2.0.0a4__py3-none-any.whl

mirascope 2.0.0a3py3-none-any.whl → 2.0.0a4py3-none-any.whl