PyPI - fast-agent-mcp - Versions diffs - 0.2.33__py3-none-any.whl → 0.2.35__py3-none-any.whl - Mend

fast-agent-mcp 0.2.33py3-none-any.whl → 0.2.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.35.dist-info}/METADATA +1 -1
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.35.dist-info}/RECORD +28 -25
mcp_agent/agents/base_agent.py +13 -0
mcp_agent/config.py +8 -0
mcp_agent/context.py +3 -2
mcp_agent/core/agent_app.py +41 -1
mcp_agent/core/enhanced_prompt.py +9 -0
mcp_agent/core/fastagent.py +14 -2
mcp_agent/core/interactive_prompt.py +59 -13
mcp_agent/core/usage_display.py +193 -0
mcp_agent/event_progress.py +22 -4
mcp_agent/llm/augmented_llm.py +42 -9
mcp_agent/llm/augmented_llm_passthrough.py +66 -4
mcp_agent/llm/augmented_llm_playback.py +19 -0
mcp_agent/llm/augmented_llm_slow.py +12 -1
mcp_agent/llm/memory.py +120 -0
mcp_agent/llm/model_database.py +236 -0
mcp_agent/llm/model_factory.py +1 -0
mcp_agent/llm/providers/augmented_llm_anthropic.py +211 -30
mcp_agent/llm/providers/augmented_llm_google_native.py +18 -1
mcp_agent/llm/providers/augmented_llm_openai.py +20 -7
mcp_agent/llm/usage_tracking.py +402 -0
mcp_agent/logging/events.py +24 -0
mcp_agent/logging/rich_progress.py +9 -1
mcp_agent/mcp/interfaces.py +6 -0
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.35.dist-info}/WHEEL +0 -0
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.35.dist-info}/entry_points.txt +0 -0
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.35.dist-info}/licenses/LICENSE +0 -0

mcp_agent/core/usage_display.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""
+Utility module for displaying usage statistics in a consistent format.
+Consolidates the usage display logic that was duplicated between fastagent.py and interactive_prompt.py.
+"""
+from typing import Any, Dict, Optional
+from rich.console import Console
+def display_usage_report(
+    agents: Dict[str, Any], show_if_progress_disabled: bool = False, subdued_colors: bool = False
+) -> None:
+    """
+    Display a formatted table of token usage for all agents.
+    Args:
+        agents: Dictionary of agent name -> agent object
+        show_if_progress_disabled: If True, show even when progress display is disabled
+        subdued_colors: If True, use dim styling for a more subdued appearance
+    """
+    # Check if progress display is enabled (only relevant for fastagent context)
+    if not show_if_progress_disabled:
+        try:
+            from mcp_agent import config
+            settings = config.get_settings()
+            if not settings.logger.progress_display:
+                return
+        except (ImportError, AttributeError):
+            # If we can't check settings, assume we should display
+            pass
+    # Collect usage data from all agents
+    usage_data = []
+    total_input = 0
+    total_output = 0
+    total_tokens = 0
+    for agent_name, agent in agents.items():
+        if agent.usage_accumulator:
+            summary = agent.usage_accumulator.get_summary()
+            if summary["turn_count"] > 0:
+                input_tokens = summary["cumulative_input_tokens"]
+                output_tokens = summary["cumulative_output_tokens"]
+                billing_tokens = summary["cumulative_billing_tokens"]
+                turns = summary["turn_count"]
+                # Get context percentage for this agent
+                context_percentage = agent.usage_accumulator.context_usage_percentage
+                # Get model name from LLM's default_request_params
+                model = "unknown"
+                if hasattr(agent, "_llm") and agent._llm:
+                    llm = agent._llm
+                    if (
+                        hasattr(llm, "default_request_params")
+                        and llm.default_request_params
+                        and hasattr(llm.default_request_params, "model")
+                    ):
+                        model = llm.default_request_params.model or "unknown"
+                # Standardize model name truncation - use consistent 25 char width with 22+... truncation
+                if len(model) > 25:
+                    model = model[:22] + "..."
+                usage_data.append(
+                    {
+                        "name": agent_name,
+                        "model": model,
+                        "input": input_tokens,
+                        "output": output_tokens,
+                        "total": billing_tokens,
+                        "turns": turns,
+                        "context": context_percentage,
+                    }
+                )
+                total_input += input_tokens
+                total_output += output_tokens
+                total_tokens += billing_tokens
+    if not usage_data:
+        return
+    # Calculate dynamic agent column width (max 15)
+    max_agent_width = min(15, max(len(data["name"]) for data in usage_data) if usage_data else 8)
+    agent_width = max(max_agent_width, 5)  # Minimum of 5 for "Agent" header
+    # Display the table
+    console = Console()
+    console.print()
+    console.print("[dim]Usage Summary (Cumulative)[/dim]")
+    # Print header with proper spacing
+    console.print(
+        f"[dim]{'Agent':<{agent_width}} {'Input':>9} {'Output':>9} {'Total':>9} {'Turns':>6} {'Context%':>9}  {'Model':<25}[/dim]"
+    )
+    # Print agent rows - use styling based on subdued_colors flag
+    for data in usage_data:
+        input_str = f"{data['input']:,}"
+        output_str = f"{data['output']:,}"
+        total_str = f"{data['total']:,}"
+        turns_str = str(data["turns"])
+        context_str = f"{data['context']:.1f}%" if data["context"] is not None else "-"
+        # Truncate agent name if needed
+        agent_name = data["name"]
+        if len(agent_name) > agent_width:
+            agent_name = agent_name[: agent_width - 3] + "..."
+        if subdued_colors:
+            # Original fastagent.py style with dim wrapper
+            console.print(
+                f"[dim]{agent_name:<{agent_width}} "
+                f"{input_str:>9} "
+                f"{output_str:>9} "
+                f"[bold]{total_str:>9}[/bold] "
+                f"{turns_str:>6} "
+                f"{context_str:>9}  "
+                f"{data['model']:<25}[/dim]"
+            )
+        else:
+            # Original interactive_prompt.py style
+            console.print(
+                f"{agent_name:<{agent_width}} "
+                f"{input_str:>9} "
+                f"{output_str:>9} "
+                f"[bold]{total_str:>9}[/bold] "
+                f"{turns_str:>6} "
+                f"{context_str:>9}  "
+                f"[dim]{data['model']:<25}[/dim]"
+            )
+    # Add total row if multiple agents
+    if len(usage_data) > 1:
+        console.print()
+        total_input_str = f"{total_input:,}"
+        total_output_str = f"{total_output:,}"
+        total_tokens_str = f"{total_tokens:,}"
+        if subdued_colors:
+            # Original fastagent.py style with dim wrapper on bold
+            console.print(
+                f"[bold dim]{'TOTAL':<{agent_width}} "
+                f"{total_input_str:>9} "
+                f"{total_output_str:>9} "
+                f"[bold]{total_tokens_str:>9}[/bold] "
+                f"{'':<6} "
+                f"{'':<9}  "
+                f"{'':<25}[/bold dim]"
+            )
+        else:
+            # Original interactive_prompt.py style
+            console.print(
+                f"[bold]{'TOTAL':<{agent_width}}[/bold] "
+                f"[bold]{total_input_str:>9}[/bold] "
+                f"[bold]{total_output_str:>9}[/bold] "
+                f"[bold]{total_tokens_str:>9}[/bold] "
+                f"{'':<6} "
+                f"{'':<9}  "
+                f"{'':<25}"
+            )
+    console.print()
+def collect_agents_from_provider(
+    prompt_provider: Any, agent_name: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Collect agents from a prompt provider for usage display.
+    Args:
+        prompt_provider: Provider that has access to agents
+        agent_name: Name of the current agent (for context)
+    Returns:
+        Dictionary of agent name -> agent object
+    """
+    agents_to_show = {}
+    if hasattr(prompt_provider, "_agents"):
+        # Multi-agent app - show all agents
+        agents_to_show = prompt_provider._agents
+    elif hasattr(prompt_provider, "agent"):
+        # Single agent
+        agent = prompt_provider.agent
+        if hasattr(agent, "name"):
+            agents_to_show = {agent.name: agent}
+    return agents_to_show

mcp_agent/event_progress.py CHANGED Viewed

@@ -15,6 +15,7 @@ class ProgressAction(str, Enum):
     LOADED = "Loaded"
     INITIALIZED = "Initialized"
     CHATTING = "Chatting"
+    STREAMING = "Streaming"  # Special action for real-time streaming updates
     ROUTING = "Routing"
     PLANNING = "Planning"
     READY = "Ready"
@@ -33,12 +34,22 @@ class ProgressEvent(BaseModel):
     target: str
     details: Optional[str] = None
     agent_name: Optional[str] = None
+    streaming_tokens: Optional[str] = None  # Special field for streaming token count
     def __str__(self) -> str:
         """Format the progress event for display."""
-        base = f"{self.action.ljust(11)}. {self.target}"
-        if self.details:
-            base += f" - {self.details}"
+        # Special handling for streaming - show token count in action position
+        if self.action == ProgressAction.STREAMING and self.streaming_tokens:
+            # For streaming, show just the token count instead of "Streaming"
+            action_display = self.streaming_tokens.ljust(11)
+            base = f"{action_display}. {self.target}"
+            if self.details:
+                base += f" - {self.details}"
+        else:
+            base = f"{self.action.ljust(11)}. {self.target}"
+            if self.details:
+                base += f" - {self.details}"
         if self.agent_name:
             base = f"[{self.agent_name}] {base}"
         return base
@@ -78,7 +89,8 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
     elif "augmented_llm" in namespace:
         model = event_data.get("model", "")
+        # For all augmented_llm events, put model info in details column
         details = f"{model}"
         chat_turn = event_data.get("chat_turn")
         if chat_turn is not None:
@@ -87,9 +99,15 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
         if not target:
             target = event_data.get("target", "unknown")
+    # Extract streaming token count for STREAMING actions
+    streaming_tokens = None
+    if progress_action == ProgressAction.STREAMING:
+        streaming_tokens = event_data.get("details", "")
     return ProgressEvent(
         action=ProgressAction(progress_action),
         target=target or "unknown",
         details=details,
         agent_name=event_data.get("agent_name"),
+        streaming_tokens=streaming_tokens,
     )

mcp_agent/llm/augmented_llm.py CHANGED Viewed

@@ -30,11 +30,13 @@ from mcp_agent.core.prompt import Prompt
 from mcp_agent.core.request_params import RequestParams
 from mcp_agent.event_progress import ProgressAction
 from mcp_agent.llm.memory import Memory, SimpleMemory
+from mcp_agent.llm.model_database import ModelDatabase
 from mcp_agent.llm.provider_types import Provider
 from mcp_agent.llm.sampling_format_converter import (
     BasicFormatConverter,
     ProviderFormatConverter,
 )
+from mcp_agent.llm.usage_tracking import UsageAccumulator
 from mcp_agent.logging.logger import get_logger
 from mcp_agent.mcp.helpers.content_helpers import get_text
 from mcp_agent.mcp.interfaces import (
@@ -95,6 +97,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
     PARAM_USE_HISTORY = "use_history"
     PARAM_MAX_ITERATIONS = "max_iterations"
     PARAM_TEMPLATE_VARS = "template_vars"
     # Base set of fields that should always be excluded
     BASE_EXCLUDE_FIELDS = {PARAM_METADATA}
@@ -155,12 +158,11 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         # Initialize the display component
         self.display = ConsoleDisplay(config=self.context.config)
-        # Initialize default parameters
-        self.default_request_params = self._initialize_default_params(kwargs)
-        # Apply model override if provided
+        # Initialize default parameters, passing model info
+        model_kwargs = kwargs.copy()
         if model:
-            self.default_request_params.model = model
+            model_kwargs["model"] = model
+        self.default_request_params = self._initialize_default_params(model_kwargs)
         # Merge with provided params if any
         if self._init_request_params:
@@ -171,13 +173,22 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         self.type_converter = type_converter
         self.verb = kwargs.get("verb")
+        # Initialize usage tracking
+        self.usage_accumulator = UsageAccumulator()
     def _initialize_default_params(self, kwargs: dict) -> RequestParams:
         """Initialize default parameters for the LLM.
         Should be overridden by provider implementations to set provider-specific defaults."""
+        # Get model-aware default max tokens
+        model = kwargs.get("model")
+        max_tokens = ModelDatabase.get_default_max_tokens(model)
         return RequestParams(
+            model=model,
+            maxTokens=max_tokens,
             systemPrompt=self.instruction,
             parallel_tool_calls=True,
-            max_iterations=10,
+            max_iterations=20,
             use_history=True,
         )
@@ -361,16 +372,28 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         # Start with base arguments
         arguments = base_args.copy()
-        # Use provided exclude_fields or fall back to base exclusions
-        exclude_fields = exclude_fields or self.BASE_EXCLUDE_FIELDS.copy()
+        # Combine base exclusions with provider-specific exclusions
+        final_exclude_fields = self.BASE_EXCLUDE_FIELDS.copy()
+        if exclude_fields:
+            final_exclude_fields.update(exclude_fields)
         # Add all fields from params that aren't explicitly excluded
-        params_dict = request_params.model_dump(exclude=exclude_fields)
+        # Ensure model_dump only includes set fields if that's the desired behavior,
+        # or adjust exclude_unset=True/False as needed.
+        # Default Pydantic v2 model_dump is exclude_unset=False
+        params_dict = request_params.model_dump(exclude=final_exclude_fields)
         for key, value in params_dict.items():
+            # Only add if not None and not already in base_args (base_args take precedence)
+            # or if None is a valid value for the provider, this logic might need adjustment.
             if value is not None and key not in arguments:
                 arguments[key] = value
+            elif value is not None and key in arguments and arguments[key] is None:
+                # Allow overriding a None in base_args with a set value from params
+                arguments[key] = value
         # Finally, add any metadata fields as a last layer of overrides
+        # This ensures metadata can override anything previously set if keys conflict.
         if request_params.metadata:
             arguments.update(request_params.metadata)
@@ -642,3 +665,13 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         assert self.provider
         return ProviderKeyManager.get_api_key(self.provider.value, self.context.config)
+    def get_usage_summary(self) -> dict:
+        """
+        Get a summary of usage statistics for this LLM instance.
+        Returns:
+            Dictionary containing usage statistics including tokens, cache metrics,
+            and context window utilization.
+        """
+        return self.usage_accumulator.get_summary()

mcp_agent/llm/augmented_llm_passthrough.py CHANGED Viewed

@@ -10,6 +10,7 @@ from mcp_agent.llm.augmented_llm import (
     RequestParams,
 )
 from mcp_agent.llm.provider_types import Provider
+from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
 from mcp_agent.logging.logger import get_logger
 from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
@@ -48,13 +49,34 @@ class PassthroughLLM(AugmentedLLM):
         await self.show_assistant_message(message, title="ASSISTANT/PASSTHROUGH")
         # Handle PromptMessage by concatenating all parts
+        result = ""
         if isinstance(message, PromptMessage):
             parts_text = []
             for part in message.content:
                 parts_text.append(str(part))
-            return "\n".join(parts_text)
+            result = "\n".join(parts_text)
+        else:
+            result = str(message)
-        return str(message)
+        # Track usage for this passthrough "turn"
+        try:
+            input_content = str(message)
+            output_content = result
+            tool_calls = 1 if input_content.startswith("***CALL_TOOL") else 0
+            turn_usage = create_turn_usage_from_messages(
+                input_content=input_content,
+                output_content=output_content,
+                model="passthrough",
+                model_type="passthrough",
+                tool_calls=tool_calls,
+                delay_seconds=0.0,
+            )
+            self.usage_accumulator.add_turn(turn_usage)
+        except Exception as e:
+            self.logger.warning(f"Failed to track usage: {e}")
+        return result
     async def initialize(self) -> None:
         pass
@@ -146,6 +168,25 @@ class PassthroughLLM(AugmentedLLM):
         if self.is_tool_call(last_message):
             result = Prompt.assistant(await self.generate_str(last_message.first_text()))
             await self.show_assistant_message(result.first_text())
+            # Track usage for this tool call "turn"
+            try:
+                input_content = "\n".join(message.all_text() for message in multipart_messages)
+                output_content = result.first_text()
+                turn_usage = create_turn_usage_from_messages(
+                    input_content=input_content,
+                    output_content=output_content,
+                    model="passthrough",
+                    model_type="passthrough",
+                    tool_calls=1,  # This is definitely a tool call
+                    delay_seconds=0.0,
+                )
+                self.usage_accumulator.add_turn(turn_usage)
+            except Exception as e:
+                self.logger.warning(f"Failed to track usage: {e}")
             return result
         if last_message.first_text().startswith(FIXED_RESPONSE_INDICATOR):
@@ -155,12 +196,33 @@ class PassthroughLLM(AugmentedLLM):
         if self._fixed_response:
             await self.show_assistant_message(self._fixed_response)
-            return Prompt.assistant(self._fixed_response)
+            result = Prompt.assistant(self._fixed_response)
         else:
             # TODO -- improve when we support Audio/Multimodal gen models e.g. gemini . This should really just return the input as "assistant"...
             concatenated: str = "\n".join(message.all_text() for message in multipart_messages)
             await self.show_assistant_message(concatenated)
-            return Prompt.assistant(concatenated)
+            result = Prompt.assistant(concatenated)
+        # Track usage for this passthrough "turn"
+        try:
+            input_content = "\n".join(message.all_text() for message in multipart_messages)
+            output_content = result.first_text()
+            tool_calls = 1 if self.is_tool_call(last_message) else 0
+            turn_usage = create_turn_usage_from_messages(
+                input_content=input_content,
+                output_content=output_content,
+                model="passthrough",
+                model_type="passthrough",
+                tool_calls=tool_calls,
+                delay_seconds=0.0,
+            )
+            self.usage_accumulator.add_turn(turn_usage)
+        except Exception as e:
+            self.logger.warning(f"Failed to track usage: {e}")
+        return result
     def is_tool_call(self, message: PromptMessageMultipart) -> bool:
         return message.first_text().startswith(CALL_TOOL_INDICATOR)

mcp_agent/llm/augmented_llm_playback.py CHANGED Viewed

@@ -5,6 +5,7 @@ from mcp_agent.core.prompt import Prompt
 from mcp_agent.llm.augmented_llm import RequestParams
 from mcp_agent.llm.augmented_llm_passthrough import PassthroughLLM
 from mcp_agent.llm.provider_types import Provider
+from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
 from mcp_agent.mcp.interfaces import ModelT
 from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
 from mcp_agent.mcp.prompts.prompt_helpers import MessageContent
@@ -83,6 +84,24 @@ class PlaybackLLM(PassthroughLLM):
             message_text=MessageContent.get_first_text(response), title="ASSISTANT/PLAYBACK"
         )
+        # Track usage for this playback "turn"
+        try:
+            input_content = str(multipart_messages) if multipart_messages else ""
+            output_content = MessageContent.get_first_text(response)
+            turn_usage = create_turn_usage_from_messages(
+                input_content=input_content,
+                output_content=output_content,
+                model="playback",
+                model_type="playback",
+                tool_calls=0,
+                delay_seconds=0.0,
+            )
+            self.usage_accumulator.add_turn(turn_usage)
+        except Exception as e:
+            self.logger.warning(f"Failed to track usage: {e}")
         return response
     async def structured(

mcp_agent/llm/augmented_llm_slow.py CHANGED Viewed

@@ -30,7 +30,18 @@ class SlowLLM(PassthroughLLM):
     ) -> str:
         """Sleep for 3 seconds then return the input message as a string."""
         await asyncio.sleep(3)
-        return await super().generate_str(message, request_params)
+        result = await super().generate_str(message, request_params)
+        # Override the last turn to include the 3-second delay
+        if self.usage_accumulator.turns:
+            last_turn = self.usage_accumulator.turns[-1]
+            # Update the raw usage to include delay
+            if hasattr(last_turn.raw_usage, 'delay_seconds'):
+                last_turn.raw_usage.delay_seconds = 3.0
+                # Print updated debug info
+                print("SlowLLM: Added 3.0s delay to turn usage")
+        return result
     async def _apply_prompt_provider_specific(
         self,

mcp_agent/llm/memory.py CHANGED Viewed

@@ -35,6 +35,9 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
     def __init__(self) -> None:
         self.history: List[MessageParamT] = []
         self.prompt_messages: List[MessageParamT] = []  # Always included
+        self.conversation_cache_positions: List[int] = []  # Track active conversation cache positions
+        self.cache_walk_distance: int = 6  # Messages between cache blocks
+        self.max_conversation_cache_blocks: int = 2  # Maximum conversation cache blocks
     def extend(self, messages: List[MessageParamT], is_prompt: bool = False) -> None:
         """
@@ -99,5 +102,122 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
             clear_prompts: If True, also clear prompt messages
         """
         self.history = []
+        self.conversation_cache_positions = []  # Reset cache positions
         if clear_prompts:
             self.prompt_messages = []
+    def should_apply_conversation_cache(self) -> bool:
+        """
+        Determine if conversation caching should be applied based on walking algorithm.
+        Returns:
+            True if we should add or update cache blocks
+        """
+        total_messages = len(self.history)
+        # Need at least cache_walk_distance messages to start caching
+        if total_messages < self.cache_walk_distance:
+            return False
+        # Check if we need to add a new cache block
+        return len(self._calculate_cache_positions(total_messages)) != len(self.conversation_cache_positions)
+    def _calculate_cache_positions(self, total_conversation_messages: int) -> List[int]:
+        """
+        Calculate where cache blocks should be placed using walking algorithm.
+        Args:
+            total_conversation_messages: Number of conversation messages (not including prompts)
+        Returns:
+            List of positions (relative to conversation start) where cache should be placed
+        """
+        positions = []
+        # Place cache blocks every cache_walk_distance messages
+        for i in range(self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance):
+            positions.append(i)
+            if len(positions) >= self.max_conversation_cache_blocks:
+                break
+        # Keep only the most recent cache blocks (walking behavior)
+        if len(positions) > self.max_conversation_cache_blocks:
+            positions = positions[-self.max_conversation_cache_blocks:]
+        return positions
+    def get_conversation_cache_updates(self) -> dict:
+        """
+        Get cache position updates needed for the walking algorithm.
+        Returns:
+            Dict with 'add', 'remove', and 'active' position lists (relative to full message array)
+        """
+        total_conversation_messages = len(self.history)
+        new_positions = self._calculate_cache_positions(total_conversation_messages)
+        # Convert to absolute positions (including prompt messages)
+        prompt_offset = len(self.prompt_messages)
+        new_absolute_positions = [pos + prompt_offset for pos in new_positions]
+        old_positions_set = set(self.conversation_cache_positions)
+        new_positions_set = set(new_absolute_positions)
+        return {
+            'add': sorted(new_positions_set - old_positions_set),
+            'remove': sorted(old_positions_set - new_positions_set),
+            'active': sorted(new_absolute_positions)
+        }
+    def apply_conversation_cache_updates(self, updates: dict) -> None:
+        """
+        Apply cache position updates.
+        Args:
+            updates: Dict from get_conversation_cache_updates()
+        """
+        self.conversation_cache_positions = updates['active'].copy()
+    def remove_cache_control_from_messages(self, messages: List[MessageParamT], positions: List[int]) -> None:
+        """
+        Remove cache control from specified message positions.
+        Args:
+            messages: The message array to modify
+            positions: List of positions to remove cache control from
+        """
+        for pos in positions:
+            if pos < len(messages):
+                message = messages[pos]
+                if isinstance(message, dict) and "content" in message:
+                    content_list = message["content"]
+                    if isinstance(content_list, list):
+                        for content_block in content_list:
+                            if isinstance(content_block, dict) and "cache_control" in content_block:
+                                del content_block["cache_control"]
+    def add_cache_control_to_messages(self, messages: List[MessageParamT], positions: List[int]) -> int:
+        """
+        Add cache control to specified message positions.
+        Args:
+            messages: The message array to modify
+            positions: List of positions to add cache control to
+        Returns:
+            Number of cache blocks successfully applied
+        """
+        applied_count = 0
+        for pos in positions:
+            if pos < len(messages):
+                message = messages[pos]
+                if isinstance(message, dict) and "content" in message:
+                    content_list = message["content"]
+                    if isinstance(content_list, list) and content_list:
+                        # Apply cache control to the last content block
+                        for content_block in reversed(content_list):
+                            if isinstance(content_block, dict):
+                                content_block["cache_control"] = {"type": "ephemeral"}
+                                applied_count += 1
+                                break
+        return applied_count

fast-agent-mcp 0.2.33__py3-none-any.whl → 0.2.35__py3-none-any.whl

fast-agent-mcp 0.2.33py3-none-any.whl → 0.2.35py3-none-any.whl