PyPI - fast-agent-mcp - Versions diffs - 0.2.34__py3-none-any.whl → 0.2.35__py3-none-any.whl - Mend

fast-agent-mcp 0.2.34py3-none-any.whl → 0.2.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/METADATA +1 -1
{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/RECORD +16 -16
mcp_agent/config.py +8 -0
mcp_agent/context.py +3 -2
mcp_agent/core/agent_app.py +1 -1
mcp_agent/event_progress.py +22 -4
mcp_agent/llm/augmented_llm.py +16 -3
mcp_agent/llm/memory.py +120 -0
mcp_agent/llm/model_database.py +2 -2
mcp_agent/llm/providers/augmented_llm_anthropic.py +190 -45
mcp_agent/llm/usage_tracking.py +34 -17
mcp_agent/logging/events.py +24 -0
mcp_agent/logging/rich_progress.py +9 -1
{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/WHEEL +0 -0
{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/entry_points.txt +0 -0
{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/licenses/LICENSE +0 -0

{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fast-agent-mcp
-Version: 0.2.34
+Version: 0.2.35
 Summary: Define, Prompt and Test MCP enabled Agents and Workflows
 Author-email: Shaun Smith <fastagent@llmindset.co.uk>
 License:                                  Apache License

{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 mcp_agent/__init__.py,sha256=18T0AG0W9sJhTY38O9GFFOzliDhxx9p87CvRyti9zbw,1620
 mcp_agent/app.py,sha256=3mtHP1nRQcRaKhhxgTmCOv00alh70nT7UxNA8bN47QE,5560
-mcp_agent/config.py,sha256=9GDvMugKIeT9SKRGGEv2gN3lsC78hQ_Oy-HSpItuqo0,15841
+mcp_agent/config.py,sha256=ZIGFCSWrhMqhlHhapQf3QXo9N6EuTVy5iZIFiiqwE2M,16289
 mcp_agent/console.py,sha256=Gjf2QLFumwG1Lav__c07X_kZxxEUSkzV-1_-YbAwcwo,813
-mcp_agent/context.py,sha256=H7JbaZ_8SzzTagLmIgUPUPxX5370C5qjQAsasFPZG2Y,7510
+mcp_agent/context.py,sha256=f729LJcW4YoFXb0Rg_kEU-5FlrOnFgqplI6W0fVqomg,7631
 mcp_agent/context_dependent.py,sha256=QXfhw3RaQCKfscEEBRGuZ3sdMWqkgShz2jJ1ivGGX1I,1455
-mcp_agent/event_progress.py,sha256=040lrCCclcOuryi07YGSej25kTQF5_JMXY12Yj-3u1U,2773
+mcp_agent/event_progress.py,sha256=d7T1hQ1D289MYh2Z5bMPB4JqjGqTOzveJuOHE03B_Xo,3720
 mcp_agent/mcp_server_registry.py,sha256=b3iSb-0ULYc5yUG2KHav41WGwSYWiJCGQsOwWHWByxo,12346
 mcp_agent/progress_display.py,sha256=GeJU9VUt6qKsFVymG688hCMVCsAygG9ifiiEb5IcbN4,361
 mcp_agent/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -28,7 +28,7 @@ mcp_agent/cli/commands/quickstart.py,sha256=SM3CHMzDgvTxIpKjFuX9BrS_N1vRoXNBDaO9
 mcp_agent/cli/commands/setup.py,sha256=eOEd4TL-b0DaDeSJMGOfNOsTEItoZ67W88eTP4aP-bo,6482
 mcp_agent/cli/commands/url_parser.py,sha256=5VdtcHRHzi67YignStVbz7u-rcvNNErw9oJLAUFOtEY,5855
 mcp_agent/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mcp_agent/core/agent_app.py,sha256=7DmOn55j9fQ-ngoGcJP82R_Z_y93FonuEkORTlP8X-w,12111
+mcp_agent/core/agent_app.py,sha256=KJdx0Qbh7Gb4wA8_LwKriogc27SraRIrvMqHsOCVVt0,12119
 mcp_agent/core/agent_types.py,sha256=DogMcOoRwk70CFSetZ09madRcPDlhPn1iXZVeOcLV8Q,1507
 mcp_agent/core/direct_decorators.py,sha256=HY_7S7OtfZPqAeqC3_hPYa1d6zTnEyiOeI7JxvnWqTM,16786
 mcp_agent/core/direct_factory.py,sha256=UNAjHHFRLrQ3D934RMsKsh0Oas7LXLIVslgrzcetM6A,19090
@@ -50,23 +50,23 @@ mcp_agent/human_input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
 mcp_agent/human_input/handler.py,sha256=s712Z5ssTCwjL9-VKoIdP5CtgMh43YvepynYisiWTTA,3144
 mcp_agent/human_input/types.py,sha256=RtWBOVzy8vnYoQrc36jRLn8z8N3C4pDPMBN5vF6qM5Y,1476
 mcp_agent/llm/__init__.py,sha256=d8zgwG-bRFuwiMNMYkywg_qytk4P8lawyld_meuUmHI,68
-mcp_agent/llm/augmented_llm.py,sha256=vthXYuEIDunXITr1zkrIg8sBypnuQ5pcCheKRaCR-kw,24943
+mcp_agent/llm/augmented_llm.py,sha256=ekVZQla3oOyWSysJif-2ZutklYB8HTK99I7HzSye6ag,25705
 mcp_agent/llm/augmented_llm_passthrough.py,sha256=F8KifmTwoQ7zyncjmoRek8SBfGdgc9yc5LRXwMQH-bg,8640
 mcp_agent/llm/augmented_llm_playback.py,sha256=BQeBXRpO-xGAY9wIJxyde6xpHmZEdQPLd32frF8t3QQ,4916
 mcp_agent/llm/augmented_llm_slow.py,sha256=DDSD8bL2flmQrVHZm-UDs7sR8aHRWkDOcOW-mX_GPok,2067
-mcp_agent/llm/memory.py,sha256=HQ_c1QemOUjrkY6Z2omE6BG5fXga7y4jN7KCMOuGjPs,3345
-mcp_agent/llm/model_database.py,sha256=cBhKFnofA_9M8qAcr0Kvzui5fSJkWMMGFWOOvqkU-DQ,8485
+mcp_agent/llm/memory.py,sha256=pTOaTDV3EA3X68yKwEtUAu7s0xGIQQ_cKBhfYUnfR0w,8614
+mcp_agent/llm/model_database.py,sha256=mfy039QZP_8-f0aHWR0Fpj2qnlys5430haSzrA86aXw,8485
 mcp_agent/llm/model_factory.py,sha256=u60O4SWe22wN6CpmIfaF4C5aUziJs8O3N0Jo7erPjp8,10753
 mcp_agent/llm/prompt_utils.py,sha256=yWQHykoK13QRF7evHUKxVF0SpVLN-Bsft0Yixzvn0g0,4825
 mcp_agent/llm/provider_key_manager.py,sha256=usMWozSMhek_FIlM1MeVDwAbs-P96SrEVPGd3YwF9E4,2833
 mcp_agent/llm/provider_types.py,sha256=AkQl1r67wZ0gSIY6CXsiZiS3uw5DBF9E5yhIn3THayk,633
 mcp_agent/llm/sampling_converter.py,sha256=C7wPBlmT0eD90XWabC22zkxsrVHKCrjwIwg6cG628cI,2926
 mcp_agent/llm/sampling_format_converter.py,sha256=xGz4odHpOcP7--eFaJaFtUR8eR9jxZS7MnLH6J7n0EU,1263
-mcp_agent/llm/usage_tracking.py,sha256=JOCmywn7f0-aJHUIG9DaTbFVxTqwMM-0hc4-lEhNkBM,14201
+mcp_agent/llm/usage_tracking.py,sha256=HdBehPMt0bZzEgRmTnbMdgpLVuTp6L_VJTQx5Z25zCM,15321
 mcp_agent/llm/providers/__init__.py,sha256=heVxtmuqFJOnjjxHz4bWSqTAxXoN1E8twC_gQ_yJpHk,265
 mcp_agent/llm/providers/anthropic_utils.py,sha256=vYDN5G5jKMhD2CQg8veJYab7tvvzYkDMq8M1g_hUAQg,3275
 mcp_agent/llm/providers/augmented_llm_aliyun.py,sha256=XylkJKZ9theSVUxJKOZkf1244hgzng4Ng4Dr209Qb-w,1101
-mcp_agent/llm/providers/augmented_llm_anthropic.py,sha256=uvPANaseesS7K3gu4uM1E7OwurjVvM-e6tuRezP5IIY,17651
+mcp_agent/llm/providers/augmented_llm_anthropic.py,sha256=xCLqLi2HeBHPSvN_xD9Gl26ENTGT0E_1KLnN38BVXiE,24624
 mcp_agent/llm/providers/augmented_llm_azure.py,sha256=VPrD6lNrEw6EdYUTa9MDvHDNIPjJU5CG5xnKCM3JYdA,5878
 mcp_agent/llm/providers/augmented_llm_deepseek.py,sha256=zI9a90dwT4r6E1f_xp4K50Cj9sD7y7kNRgjo0s1pd5w,3804
 mcp_agent/llm/providers/augmented_llm_generic.py,sha256=5Uq8ZBhcFuQTt7koP_5ykolREh2iWu8zKhNbh3pM9lQ,1210
@@ -84,11 +84,11 @@ mcp_agent/llm/providers/openai_utils.py,sha256=T4bTCL9f7DsoS_zoKgQKv_FUv_4n98vgb
 mcp_agent/llm/providers/sampling_converter_anthropic.py,sha256=35WzBWkPklnuMlu5S6XsQIq0YL58NOy8Ja6A_l4m6eM,1612
 mcp_agent/llm/providers/sampling_converter_openai.py,sha256=GA-LfTJzOwH9Vwk0Q4K37nG6zxpzqS-JGaM7cTH-Epc,841
 mcp_agent/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mcp_agent/logging/events.py,sha256=iHTSgrxK3BWnRoej6NhxVL5899MIHr-ktsA7hxMoo9k,3437
+mcp_agent/logging/events.py,sha256=dSJJfuCd59-ZyYTVcf0M4HQd6iXb5k50PSAeoq1CpH0,4278
 mcp_agent/logging/json_serializer.py,sha256=qkfxnR9ka6OgvwSpM2CggELbEtzzkApm0s_KYz11RDY,5791
 mcp_agent/logging/listeners.py,sha256=_S4Jp5_KWp0kUfrx4BxDdNCeQK3MNT3Zi9AaolPri7A,6648
 mcp_agent/logging/logger.py,sha256=l02OGX_c5FOyH0rspd4ZvnkJcbb0FahhUhlh2KI8mqE,10724
-mcp_agent/logging/rich_progress.py,sha256=oY9fjb4Tyw6887v8sgO6EGIK4lnmIoR3NNxhA_-Ln_M,4893
+mcp_agent/logging/rich_progress.py,sha256=NQbW010VxfzgJw8BRaqKVTIFlTNvDfmMcoOt7pxGvzQ,5362
 mcp_agent/logging/transport.py,sha256=m8YsLLu5T8eof_ndpLQs4gHOzqqEL98xsVwBwDsBfxI,17335
 mcp_agent/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 mcp_agent/mcp/common.py,sha256=MpSC0fLO21RcDz4VApah4C8_LisVGz7OXkR17Xw-9mY,431
@@ -158,8 +158,8 @@ mcp_agent/resources/examples/workflows/router.py,sha256=E4x_-c3l4YW9w1i4ARcDtkde
 mcp_agent/resources/examples/workflows/short_story.txt,sha256=X3y_1AyhLFN2AKzCKvucJtDgAFIJfnlbsbGZO5bBWu0,1187
 mcp_agent/tools/tool_definition.py,sha256=L3Pxl-uLEXqlVoo-bYuFTFALeI-2pIU44YgFhsTKEtM,398
 mcp_agent/ui/console_display.py,sha256=UKqax5V2TC0hkZZORmmd6UqUk0DGX7A25E3h1k9f42k,10982
-fast_agent_mcp-0.2.34.dist-info/METADATA,sha256=Vl4WmcpJIKaOYiFHKMy0XYzw5Jmt65zXS0iEYegAJLQ,30799
-fast_agent_mcp-0.2.34.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-fast_agent_mcp-0.2.34.dist-info/entry_points.txt,sha256=oKQeSUVn87pJv8_k1NQ7Ak8cXaaXHCnPAOJRCV_uUVg,230
-fast_agent_mcp-0.2.34.dist-info/licenses/LICENSE,sha256=cN3FxDURL9XuzE5mhK9L2paZo82LTfjwCYVT7e3j0e4,10939
-fast_agent_mcp-0.2.34.dist-info/RECORD,,
+fast_agent_mcp-0.2.35.dist-info/METADATA,sha256=O4DhiiD77uzvQeJvBbJxWMHwycs3M2fCilt7lJmPzUE,30799
+fast_agent_mcp-0.2.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+fast_agent_mcp-0.2.35.dist-info/entry_points.txt,sha256=oKQeSUVn87pJv8_k1NQ7Ak8cXaaXHCnPAOJRCV_uUVg,230
+fast_agent_mcp-0.2.35.dist-info/licenses/LICENSE,sha256=cN3FxDURL9XuzE5mhK9L2paZo82LTfjwCYVT7e3j0e4,10939
+fast_agent_mcp-0.2.35.dist-info/RECORD,,

mcp_agent/config.py CHANGED Viewed

@@ -115,6 +115,14 @@ class AnthropicSettings(BaseModel):
     base_url: str | None = None
+    cache_mode: Literal["off", "prompt", "auto"] = "auto"
+    """
+    Controls how caching is applied for Anthropic models when prompt_caching is enabled globally.
+    - "off": No caching, even if global prompt_caching is true.
+    - "prompt": Caches tools+system prompt (1 block) and template content. Useful for large, static prompts.
+    - "auto": Currently same as "prompt" - caches tools+system prompt (1 block) and template content.
+    """
     model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

mcp_agent/context.py CHANGED Viewed

@@ -25,7 +25,7 @@ from pydantic import BaseModel, ConfigDict
 from mcp_agent.config import Settings, get_settings
 from mcp_agent.executor.executor import AsyncioExecutor, Executor
 from mcp_agent.executor.task_registry import ActivityRegistry
-from mcp_agent.logging.events import EventFilter
+from mcp_agent.logging.events import EventFilter, StreamingExclusionFilter
 from mcp_agent.logging.logger import LoggingConfig, get_logger
 from mcp_agent.logging.transport import create_transport
 from mcp_agent.mcp_server_registry import ServerRegistry
@@ -124,7 +124,8 @@ async def configure_logger(config: "Settings") -> None:
     """
     Configure logging and tracing based on the application config.
     """
-    event_filter: EventFilter = EventFilter(min_level=config.logger.level)
+    # Use StreamingExclusionFilter to prevent streaming events from flooding logs
+    event_filter: EventFilter = StreamingExclusionFilter(min_level=config.logger.level)
     logger.info(f"Configuring logger with level: {config.logger.level}")
     transport = create_transport(settings=config.logger, event_filter=event_filter)
     await LoggingConfig.configure(

mcp_agent/core/agent_app.py CHANGED Viewed

@@ -302,7 +302,7 @@ class AgentApp:
             return
         last_turn = turns[-1]
-        input_tokens = last_turn.input_tokens
+        input_tokens = last_turn.display_input_tokens
         output_tokens = last_turn.output_tokens
         # Build cache indicators with bright colors

mcp_agent/event_progress.py CHANGED Viewed

@@ -15,6 +15,7 @@ class ProgressAction(str, Enum):
     LOADED = "Loaded"
     INITIALIZED = "Initialized"
     CHATTING = "Chatting"
+    STREAMING = "Streaming"  # Special action for real-time streaming updates
     ROUTING = "Routing"
     PLANNING = "Planning"
     READY = "Ready"
@@ -33,12 +34,22 @@ class ProgressEvent(BaseModel):
     target: str
     details: Optional[str] = None
     agent_name: Optional[str] = None
+    streaming_tokens: Optional[str] = None  # Special field for streaming token count
     def __str__(self) -> str:
         """Format the progress event for display."""
-        base = f"{self.action.ljust(11)}. {self.target}"
-        if self.details:
-            base += f" - {self.details}"
+        # Special handling for streaming - show token count in action position
+        if self.action == ProgressAction.STREAMING and self.streaming_tokens:
+            # For streaming, show just the token count instead of "Streaming"
+            action_display = self.streaming_tokens.ljust(11)
+            base = f"{action_display}. {self.target}"
+            if self.details:
+                base += f" - {self.details}"
+        else:
+            base = f"{self.action.ljust(11)}. {self.target}"
+            if self.details:
+                base += f" - {self.details}"
         if self.agent_name:
             base = f"[{self.agent_name}] {base}"
         return base
@@ -78,7 +89,8 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
     elif "augmented_llm" in namespace:
         model = event_data.get("model", "")
+        # For all augmented_llm events, put model info in details column
         details = f"{model}"
         chat_turn = event_data.get("chat_turn")
         if chat_turn is not None:
@@ -87,9 +99,15 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
         if not target:
             target = event_data.get("target", "unknown")
+    # Extract streaming token count for STREAMING actions
+    streaming_tokens = None
+    if progress_action == ProgressAction.STREAMING:
+        streaming_tokens = event_data.get("details", "")
     return ProgressEvent(
         action=ProgressAction(progress_action),
         target=target or "unknown",
         details=details,
         agent_name=event_data.get("agent_name"),
+        streaming_tokens=streaming_tokens,
     )

mcp_agent/llm/augmented_llm.py CHANGED Viewed

@@ -97,6 +97,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
     PARAM_USE_HISTORY = "use_history"
     PARAM_MAX_ITERATIONS = "max_iterations"
     PARAM_TEMPLATE_VARS = "template_vars"
     # Base set of fields that should always be excluded
     BASE_EXCLUDE_FIELDS = {PARAM_METADATA}
@@ -371,16 +372,28 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         # Start with base arguments
         arguments = base_args.copy()
-        # Use provided exclude_fields or fall back to base exclusions
-        exclude_fields = exclude_fields or self.BASE_EXCLUDE_FIELDS.copy()
+        # Combine base exclusions with provider-specific exclusions
+        final_exclude_fields = self.BASE_EXCLUDE_FIELDS.copy()
+        if exclude_fields:
+            final_exclude_fields.update(exclude_fields)
         # Add all fields from params that aren't explicitly excluded
-        params_dict = request_params.model_dump(exclude=exclude_fields)
+        # Ensure model_dump only includes set fields if that's the desired behavior,
+        # or adjust exclude_unset=True/False as needed.
+        # Default Pydantic v2 model_dump is exclude_unset=False
+        params_dict = request_params.model_dump(exclude=final_exclude_fields)
         for key, value in params_dict.items():
+            # Only add if not None and not already in base_args (base_args take precedence)
+            # or if None is a valid value for the provider, this logic might need adjustment.
             if value is not None and key not in arguments:
                 arguments[key] = value
+            elif value is not None and key in arguments and arguments[key] is None:
+                # Allow overriding a None in base_args with a set value from params
+                arguments[key] = value
         # Finally, add any metadata fields as a last layer of overrides
+        # This ensures metadata can override anything previously set if keys conflict.
         if request_params.metadata:
             arguments.update(request_params.metadata)

mcp_agent/llm/memory.py CHANGED Viewed

@@ -35,6 +35,9 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
     def __init__(self) -> None:
         self.history: List[MessageParamT] = []
         self.prompt_messages: List[MessageParamT] = []  # Always included
+        self.conversation_cache_positions: List[int] = []  # Track active conversation cache positions
+        self.cache_walk_distance: int = 6  # Messages between cache blocks
+        self.max_conversation_cache_blocks: int = 2  # Maximum conversation cache blocks
     def extend(self, messages: List[MessageParamT], is_prompt: bool = False) -> None:
         """
@@ -99,5 +102,122 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
             clear_prompts: If True, also clear prompt messages
         """
         self.history = []
+        self.conversation_cache_positions = []  # Reset cache positions
         if clear_prompts:
             self.prompt_messages = []
+    def should_apply_conversation_cache(self) -> bool:
+        """
+        Determine if conversation caching should be applied based on walking algorithm.
+        Returns:
+            True if we should add or update cache blocks
+        """
+        total_messages = len(self.history)
+        # Need at least cache_walk_distance messages to start caching
+        if total_messages < self.cache_walk_distance:
+            return False
+        # Check if we need to add a new cache block
+        return len(self._calculate_cache_positions(total_messages)) != len(self.conversation_cache_positions)
+    def _calculate_cache_positions(self, total_conversation_messages: int) -> List[int]:
+        """
+        Calculate where cache blocks should be placed using walking algorithm.
+        Args:
+            total_conversation_messages: Number of conversation messages (not including prompts)
+        Returns:
+            List of positions (relative to conversation start) where cache should be placed
+        """
+        positions = []
+        # Place cache blocks every cache_walk_distance messages
+        for i in range(self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance):
+            positions.append(i)
+            if len(positions) >= self.max_conversation_cache_blocks:
+                break
+        # Keep only the most recent cache blocks (walking behavior)
+        if len(positions) > self.max_conversation_cache_blocks:
+            positions = positions[-self.max_conversation_cache_blocks:]
+        return positions
+    def get_conversation_cache_updates(self) -> dict:
+        """
+        Get cache position updates needed for the walking algorithm.
+        Returns:
+            Dict with 'add', 'remove', and 'active' position lists (relative to full message array)
+        """
+        total_conversation_messages = len(self.history)
+        new_positions = self._calculate_cache_positions(total_conversation_messages)
+        # Convert to absolute positions (including prompt messages)
+        prompt_offset = len(self.prompt_messages)
+        new_absolute_positions = [pos + prompt_offset for pos in new_positions]
+        old_positions_set = set(self.conversation_cache_positions)
+        new_positions_set = set(new_absolute_positions)
+        return {
+            'add': sorted(new_positions_set - old_positions_set),
+            'remove': sorted(old_positions_set - new_positions_set),
+            'active': sorted(new_absolute_positions)
+        }
+    def apply_conversation_cache_updates(self, updates: dict) -> None:
+        """
+        Apply cache position updates.
+        Args:
+            updates: Dict from get_conversation_cache_updates()
+        """
+        self.conversation_cache_positions = updates['active'].copy()
+    def remove_cache_control_from_messages(self, messages: List[MessageParamT], positions: List[int]) -> None:
+        """
+        Remove cache control from specified message positions.
+        Args:
+            messages: The message array to modify
+            positions: List of positions to remove cache control from
+        """
+        for pos in positions:
+            if pos < len(messages):
+                message = messages[pos]
+                if isinstance(message, dict) and "content" in message:
+                    content_list = message["content"]
+                    if isinstance(content_list, list):
+                        for content_block in content_list:
+                            if isinstance(content_block, dict) and "cache_control" in content_block:
+                                del content_block["cache_control"]
+    def add_cache_control_to_messages(self, messages: List[MessageParamT], positions: List[int]) -> int:
+        """
+        Add cache control to specified message positions.
+        Args:
+            messages: The message array to modify
+            positions: List of positions to add cache control to
+        Returns:
+            Number of cache blocks successfully applied
+        """
+        applied_count = 0
+        for pos in positions:
+            if pos < len(messages):
+                message = messages[pos]
+                if isinstance(message, dict) and "content" in message:
+                    content_list = message["content"]
+                    if isinstance(content_list, list) and content_list:
+                        # Apply cache control to the last content block
+                        for content_block in reversed(content_list):
+                            if isinstance(content_block, dict):
+                                content_block["cache_control"] = {"type": "ephemeral"}
+                                applied_count += 1
+                                break
+        return applied_count

mcp_agent/llm/model_database.py CHANGED Viewed

@@ -109,11 +109,11 @@ class ModelDatabase:
     # TODO update to 32000
     ANTHROPIC_OPUS_4_VERSIONED = ModelParameters(
-        context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
+        context_window=200000, max_output_tokens=32000, tokenizes=ANTHROPIC_MULTIMODAL
     )
     # TODO update to 64000
     ANTHROPIC_SONNET_4_VERSIONED = ModelParameters(
-        context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
+        context_window=200000, max_output_tokens=64000, tokenizes=ANTHROPIC_MULTIMODAL
     )
     DEEPSEEK_CHAT_STANDARD = ModelParameters(

mcp_agent/llm/providers/augmented_llm_anthropic.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, List, Tuple, Type
 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from mcp_agent.core.prompt import Prompt
+from mcp_agent.event_progress import ProgressAction
 from mcp_agent.llm.provider_types import Provider
 from mcp_agent.llm.providers.multipart_converter_anthropic import (
     AnthropicConverter,
@@ -18,7 +19,8 @@ if TYPE_CHECKING:
     from mcp import ListToolsResult
-from anthropic import Anthropic, AuthenticationError
+from anthropic import AsyncAnthropic, AuthenticationError
+from anthropic.lib.streaming import AsyncMessageStream
 from anthropic.types import (
     Message,
     MessageParam,
@@ -78,17 +80,81 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
         """Initialize Anthropic-specific default parameters"""
         # Get base defaults from parent (includes ModelDatabase lookup)
         base_params = super()._initialize_default_params(kwargs)
         # Override with Anthropic-specific settings
         chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
         base_params.model = chosen_model
         return base_params
     def _base_url(self) -> str | None:
         assert self.context.config
         return self.context.config.anthropic.base_url if self.context.config.anthropic else None
+    def _get_cache_mode(self) -> str:
+        """Get the cache mode configuration."""
+        cache_mode = "auto"  # Default to auto
+        if self.context.config and self.context.config.anthropic:
+            cache_mode = self.context.config.anthropic.cache_mode
+        return cache_mode
+    async def _process_stream(self, stream: AsyncMessageStream, model: str) -> Message:
+        """Process the streaming response and display real-time token usage."""
+        # Track estimated output tokens by counting text chunks
+        estimated_tokens = 0
+        # Process the raw event stream to get token counts
+        async for event in stream:
+            # Count tokens in real-time from content_block_delta events
+            if (
+                event.type == "content_block_delta"
+                and hasattr(event, "delta")
+                and event.delta.type == "text_delta"
+            ):
+                # Rough estimate: 1 token per 4 characters (OpenAI's typical ratio)
+                text_length = len(event.delta.text)
+                estimated_tokens += max(1, text_length // 4)
+                # Update progress on every token for real-time display
+                token_str = str(estimated_tokens).rjust(5)
+                #                print(f"DEBUG: Streaming tokens: {token_str}")
+                self._emit_streaming_progress(model, token_str)
+            # Also check for final message_delta events with actual usage info
+            elif (
+                event.type == "message_delta"
+                and hasattr(event, "usage")
+                and event.usage.output_tokens
+            ):
+                actual_tokens = event.usage.output_tokens
+                token_str = str(actual_tokens).rjust(5)
+                #               print(f"DEBUG: Final actual tokens: {token_str}")
+                self._emit_streaming_progress(model, token_str)
+        # Get the final message with complete usage data
+        message = await stream.get_final_message()
+        # Log final usage information
+        if hasattr(message, "usage") and message.usage:
+            self.logger.info(
+                f"Streaming complete - Model: {model}, Input tokens: {message.usage.input_tokens}, Output tokens: {message.usage.output_tokens}"
+            )
+        return message
+    def _emit_streaming_progress(self, model: str, token_str: str) -> None:
+        """Emit a streaming progress event that goes directly to progress display."""
+        data = {
+            "progress_action": ProgressAction.STREAMING,
+            "model": model,
+            "agent_name": self.name,
+            "chat_turn": self.chat_turn(),
+            "details": token_str.strip(),  # Token count goes in details for STREAMING action
+        }
+        #        print(f"DEBUG: Emitting streaming progress event with data: {data}")
+        # Use a special logger level or namespace to avoid polluting regular logs
+        self.logger.info("Streaming progress", data=data)
     async def _anthropic_completion(
         self,
         message_param,
@@ -105,7 +171,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
             base_url = base_url.rstrip("/v1")
         try:
-            anthropic = Anthropic(api_key=api_key, base_url=base_url)
+            anthropic = AsyncAnthropic(api_key=api_key, base_url=base_url)
             messages: List[MessageParam] = []
             params = self.get_request_params(request_params)
         except AuthenticationError as e:
@@ -118,7 +184,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
         # if use_history is True
         messages.extend(self.history.get(include_completion_history=params.use_history))
-        messages.append(message_param)
+        messages.append(message_param)  # message_param is the current user turn
+        # Get cache mode configuration
+        cache_mode = self._get_cache_mode()
+        self.logger.debug(f"Anthropic cache_mode: {cache_mode}")
         tool_list: ListToolsResult = await self.aggregator.list_tools()
         available_tools: List[ToolParam] = [
@@ -134,8 +204,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
         model = self.default_request_params.model
+        # Note: We'll cache tools+system together by putting cache_control only on system prompt
         for i in range(params.max_iterations):
             self._log_chat_progress(self.chat_turn(), model=model)
             # Create base arguments dictionary
             base_args = {
                 "model": model,
@@ -145,6 +218,60 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
                 "tools": available_tools,
             }
+            # Apply cache_control to system prompt if cache_mode is not "off"
+            # This caches both tools and system prompt together in one cache block
+            if cache_mode != "off" and base_args["system"]:
+                if isinstance(base_args["system"], str):
+                    base_args["system"] = [
+                        {
+                            "type": "text",
+                            "text": base_args["system"],
+                            "cache_control": {"type": "ephemeral"},
+                        }
+                    ]
+                    self.logger.debug(
+                        "Applied cache_control to system prompt (caches tools+system in one block)"
+                    )
+                else:
+                    self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
+            # Apply conversation caching using walking algorithm if in auto mode
+            if cache_mode == "auto" and self.history.should_apply_conversation_cache():
+                cache_updates = self.history.get_conversation_cache_updates()
+                # Remove cache control from old positions
+                if cache_updates["remove"]:
+                    self.history.remove_cache_control_from_messages(
+                        messages, cache_updates["remove"]
+                    )
+                    self.logger.debug(
+                        f"Removed conversation cache_control from positions {cache_updates['remove']}"
+                    )
+                # Add cache control to new positions
+                if cache_updates["add"]:
+                    applied_count = self.history.add_cache_control_to_messages(
+                        messages, cache_updates["add"]
+                    )
+                    if applied_count > 0:
+                        self.history.apply_conversation_cache_updates(cache_updates)
+                        self.logger.debug(
+                            f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
+                        )
+                        # Verify we don't exceed Anthropic's 4 cache block limit
+                        total_cache_blocks = applied_count
+                        if cache_mode != "off" and base_args["system"]:
+                            total_cache_blocks += 1  # tools+system cache block
+                        if total_cache_blocks > 4:
+                            self.logger.warning(
+                                f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
+                            )
+                    else:
+                        self.logger.debug(
+                            f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
+                        )
             if params.maxTokens is not None:
                 base_args["max_tokens"] = params.maxTokens
@@ -155,9 +282,10 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
             self.logger.debug(f"{arguments}")
-            executor_result = await self.executor.execute(anthropic.messages.create, **arguments)
-            response = executor_result[0]
+            # Use streaming API with helper
+            async with anthropic.messages.stream(**arguments) as stream:
+                # Process the stream
+                response = await self._process_stream(stream, model)
             # Track usage if response is valid and has usage data
             if (
@@ -170,27 +298,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
                         response.usage, model or DEFAULT_ANTHROPIC_MODEL
                     )
                     self.usage_accumulator.add_turn(turn_usage)
-                    # # Print raw usage for debugging
-                    # print(f"\n=== USAGE DEBUG ({model}) ===")
-                    # print(f"Raw usage: {response.usage}")
-                    # print(
-                    #     f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
-                    # )
-                    # print(
-                    #     f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
-                    # )
-                    # print(f"Effective input: {turn_usage.effective_input_tokens}")
-                    # print(
-                    #     f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
-                    # )
-                    # if self.usage_accumulator.context_usage_percentage:
-                    #     print(
-                    #         f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
-                    #     )
-                    # if self.usage_accumulator.cache_hit_rate:
-                    #     print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
-                    # print("===========================\n")
+                #                    self._show_usage(response.usage, turn_usage)
                 except Exception as e:
                     self.logger.warning(f"Failed to track usage: {e}")
@@ -201,7 +309,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
                 ) from response
             elif isinstance(response, BaseException):
                 error_details = str(response)
-                self.logger.error(f"Error: {error_details}", data=executor_result)
+                self.logger.error(f"Error: {error_details}", data=BaseException)
                 # Try to extract more useful information for API errors
                 if hasattr(response, "status_code") and hasattr(response, "response"):
@@ -214,13 +322,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
                 # Convert other errors to text response
                 error_message = f"Error during generation: {error_details}"
                 response = Message(
-                    id="error",  # Required field
-                    model="error",  # Required field
+                    id="error",
+                    model="error",
                     role="assistant",
                     type="message",
                     content=[TextBlock(type="text", text=error_message)],
-                    stop_reason="end_turn",  # Must be one of the allowed values
-                    usage=Usage(input_tokens=0, output_tokens=0),  # Required field
+                    stop_reason="end_turn",
+                    usage=Usage(input_tokens=0, output_tokens=0),
                 )
             self.logger.debug(
@@ -230,7 +338,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
             response_as_message = self.convert_message_to_message_param(response)
             messages.append(response_as_message)
-            if response.content[0].type == "text":
+            if response.content and response.content[0].type == "text":
                 responses.append(TextContent(type="text", text=response.content[0].text))
             if response.stop_reason == "end_turn":
@@ -290,12 +398,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
                     # Process all tool calls and collect results
                     tool_results = []
-                    for i, content in enumerate(tool_uses):
-                        tool_name = content.name
-                        tool_args = content.input
-                        tool_use_id = content.id
+                    # Use a different loop variable for tool enumeration if 'i' is outer loop counter
+                    for tool_idx, content_block in enumerate(tool_uses):
+                        tool_name = content_block.name
+                        tool_args = content_block.input
+                        tool_use_id = content_block.id
-                        if i == 0:  # Only show message for first tool use
+                        if tool_idx == 0:  # Only show message for first tool use
                             await self.show_assistant_message(message_text, tool_name)
                         self.show_tool_call(available_tools, tool_name, tool_args)
@@ -320,11 +429,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
         if params.use_history:
             # Get current prompt messages
             prompt_messages = self.history.get(include_completion_history=False)
-            # Calculate new conversation messages (excluding prompts)
             new_messages = messages[len(prompt_messages) :]
-            # Update conversation history
             self.history.set(new_messages)
         self._log_chat_finished(model=model)
@@ -362,8 +467,26 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
             multipart_messages[:-1] if last_message.role == "user" else multipart_messages
         )
         converted = []
+        # Get cache mode configuration
+        cache_mode = self._get_cache_mode()
         for msg in messages_to_add:
-            converted.append(AnthropicConverter.convert_to_anthropic(msg))
+            anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
+            # Apply caching to template messages if cache_mode is "prompt" or "auto"
+            if is_template and cache_mode in ["prompt", "auto"] and anthropic_msg.get("content"):
+                content_list = anthropic_msg["content"]
+                if isinstance(content_list, list) and content_list:
+                    # Apply cache control to the last content block
+                    last_block = content_list[-1]
+                    if isinstance(last_block, dict):
+                        last_block["cache_control"] = {"type": "ephemeral"}
+                        self.logger.debug(
+                            f"Applied cache_control to template message with role {anthropic_msg.get('role')}"
+                        )
+            converted.append(anthropic_msg)
         self.history.extend(converted, is_prompt=is_template)
@@ -398,6 +521,28 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
         )
         return self._structured_from_multipart(result, model)
+    def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
+        # Print raw usage for debugging
+        print(f"\n=== USAGE DEBUG ({turn_usage.model}) ===")
+        print(f"Raw usage: {raw_usage}")
+        print(
+            f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
+        )
+        print(
+            f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
+        )
+        print(f"Effective input: {turn_usage.effective_input_tokens}")
+        print(
+            f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
+        )
+        if self.usage_accumulator.context_usage_percentage:
+            print(
+                f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
+            )
+        if self.usage_accumulator.cache_hit_rate:
+            print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
+        print("===========================\n")
     @classmethod
     def convert_message_to_message_param(cls, message: Message, **kwargs) -> MessageParam:
         """Convert a response object to an input parameter object to allow LLM calls to be chained."""

mcp_agent/llm/usage_tracking.py CHANGED Viewed

@@ -84,19 +84,32 @@ class TurnUsage(BaseModel):
     @computed_field
     @property
     def current_context_tokens(self) -> int:
-        """Current context size after this turn (input + output)"""
-        return self.input_tokens + self.output_tokens
+        """Current context size after this turn (total input including cache + output)"""
+        # For Anthropic: input_tokens + cache_read_tokens represents total input context
+        total_input = self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
+        return total_input + self.output_tokens
     @computed_field
     @property
     def effective_input_tokens(self) -> int:
-        """Input tokens excluding cache reads (tokens actually processed)"""
-        return max(
-            0,
-            self.input_tokens
-            - self.cache_usage.cache_read_tokens
-            - self.cache_usage.cache_hit_tokens,
-        )
+        """Input tokens actually processed (new tokens, not from cache)"""
+        # For Anthropic: input_tokens already excludes cached content
+        # For other providers: subtract cache hits from input_tokens
+        if self.provider == Provider.ANTHROPIC:
+            return self.input_tokens
+        else:
+            return max(0, self.input_tokens - self.cache_usage.cache_hit_tokens)
+    @computed_field
+    @property
+    def display_input_tokens(self) -> int:
+        """Input tokens to display for 'Last turn' (total submitted tokens)"""
+        # For Anthropic: input_tokens excludes cache, so add cache tokens
+        if self.provider == Provider.ANTHROPIC:
+            return self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
+        else:
+            # For OpenAI/Google: input_tokens already includes cached tokens
+            return self.input_tokens
     @classmethod
     def from_anthropic(cls, usage: AnthropicUsage, model: str) -> "TurnUsage":
@@ -204,8 +217,11 @@ class UsageAccumulator(BaseModel):
     @computed_field
     @property
     def cumulative_input_tokens(self) -> int:
-        """Total input tokens charged across all turns"""
-        return sum(turn.input_tokens for turn in self.turns)
+        """Total input tokens charged across all turns (including cache tokens)"""
+        return sum(
+            turn.input_tokens + turn.cache_usage.cache_read_tokens + turn.cache_usage.cache_write_tokens
+            for turn in self.turns
+        )
     @computed_field
     @property
@@ -216,8 +232,8 @@ class UsageAccumulator(BaseModel):
     @computed_field
     @property
     def cumulative_billing_tokens(self) -> int:
-        """Total tokens charged across all turns"""
-        return sum(turn.total_tokens for turn in self.turns)
+        """Total tokens charged across all turns (including cache tokens)"""
+        return self.cumulative_input_tokens + self.cumulative_output_tokens
     @computed_field
     @property
@@ -258,11 +274,12 @@ class UsageAccumulator(BaseModel):
     @computed_field
     @property
     def cache_hit_rate(self) -> Optional[float]:
-        """Percentage of input tokens served from cache"""
-        if self.cumulative_input_tokens == 0:
-            return None
+        """Percentage of total input context served from cache"""
         cache_tokens = self.cumulative_cache_read_tokens + self.cumulative_cache_hit_tokens
-        return (cache_tokens / self.cumulative_input_tokens) * 100
+        total_input_context = self.cumulative_input_tokens + cache_tokens
+        if total_input_context == 0:
+            return None
+        return (cache_tokens / total_input_context) * 100
     @computed_field
     @property

mcp_agent/logging/events.py CHANGED Viewed

@@ -117,3 +117,27 @@ class SamplingFilter(EventFilter):
         if not super().matches(event):
             return False
         return random.random() < self.sample_rate
+class StreamingExclusionFilter(EventFilter):
+    """
+    Event filter that excludes streaming progress events from logs.
+    This prevents token count updates from flooding the logs when info level is enabled.
+    """
+    def matches(self, event: Event) -> bool:
+        # First check if it passes the base filter
+        if not super().matches(event):
+            return False
+        # Exclude events with "Streaming progress" message
+        if event.message == "Streaming progress":
+            return False
+        # Also check for events with progress_action = STREAMING in data
+        if event.data and isinstance(event.data.get("data"), dict):
+            event_data = event.data["data"]
+            if event_data.get("progress_action") == "Streaming":
+                return False
+        return True

mcp_agent/logging/rich_progress.py CHANGED Viewed

@@ -73,6 +73,7 @@ class RichProgressDisplay:
             ProgressAction.LOADED: "dim green",
             ProgressAction.INITIALIZED: "dim green",
             ProgressAction.CHATTING: "bold blue",
+            ProgressAction.STREAMING: "bold blue",  # Same color as chatting
             ProgressAction.ROUTING: "bold blue",
             ProgressAction.PLANNING: "bold blue",
             ProgressAction.READY: "dim green",
@@ -100,9 +101,16 @@ class RichProgressDisplay:
             task_id = self._taskmap[task_name]
         # Ensure no None values in the update
+        # For streaming, use custom description immediately to avoid flashing
+        if event.action == ProgressAction.STREAMING and event.streaming_tokens:
+            formatted_tokens = f"↓ {event.streaming_tokens.strip()}".ljust(15)
+            description = f"[{self._get_action_style(event.action)}]{formatted_tokens}"
+        else:
+            description = f"[{self._get_action_style(event.action)}]{event.action.value:<15}"
         self._progress.update(
             task_id,
-            description=f"[{self._get_action_style(event.action)}]{event.action.value:<15}",
+            description=description,
             target=event.target or task_name,  # Use task_name as fallback for target
             details=event.details or "",
             task_name=task_name,

{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/WHEEL RENAMED Viewed

File without changes

{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

fast-agent-mcp 0.2.34__py3-none-any.whl → 0.2.35__py3-none-any.whl

fast-agent-mcp 0.2.34py3-none-any.whl → 0.2.35py3-none-any.whl