fast-agent-mcp 0.2.34__py3-none-any.whl → 0.2.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fast-agent-mcp
3
- Version: 0.2.34
3
+ Version: 0.2.35
4
4
  Summary: Define, Prompt and Test MCP enabled Agents and Workflows
5
5
  Author-email: Shaun Smith <fastagent@llmindset.co.uk>
6
6
  License: Apache License
@@ -1,10 +1,10 @@
1
1
  mcp_agent/__init__.py,sha256=18T0AG0W9sJhTY38O9GFFOzliDhxx9p87CvRyti9zbw,1620
2
2
  mcp_agent/app.py,sha256=3mtHP1nRQcRaKhhxgTmCOv00alh70nT7UxNA8bN47QE,5560
3
- mcp_agent/config.py,sha256=9GDvMugKIeT9SKRGGEv2gN3lsC78hQ_Oy-HSpItuqo0,15841
3
+ mcp_agent/config.py,sha256=ZIGFCSWrhMqhlHhapQf3QXo9N6EuTVy5iZIFiiqwE2M,16289
4
4
  mcp_agent/console.py,sha256=Gjf2QLFumwG1Lav__c07X_kZxxEUSkzV-1_-YbAwcwo,813
5
- mcp_agent/context.py,sha256=H7JbaZ_8SzzTagLmIgUPUPxX5370C5qjQAsasFPZG2Y,7510
5
+ mcp_agent/context.py,sha256=f729LJcW4YoFXb0Rg_kEU-5FlrOnFgqplI6W0fVqomg,7631
6
6
  mcp_agent/context_dependent.py,sha256=QXfhw3RaQCKfscEEBRGuZ3sdMWqkgShz2jJ1ivGGX1I,1455
7
- mcp_agent/event_progress.py,sha256=040lrCCclcOuryi07YGSej25kTQF5_JMXY12Yj-3u1U,2773
7
+ mcp_agent/event_progress.py,sha256=d7T1hQ1D289MYh2Z5bMPB4JqjGqTOzveJuOHE03B_Xo,3720
8
8
  mcp_agent/mcp_server_registry.py,sha256=b3iSb-0ULYc5yUG2KHav41WGwSYWiJCGQsOwWHWByxo,12346
9
9
  mcp_agent/progress_display.py,sha256=GeJU9VUt6qKsFVymG688hCMVCsAygG9ifiiEb5IcbN4,361
10
10
  mcp_agent/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -28,7 +28,7 @@ mcp_agent/cli/commands/quickstart.py,sha256=SM3CHMzDgvTxIpKjFuX9BrS_N1vRoXNBDaO9
28
28
  mcp_agent/cli/commands/setup.py,sha256=eOEd4TL-b0DaDeSJMGOfNOsTEItoZ67W88eTP4aP-bo,6482
29
29
  mcp_agent/cli/commands/url_parser.py,sha256=5VdtcHRHzi67YignStVbz7u-rcvNNErw9oJLAUFOtEY,5855
30
30
  mcp_agent/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- mcp_agent/core/agent_app.py,sha256=7DmOn55j9fQ-ngoGcJP82R_Z_y93FonuEkORTlP8X-w,12111
31
+ mcp_agent/core/agent_app.py,sha256=KJdx0Qbh7Gb4wA8_LwKriogc27SraRIrvMqHsOCVVt0,12119
32
32
  mcp_agent/core/agent_types.py,sha256=DogMcOoRwk70CFSetZ09madRcPDlhPn1iXZVeOcLV8Q,1507
33
33
  mcp_agent/core/direct_decorators.py,sha256=HY_7S7OtfZPqAeqC3_hPYa1d6zTnEyiOeI7JxvnWqTM,16786
34
34
  mcp_agent/core/direct_factory.py,sha256=UNAjHHFRLrQ3D934RMsKsh0Oas7LXLIVslgrzcetM6A,19090
@@ -50,23 +50,23 @@ mcp_agent/human_input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
50
50
  mcp_agent/human_input/handler.py,sha256=s712Z5ssTCwjL9-VKoIdP5CtgMh43YvepynYisiWTTA,3144
51
51
  mcp_agent/human_input/types.py,sha256=RtWBOVzy8vnYoQrc36jRLn8z8N3C4pDPMBN5vF6qM5Y,1476
52
52
  mcp_agent/llm/__init__.py,sha256=d8zgwG-bRFuwiMNMYkywg_qytk4P8lawyld_meuUmHI,68
53
- mcp_agent/llm/augmented_llm.py,sha256=vthXYuEIDunXITr1zkrIg8sBypnuQ5pcCheKRaCR-kw,24943
53
+ mcp_agent/llm/augmented_llm.py,sha256=ekVZQla3oOyWSysJif-2ZutklYB8HTK99I7HzSye6ag,25705
54
54
  mcp_agent/llm/augmented_llm_passthrough.py,sha256=F8KifmTwoQ7zyncjmoRek8SBfGdgc9yc5LRXwMQH-bg,8640
55
55
  mcp_agent/llm/augmented_llm_playback.py,sha256=BQeBXRpO-xGAY9wIJxyde6xpHmZEdQPLd32frF8t3QQ,4916
56
56
  mcp_agent/llm/augmented_llm_slow.py,sha256=DDSD8bL2flmQrVHZm-UDs7sR8aHRWkDOcOW-mX_GPok,2067
57
- mcp_agent/llm/memory.py,sha256=HQ_c1QemOUjrkY6Z2omE6BG5fXga7y4jN7KCMOuGjPs,3345
58
- mcp_agent/llm/model_database.py,sha256=cBhKFnofA_9M8qAcr0Kvzui5fSJkWMMGFWOOvqkU-DQ,8485
57
+ mcp_agent/llm/memory.py,sha256=pTOaTDV3EA3X68yKwEtUAu7s0xGIQQ_cKBhfYUnfR0w,8614
58
+ mcp_agent/llm/model_database.py,sha256=mfy039QZP_8-f0aHWR0Fpj2qnlys5430haSzrA86aXw,8485
59
59
  mcp_agent/llm/model_factory.py,sha256=u60O4SWe22wN6CpmIfaF4C5aUziJs8O3N0Jo7erPjp8,10753
60
60
  mcp_agent/llm/prompt_utils.py,sha256=yWQHykoK13QRF7evHUKxVF0SpVLN-Bsft0Yixzvn0g0,4825
61
61
  mcp_agent/llm/provider_key_manager.py,sha256=usMWozSMhek_FIlM1MeVDwAbs-P96SrEVPGd3YwF9E4,2833
62
62
  mcp_agent/llm/provider_types.py,sha256=AkQl1r67wZ0gSIY6CXsiZiS3uw5DBF9E5yhIn3THayk,633
63
63
  mcp_agent/llm/sampling_converter.py,sha256=C7wPBlmT0eD90XWabC22zkxsrVHKCrjwIwg6cG628cI,2926
64
64
  mcp_agent/llm/sampling_format_converter.py,sha256=xGz4odHpOcP7--eFaJaFtUR8eR9jxZS7MnLH6J7n0EU,1263
65
- mcp_agent/llm/usage_tracking.py,sha256=JOCmywn7f0-aJHUIG9DaTbFVxTqwMM-0hc4-lEhNkBM,14201
65
+ mcp_agent/llm/usage_tracking.py,sha256=HdBehPMt0bZzEgRmTnbMdgpLVuTp6L_VJTQx5Z25zCM,15321
66
66
  mcp_agent/llm/providers/__init__.py,sha256=heVxtmuqFJOnjjxHz4bWSqTAxXoN1E8twC_gQ_yJpHk,265
67
67
  mcp_agent/llm/providers/anthropic_utils.py,sha256=vYDN5G5jKMhD2CQg8veJYab7tvvzYkDMq8M1g_hUAQg,3275
68
68
  mcp_agent/llm/providers/augmented_llm_aliyun.py,sha256=XylkJKZ9theSVUxJKOZkf1244hgzng4Ng4Dr209Qb-w,1101
69
- mcp_agent/llm/providers/augmented_llm_anthropic.py,sha256=uvPANaseesS7K3gu4uM1E7OwurjVvM-e6tuRezP5IIY,17651
69
+ mcp_agent/llm/providers/augmented_llm_anthropic.py,sha256=xCLqLi2HeBHPSvN_xD9Gl26ENTGT0E_1KLnN38BVXiE,24624
70
70
  mcp_agent/llm/providers/augmented_llm_azure.py,sha256=VPrD6lNrEw6EdYUTa9MDvHDNIPjJU5CG5xnKCM3JYdA,5878
71
71
  mcp_agent/llm/providers/augmented_llm_deepseek.py,sha256=zI9a90dwT4r6E1f_xp4K50Cj9sD7y7kNRgjo0s1pd5w,3804
72
72
  mcp_agent/llm/providers/augmented_llm_generic.py,sha256=5Uq8ZBhcFuQTt7koP_5ykolREh2iWu8zKhNbh3pM9lQ,1210
@@ -84,11 +84,11 @@ mcp_agent/llm/providers/openai_utils.py,sha256=T4bTCL9f7DsoS_zoKgQKv_FUv_4n98vgb
84
84
  mcp_agent/llm/providers/sampling_converter_anthropic.py,sha256=35WzBWkPklnuMlu5S6XsQIq0YL58NOy8Ja6A_l4m6eM,1612
85
85
  mcp_agent/llm/providers/sampling_converter_openai.py,sha256=GA-LfTJzOwH9Vwk0Q4K37nG6zxpzqS-JGaM7cTH-Epc,841
86
86
  mcp_agent/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- mcp_agent/logging/events.py,sha256=iHTSgrxK3BWnRoej6NhxVL5899MIHr-ktsA7hxMoo9k,3437
87
+ mcp_agent/logging/events.py,sha256=dSJJfuCd59-ZyYTVcf0M4HQd6iXb5k50PSAeoq1CpH0,4278
88
88
  mcp_agent/logging/json_serializer.py,sha256=qkfxnR9ka6OgvwSpM2CggELbEtzzkApm0s_KYz11RDY,5791
89
89
  mcp_agent/logging/listeners.py,sha256=_S4Jp5_KWp0kUfrx4BxDdNCeQK3MNT3Zi9AaolPri7A,6648
90
90
  mcp_agent/logging/logger.py,sha256=l02OGX_c5FOyH0rspd4ZvnkJcbb0FahhUhlh2KI8mqE,10724
91
- mcp_agent/logging/rich_progress.py,sha256=oY9fjb4Tyw6887v8sgO6EGIK4lnmIoR3NNxhA_-Ln_M,4893
91
+ mcp_agent/logging/rich_progress.py,sha256=NQbW010VxfzgJw8BRaqKVTIFlTNvDfmMcoOt7pxGvzQ,5362
92
92
  mcp_agent/logging/transport.py,sha256=m8YsLLu5T8eof_ndpLQs4gHOzqqEL98xsVwBwDsBfxI,17335
93
93
  mcp_agent/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
94
  mcp_agent/mcp/common.py,sha256=MpSC0fLO21RcDz4VApah4C8_LisVGz7OXkR17Xw-9mY,431
@@ -158,8 +158,8 @@ mcp_agent/resources/examples/workflows/router.py,sha256=E4x_-c3l4YW9w1i4ARcDtkde
158
158
  mcp_agent/resources/examples/workflows/short_story.txt,sha256=X3y_1AyhLFN2AKzCKvucJtDgAFIJfnlbsbGZO5bBWu0,1187
159
159
  mcp_agent/tools/tool_definition.py,sha256=L3Pxl-uLEXqlVoo-bYuFTFALeI-2pIU44YgFhsTKEtM,398
160
160
  mcp_agent/ui/console_display.py,sha256=UKqax5V2TC0hkZZORmmd6UqUk0DGX7A25E3h1k9f42k,10982
161
- fast_agent_mcp-0.2.34.dist-info/METADATA,sha256=Vl4WmcpJIKaOYiFHKMy0XYzw5Jmt65zXS0iEYegAJLQ,30799
162
- fast_agent_mcp-0.2.34.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
163
- fast_agent_mcp-0.2.34.dist-info/entry_points.txt,sha256=oKQeSUVn87pJv8_k1NQ7Ak8cXaaXHCnPAOJRCV_uUVg,230
164
- fast_agent_mcp-0.2.34.dist-info/licenses/LICENSE,sha256=cN3FxDURL9XuzE5mhK9L2paZo82LTfjwCYVT7e3j0e4,10939
165
- fast_agent_mcp-0.2.34.dist-info/RECORD,,
161
+ fast_agent_mcp-0.2.35.dist-info/METADATA,sha256=O4DhiiD77uzvQeJvBbJxWMHwycs3M2fCilt7lJmPzUE,30799
162
+ fast_agent_mcp-0.2.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
163
+ fast_agent_mcp-0.2.35.dist-info/entry_points.txt,sha256=oKQeSUVn87pJv8_k1NQ7Ak8cXaaXHCnPAOJRCV_uUVg,230
164
+ fast_agent_mcp-0.2.35.dist-info/licenses/LICENSE,sha256=cN3FxDURL9XuzE5mhK9L2paZo82LTfjwCYVT7e3j0e4,10939
165
+ fast_agent_mcp-0.2.35.dist-info/RECORD,,
mcp_agent/config.py CHANGED
@@ -115,6 +115,14 @@ class AnthropicSettings(BaseModel):
115
115
 
116
116
  base_url: str | None = None
117
117
 
118
+ cache_mode: Literal["off", "prompt", "auto"] = "auto"
119
+ """
120
+ Controls how caching is applied for Anthropic models when prompt_caching is enabled globally.
121
+ - "off": No caching, even if global prompt_caching is true.
122
+ - "prompt": Caches tools+system prompt (1 block) and template content. Useful for large, static prompts.
123
+ - "auto": Currently same as "prompt" - caches tools+system prompt (1 block) and template content.
124
+ """
125
+
118
126
  model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
119
127
 
120
128
 
mcp_agent/context.py CHANGED
@@ -25,7 +25,7 @@ from pydantic import BaseModel, ConfigDict
25
25
  from mcp_agent.config import Settings, get_settings
26
26
  from mcp_agent.executor.executor import AsyncioExecutor, Executor
27
27
  from mcp_agent.executor.task_registry import ActivityRegistry
28
- from mcp_agent.logging.events import EventFilter
28
+ from mcp_agent.logging.events import EventFilter, StreamingExclusionFilter
29
29
  from mcp_agent.logging.logger import LoggingConfig, get_logger
30
30
  from mcp_agent.logging.transport import create_transport
31
31
  from mcp_agent.mcp_server_registry import ServerRegistry
@@ -124,7 +124,8 @@ async def configure_logger(config: "Settings") -> None:
124
124
  """
125
125
  Configure logging and tracing based on the application config.
126
126
  """
127
- event_filter: EventFilter = EventFilter(min_level=config.logger.level)
127
+ # Use StreamingExclusionFilter to prevent streaming events from flooding logs
128
+ event_filter: EventFilter = StreamingExclusionFilter(min_level=config.logger.level)
128
129
  logger.info(f"Configuring logger with level: {config.logger.level}")
129
130
  transport = create_transport(settings=config.logger, event_filter=event_filter)
130
131
  await LoggingConfig.configure(
@@ -302,7 +302,7 @@ class AgentApp:
302
302
  return
303
303
 
304
304
  last_turn = turns[-1]
305
- input_tokens = last_turn.input_tokens
305
+ input_tokens = last_turn.display_input_tokens
306
306
  output_tokens = last_turn.output_tokens
307
307
 
308
308
  # Build cache indicators with bright colors
@@ -15,6 +15,7 @@ class ProgressAction(str, Enum):
15
15
  LOADED = "Loaded"
16
16
  INITIALIZED = "Initialized"
17
17
  CHATTING = "Chatting"
18
+ STREAMING = "Streaming" # Special action for real-time streaming updates
18
19
  ROUTING = "Routing"
19
20
  PLANNING = "Planning"
20
21
  READY = "Ready"
@@ -33,12 +34,22 @@ class ProgressEvent(BaseModel):
33
34
  target: str
34
35
  details: Optional[str] = None
35
36
  agent_name: Optional[str] = None
37
+ streaming_tokens: Optional[str] = None # Special field for streaming token count
36
38
 
37
39
  def __str__(self) -> str:
38
40
  """Format the progress event for display."""
39
- base = f"{self.action.ljust(11)}. {self.target}"
40
- if self.details:
41
- base += f" - {self.details}"
41
+ # Special handling for streaming - show token count in action position
42
+ if self.action == ProgressAction.STREAMING and self.streaming_tokens:
43
+ # For streaming, show just the token count instead of "Streaming"
44
+ action_display = self.streaming_tokens.ljust(11)
45
+ base = f"{action_display}. {self.target}"
46
+ if self.details:
47
+ base += f" - {self.details}"
48
+ else:
49
+ base = f"{self.action.ljust(11)}. {self.target}"
50
+ if self.details:
51
+ base += f" - {self.details}"
52
+
42
53
  if self.agent_name:
43
54
  base = f"[{self.agent_name}] {base}"
44
55
  return base
@@ -78,7 +89,8 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
78
89
 
79
90
  elif "augmented_llm" in namespace:
80
91
  model = event_data.get("model", "")
81
-
92
+
93
+ # For all augmented_llm events, put model info in details column
82
94
  details = f"{model}"
83
95
  chat_turn = event_data.get("chat_turn")
84
96
  if chat_turn is not None:
@@ -87,9 +99,15 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
87
99
  if not target:
88
100
  target = event_data.get("target", "unknown")
89
101
 
102
+ # Extract streaming token count for STREAMING actions
103
+ streaming_tokens = None
104
+ if progress_action == ProgressAction.STREAMING:
105
+ streaming_tokens = event_data.get("details", "")
106
+
90
107
  return ProgressEvent(
91
108
  action=ProgressAction(progress_action),
92
109
  target=target or "unknown",
93
110
  details=details,
94
111
  agent_name=event_data.get("agent_name"),
112
+ streaming_tokens=streaming_tokens,
95
113
  )
@@ -97,6 +97,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
97
97
  PARAM_USE_HISTORY = "use_history"
98
98
  PARAM_MAX_ITERATIONS = "max_iterations"
99
99
  PARAM_TEMPLATE_VARS = "template_vars"
100
+
100
101
  # Base set of fields that should always be excluded
101
102
  BASE_EXCLUDE_FIELDS = {PARAM_METADATA}
102
103
 
@@ -371,16 +372,28 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
371
372
  # Start with base arguments
372
373
  arguments = base_args.copy()
373
374
 
374
- # Use provided exclude_fields or fall back to base exclusions
375
- exclude_fields = exclude_fields or self.BASE_EXCLUDE_FIELDS.copy()
375
+ # Combine base exclusions with provider-specific exclusions
376
+ final_exclude_fields = self.BASE_EXCLUDE_FIELDS.copy()
377
+ if exclude_fields:
378
+ final_exclude_fields.update(exclude_fields)
376
379
 
377
380
  # Add all fields from params that aren't explicitly excluded
378
- params_dict = request_params.model_dump(exclude=exclude_fields)
381
+ # Ensure model_dump only includes set fields if that's the desired behavior,
382
+ # or adjust exclude_unset=True/False as needed.
383
+ # Default Pydantic v2 model_dump is exclude_unset=False
384
+ params_dict = request_params.model_dump(exclude=final_exclude_fields)
385
+
379
386
  for key, value in params_dict.items():
387
+ # Only add if not None and not already in base_args (base_args take precedence)
388
+ # or if None is a valid value for the provider, this logic might need adjustment.
380
389
  if value is not None and key not in arguments:
381
390
  arguments[key] = value
391
+ elif value is not None and key in arguments and arguments[key] is None:
392
+ # Allow overriding a None in base_args with a set value from params
393
+ arguments[key] = value
382
394
 
383
395
  # Finally, add any metadata fields as a last layer of overrides
396
+ # This ensures metadata can override anything previously set if keys conflict.
384
397
  if request_params.metadata:
385
398
  arguments.update(request_params.metadata)
386
399
 
mcp_agent/llm/memory.py CHANGED
@@ -35,6 +35,9 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
35
35
  def __init__(self) -> None:
36
36
  self.history: List[MessageParamT] = []
37
37
  self.prompt_messages: List[MessageParamT] = [] # Always included
38
+ self.conversation_cache_positions: List[int] = [] # Track active conversation cache positions
39
+ self.cache_walk_distance: int = 6 # Messages between cache blocks
40
+ self.max_conversation_cache_blocks: int = 2 # Maximum conversation cache blocks
38
41
 
39
42
  def extend(self, messages: List[MessageParamT], is_prompt: bool = False) -> None:
40
43
  """
@@ -99,5 +102,122 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
99
102
  clear_prompts: If True, also clear prompt messages
100
103
  """
101
104
  self.history = []
105
+ self.conversation_cache_positions = [] # Reset cache positions
102
106
  if clear_prompts:
103
107
  self.prompt_messages = []
108
+
109
+ def should_apply_conversation_cache(self) -> bool:
110
+ """
111
+ Determine if conversation caching should be applied based on walking algorithm.
112
+
113
+ Returns:
114
+ True if we should add or update cache blocks
115
+ """
116
+ total_messages = len(self.history)
117
+
118
+ # Need at least cache_walk_distance messages to start caching
119
+ if total_messages < self.cache_walk_distance:
120
+ return False
121
+
122
+ # Check if we need to add a new cache block
123
+ return len(self._calculate_cache_positions(total_messages)) != len(self.conversation_cache_positions)
124
+
125
+ def _calculate_cache_positions(self, total_conversation_messages: int) -> List[int]:
126
+ """
127
+ Calculate where cache blocks should be placed using walking algorithm.
128
+
129
+ Args:
130
+ total_conversation_messages: Number of conversation messages (not including prompts)
131
+
132
+ Returns:
133
+ List of positions (relative to conversation start) where cache should be placed
134
+ """
135
+ positions = []
136
+
137
+ # Place cache blocks every cache_walk_distance messages
138
+ for i in range(self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance):
139
+ positions.append(i)
140
+ if len(positions) >= self.max_conversation_cache_blocks:
141
+ break
142
+
143
+ # Keep only the most recent cache blocks (walking behavior)
144
+ if len(positions) > self.max_conversation_cache_blocks:
145
+ positions = positions[-self.max_conversation_cache_blocks:]
146
+
147
+ return positions
148
+
149
+ def get_conversation_cache_updates(self) -> dict:
150
+ """
151
+ Get cache position updates needed for the walking algorithm.
152
+
153
+ Returns:
154
+ Dict with 'add', 'remove', and 'active' position lists (relative to full message array)
155
+ """
156
+ total_conversation_messages = len(self.history)
157
+ new_positions = self._calculate_cache_positions(total_conversation_messages)
158
+
159
+ # Convert to absolute positions (including prompt messages)
160
+ prompt_offset = len(self.prompt_messages)
161
+ new_absolute_positions = [pos + prompt_offset for pos in new_positions]
162
+
163
+ old_positions_set = set(self.conversation_cache_positions)
164
+ new_positions_set = set(new_absolute_positions)
165
+
166
+ return {
167
+ 'add': sorted(new_positions_set - old_positions_set),
168
+ 'remove': sorted(old_positions_set - new_positions_set),
169
+ 'active': sorted(new_absolute_positions)
170
+ }
171
+
172
+ def apply_conversation_cache_updates(self, updates: dict) -> None:
173
+ """
174
+ Apply cache position updates.
175
+
176
+ Args:
177
+ updates: Dict from get_conversation_cache_updates()
178
+ """
179
+ self.conversation_cache_positions = updates['active'].copy()
180
+
181
+ def remove_cache_control_from_messages(self, messages: List[MessageParamT], positions: List[int]) -> None:
182
+ """
183
+ Remove cache control from specified message positions.
184
+
185
+ Args:
186
+ messages: The message array to modify
187
+ positions: List of positions to remove cache control from
188
+ """
189
+ for pos in positions:
190
+ if pos < len(messages):
191
+ message = messages[pos]
192
+ if isinstance(message, dict) and "content" in message:
193
+ content_list = message["content"]
194
+ if isinstance(content_list, list):
195
+ for content_block in content_list:
196
+ if isinstance(content_block, dict) and "cache_control" in content_block:
197
+ del content_block["cache_control"]
198
+
199
+ def add_cache_control_to_messages(self, messages: List[MessageParamT], positions: List[int]) -> int:
200
+ """
201
+ Add cache control to specified message positions.
202
+
203
+ Args:
204
+ messages: The message array to modify
205
+ positions: List of positions to add cache control to
206
+
207
+ Returns:
208
+ Number of cache blocks successfully applied
209
+ """
210
+ applied_count = 0
211
+ for pos in positions:
212
+ if pos < len(messages):
213
+ message = messages[pos]
214
+ if isinstance(message, dict) and "content" in message:
215
+ content_list = message["content"]
216
+ if isinstance(content_list, list) and content_list:
217
+ # Apply cache control to the last content block
218
+ for content_block in reversed(content_list):
219
+ if isinstance(content_block, dict):
220
+ content_block["cache_control"] = {"type": "ephemeral"}
221
+ applied_count += 1
222
+ break
223
+ return applied_count
@@ -109,11 +109,11 @@ class ModelDatabase:
109
109
 
110
110
  # TODO update to 32000
111
111
  ANTHROPIC_OPUS_4_VERSIONED = ModelParameters(
112
- context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
112
+ context_window=200000, max_output_tokens=32000, tokenizes=ANTHROPIC_MULTIMODAL
113
113
  )
114
114
  # TODO update to 64000
115
115
  ANTHROPIC_SONNET_4_VERSIONED = ModelParameters(
116
- context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
116
+ context_window=200000, max_output_tokens=64000, tokenizes=ANTHROPIC_MULTIMODAL
117
117
  )
118
118
 
119
119
  DEEPSEEK_CHAT_STANDARD = ModelParameters(
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, List, Tuple, Type
3
3
  from mcp.types import EmbeddedResource, ImageContent, TextContent
4
4
 
5
5
  from mcp_agent.core.prompt import Prompt
6
+ from mcp_agent.event_progress import ProgressAction
6
7
  from mcp_agent.llm.provider_types import Provider
7
8
  from mcp_agent.llm.providers.multipart_converter_anthropic import (
8
9
  AnthropicConverter,
@@ -18,7 +19,8 @@ if TYPE_CHECKING:
18
19
  from mcp import ListToolsResult
19
20
 
20
21
 
21
- from anthropic import Anthropic, AuthenticationError
22
+ from anthropic import AsyncAnthropic, AuthenticationError
23
+ from anthropic.lib.streaming import AsyncMessageStream
22
24
  from anthropic.types import (
23
25
  Message,
24
26
  MessageParam,
@@ -78,17 +80,81 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
78
80
  """Initialize Anthropic-specific default parameters"""
79
81
  # Get base defaults from parent (includes ModelDatabase lookup)
80
82
  base_params = super()._initialize_default_params(kwargs)
81
-
83
+
82
84
  # Override with Anthropic-specific settings
83
85
  chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
84
86
  base_params.model = chosen_model
85
-
87
+
86
88
  return base_params
87
89
 
88
90
  def _base_url(self) -> str | None:
89
91
  assert self.context.config
90
92
  return self.context.config.anthropic.base_url if self.context.config.anthropic else None
91
93
 
94
+ def _get_cache_mode(self) -> str:
95
+ """Get the cache mode configuration."""
96
+ cache_mode = "auto" # Default to auto
97
+ if self.context.config and self.context.config.anthropic:
98
+ cache_mode = self.context.config.anthropic.cache_mode
99
+ return cache_mode
100
+
101
+ async def _process_stream(self, stream: AsyncMessageStream, model: str) -> Message:
102
+ """Process the streaming response and display real-time token usage."""
103
+ # Track estimated output tokens by counting text chunks
104
+ estimated_tokens = 0
105
+
106
+ # Process the raw event stream to get token counts
107
+ async for event in stream:
108
+ # Count tokens in real-time from content_block_delta events
109
+ if (
110
+ event.type == "content_block_delta"
111
+ and hasattr(event, "delta")
112
+ and event.delta.type == "text_delta"
113
+ ):
114
+ # Rough estimate: 1 token per 4 characters (OpenAI's typical ratio)
115
+ text_length = len(event.delta.text)
116
+ estimated_tokens += max(1, text_length // 4)
117
+
118
+ # Update progress on every token for real-time display
119
+ token_str = str(estimated_tokens).rjust(5)
120
+ # print(f"DEBUG: Streaming tokens: {token_str}")
121
+ self._emit_streaming_progress(model, token_str)
122
+
123
+ # Also check for final message_delta events with actual usage info
124
+ elif (
125
+ event.type == "message_delta"
126
+ and hasattr(event, "usage")
127
+ and event.usage.output_tokens
128
+ ):
129
+ actual_tokens = event.usage.output_tokens
130
+ token_str = str(actual_tokens).rjust(5)
131
+ # print(f"DEBUG: Final actual tokens: {token_str}")
132
+ self._emit_streaming_progress(model, token_str)
133
+
134
+ # Get the final message with complete usage data
135
+ message = await stream.get_final_message()
136
+
137
+ # Log final usage information
138
+ if hasattr(message, "usage") and message.usage:
139
+ self.logger.info(
140
+ f"Streaming complete - Model: {model}, Input tokens: {message.usage.input_tokens}, Output tokens: {message.usage.output_tokens}"
141
+ )
142
+
143
+ return message
144
+
145
+ def _emit_streaming_progress(self, model: str, token_str: str) -> None:
146
+ """Emit a streaming progress event that goes directly to progress display."""
147
+ data = {
148
+ "progress_action": ProgressAction.STREAMING,
149
+ "model": model,
150
+ "agent_name": self.name,
151
+ "chat_turn": self.chat_turn(),
152
+ "details": token_str.strip(), # Token count goes in details for STREAMING action
153
+ }
154
+ # print(f"DEBUG: Emitting streaming progress event with data: {data}")
155
+ # Use a special logger level or namespace to avoid polluting regular logs
156
+ self.logger.info("Streaming progress", data=data)
157
+
92
158
  async def _anthropic_completion(
93
159
  self,
94
160
  message_param,
@@ -105,7 +171,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
105
171
  base_url = base_url.rstrip("/v1")
106
172
 
107
173
  try:
108
- anthropic = Anthropic(api_key=api_key, base_url=base_url)
174
+ anthropic = AsyncAnthropic(api_key=api_key, base_url=base_url)
109
175
  messages: List[MessageParam] = []
110
176
  params = self.get_request_params(request_params)
111
177
  except AuthenticationError as e:
@@ -118,7 +184,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
118
184
  # if use_history is True
119
185
  messages.extend(self.history.get(include_completion_history=params.use_history))
120
186
 
121
- messages.append(message_param)
187
+ messages.append(message_param) # message_param is the current user turn
188
+
189
+ # Get cache mode configuration
190
+ cache_mode = self._get_cache_mode()
191
+ self.logger.debug(f"Anthropic cache_mode: {cache_mode}")
122
192
 
123
193
  tool_list: ListToolsResult = await self.aggregator.list_tools()
124
194
  available_tools: List[ToolParam] = [
@@ -134,8 +204,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
134
204
 
135
205
  model = self.default_request_params.model
136
206
 
207
+ # Note: We'll cache tools+system together by putting cache_control only on system prompt
208
+
137
209
  for i in range(params.max_iterations):
138
210
  self._log_chat_progress(self.chat_turn(), model=model)
211
+
139
212
  # Create base arguments dictionary
140
213
  base_args = {
141
214
  "model": model,
@@ -145,6 +218,60 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
145
218
  "tools": available_tools,
146
219
  }
147
220
 
221
+ # Apply cache_control to system prompt if cache_mode is not "off"
222
+ # This caches both tools and system prompt together in one cache block
223
+ if cache_mode != "off" and base_args["system"]:
224
+ if isinstance(base_args["system"], str):
225
+ base_args["system"] = [
226
+ {
227
+ "type": "text",
228
+ "text": base_args["system"],
229
+ "cache_control": {"type": "ephemeral"},
230
+ }
231
+ ]
232
+ self.logger.debug(
233
+ "Applied cache_control to system prompt (caches tools+system in one block)"
234
+ )
235
+ else:
236
+ self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
237
+
238
+ # Apply conversation caching using walking algorithm if in auto mode
239
+ if cache_mode == "auto" and self.history.should_apply_conversation_cache():
240
+ cache_updates = self.history.get_conversation_cache_updates()
241
+
242
+ # Remove cache control from old positions
243
+ if cache_updates["remove"]:
244
+ self.history.remove_cache_control_from_messages(
245
+ messages, cache_updates["remove"]
246
+ )
247
+ self.logger.debug(
248
+ f"Removed conversation cache_control from positions {cache_updates['remove']}"
249
+ )
250
+
251
+ # Add cache control to new positions
252
+ if cache_updates["add"]:
253
+ applied_count = self.history.add_cache_control_to_messages(
254
+ messages, cache_updates["add"]
255
+ )
256
+ if applied_count > 0:
257
+ self.history.apply_conversation_cache_updates(cache_updates)
258
+ self.logger.debug(
259
+ f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
260
+ )
261
+
262
+ # Verify we don't exceed Anthropic's 4 cache block limit
263
+ total_cache_blocks = applied_count
264
+ if cache_mode != "off" and base_args["system"]:
265
+ total_cache_blocks += 1 # tools+system cache block
266
+ if total_cache_blocks > 4:
267
+ self.logger.warning(
268
+ f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
269
+ )
270
+ else:
271
+ self.logger.debug(
272
+ f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
273
+ )
274
+
148
275
  if params.maxTokens is not None:
149
276
  base_args["max_tokens"] = params.maxTokens
150
277
 
@@ -155,9 +282,10 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
155
282
 
156
283
  self.logger.debug(f"{arguments}")
157
284
 
158
- executor_result = await self.executor.execute(anthropic.messages.create, **arguments)
159
-
160
- response = executor_result[0]
285
+ # Use streaming API with helper
286
+ async with anthropic.messages.stream(**arguments) as stream:
287
+ # Process the stream
288
+ response = await self._process_stream(stream, model)
161
289
 
162
290
  # Track usage if response is valid and has usage data
163
291
  if (
@@ -170,27 +298,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
170
298
  response.usage, model or DEFAULT_ANTHROPIC_MODEL
171
299
  )
172
300
  self.usage_accumulator.add_turn(turn_usage)
173
-
174
- # # Print raw usage for debugging
175
- # print(f"\n=== USAGE DEBUG ({model}) ===")
176
- # print(f"Raw usage: {response.usage}")
177
- # print(
178
- # f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
179
- # )
180
- # print(
181
- # f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
182
- # )
183
- # print(f"Effective input: {turn_usage.effective_input_tokens}")
184
- # print(
185
- # f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
186
- # )
187
- # if self.usage_accumulator.context_usage_percentage:
188
- # print(
189
- # f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
190
- # )
191
- # if self.usage_accumulator.cache_hit_rate:
192
- # print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
193
- # print("===========================\n")
301
+ # self._show_usage(response.usage, turn_usage)
194
302
  except Exception as e:
195
303
  self.logger.warning(f"Failed to track usage: {e}")
196
304
 
@@ -201,7 +309,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
201
309
  ) from response
202
310
  elif isinstance(response, BaseException):
203
311
  error_details = str(response)
204
- self.logger.error(f"Error: {error_details}", data=executor_result)
312
+ self.logger.error(f"Error: {error_details}", data=BaseException)
205
313
 
206
314
  # Try to extract more useful information for API errors
207
315
  if hasattr(response, "status_code") and hasattr(response, "response"):
@@ -214,13 +322,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
214
322
  # Convert other errors to text response
215
323
  error_message = f"Error during generation: {error_details}"
216
324
  response = Message(
217
- id="error", # Required field
218
- model="error", # Required field
325
+ id="error",
326
+ model="error",
219
327
  role="assistant",
220
328
  type="message",
221
329
  content=[TextBlock(type="text", text=error_message)],
222
- stop_reason="end_turn", # Must be one of the allowed values
223
- usage=Usage(input_tokens=0, output_tokens=0), # Required field
330
+ stop_reason="end_turn",
331
+ usage=Usage(input_tokens=0, output_tokens=0),
224
332
  )
225
333
 
226
334
  self.logger.debug(
@@ -230,7 +338,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
230
338
 
231
339
  response_as_message = self.convert_message_to_message_param(response)
232
340
  messages.append(response_as_message)
233
- if response.content[0].type == "text":
341
+ if response.content and response.content[0].type == "text":
234
342
  responses.append(TextContent(type="text", text=response.content[0].text))
235
343
 
236
344
  if response.stop_reason == "end_turn":
@@ -290,12 +398,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
290
398
 
291
399
  # Process all tool calls and collect results
292
400
  tool_results = []
293
- for i, content in enumerate(tool_uses):
294
- tool_name = content.name
295
- tool_args = content.input
296
- tool_use_id = content.id
401
+ # Use a different loop variable for tool enumeration if 'i' is outer loop counter
402
+ for tool_idx, content_block in enumerate(tool_uses):
403
+ tool_name = content_block.name
404
+ tool_args = content_block.input
405
+ tool_use_id = content_block.id
297
406
 
298
- if i == 0: # Only show message for first tool use
407
+ if tool_idx == 0: # Only show message for first tool use
299
408
  await self.show_assistant_message(message_text, tool_name)
300
409
 
301
410
  self.show_tool_call(available_tools, tool_name, tool_args)
@@ -320,11 +429,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
320
429
  if params.use_history:
321
430
  # Get current prompt messages
322
431
  prompt_messages = self.history.get(include_completion_history=False)
323
-
324
- # Calculate new conversation messages (excluding prompts)
325
432
  new_messages = messages[len(prompt_messages) :]
326
-
327
- # Update conversation history
328
433
  self.history.set(new_messages)
329
434
 
330
435
  self._log_chat_finished(model=model)
@@ -362,8 +467,26 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
362
467
  multipart_messages[:-1] if last_message.role == "user" else multipart_messages
363
468
  )
364
469
  converted = []
470
+
471
+ # Get cache mode configuration
472
+ cache_mode = self._get_cache_mode()
473
+
365
474
  for msg in messages_to_add:
366
- converted.append(AnthropicConverter.convert_to_anthropic(msg))
475
+ anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
476
+
477
+ # Apply caching to template messages if cache_mode is "prompt" or "auto"
478
+ if is_template and cache_mode in ["prompt", "auto"] and anthropic_msg.get("content"):
479
+ content_list = anthropic_msg["content"]
480
+ if isinstance(content_list, list) and content_list:
481
+ # Apply cache control to the last content block
482
+ last_block = content_list[-1]
483
+ if isinstance(last_block, dict):
484
+ last_block["cache_control"] = {"type": "ephemeral"}
485
+ self.logger.debug(
486
+ f"Applied cache_control to template message with role {anthropic_msg.get('role')}"
487
+ )
488
+
489
+ converted.append(anthropic_msg)
367
490
 
368
491
  self.history.extend(converted, is_prompt=is_template)
369
492
 
@@ -398,6 +521,28 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
398
521
  )
399
522
  return self._structured_from_multipart(result, model)
400
523
 
524
+ def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
525
+ # Print raw usage for debugging
526
+ print(f"\n=== USAGE DEBUG ({turn_usage.model}) ===")
527
+ print(f"Raw usage: {raw_usage}")
528
+ print(
529
+ f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
530
+ )
531
+ print(
532
+ f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
533
+ )
534
+ print(f"Effective input: {turn_usage.effective_input_tokens}")
535
+ print(
536
+ f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
537
+ )
538
+ if self.usage_accumulator.context_usage_percentage:
539
+ print(
540
+ f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
541
+ )
542
+ if self.usage_accumulator.cache_hit_rate:
543
+ print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
544
+ print("===========================\n")
545
+
401
546
  @classmethod
402
547
  def convert_message_to_message_param(cls, message: Message, **kwargs) -> MessageParam:
403
548
  """Convert a response object to an input parameter object to allow LLM calls to be chained."""
@@ -84,19 +84,32 @@ class TurnUsage(BaseModel):
84
84
  @computed_field
85
85
  @property
86
86
  def current_context_tokens(self) -> int:
87
- """Current context size after this turn (input + output)"""
88
- return self.input_tokens + self.output_tokens
87
+ """Current context size after this turn (total input including cache + output)"""
88
+ # For Anthropic: input_tokens + cache_read_tokens represents total input context
89
+ total_input = self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
90
+ return total_input + self.output_tokens
89
91
 
90
92
  @computed_field
91
93
  @property
92
94
  def effective_input_tokens(self) -> int:
93
- """Input tokens excluding cache reads (tokens actually processed)"""
94
- return max(
95
- 0,
96
- self.input_tokens
97
- - self.cache_usage.cache_read_tokens
98
- - self.cache_usage.cache_hit_tokens,
99
- )
95
+ """Input tokens actually processed (new tokens, not from cache)"""
96
+ # For Anthropic: input_tokens already excludes cached content
97
+ # For other providers: subtract cache hits from input_tokens
98
+ if self.provider == Provider.ANTHROPIC:
99
+ return self.input_tokens
100
+ else:
101
+ return max(0, self.input_tokens - self.cache_usage.cache_hit_tokens)
102
+
103
+ @computed_field
104
+ @property
105
+ def display_input_tokens(self) -> int:
106
+ """Input tokens to display for 'Last turn' (total submitted tokens)"""
107
+ # For Anthropic: input_tokens excludes cache, so add cache tokens
108
+ if self.provider == Provider.ANTHROPIC:
109
+ return self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
110
+ else:
111
+ # For OpenAI/Google: input_tokens already includes cached tokens
112
+ return self.input_tokens
100
113
 
101
114
  @classmethod
102
115
  def from_anthropic(cls, usage: AnthropicUsage, model: str) -> "TurnUsage":
@@ -204,8 +217,11 @@ class UsageAccumulator(BaseModel):
204
217
  @computed_field
205
218
  @property
206
219
  def cumulative_input_tokens(self) -> int:
207
- """Total input tokens charged across all turns"""
208
- return sum(turn.input_tokens for turn in self.turns)
220
+ """Total input tokens charged across all turns (including cache tokens)"""
221
+ return sum(
222
+ turn.input_tokens + turn.cache_usage.cache_read_tokens + turn.cache_usage.cache_write_tokens
223
+ for turn in self.turns
224
+ )
209
225
 
210
226
  @computed_field
211
227
  @property
@@ -216,8 +232,8 @@ class UsageAccumulator(BaseModel):
216
232
  @computed_field
217
233
  @property
218
234
  def cumulative_billing_tokens(self) -> int:
219
- """Total tokens charged across all turns"""
220
- return sum(turn.total_tokens for turn in self.turns)
235
+ """Total tokens charged across all turns (including cache tokens)"""
236
+ return self.cumulative_input_tokens + self.cumulative_output_tokens
221
237
 
222
238
  @computed_field
223
239
  @property
@@ -258,11 +274,12 @@ class UsageAccumulator(BaseModel):
258
274
  @computed_field
259
275
  @property
260
276
  def cache_hit_rate(self) -> Optional[float]:
261
- """Percentage of input tokens served from cache"""
262
- if self.cumulative_input_tokens == 0:
263
- return None
277
+ """Percentage of total input context served from cache"""
264
278
  cache_tokens = self.cumulative_cache_read_tokens + self.cumulative_cache_hit_tokens
265
- return (cache_tokens / self.cumulative_input_tokens) * 100
279
+ total_input_context = self.cumulative_input_tokens + cache_tokens
280
+ if total_input_context == 0:
281
+ return None
282
+ return (cache_tokens / total_input_context) * 100
266
283
 
267
284
  @computed_field
268
285
  @property
@@ -117,3 +117,27 @@ class SamplingFilter(EventFilter):
117
117
  if not super().matches(event):
118
118
  return False
119
119
  return random.random() < self.sample_rate
120
+
121
+
122
+ class StreamingExclusionFilter(EventFilter):
123
+ """
124
+ Event filter that excludes streaming progress events from logs.
125
+ This prevents token count updates from flooding the logs when info level is enabled.
126
+ """
127
+
128
+ def matches(self, event: Event) -> bool:
129
+ # First check if it passes the base filter
130
+ if not super().matches(event):
131
+ return False
132
+
133
+ # Exclude events with "Streaming progress" message
134
+ if event.message == "Streaming progress":
135
+ return False
136
+
137
+ # Also check for events with progress_action = STREAMING in data
138
+ if event.data and isinstance(event.data.get("data"), dict):
139
+ event_data = event.data["data"]
140
+ if event_data.get("progress_action") == "Streaming":
141
+ return False
142
+
143
+ return True
@@ -73,6 +73,7 @@ class RichProgressDisplay:
73
73
  ProgressAction.LOADED: "dim green",
74
74
  ProgressAction.INITIALIZED: "dim green",
75
75
  ProgressAction.CHATTING: "bold blue",
76
+ ProgressAction.STREAMING: "bold blue", # Same color as chatting
76
77
  ProgressAction.ROUTING: "bold blue",
77
78
  ProgressAction.PLANNING: "bold blue",
78
79
  ProgressAction.READY: "dim green",
@@ -100,9 +101,16 @@ class RichProgressDisplay:
100
101
  task_id = self._taskmap[task_name]
101
102
 
102
103
  # Ensure no None values in the update
104
+ # For streaming, use custom description immediately to avoid flashing
105
+ if event.action == ProgressAction.STREAMING and event.streaming_tokens:
106
+ formatted_tokens = f"↓ {event.streaming_tokens.strip()}".ljust(15)
107
+ description = f"[{self._get_action_style(event.action)}]{formatted_tokens}"
108
+ else:
109
+ description = f"[{self._get_action_style(event.action)}]{event.action.value:<15}"
110
+
103
111
  self._progress.update(
104
112
  task_id,
105
- description=f"[{self._get_action_style(event.action)}]{event.action.value:<15}",
113
+ description=description,
106
114
  target=event.target or task_name, # Use task_name as fallback for target
107
115
  details=event.details or "",
108
116
  task_name=task_name,