fast-agent-mcp 0.2.34__py3-none-any.whl → 0.2.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/METADATA +1 -1
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/RECORD +16 -16
- mcp_agent/config.py +8 -0
- mcp_agent/context.py +3 -2
- mcp_agent/core/agent_app.py +1 -1
- mcp_agent/event_progress.py +22 -4
- mcp_agent/llm/augmented_llm.py +16 -3
- mcp_agent/llm/memory.py +120 -0
- mcp_agent/llm/model_database.py +2 -2
- mcp_agent/llm/providers/augmented_llm_anthropic.py +190 -45
- mcp_agent/llm/usage_tracking.py +34 -17
- mcp_agent/logging/events.py +24 -0
- mcp_agent/logging/rich_progress.py +9 -1
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.35.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,10 @@
|
|
1
1
|
mcp_agent/__init__.py,sha256=18T0AG0W9sJhTY38O9GFFOzliDhxx9p87CvRyti9zbw,1620
|
2
2
|
mcp_agent/app.py,sha256=3mtHP1nRQcRaKhhxgTmCOv00alh70nT7UxNA8bN47QE,5560
|
3
|
-
mcp_agent/config.py,sha256=
|
3
|
+
mcp_agent/config.py,sha256=ZIGFCSWrhMqhlHhapQf3QXo9N6EuTVy5iZIFiiqwE2M,16289
|
4
4
|
mcp_agent/console.py,sha256=Gjf2QLFumwG1Lav__c07X_kZxxEUSkzV-1_-YbAwcwo,813
|
5
|
-
mcp_agent/context.py,sha256=
|
5
|
+
mcp_agent/context.py,sha256=f729LJcW4YoFXb0Rg_kEU-5FlrOnFgqplI6W0fVqomg,7631
|
6
6
|
mcp_agent/context_dependent.py,sha256=QXfhw3RaQCKfscEEBRGuZ3sdMWqkgShz2jJ1ivGGX1I,1455
|
7
|
-
mcp_agent/event_progress.py,sha256=
|
7
|
+
mcp_agent/event_progress.py,sha256=d7T1hQ1D289MYh2Z5bMPB4JqjGqTOzveJuOHE03B_Xo,3720
|
8
8
|
mcp_agent/mcp_server_registry.py,sha256=b3iSb-0ULYc5yUG2KHav41WGwSYWiJCGQsOwWHWByxo,12346
|
9
9
|
mcp_agent/progress_display.py,sha256=GeJU9VUt6qKsFVymG688hCMVCsAygG9ifiiEb5IcbN4,361
|
10
10
|
mcp_agent/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -28,7 +28,7 @@ mcp_agent/cli/commands/quickstart.py,sha256=SM3CHMzDgvTxIpKjFuX9BrS_N1vRoXNBDaO9
|
|
28
28
|
mcp_agent/cli/commands/setup.py,sha256=eOEd4TL-b0DaDeSJMGOfNOsTEItoZ67W88eTP4aP-bo,6482
|
29
29
|
mcp_agent/cli/commands/url_parser.py,sha256=5VdtcHRHzi67YignStVbz7u-rcvNNErw9oJLAUFOtEY,5855
|
30
30
|
mcp_agent/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
-
mcp_agent/core/agent_app.py,sha256=
|
31
|
+
mcp_agent/core/agent_app.py,sha256=KJdx0Qbh7Gb4wA8_LwKriogc27SraRIrvMqHsOCVVt0,12119
|
32
32
|
mcp_agent/core/agent_types.py,sha256=DogMcOoRwk70CFSetZ09madRcPDlhPn1iXZVeOcLV8Q,1507
|
33
33
|
mcp_agent/core/direct_decorators.py,sha256=HY_7S7OtfZPqAeqC3_hPYa1d6zTnEyiOeI7JxvnWqTM,16786
|
34
34
|
mcp_agent/core/direct_factory.py,sha256=UNAjHHFRLrQ3D934RMsKsh0Oas7LXLIVslgrzcetM6A,19090
|
@@ -50,23 +50,23 @@ mcp_agent/human_input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
50
50
|
mcp_agent/human_input/handler.py,sha256=s712Z5ssTCwjL9-VKoIdP5CtgMh43YvepynYisiWTTA,3144
|
51
51
|
mcp_agent/human_input/types.py,sha256=RtWBOVzy8vnYoQrc36jRLn8z8N3C4pDPMBN5vF6qM5Y,1476
|
52
52
|
mcp_agent/llm/__init__.py,sha256=d8zgwG-bRFuwiMNMYkywg_qytk4P8lawyld_meuUmHI,68
|
53
|
-
mcp_agent/llm/augmented_llm.py,sha256=
|
53
|
+
mcp_agent/llm/augmented_llm.py,sha256=ekVZQla3oOyWSysJif-2ZutklYB8HTK99I7HzSye6ag,25705
|
54
54
|
mcp_agent/llm/augmented_llm_passthrough.py,sha256=F8KifmTwoQ7zyncjmoRek8SBfGdgc9yc5LRXwMQH-bg,8640
|
55
55
|
mcp_agent/llm/augmented_llm_playback.py,sha256=BQeBXRpO-xGAY9wIJxyde6xpHmZEdQPLd32frF8t3QQ,4916
|
56
56
|
mcp_agent/llm/augmented_llm_slow.py,sha256=DDSD8bL2flmQrVHZm-UDs7sR8aHRWkDOcOW-mX_GPok,2067
|
57
|
-
mcp_agent/llm/memory.py,sha256=
|
58
|
-
mcp_agent/llm/model_database.py,sha256=
|
57
|
+
mcp_agent/llm/memory.py,sha256=pTOaTDV3EA3X68yKwEtUAu7s0xGIQQ_cKBhfYUnfR0w,8614
|
58
|
+
mcp_agent/llm/model_database.py,sha256=mfy039QZP_8-f0aHWR0Fpj2qnlys5430haSzrA86aXw,8485
|
59
59
|
mcp_agent/llm/model_factory.py,sha256=u60O4SWe22wN6CpmIfaF4C5aUziJs8O3N0Jo7erPjp8,10753
|
60
60
|
mcp_agent/llm/prompt_utils.py,sha256=yWQHykoK13QRF7evHUKxVF0SpVLN-Bsft0Yixzvn0g0,4825
|
61
61
|
mcp_agent/llm/provider_key_manager.py,sha256=usMWozSMhek_FIlM1MeVDwAbs-P96SrEVPGd3YwF9E4,2833
|
62
62
|
mcp_agent/llm/provider_types.py,sha256=AkQl1r67wZ0gSIY6CXsiZiS3uw5DBF9E5yhIn3THayk,633
|
63
63
|
mcp_agent/llm/sampling_converter.py,sha256=C7wPBlmT0eD90XWabC22zkxsrVHKCrjwIwg6cG628cI,2926
|
64
64
|
mcp_agent/llm/sampling_format_converter.py,sha256=xGz4odHpOcP7--eFaJaFtUR8eR9jxZS7MnLH6J7n0EU,1263
|
65
|
-
mcp_agent/llm/usage_tracking.py,sha256=
|
65
|
+
mcp_agent/llm/usage_tracking.py,sha256=HdBehPMt0bZzEgRmTnbMdgpLVuTp6L_VJTQx5Z25zCM,15321
|
66
66
|
mcp_agent/llm/providers/__init__.py,sha256=heVxtmuqFJOnjjxHz4bWSqTAxXoN1E8twC_gQ_yJpHk,265
|
67
67
|
mcp_agent/llm/providers/anthropic_utils.py,sha256=vYDN5G5jKMhD2CQg8veJYab7tvvzYkDMq8M1g_hUAQg,3275
|
68
68
|
mcp_agent/llm/providers/augmented_llm_aliyun.py,sha256=XylkJKZ9theSVUxJKOZkf1244hgzng4Ng4Dr209Qb-w,1101
|
69
|
-
mcp_agent/llm/providers/augmented_llm_anthropic.py,sha256=
|
69
|
+
mcp_agent/llm/providers/augmented_llm_anthropic.py,sha256=xCLqLi2HeBHPSvN_xD9Gl26ENTGT0E_1KLnN38BVXiE,24624
|
70
70
|
mcp_agent/llm/providers/augmented_llm_azure.py,sha256=VPrD6lNrEw6EdYUTa9MDvHDNIPjJU5CG5xnKCM3JYdA,5878
|
71
71
|
mcp_agent/llm/providers/augmented_llm_deepseek.py,sha256=zI9a90dwT4r6E1f_xp4K50Cj9sD7y7kNRgjo0s1pd5w,3804
|
72
72
|
mcp_agent/llm/providers/augmented_llm_generic.py,sha256=5Uq8ZBhcFuQTt7koP_5ykolREh2iWu8zKhNbh3pM9lQ,1210
|
@@ -84,11 +84,11 @@ mcp_agent/llm/providers/openai_utils.py,sha256=T4bTCL9f7DsoS_zoKgQKv_FUv_4n98vgb
|
|
84
84
|
mcp_agent/llm/providers/sampling_converter_anthropic.py,sha256=35WzBWkPklnuMlu5S6XsQIq0YL58NOy8Ja6A_l4m6eM,1612
|
85
85
|
mcp_agent/llm/providers/sampling_converter_openai.py,sha256=GA-LfTJzOwH9Vwk0Q4K37nG6zxpzqS-JGaM7cTH-Epc,841
|
86
86
|
mcp_agent/logging/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
87
|
-
mcp_agent/logging/events.py,sha256=
|
87
|
+
mcp_agent/logging/events.py,sha256=dSJJfuCd59-ZyYTVcf0M4HQd6iXb5k50PSAeoq1CpH0,4278
|
88
88
|
mcp_agent/logging/json_serializer.py,sha256=qkfxnR9ka6OgvwSpM2CggELbEtzzkApm0s_KYz11RDY,5791
|
89
89
|
mcp_agent/logging/listeners.py,sha256=_S4Jp5_KWp0kUfrx4BxDdNCeQK3MNT3Zi9AaolPri7A,6648
|
90
90
|
mcp_agent/logging/logger.py,sha256=l02OGX_c5FOyH0rspd4ZvnkJcbb0FahhUhlh2KI8mqE,10724
|
91
|
-
mcp_agent/logging/rich_progress.py,sha256=
|
91
|
+
mcp_agent/logging/rich_progress.py,sha256=NQbW010VxfzgJw8BRaqKVTIFlTNvDfmMcoOt7pxGvzQ,5362
|
92
92
|
mcp_agent/logging/transport.py,sha256=m8YsLLu5T8eof_ndpLQs4gHOzqqEL98xsVwBwDsBfxI,17335
|
93
93
|
mcp_agent/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
94
94
|
mcp_agent/mcp/common.py,sha256=MpSC0fLO21RcDz4VApah4C8_LisVGz7OXkR17Xw-9mY,431
|
@@ -158,8 +158,8 @@ mcp_agent/resources/examples/workflows/router.py,sha256=E4x_-c3l4YW9w1i4ARcDtkde
|
|
158
158
|
mcp_agent/resources/examples/workflows/short_story.txt,sha256=X3y_1AyhLFN2AKzCKvucJtDgAFIJfnlbsbGZO5bBWu0,1187
|
159
159
|
mcp_agent/tools/tool_definition.py,sha256=L3Pxl-uLEXqlVoo-bYuFTFALeI-2pIU44YgFhsTKEtM,398
|
160
160
|
mcp_agent/ui/console_display.py,sha256=UKqax5V2TC0hkZZORmmd6UqUk0DGX7A25E3h1k9f42k,10982
|
161
|
-
fast_agent_mcp-0.2.
|
162
|
-
fast_agent_mcp-0.2.
|
163
|
-
fast_agent_mcp-0.2.
|
164
|
-
fast_agent_mcp-0.2.
|
165
|
-
fast_agent_mcp-0.2.
|
161
|
+
fast_agent_mcp-0.2.35.dist-info/METADATA,sha256=O4DhiiD77uzvQeJvBbJxWMHwycs3M2fCilt7lJmPzUE,30799
|
162
|
+
fast_agent_mcp-0.2.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
163
|
+
fast_agent_mcp-0.2.35.dist-info/entry_points.txt,sha256=oKQeSUVn87pJv8_k1NQ7Ak8cXaaXHCnPAOJRCV_uUVg,230
|
164
|
+
fast_agent_mcp-0.2.35.dist-info/licenses/LICENSE,sha256=cN3FxDURL9XuzE5mhK9L2paZo82LTfjwCYVT7e3j0e4,10939
|
165
|
+
fast_agent_mcp-0.2.35.dist-info/RECORD,,
|
mcp_agent/config.py
CHANGED
@@ -115,6 +115,14 @@ class AnthropicSettings(BaseModel):
|
|
115
115
|
|
116
116
|
base_url: str | None = None
|
117
117
|
|
118
|
+
cache_mode: Literal["off", "prompt", "auto"] = "auto"
|
119
|
+
"""
|
120
|
+
Controls how caching is applied for Anthropic models when prompt_caching is enabled globally.
|
121
|
+
- "off": No caching, even if global prompt_caching is true.
|
122
|
+
- "prompt": Caches tools+system prompt (1 block) and template content. Useful for large, static prompts.
|
123
|
+
- "auto": Currently same as "prompt" - caches tools+system prompt (1 block) and template content.
|
124
|
+
"""
|
125
|
+
|
118
126
|
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
|
119
127
|
|
120
128
|
|
mcp_agent/context.py
CHANGED
@@ -25,7 +25,7 @@ from pydantic import BaseModel, ConfigDict
|
|
25
25
|
from mcp_agent.config import Settings, get_settings
|
26
26
|
from mcp_agent.executor.executor import AsyncioExecutor, Executor
|
27
27
|
from mcp_agent.executor.task_registry import ActivityRegistry
|
28
|
-
from mcp_agent.logging.events import EventFilter
|
28
|
+
from mcp_agent.logging.events import EventFilter, StreamingExclusionFilter
|
29
29
|
from mcp_agent.logging.logger import LoggingConfig, get_logger
|
30
30
|
from mcp_agent.logging.transport import create_transport
|
31
31
|
from mcp_agent.mcp_server_registry import ServerRegistry
|
@@ -124,7 +124,8 @@ async def configure_logger(config: "Settings") -> None:
|
|
124
124
|
"""
|
125
125
|
Configure logging and tracing based on the application config.
|
126
126
|
"""
|
127
|
-
|
127
|
+
# Use StreamingExclusionFilter to prevent streaming events from flooding logs
|
128
|
+
event_filter: EventFilter = StreamingExclusionFilter(min_level=config.logger.level)
|
128
129
|
logger.info(f"Configuring logger with level: {config.logger.level}")
|
129
130
|
transport = create_transport(settings=config.logger, event_filter=event_filter)
|
130
131
|
await LoggingConfig.configure(
|
mcp_agent/core/agent_app.py
CHANGED
mcp_agent/event_progress.py
CHANGED
@@ -15,6 +15,7 @@ class ProgressAction(str, Enum):
|
|
15
15
|
LOADED = "Loaded"
|
16
16
|
INITIALIZED = "Initialized"
|
17
17
|
CHATTING = "Chatting"
|
18
|
+
STREAMING = "Streaming" # Special action for real-time streaming updates
|
18
19
|
ROUTING = "Routing"
|
19
20
|
PLANNING = "Planning"
|
20
21
|
READY = "Ready"
|
@@ -33,12 +34,22 @@ class ProgressEvent(BaseModel):
|
|
33
34
|
target: str
|
34
35
|
details: Optional[str] = None
|
35
36
|
agent_name: Optional[str] = None
|
37
|
+
streaming_tokens: Optional[str] = None # Special field for streaming token count
|
36
38
|
|
37
39
|
def __str__(self) -> str:
|
38
40
|
"""Format the progress event for display."""
|
39
|
-
|
40
|
-
if self.
|
41
|
-
|
41
|
+
# Special handling for streaming - show token count in action position
|
42
|
+
if self.action == ProgressAction.STREAMING and self.streaming_tokens:
|
43
|
+
# For streaming, show just the token count instead of "Streaming"
|
44
|
+
action_display = self.streaming_tokens.ljust(11)
|
45
|
+
base = f"{action_display}. {self.target}"
|
46
|
+
if self.details:
|
47
|
+
base += f" - {self.details}"
|
48
|
+
else:
|
49
|
+
base = f"{self.action.ljust(11)}. {self.target}"
|
50
|
+
if self.details:
|
51
|
+
base += f" - {self.details}"
|
52
|
+
|
42
53
|
if self.agent_name:
|
43
54
|
base = f"[{self.agent_name}] {base}"
|
44
55
|
return base
|
@@ -78,7 +89,8 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
|
|
78
89
|
|
79
90
|
elif "augmented_llm" in namespace:
|
80
91
|
model = event_data.get("model", "")
|
81
|
-
|
92
|
+
|
93
|
+
# For all augmented_llm events, put model info in details column
|
82
94
|
details = f"{model}"
|
83
95
|
chat_turn = event_data.get("chat_turn")
|
84
96
|
if chat_turn is not None:
|
@@ -87,9 +99,15 @@ def convert_log_event(event: Event) -> Optional[ProgressEvent]:
|
|
87
99
|
if not target:
|
88
100
|
target = event_data.get("target", "unknown")
|
89
101
|
|
102
|
+
# Extract streaming token count for STREAMING actions
|
103
|
+
streaming_tokens = None
|
104
|
+
if progress_action == ProgressAction.STREAMING:
|
105
|
+
streaming_tokens = event_data.get("details", "")
|
106
|
+
|
90
107
|
return ProgressEvent(
|
91
108
|
action=ProgressAction(progress_action),
|
92
109
|
target=target or "unknown",
|
93
110
|
details=details,
|
94
111
|
agent_name=event_data.get("agent_name"),
|
112
|
+
streaming_tokens=streaming_tokens,
|
95
113
|
)
|
mcp_agent/llm/augmented_llm.py
CHANGED
@@ -97,6 +97,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
97
97
|
PARAM_USE_HISTORY = "use_history"
|
98
98
|
PARAM_MAX_ITERATIONS = "max_iterations"
|
99
99
|
PARAM_TEMPLATE_VARS = "template_vars"
|
100
|
+
|
100
101
|
# Base set of fields that should always be excluded
|
101
102
|
BASE_EXCLUDE_FIELDS = {PARAM_METADATA}
|
102
103
|
|
@@ -371,16 +372,28 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
371
372
|
# Start with base arguments
|
372
373
|
arguments = base_args.copy()
|
373
374
|
|
374
|
-
#
|
375
|
-
|
375
|
+
# Combine base exclusions with provider-specific exclusions
|
376
|
+
final_exclude_fields = self.BASE_EXCLUDE_FIELDS.copy()
|
377
|
+
if exclude_fields:
|
378
|
+
final_exclude_fields.update(exclude_fields)
|
376
379
|
|
377
380
|
# Add all fields from params that aren't explicitly excluded
|
378
|
-
|
381
|
+
# Ensure model_dump only includes set fields if that's the desired behavior,
|
382
|
+
# or adjust exclude_unset=True/False as needed.
|
383
|
+
# Default Pydantic v2 model_dump is exclude_unset=False
|
384
|
+
params_dict = request_params.model_dump(exclude=final_exclude_fields)
|
385
|
+
|
379
386
|
for key, value in params_dict.items():
|
387
|
+
# Only add if not None and not already in base_args (base_args take precedence)
|
388
|
+
# or if None is a valid value for the provider, this logic might need adjustment.
|
380
389
|
if value is not None and key not in arguments:
|
381
390
|
arguments[key] = value
|
391
|
+
elif value is not None and key in arguments and arguments[key] is None:
|
392
|
+
# Allow overriding a None in base_args with a set value from params
|
393
|
+
arguments[key] = value
|
382
394
|
|
383
395
|
# Finally, add any metadata fields as a last layer of overrides
|
396
|
+
# This ensures metadata can override anything previously set if keys conflict.
|
384
397
|
if request_params.metadata:
|
385
398
|
arguments.update(request_params.metadata)
|
386
399
|
|
mcp_agent/llm/memory.py
CHANGED
@@ -35,6 +35,9 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
|
|
35
35
|
def __init__(self) -> None:
|
36
36
|
self.history: List[MessageParamT] = []
|
37
37
|
self.prompt_messages: List[MessageParamT] = [] # Always included
|
38
|
+
self.conversation_cache_positions: List[int] = [] # Track active conversation cache positions
|
39
|
+
self.cache_walk_distance: int = 6 # Messages between cache blocks
|
40
|
+
self.max_conversation_cache_blocks: int = 2 # Maximum conversation cache blocks
|
38
41
|
|
39
42
|
def extend(self, messages: List[MessageParamT], is_prompt: bool = False) -> None:
|
40
43
|
"""
|
@@ -99,5 +102,122 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
|
|
99
102
|
clear_prompts: If True, also clear prompt messages
|
100
103
|
"""
|
101
104
|
self.history = []
|
105
|
+
self.conversation_cache_positions = [] # Reset cache positions
|
102
106
|
if clear_prompts:
|
103
107
|
self.prompt_messages = []
|
108
|
+
|
109
|
+
def should_apply_conversation_cache(self) -> bool:
|
110
|
+
"""
|
111
|
+
Determine if conversation caching should be applied based on walking algorithm.
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
True if we should add or update cache blocks
|
115
|
+
"""
|
116
|
+
total_messages = len(self.history)
|
117
|
+
|
118
|
+
# Need at least cache_walk_distance messages to start caching
|
119
|
+
if total_messages < self.cache_walk_distance:
|
120
|
+
return False
|
121
|
+
|
122
|
+
# Check if we need to add a new cache block
|
123
|
+
return len(self._calculate_cache_positions(total_messages)) != len(self.conversation_cache_positions)
|
124
|
+
|
125
|
+
def _calculate_cache_positions(self, total_conversation_messages: int) -> List[int]:
|
126
|
+
"""
|
127
|
+
Calculate where cache blocks should be placed using walking algorithm.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
total_conversation_messages: Number of conversation messages (not including prompts)
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
List of positions (relative to conversation start) where cache should be placed
|
134
|
+
"""
|
135
|
+
positions = []
|
136
|
+
|
137
|
+
# Place cache blocks every cache_walk_distance messages
|
138
|
+
for i in range(self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance):
|
139
|
+
positions.append(i)
|
140
|
+
if len(positions) >= self.max_conversation_cache_blocks:
|
141
|
+
break
|
142
|
+
|
143
|
+
# Keep only the most recent cache blocks (walking behavior)
|
144
|
+
if len(positions) > self.max_conversation_cache_blocks:
|
145
|
+
positions = positions[-self.max_conversation_cache_blocks:]
|
146
|
+
|
147
|
+
return positions
|
148
|
+
|
149
|
+
def get_conversation_cache_updates(self) -> dict:
|
150
|
+
"""
|
151
|
+
Get cache position updates needed for the walking algorithm.
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
Dict with 'add', 'remove', and 'active' position lists (relative to full message array)
|
155
|
+
"""
|
156
|
+
total_conversation_messages = len(self.history)
|
157
|
+
new_positions = self._calculate_cache_positions(total_conversation_messages)
|
158
|
+
|
159
|
+
# Convert to absolute positions (including prompt messages)
|
160
|
+
prompt_offset = len(self.prompt_messages)
|
161
|
+
new_absolute_positions = [pos + prompt_offset for pos in new_positions]
|
162
|
+
|
163
|
+
old_positions_set = set(self.conversation_cache_positions)
|
164
|
+
new_positions_set = set(new_absolute_positions)
|
165
|
+
|
166
|
+
return {
|
167
|
+
'add': sorted(new_positions_set - old_positions_set),
|
168
|
+
'remove': sorted(old_positions_set - new_positions_set),
|
169
|
+
'active': sorted(new_absolute_positions)
|
170
|
+
}
|
171
|
+
|
172
|
+
def apply_conversation_cache_updates(self, updates: dict) -> None:
|
173
|
+
"""
|
174
|
+
Apply cache position updates.
|
175
|
+
|
176
|
+
Args:
|
177
|
+
updates: Dict from get_conversation_cache_updates()
|
178
|
+
"""
|
179
|
+
self.conversation_cache_positions = updates['active'].copy()
|
180
|
+
|
181
|
+
def remove_cache_control_from_messages(self, messages: List[MessageParamT], positions: List[int]) -> None:
|
182
|
+
"""
|
183
|
+
Remove cache control from specified message positions.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
messages: The message array to modify
|
187
|
+
positions: List of positions to remove cache control from
|
188
|
+
"""
|
189
|
+
for pos in positions:
|
190
|
+
if pos < len(messages):
|
191
|
+
message = messages[pos]
|
192
|
+
if isinstance(message, dict) and "content" in message:
|
193
|
+
content_list = message["content"]
|
194
|
+
if isinstance(content_list, list):
|
195
|
+
for content_block in content_list:
|
196
|
+
if isinstance(content_block, dict) and "cache_control" in content_block:
|
197
|
+
del content_block["cache_control"]
|
198
|
+
|
199
|
+
def add_cache_control_to_messages(self, messages: List[MessageParamT], positions: List[int]) -> int:
|
200
|
+
"""
|
201
|
+
Add cache control to specified message positions.
|
202
|
+
|
203
|
+
Args:
|
204
|
+
messages: The message array to modify
|
205
|
+
positions: List of positions to add cache control to
|
206
|
+
|
207
|
+
Returns:
|
208
|
+
Number of cache blocks successfully applied
|
209
|
+
"""
|
210
|
+
applied_count = 0
|
211
|
+
for pos in positions:
|
212
|
+
if pos < len(messages):
|
213
|
+
message = messages[pos]
|
214
|
+
if isinstance(message, dict) and "content" in message:
|
215
|
+
content_list = message["content"]
|
216
|
+
if isinstance(content_list, list) and content_list:
|
217
|
+
# Apply cache control to the last content block
|
218
|
+
for content_block in reversed(content_list):
|
219
|
+
if isinstance(content_block, dict):
|
220
|
+
content_block["cache_control"] = {"type": "ephemeral"}
|
221
|
+
applied_count += 1
|
222
|
+
break
|
223
|
+
return applied_count
|
mcp_agent/llm/model_database.py
CHANGED
@@ -109,11 +109,11 @@ class ModelDatabase:
|
|
109
109
|
|
110
110
|
# TODO update to 32000
|
111
111
|
ANTHROPIC_OPUS_4_VERSIONED = ModelParameters(
|
112
|
-
context_window=200000, max_output_tokens=
|
112
|
+
context_window=200000, max_output_tokens=32000, tokenizes=ANTHROPIC_MULTIMODAL
|
113
113
|
)
|
114
114
|
# TODO update to 64000
|
115
115
|
ANTHROPIC_SONNET_4_VERSIONED = ModelParameters(
|
116
|
-
context_window=200000, max_output_tokens=
|
116
|
+
context_window=200000, max_output_tokens=64000, tokenizes=ANTHROPIC_MULTIMODAL
|
117
117
|
)
|
118
118
|
|
119
119
|
DEEPSEEK_CHAT_STANDARD = ModelParameters(
|
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, List, Tuple, Type
|
|
3
3
|
from mcp.types import EmbeddedResource, ImageContent, TextContent
|
4
4
|
|
5
5
|
from mcp_agent.core.prompt import Prompt
|
6
|
+
from mcp_agent.event_progress import ProgressAction
|
6
7
|
from mcp_agent.llm.provider_types import Provider
|
7
8
|
from mcp_agent.llm.providers.multipart_converter_anthropic import (
|
8
9
|
AnthropicConverter,
|
@@ -18,7 +19,8 @@ if TYPE_CHECKING:
|
|
18
19
|
from mcp import ListToolsResult
|
19
20
|
|
20
21
|
|
21
|
-
from anthropic import
|
22
|
+
from anthropic import AsyncAnthropic, AuthenticationError
|
23
|
+
from anthropic.lib.streaming import AsyncMessageStream
|
22
24
|
from anthropic.types import (
|
23
25
|
Message,
|
24
26
|
MessageParam,
|
@@ -78,17 +80,81 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
78
80
|
"""Initialize Anthropic-specific default parameters"""
|
79
81
|
# Get base defaults from parent (includes ModelDatabase lookup)
|
80
82
|
base_params = super()._initialize_default_params(kwargs)
|
81
|
-
|
83
|
+
|
82
84
|
# Override with Anthropic-specific settings
|
83
85
|
chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
|
84
86
|
base_params.model = chosen_model
|
85
|
-
|
87
|
+
|
86
88
|
return base_params
|
87
89
|
|
88
90
|
def _base_url(self) -> str | None:
|
89
91
|
assert self.context.config
|
90
92
|
return self.context.config.anthropic.base_url if self.context.config.anthropic else None
|
91
93
|
|
94
|
+
def _get_cache_mode(self) -> str:
|
95
|
+
"""Get the cache mode configuration."""
|
96
|
+
cache_mode = "auto" # Default to auto
|
97
|
+
if self.context.config and self.context.config.anthropic:
|
98
|
+
cache_mode = self.context.config.anthropic.cache_mode
|
99
|
+
return cache_mode
|
100
|
+
|
101
|
+
async def _process_stream(self, stream: AsyncMessageStream, model: str) -> Message:
|
102
|
+
"""Process the streaming response and display real-time token usage."""
|
103
|
+
# Track estimated output tokens by counting text chunks
|
104
|
+
estimated_tokens = 0
|
105
|
+
|
106
|
+
# Process the raw event stream to get token counts
|
107
|
+
async for event in stream:
|
108
|
+
# Count tokens in real-time from content_block_delta events
|
109
|
+
if (
|
110
|
+
event.type == "content_block_delta"
|
111
|
+
and hasattr(event, "delta")
|
112
|
+
and event.delta.type == "text_delta"
|
113
|
+
):
|
114
|
+
# Rough estimate: 1 token per 4 characters (OpenAI's typical ratio)
|
115
|
+
text_length = len(event.delta.text)
|
116
|
+
estimated_tokens += max(1, text_length // 4)
|
117
|
+
|
118
|
+
# Update progress on every token for real-time display
|
119
|
+
token_str = str(estimated_tokens).rjust(5)
|
120
|
+
# print(f"DEBUG: Streaming tokens: {token_str}")
|
121
|
+
self._emit_streaming_progress(model, token_str)
|
122
|
+
|
123
|
+
# Also check for final message_delta events with actual usage info
|
124
|
+
elif (
|
125
|
+
event.type == "message_delta"
|
126
|
+
and hasattr(event, "usage")
|
127
|
+
and event.usage.output_tokens
|
128
|
+
):
|
129
|
+
actual_tokens = event.usage.output_tokens
|
130
|
+
token_str = str(actual_tokens).rjust(5)
|
131
|
+
# print(f"DEBUG: Final actual tokens: {token_str}")
|
132
|
+
self._emit_streaming_progress(model, token_str)
|
133
|
+
|
134
|
+
# Get the final message with complete usage data
|
135
|
+
message = await stream.get_final_message()
|
136
|
+
|
137
|
+
# Log final usage information
|
138
|
+
if hasattr(message, "usage") and message.usage:
|
139
|
+
self.logger.info(
|
140
|
+
f"Streaming complete - Model: {model}, Input tokens: {message.usage.input_tokens}, Output tokens: {message.usage.output_tokens}"
|
141
|
+
)
|
142
|
+
|
143
|
+
return message
|
144
|
+
|
145
|
+
def _emit_streaming_progress(self, model: str, token_str: str) -> None:
|
146
|
+
"""Emit a streaming progress event that goes directly to progress display."""
|
147
|
+
data = {
|
148
|
+
"progress_action": ProgressAction.STREAMING,
|
149
|
+
"model": model,
|
150
|
+
"agent_name": self.name,
|
151
|
+
"chat_turn": self.chat_turn(),
|
152
|
+
"details": token_str.strip(), # Token count goes in details for STREAMING action
|
153
|
+
}
|
154
|
+
# print(f"DEBUG: Emitting streaming progress event with data: {data}")
|
155
|
+
# Use a special logger level or namespace to avoid polluting regular logs
|
156
|
+
self.logger.info("Streaming progress", data=data)
|
157
|
+
|
92
158
|
async def _anthropic_completion(
|
93
159
|
self,
|
94
160
|
message_param,
|
@@ -105,7 +171,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
105
171
|
base_url = base_url.rstrip("/v1")
|
106
172
|
|
107
173
|
try:
|
108
|
-
anthropic =
|
174
|
+
anthropic = AsyncAnthropic(api_key=api_key, base_url=base_url)
|
109
175
|
messages: List[MessageParam] = []
|
110
176
|
params = self.get_request_params(request_params)
|
111
177
|
except AuthenticationError as e:
|
@@ -118,7 +184,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
118
184
|
# if use_history is True
|
119
185
|
messages.extend(self.history.get(include_completion_history=params.use_history))
|
120
186
|
|
121
|
-
messages.append(message_param)
|
187
|
+
messages.append(message_param) # message_param is the current user turn
|
188
|
+
|
189
|
+
# Get cache mode configuration
|
190
|
+
cache_mode = self._get_cache_mode()
|
191
|
+
self.logger.debug(f"Anthropic cache_mode: {cache_mode}")
|
122
192
|
|
123
193
|
tool_list: ListToolsResult = await self.aggregator.list_tools()
|
124
194
|
available_tools: List[ToolParam] = [
|
@@ -134,8 +204,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
134
204
|
|
135
205
|
model = self.default_request_params.model
|
136
206
|
|
207
|
+
# Note: We'll cache tools+system together by putting cache_control only on system prompt
|
208
|
+
|
137
209
|
for i in range(params.max_iterations):
|
138
210
|
self._log_chat_progress(self.chat_turn(), model=model)
|
211
|
+
|
139
212
|
# Create base arguments dictionary
|
140
213
|
base_args = {
|
141
214
|
"model": model,
|
@@ -145,6 +218,60 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
145
218
|
"tools": available_tools,
|
146
219
|
}
|
147
220
|
|
221
|
+
# Apply cache_control to system prompt if cache_mode is not "off"
|
222
|
+
# This caches both tools and system prompt together in one cache block
|
223
|
+
if cache_mode != "off" and base_args["system"]:
|
224
|
+
if isinstance(base_args["system"], str):
|
225
|
+
base_args["system"] = [
|
226
|
+
{
|
227
|
+
"type": "text",
|
228
|
+
"text": base_args["system"],
|
229
|
+
"cache_control": {"type": "ephemeral"},
|
230
|
+
}
|
231
|
+
]
|
232
|
+
self.logger.debug(
|
233
|
+
"Applied cache_control to system prompt (caches tools+system in one block)"
|
234
|
+
)
|
235
|
+
else:
|
236
|
+
self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
|
237
|
+
|
238
|
+
# Apply conversation caching using walking algorithm if in auto mode
|
239
|
+
if cache_mode == "auto" and self.history.should_apply_conversation_cache():
|
240
|
+
cache_updates = self.history.get_conversation_cache_updates()
|
241
|
+
|
242
|
+
# Remove cache control from old positions
|
243
|
+
if cache_updates["remove"]:
|
244
|
+
self.history.remove_cache_control_from_messages(
|
245
|
+
messages, cache_updates["remove"]
|
246
|
+
)
|
247
|
+
self.logger.debug(
|
248
|
+
f"Removed conversation cache_control from positions {cache_updates['remove']}"
|
249
|
+
)
|
250
|
+
|
251
|
+
# Add cache control to new positions
|
252
|
+
if cache_updates["add"]:
|
253
|
+
applied_count = self.history.add_cache_control_to_messages(
|
254
|
+
messages, cache_updates["add"]
|
255
|
+
)
|
256
|
+
if applied_count > 0:
|
257
|
+
self.history.apply_conversation_cache_updates(cache_updates)
|
258
|
+
self.logger.debug(
|
259
|
+
f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
|
260
|
+
)
|
261
|
+
|
262
|
+
# Verify we don't exceed Anthropic's 4 cache block limit
|
263
|
+
total_cache_blocks = applied_count
|
264
|
+
if cache_mode != "off" and base_args["system"]:
|
265
|
+
total_cache_blocks += 1 # tools+system cache block
|
266
|
+
if total_cache_blocks > 4:
|
267
|
+
self.logger.warning(
|
268
|
+
f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
|
269
|
+
)
|
270
|
+
else:
|
271
|
+
self.logger.debug(
|
272
|
+
f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
|
273
|
+
)
|
274
|
+
|
148
275
|
if params.maxTokens is not None:
|
149
276
|
base_args["max_tokens"] = params.maxTokens
|
150
277
|
|
@@ -155,9 +282,10 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
155
282
|
|
156
283
|
self.logger.debug(f"{arguments}")
|
157
284
|
|
158
|
-
|
159
|
-
|
160
|
-
|
285
|
+
# Use streaming API with helper
|
286
|
+
async with anthropic.messages.stream(**arguments) as stream:
|
287
|
+
# Process the stream
|
288
|
+
response = await self._process_stream(stream, model)
|
161
289
|
|
162
290
|
# Track usage if response is valid and has usage data
|
163
291
|
if (
|
@@ -170,27 +298,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
170
298
|
response.usage, model or DEFAULT_ANTHROPIC_MODEL
|
171
299
|
)
|
172
300
|
self.usage_accumulator.add_turn(turn_usage)
|
173
|
-
|
174
|
-
# # Print raw usage for debugging
|
175
|
-
# print(f"\n=== USAGE DEBUG ({model}) ===")
|
176
|
-
# print(f"Raw usage: {response.usage}")
|
177
|
-
# print(
|
178
|
-
# f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
179
|
-
# )
|
180
|
-
# print(
|
181
|
-
# f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
182
|
-
# )
|
183
|
-
# print(f"Effective input: {turn_usage.effective_input_tokens}")
|
184
|
-
# print(
|
185
|
-
# f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
186
|
-
# )
|
187
|
-
# if self.usage_accumulator.context_usage_percentage:
|
188
|
-
# print(
|
189
|
-
# f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
190
|
-
# )
|
191
|
-
# if self.usage_accumulator.cache_hit_rate:
|
192
|
-
# print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
193
|
-
# print("===========================\n")
|
301
|
+
# self._show_usage(response.usage, turn_usage)
|
194
302
|
except Exception as e:
|
195
303
|
self.logger.warning(f"Failed to track usage: {e}")
|
196
304
|
|
@@ -201,7 +309,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
201
309
|
) from response
|
202
310
|
elif isinstance(response, BaseException):
|
203
311
|
error_details = str(response)
|
204
|
-
self.logger.error(f"Error: {error_details}", data=
|
312
|
+
self.logger.error(f"Error: {error_details}", data=BaseException)
|
205
313
|
|
206
314
|
# Try to extract more useful information for API errors
|
207
315
|
if hasattr(response, "status_code") and hasattr(response, "response"):
|
@@ -214,13 +322,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
214
322
|
# Convert other errors to text response
|
215
323
|
error_message = f"Error during generation: {error_details}"
|
216
324
|
response = Message(
|
217
|
-
id="error",
|
218
|
-
model="error",
|
325
|
+
id="error",
|
326
|
+
model="error",
|
219
327
|
role="assistant",
|
220
328
|
type="message",
|
221
329
|
content=[TextBlock(type="text", text=error_message)],
|
222
|
-
stop_reason="end_turn",
|
223
|
-
usage=Usage(input_tokens=0, output_tokens=0),
|
330
|
+
stop_reason="end_turn",
|
331
|
+
usage=Usage(input_tokens=0, output_tokens=0),
|
224
332
|
)
|
225
333
|
|
226
334
|
self.logger.debug(
|
@@ -230,7 +338,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
230
338
|
|
231
339
|
response_as_message = self.convert_message_to_message_param(response)
|
232
340
|
messages.append(response_as_message)
|
233
|
-
if response.content[0].type == "text":
|
341
|
+
if response.content and response.content[0].type == "text":
|
234
342
|
responses.append(TextContent(type="text", text=response.content[0].text))
|
235
343
|
|
236
344
|
if response.stop_reason == "end_turn":
|
@@ -290,12 +398,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
290
398
|
|
291
399
|
# Process all tool calls and collect results
|
292
400
|
tool_results = []
|
293
|
-
for i
|
294
|
-
|
295
|
-
|
296
|
-
|
401
|
+
# Use a different loop variable for tool enumeration if 'i' is outer loop counter
|
402
|
+
for tool_idx, content_block in enumerate(tool_uses):
|
403
|
+
tool_name = content_block.name
|
404
|
+
tool_args = content_block.input
|
405
|
+
tool_use_id = content_block.id
|
297
406
|
|
298
|
-
if
|
407
|
+
if tool_idx == 0: # Only show message for first tool use
|
299
408
|
await self.show_assistant_message(message_text, tool_name)
|
300
409
|
|
301
410
|
self.show_tool_call(available_tools, tool_name, tool_args)
|
@@ -320,11 +429,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
320
429
|
if params.use_history:
|
321
430
|
# Get current prompt messages
|
322
431
|
prompt_messages = self.history.get(include_completion_history=False)
|
323
|
-
|
324
|
-
# Calculate new conversation messages (excluding prompts)
|
325
432
|
new_messages = messages[len(prompt_messages) :]
|
326
|
-
|
327
|
-
# Update conversation history
|
328
433
|
self.history.set(new_messages)
|
329
434
|
|
330
435
|
self._log_chat_finished(model=model)
|
@@ -362,8 +467,26 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
362
467
|
multipart_messages[:-1] if last_message.role == "user" else multipart_messages
|
363
468
|
)
|
364
469
|
converted = []
|
470
|
+
|
471
|
+
# Get cache mode configuration
|
472
|
+
cache_mode = self._get_cache_mode()
|
473
|
+
|
365
474
|
for msg in messages_to_add:
|
366
|
-
|
475
|
+
anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
|
476
|
+
|
477
|
+
# Apply caching to template messages if cache_mode is "prompt" or "auto"
|
478
|
+
if is_template and cache_mode in ["prompt", "auto"] and anthropic_msg.get("content"):
|
479
|
+
content_list = anthropic_msg["content"]
|
480
|
+
if isinstance(content_list, list) and content_list:
|
481
|
+
# Apply cache control to the last content block
|
482
|
+
last_block = content_list[-1]
|
483
|
+
if isinstance(last_block, dict):
|
484
|
+
last_block["cache_control"] = {"type": "ephemeral"}
|
485
|
+
self.logger.debug(
|
486
|
+
f"Applied cache_control to template message with role {anthropic_msg.get('role')}"
|
487
|
+
)
|
488
|
+
|
489
|
+
converted.append(anthropic_msg)
|
367
490
|
|
368
491
|
self.history.extend(converted, is_prompt=is_template)
|
369
492
|
|
@@ -398,6 +521,28 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
398
521
|
)
|
399
522
|
return self._structured_from_multipart(result, model)
|
400
523
|
|
524
|
+
def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
|
525
|
+
# Print raw usage for debugging
|
526
|
+
print(f"\n=== USAGE DEBUG ({turn_usage.model}) ===")
|
527
|
+
print(f"Raw usage: {raw_usage}")
|
528
|
+
print(
|
529
|
+
f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
530
|
+
)
|
531
|
+
print(
|
532
|
+
f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
533
|
+
)
|
534
|
+
print(f"Effective input: {turn_usage.effective_input_tokens}")
|
535
|
+
print(
|
536
|
+
f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
537
|
+
)
|
538
|
+
if self.usage_accumulator.context_usage_percentage:
|
539
|
+
print(
|
540
|
+
f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
541
|
+
)
|
542
|
+
if self.usage_accumulator.cache_hit_rate:
|
543
|
+
print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
544
|
+
print("===========================\n")
|
545
|
+
|
401
546
|
@classmethod
|
402
547
|
def convert_message_to_message_param(cls, message: Message, **kwargs) -> MessageParam:
|
403
548
|
"""Convert a response object to an input parameter object to allow LLM calls to be chained."""
|
mcp_agent/llm/usage_tracking.py
CHANGED
@@ -84,19 +84,32 @@ class TurnUsage(BaseModel):
|
|
84
84
|
@computed_field
|
85
85
|
@property
|
86
86
|
def current_context_tokens(self) -> int:
|
87
|
-
"""Current context size after this turn (input + output)"""
|
88
|
-
|
87
|
+
"""Current context size after this turn (total input including cache + output)"""
|
88
|
+
# For Anthropic: input_tokens + cache_read_tokens represents total input context
|
89
|
+
total_input = self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
|
90
|
+
return total_input + self.output_tokens
|
89
91
|
|
90
92
|
@computed_field
|
91
93
|
@property
|
92
94
|
def effective_input_tokens(self) -> int:
|
93
|
-
"""Input tokens
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
95
|
+
"""Input tokens actually processed (new tokens, not from cache)"""
|
96
|
+
# For Anthropic: input_tokens already excludes cached content
|
97
|
+
# For other providers: subtract cache hits from input_tokens
|
98
|
+
if self.provider == Provider.ANTHROPIC:
|
99
|
+
return self.input_tokens
|
100
|
+
else:
|
101
|
+
return max(0, self.input_tokens - self.cache_usage.cache_hit_tokens)
|
102
|
+
|
103
|
+
@computed_field
|
104
|
+
@property
|
105
|
+
def display_input_tokens(self) -> int:
|
106
|
+
"""Input tokens to display for 'Last turn' (total submitted tokens)"""
|
107
|
+
# For Anthropic: input_tokens excludes cache, so add cache tokens
|
108
|
+
if self.provider == Provider.ANTHROPIC:
|
109
|
+
return self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
|
110
|
+
else:
|
111
|
+
# For OpenAI/Google: input_tokens already includes cached tokens
|
112
|
+
return self.input_tokens
|
100
113
|
|
101
114
|
@classmethod
|
102
115
|
def from_anthropic(cls, usage: AnthropicUsage, model: str) -> "TurnUsage":
|
@@ -204,8 +217,11 @@ class UsageAccumulator(BaseModel):
|
|
204
217
|
@computed_field
|
205
218
|
@property
|
206
219
|
def cumulative_input_tokens(self) -> int:
|
207
|
-
"""Total input tokens charged across all turns"""
|
208
|
-
return sum(
|
220
|
+
"""Total input tokens charged across all turns (including cache tokens)"""
|
221
|
+
return sum(
|
222
|
+
turn.input_tokens + turn.cache_usage.cache_read_tokens + turn.cache_usage.cache_write_tokens
|
223
|
+
for turn in self.turns
|
224
|
+
)
|
209
225
|
|
210
226
|
@computed_field
|
211
227
|
@property
|
@@ -216,8 +232,8 @@ class UsageAccumulator(BaseModel):
|
|
216
232
|
@computed_field
|
217
233
|
@property
|
218
234
|
def cumulative_billing_tokens(self) -> int:
|
219
|
-
"""Total tokens charged across all turns"""
|
220
|
-
return
|
235
|
+
"""Total tokens charged across all turns (including cache tokens)"""
|
236
|
+
return self.cumulative_input_tokens + self.cumulative_output_tokens
|
221
237
|
|
222
238
|
@computed_field
|
223
239
|
@property
|
@@ -258,11 +274,12 @@ class UsageAccumulator(BaseModel):
|
|
258
274
|
@computed_field
|
259
275
|
@property
|
260
276
|
def cache_hit_rate(self) -> Optional[float]:
|
261
|
-
"""Percentage of input
|
262
|
-
if self.cumulative_input_tokens == 0:
|
263
|
-
return None
|
277
|
+
"""Percentage of total input context served from cache"""
|
264
278
|
cache_tokens = self.cumulative_cache_read_tokens + self.cumulative_cache_hit_tokens
|
265
|
-
|
279
|
+
total_input_context = self.cumulative_input_tokens + cache_tokens
|
280
|
+
if total_input_context == 0:
|
281
|
+
return None
|
282
|
+
return (cache_tokens / total_input_context) * 100
|
266
283
|
|
267
284
|
@computed_field
|
268
285
|
@property
|
mcp_agent/logging/events.py
CHANGED
@@ -117,3 +117,27 @@ class SamplingFilter(EventFilter):
|
|
117
117
|
if not super().matches(event):
|
118
118
|
return False
|
119
119
|
return random.random() < self.sample_rate
|
120
|
+
|
121
|
+
|
122
|
+
class StreamingExclusionFilter(EventFilter):
|
123
|
+
"""
|
124
|
+
Event filter that excludes streaming progress events from logs.
|
125
|
+
This prevents token count updates from flooding the logs when info level is enabled.
|
126
|
+
"""
|
127
|
+
|
128
|
+
def matches(self, event: Event) -> bool:
|
129
|
+
# First check if it passes the base filter
|
130
|
+
if not super().matches(event):
|
131
|
+
return False
|
132
|
+
|
133
|
+
# Exclude events with "Streaming progress" message
|
134
|
+
if event.message == "Streaming progress":
|
135
|
+
return False
|
136
|
+
|
137
|
+
# Also check for events with progress_action = STREAMING in data
|
138
|
+
if event.data and isinstance(event.data.get("data"), dict):
|
139
|
+
event_data = event.data["data"]
|
140
|
+
if event_data.get("progress_action") == "Streaming":
|
141
|
+
return False
|
142
|
+
|
143
|
+
return True
|
@@ -73,6 +73,7 @@ class RichProgressDisplay:
|
|
73
73
|
ProgressAction.LOADED: "dim green",
|
74
74
|
ProgressAction.INITIALIZED: "dim green",
|
75
75
|
ProgressAction.CHATTING: "bold blue",
|
76
|
+
ProgressAction.STREAMING: "bold blue", # Same color as chatting
|
76
77
|
ProgressAction.ROUTING: "bold blue",
|
77
78
|
ProgressAction.PLANNING: "bold blue",
|
78
79
|
ProgressAction.READY: "dim green",
|
@@ -100,9 +101,16 @@ class RichProgressDisplay:
|
|
100
101
|
task_id = self._taskmap[task_name]
|
101
102
|
|
102
103
|
# Ensure no None values in the update
|
104
|
+
# For streaming, use custom description immediately to avoid flashing
|
105
|
+
if event.action == ProgressAction.STREAMING and event.streaming_tokens:
|
106
|
+
formatted_tokens = f"↓ {event.streaming_tokens.strip()}".ljust(15)
|
107
|
+
description = f"[{self._get_action_style(event.action)}]{formatted_tokens}"
|
108
|
+
else:
|
109
|
+
description = f"[{self._get_action_style(event.action)}]{event.action.value:<15}"
|
110
|
+
|
103
111
|
self._progress.update(
|
104
112
|
task_id,
|
105
|
-
description=
|
113
|
+
description=description,
|
106
114
|
target=event.target or task_name, # Use task_name as fallback for target
|
107
115
|
details=event.details or "",
|
108
116
|
task_name=task_name,
|
File without changes
|
File without changes
|
File without changes
|