fast-agent-mcp 0.2.34__py3-none-any.whl → 0.2.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/METADATA +6 -6
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/RECORD +24 -24
- mcp_agent/agents/base_agent.py +2 -2
- mcp_agent/agents/workflow/router_agent.py +1 -1
- mcp_agent/config.py +8 -0
- mcp_agent/context.py +3 -2
- mcp_agent/core/agent_app.py +1 -1
- mcp_agent/core/enhanced_prompt.py +73 -13
- mcp_agent/core/interactive_prompt.py +118 -8
- mcp_agent/event_progress.py +22 -4
- mcp_agent/llm/augmented_llm.py +47 -3
- mcp_agent/llm/memory.py +120 -0
- mcp_agent/llm/model_database.py +2 -2
- mcp_agent/llm/providers/augmented_llm_anthropic.py +178 -45
- mcp_agent/llm/providers/augmented_llm_azure.py +4 -4
- mcp_agent/llm/providers/augmented_llm_openai.py +195 -12
- mcp_agent/llm/providers/multipart_converter_openai.py +4 -3
- mcp_agent/llm/usage_tracking.py +34 -17
- mcp_agent/logging/events.py +24 -0
- mcp_agent/logging/rich_progress.py +9 -1
- mcp_agent/mcp/interfaces.py +1 -1
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/licenses/LICENSE +0 -0
mcp_agent/llm/augmented_llm.py
CHANGED
@@ -97,6 +97,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
97
97
|
PARAM_USE_HISTORY = "use_history"
|
98
98
|
PARAM_MAX_ITERATIONS = "max_iterations"
|
99
99
|
PARAM_TEMPLATE_VARS = "template_vars"
|
100
|
+
|
100
101
|
# Base set of fields that should always be excluded
|
101
102
|
BASE_EXCLUDE_FIELDS = {PARAM_METADATA}
|
102
103
|
|
@@ -371,16 +372,28 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
371
372
|
# Start with base arguments
|
372
373
|
arguments = base_args.copy()
|
373
374
|
|
374
|
-
#
|
375
|
-
|
375
|
+
# Combine base exclusions with provider-specific exclusions
|
376
|
+
final_exclude_fields = self.BASE_EXCLUDE_FIELDS.copy()
|
377
|
+
if exclude_fields:
|
378
|
+
final_exclude_fields.update(exclude_fields)
|
376
379
|
|
377
380
|
# Add all fields from params that aren't explicitly excluded
|
378
|
-
|
381
|
+
# Ensure model_dump only includes set fields if that's the desired behavior,
|
382
|
+
# or adjust exclude_unset=True/False as needed.
|
383
|
+
# Default Pydantic v2 model_dump is exclude_unset=False
|
384
|
+
params_dict = request_params.model_dump(exclude=final_exclude_fields)
|
385
|
+
|
379
386
|
for key, value in params_dict.items():
|
387
|
+
# Only add if not None and not already in base_args (base_args take precedence)
|
388
|
+
# or if None is a valid value for the provider, this logic might need adjustment.
|
380
389
|
if value is not None and key not in arguments:
|
381
390
|
arguments[key] = value
|
391
|
+
elif value is not None and key in arguments and arguments[key] is None:
|
392
|
+
# Allow overriding a None in base_args with a set value from params
|
393
|
+
arguments[key] = value
|
382
394
|
|
383
395
|
# Finally, add any metadata fields as a last layer of overrides
|
396
|
+
# This ensures metadata can override anything previously set if keys conflict.
|
384
397
|
if request_params.metadata:
|
385
398
|
arguments.update(request_params.metadata)
|
386
399
|
|
@@ -541,6 +554,37 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
541
554
|
}
|
542
555
|
self.logger.debug("Chat in progress", data=data)
|
543
556
|
|
557
|
+
def _update_streaming_progress(self, content: str, model: str, estimated_tokens: int) -> int:
|
558
|
+
"""Update streaming progress with token estimation and formatting.
|
559
|
+
|
560
|
+
Args:
|
561
|
+
content: The text content from the streaming event
|
562
|
+
model: The model name
|
563
|
+
estimated_tokens: Current token count to update
|
564
|
+
|
565
|
+
Returns:
|
566
|
+
Updated estimated token count
|
567
|
+
"""
|
568
|
+
# Rough estimate: 1 token per 4 characters (OpenAI's typical ratio)
|
569
|
+
text_length = len(content)
|
570
|
+
additional_tokens = max(1, text_length // 4)
|
571
|
+
new_total = estimated_tokens + additional_tokens
|
572
|
+
|
573
|
+
# Format token count for display
|
574
|
+
token_str = str(new_total).rjust(5)
|
575
|
+
|
576
|
+
# Emit progress event
|
577
|
+
data = {
|
578
|
+
"progress_action": ProgressAction.STREAMING,
|
579
|
+
"model": model,
|
580
|
+
"agent_name": self.name,
|
581
|
+
"chat_turn": self.chat_turn(),
|
582
|
+
"details": token_str.strip(), # Token count goes in details for STREAMING action
|
583
|
+
}
|
584
|
+
self.logger.info("Streaming progress", data=data)
|
585
|
+
|
586
|
+
return new_total
|
587
|
+
|
544
588
|
def _log_chat_finished(self, model: Optional[str] = None) -> None:
|
545
589
|
"""Log a chat finished event"""
|
546
590
|
data = {
|
mcp_agent/llm/memory.py
CHANGED
@@ -35,6 +35,9 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
|
|
35
35
|
def __init__(self) -> None:
|
36
36
|
self.history: List[MessageParamT] = []
|
37
37
|
self.prompt_messages: List[MessageParamT] = [] # Always included
|
38
|
+
self.conversation_cache_positions: List[int] = [] # Track active conversation cache positions
|
39
|
+
self.cache_walk_distance: int = 6 # Messages between cache blocks
|
40
|
+
self.max_conversation_cache_blocks: int = 2 # Maximum conversation cache blocks
|
38
41
|
|
39
42
|
def extend(self, messages: List[MessageParamT], is_prompt: bool = False) -> None:
|
40
43
|
"""
|
@@ -99,5 +102,122 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
|
|
99
102
|
clear_prompts: If True, also clear prompt messages
|
100
103
|
"""
|
101
104
|
self.history = []
|
105
|
+
self.conversation_cache_positions = [] # Reset cache positions
|
102
106
|
if clear_prompts:
|
103
107
|
self.prompt_messages = []
|
108
|
+
|
109
|
+
def should_apply_conversation_cache(self) -> bool:
|
110
|
+
"""
|
111
|
+
Determine if conversation caching should be applied based on walking algorithm.
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
True if we should add or update cache blocks
|
115
|
+
"""
|
116
|
+
total_messages = len(self.history)
|
117
|
+
|
118
|
+
# Need at least cache_walk_distance messages to start caching
|
119
|
+
if total_messages < self.cache_walk_distance:
|
120
|
+
return False
|
121
|
+
|
122
|
+
# Check if we need to add a new cache block
|
123
|
+
return len(self._calculate_cache_positions(total_messages)) != len(self.conversation_cache_positions)
|
124
|
+
|
125
|
+
def _calculate_cache_positions(self, total_conversation_messages: int) -> List[int]:
|
126
|
+
"""
|
127
|
+
Calculate where cache blocks should be placed using walking algorithm.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
total_conversation_messages: Number of conversation messages (not including prompts)
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
List of positions (relative to conversation start) where cache should be placed
|
134
|
+
"""
|
135
|
+
positions = []
|
136
|
+
|
137
|
+
# Place cache blocks every cache_walk_distance messages
|
138
|
+
for i in range(self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance):
|
139
|
+
positions.append(i)
|
140
|
+
if len(positions) >= self.max_conversation_cache_blocks:
|
141
|
+
break
|
142
|
+
|
143
|
+
# Keep only the most recent cache blocks (walking behavior)
|
144
|
+
if len(positions) > self.max_conversation_cache_blocks:
|
145
|
+
positions = positions[-self.max_conversation_cache_blocks:]
|
146
|
+
|
147
|
+
return positions
|
148
|
+
|
149
|
+
def get_conversation_cache_updates(self) -> dict:
|
150
|
+
"""
|
151
|
+
Get cache position updates needed for the walking algorithm.
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
Dict with 'add', 'remove', and 'active' position lists (relative to full message array)
|
155
|
+
"""
|
156
|
+
total_conversation_messages = len(self.history)
|
157
|
+
new_positions = self._calculate_cache_positions(total_conversation_messages)
|
158
|
+
|
159
|
+
# Convert to absolute positions (including prompt messages)
|
160
|
+
prompt_offset = len(self.prompt_messages)
|
161
|
+
new_absolute_positions = [pos + prompt_offset for pos in new_positions]
|
162
|
+
|
163
|
+
old_positions_set = set(self.conversation_cache_positions)
|
164
|
+
new_positions_set = set(new_absolute_positions)
|
165
|
+
|
166
|
+
return {
|
167
|
+
'add': sorted(new_positions_set - old_positions_set),
|
168
|
+
'remove': sorted(old_positions_set - new_positions_set),
|
169
|
+
'active': sorted(new_absolute_positions)
|
170
|
+
}
|
171
|
+
|
172
|
+
def apply_conversation_cache_updates(self, updates: dict) -> None:
|
173
|
+
"""
|
174
|
+
Apply cache position updates.
|
175
|
+
|
176
|
+
Args:
|
177
|
+
updates: Dict from get_conversation_cache_updates()
|
178
|
+
"""
|
179
|
+
self.conversation_cache_positions = updates['active'].copy()
|
180
|
+
|
181
|
+
def remove_cache_control_from_messages(self, messages: List[MessageParamT], positions: List[int]) -> None:
|
182
|
+
"""
|
183
|
+
Remove cache control from specified message positions.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
messages: The message array to modify
|
187
|
+
positions: List of positions to remove cache control from
|
188
|
+
"""
|
189
|
+
for pos in positions:
|
190
|
+
if pos < len(messages):
|
191
|
+
message = messages[pos]
|
192
|
+
if isinstance(message, dict) and "content" in message:
|
193
|
+
content_list = message["content"]
|
194
|
+
if isinstance(content_list, list):
|
195
|
+
for content_block in content_list:
|
196
|
+
if isinstance(content_block, dict) and "cache_control" in content_block:
|
197
|
+
del content_block["cache_control"]
|
198
|
+
|
199
|
+
def add_cache_control_to_messages(self, messages: List[MessageParamT], positions: List[int]) -> int:
|
200
|
+
"""
|
201
|
+
Add cache control to specified message positions.
|
202
|
+
|
203
|
+
Args:
|
204
|
+
messages: The message array to modify
|
205
|
+
positions: List of positions to add cache control to
|
206
|
+
|
207
|
+
Returns:
|
208
|
+
Number of cache blocks successfully applied
|
209
|
+
"""
|
210
|
+
applied_count = 0
|
211
|
+
for pos in positions:
|
212
|
+
if pos < len(messages):
|
213
|
+
message = messages[pos]
|
214
|
+
if isinstance(message, dict) and "content" in message:
|
215
|
+
content_list = message["content"]
|
216
|
+
if isinstance(content_list, list) and content_list:
|
217
|
+
# Apply cache control to the last content block
|
218
|
+
for content_block in reversed(content_list):
|
219
|
+
if isinstance(content_block, dict):
|
220
|
+
content_block["cache_control"] = {"type": "ephemeral"}
|
221
|
+
applied_count += 1
|
222
|
+
break
|
223
|
+
return applied_count
|
mcp_agent/llm/model_database.py
CHANGED
@@ -109,11 +109,11 @@ class ModelDatabase:
|
|
109
109
|
|
110
110
|
# TODO update to 32000
|
111
111
|
ANTHROPIC_OPUS_4_VERSIONED = ModelParameters(
|
112
|
-
context_window=200000, max_output_tokens=
|
112
|
+
context_window=200000, max_output_tokens=32000, tokenizes=ANTHROPIC_MULTIMODAL
|
113
113
|
)
|
114
114
|
# TODO update to 64000
|
115
115
|
ANTHROPIC_SONNET_4_VERSIONED = ModelParameters(
|
116
|
-
context_window=200000, max_output_tokens=
|
116
|
+
context_window=200000, max_output_tokens=64000, tokenizes=ANTHROPIC_MULTIMODAL
|
117
117
|
)
|
118
118
|
|
119
119
|
DEEPSEEK_CHAT_STANDARD = ModelParameters(
|
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, List, Tuple, Type
|
|
3
3
|
from mcp.types import EmbeddedResource, ImageContent, TextContent
|
4
4
|
|
5
5
|
from mcp_agent.core.prompt import Prompt
|
6
|
+
from mcp_agent.event_progress import ProgressAction
|
6
7
|
from mcp_agent.llm.provider_types import Provider
|
7
8
|
from mcp_agent.llm.providers.multipart_converter_anthropic import (
|
8
9
|
AnthropicConverter,
|
@@ -18,7 +19,8 @@ if TYPE_CHECKING:
|
|
18
19
|
from mcp import ListToolsResult
|
19
20
|
|
20
21
|
|
21
|
-
from anthropic import
|
22
|
+
from anthropic import AsyncAnthropic, AuthenticationError
|
23
|
+
from anthropic.lib.streaming import AsyncMessageStream
|
22
24
|
from anthropic.types import (
|
23
25
|
Message,
|
24
26
|
MessageParam,
|
@@ -78,17 +80,69 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
78
80
|
"""Initialize Anthropic-specific default parameters"""
|
79
81
|
# Get base defaults from parent (includes ModelDatabase lookup)
|
80
82
|
base_params = super()._initialize_default_params(kwargs)
|
81
|
-
|
83
|
+
|
82
84
|
# Override with Anthropic-specific settings
|
83
85
|
chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
|
84
86
|
base_params.model = chosen_model
|
85
|
-
|
87
|
+
|
86
88
|
return base_params
|
87
89
|
|
88
90
|
def _base_url(self) -> str | None:
|
89
91
|
assert self.context.config
|
90
92
|
return self.context.config.anthropic.base_url if self.context.config.anthropic else None
|
91
93
|
|
94
|
+
def _get_cache_mode(self) -> str:
|
95
|
+
"""Get the cache mode configuration."""
|
96
|
+
cache_mode = "auto" # Default to auto
|
97
|
+
if self.context.config and self.context.config.anthropic:
|
98
|
+
cache_mode = self.context.config.anthropic.cache_mode
|
99
|
+
return cache_mode
|
100
|
+
|
101
|
+
async def _process_stream(self, stream: AsyncMessageStream, model: str) -> Message:
|
102
|
+
"""Process the streaming response and display real-time token usage."""
|
103
|
+
# Track estimated output tokens by counting text chunks
|
104
|
+
estimated_tokens = 0
|
105
|
+
|
106
|
+
# Process the raw event stream to get token counts
|
107
|
+
async for event in stream:
|
108
|
+
# Count tokens in real-time from content_block_delta events
|
109
|
+
if (
|
110
|
+
event.type == "content_block_delta"
|
111
|
+
and hasattr(event, "delta")
|
112
|
+
and event.delta.type == "text_delta"
|
113
|
+
):
|
114
|
+
# Use base class method for token estimation and progress emission
|
115
|
+
estimated_tokens = self._update_streaming_progress(event.delta.text, model, estimated_tokens)
|
116
|
+
|
117
|
+
# Also check for final message_delta events with actual usage info
|
118
|
+
elif (
|
119
|
+
event.type == "message_delta"
|
120
|
+
and hasattr(event, "usage")
|
121
|
+
and event.usage.output_tokens
|
122
|
+
):
|
123
|
+
actual_tokens = event.usage.output_tokens
|
124
|
+
# Emit final progress with actual token count
|
125
|
+
token_str = str(actual_tokens).rjust(5)
|
126
|
+
data = {
|
127
|
+
"progress_action": ProgressAction.STREAMING,
|
128
|
+
"model": model,
|
129
|
+
"agent_name": self.name,
|
130
|
+
"chat_turn": self.chat_turn(),
|
131
|
+
"details": token_str.strip(),
|
132
|
+
}
|
133
|
+
self.logger.info("Streaming progress", data=data)
|
134
|
+
|
135
|
+
# Get the final message with complete usage data
|
136
|
+
message = await stream.get_final_message()
|
137
|
+
|
138
|
+
# Log final usage information
|
139
|
+
if hasattr(message, "usage") and message.usage:
|
140
|
+
self.logger.info(
|
141
|
+
f"Streaming complete - Model: {model}, Input tokens: {message.usage.input_tokens}, Output tokens: {message.usage.output_tokens}"
|
142
|
+
)
|
143
|
+
|
144
|
+
return message
|
145
|
+
|
92
146
|
async def _anthropic_completion(
|
93
147
|
self,
|
94
148
|
message_param,
|
@@ -105,7 +159,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
105
159
|
base_url = base_url.rstrip("/v1")
|
106
160
|
|
107
161
|
try:
|
108
|
-
anthropic =
|
162
|
+
anthropic = AsyncAnthropic(api_key=api_key, base_url=base_url)
|
109
163
|
messages: List[MessageParam] = []
|
110
164
|
params = self.get_request_params(request_params)
|
111
165
|
except AuthenticationError as e:
|
@@ -118,7 +172,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
118
172
|
# if use_history is True
|
119
173
|
messages.extend(self.history.get(include_completion_history=params.use_history))
|
120
174
|
|
121
|
-
messages.append(message_param)
|
175
|
+
messages.append(message_param) # message_param is the current user turn
|
176
|
+
|
177
|
+
# Get cache mode configuration
|
178
|
+
cache_mode = self._get_cache_mode()
|
179
|
+
self.logger.debug(f"Anthropic cache_mode: {cache_mode}")
|
122
180
|
|
123
181
|
tool_list: ListToolsResult = await self.aggregator.list_tools()
|
124
182
|
available_tools: List[ToolParam] = [
|
@@ -134,8 +192,11 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
134
192
|
|
135
193
|
model = self.default_request_params.model
|
136
194
|
|
195
|
+
# Note: We'll cache tools+system together by putting cache_control only on system prompt
|
196
|
+
|
137
197
|
for i in range(params.max_iterations):
|
138
198
|
self._log_chat_progress(self.chat_turn(), model=model)
|
199
|
+
|
139
200
|
# Create base arguments dictionary
|
140
201
|
base_args = {
|
141
202
|
"model": model,
|
@@ -145,6 +206,60 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
145
206
|
"tools": available_tools,
|
146
207
|
}
|
147
208
|
|
209
|
+
# Apply cache_control to system prompt if cache_mode is not "off"
|
210
|
+
# This caches both tools and system prompt together in one cache block
|
211
|
+
if cache_mode != "off" and base_args["system"]:
|
212
|
+
if isinstance(base_args["system"], str):
|
213
|
+
base_args["system"] = [
|
214
|
+
{
|
215
|
+
"type": "text",
|
216
|
+
"text": base_args["system"],
|
217
|
+
"cache_control": {"type": "ephemeral"},
|
218
|
+
}
|
219
|
+
]
|
220
|
+
self.logger.debug(
|
221
|
+
"Applied cache_control to system prompt (caches tools+system in one block)"
|
222
|
+
)
|
223
|
+
else:
|
224
|
+
self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
|
225
|
+
|
226
|
+
# Apply conversation caching using walking algorithm if in auto mode
|
227
|
+
if cache_mode == "auto" and self.history.should_apply_conversation_cache():
|
228
|
+
cache_updates = self.history.get_conversation_cache_updates()
|
229
|
+
|
230
|
+
# Remove cache control from old positions
|
231
|
+
if cache_updates["remove"]:
|
232
|
+
self.history.remove_cache_control_from_messages(
|
233
|
+
messages, cache_updates["remove"]
|
234
|
+
)
|
235
|
+
self.logger.debug(
|
236
|
+
f"Removed conversation cache_control from positions {cache_updates['remove']}"
|
237
|
+
)
|
238
|
+
|
239
|
+
# Add cache control to new positions
|
240
|
+
if cache_updates["add"]:
|
241
|
+
applied_count = self.history.add_cache_control_to_messages(
|
242
|
+
messages, cache_updates["add"]
|
243
|
+
)
|
244
|
+
if applied_count > 0:
|
245
|
+
self.history.apply_conversation_cache_updates(cache_updates)
|
246
|
+
self.logger.debug(
|
247
|
+
f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
|
248
|
+
)
|
249
|
+
|
250
|
+
# Verify we don't exceed Anthropic's 4 cache block limit
|
251
|
+
total_cache_blocks = applied_count
|
252
|
+
if cache_mode != "off" and base_args["system"]:
|
253
|
+
total_cache_blocks += 1 # tools+system cache block
|
254
|
+
if total_cache_blocks > 4:
|
255
|
+
self.logger.warning(
|
256
|
+
f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
|
257
|
+
)
|
258
|
+
else:
|
259
|
+
self.logger.debug(
|
260
|
+
f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
|
261
|
+
)
|
262
|
+
|
148
263
|
if params.maxTokens is not None:
|
149
264
|
base_args["max_tokens"] = params.maxTokens
|
150
265
|
|
@@ -155,9 +270,10 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
155
270
|
|
156
271
|
self.logger.debug(f"{arguments}")
|
157
272
|
|
158
|
-
|
159
|
-
|
160
|
-
|
273
|
+
# Use streaming API with helper
|
274
|
+
async with anthropic.messages.stream(**arguments) as stream:
|
275
|
+
# Process the stream
|
276
|
+
response = await self._process_stream(stream, model)
|
161
277
|
|
162
278
|
# Track usage if response is valid and has usage data
|
163
279
|
if (
|
@@ -170,27 +286,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
170
286
|
response.usage, model or DEFAULT_ANTHROPIC_MODEL
|
171
287
|
)
|
172
288
|
self.usage_accumulator.add_turn(turn_usage)
|
173
|
-
|
174
|
-
# # Print raw usage for debugging
|
175
|
-
# print(f"\n=== USAGE DEBUG ({model}) ===")
|
176
|
-
# print(f"Raw usage: {response.usage}")
|
177
|
-
# print(
|
178
|
-
# f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
179
|
-
# )
|
180
|
-
# print(
|
181
|
-
# f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
182
|
-
# )
|
183
|
-
# print(f"Effective input: {turn_usage.effective_input_tokens}")
|
184
|
-
# print(
|
185
|
-
# f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
186
|
-
# )
|
187
|
-
# if self.usage_accumulator.context_usage_percentage:
|
188
|
-
# print(
|
189
|
-
# f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
190
|
-
# )
|
191
|
-
# if self.usage_accumulator.cache_hit_rate:
|
192
|
-
# print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
193
|
-
# print("===========================\n")
|
289
|
+
# self._show_usage(response.usage, turn_usage)
|
194
290
|
except Exception as e:
|
195
291
|
self.logger.warning(f"Failed to track usage: {e}")
|
196
292
|
|
@@ -201,7 +297,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
201
297
|
) from response
|
202
298
|
elif isinstance(response, BaseException):
|
203
299
|
error_details = str(response)
|
204
|
-
self.logger.error(f"Error: {error_details}", data=
|
300
|
+
self.logger.error(f"Error: {error_details}", data=BaseException)
|
205
301
|
|
206
302
|
# Try to extract more useful information for API errors
|
207
303
|
if hasattr(response, "status_code") and hasattr(response, "response"):
|
@@ -214,13 +310,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
214
310
|
# Convert other errors to text response
|
215
311
|
error_message = f"Error during generation: {error_details}"
|
216
312
|
response = Message(
|
217
|
-
id="error",
|
218
|
-
model="error",
|
313
|
+
id="error",
|
314
|
+
model="error",
|
219
315
|
role="assistant",
|
220
316
|
type="message",
|
221
317
|
content=[TextBlock(type="text", text=error_message)],
|
222
|
-
stop_reason="end_turn",
|
223
|
-
usage=Usage(input_tokens=0, output_tokens=0),
|
318
|
+
stop_reason="end_turn",
|
319
|
+
usage=Usage(input_tokens=0, output_tokens=0),
|
224
320
|
)
|
225
321
|
|
226
322
|
self.logger.debug(
|
@@ -230,7 +326,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
230
326
|
|
231
327
|
response_as_message = self.convert_message_to_message_param(response)
|
232
328
|
messages.append(response_as_message)
|
233
|
-
if response.content[0].type == "text":
|
329
|
+
if response.content and response.content[0].type == "text":
|
234
330
|
responses.append(TextContent(type="text", text=response.content[0].text))
|
235
331
|
|
236
332
|
if response.stop_reason == "end_turn":
|
@@ -290,12 +386,13 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
290
386
|
|
291
387
|
# Process all tool calls and collect results
|
292
388
|
tool_results = []
|
293
|
-
for i
|
294
|
-
|
295
|
-
|
296
|
-
|
389
|
+
# Use a different loop variable for tool enumeration if 'i' is outer loop counter
|
390
|
+
for tool_idx, content_block in enumerate(tool_uses):
|
391
|
+
tool_name = content_block.name
|
392
|
+
tool_args = content_block.input
|
393
|
+
tool_use_id = content_block.id
|
297
394
|
|
298
|
-
if
|
395
|
+
if tool_idx == 0: # Only show message for first tool use
|
299
396
|
await self.show_assistant_message(message_text, tool_name)
|
300
397
|
|
301
398
|
self.show_tool_call(available_tools, tool_name, tool_args)
|
@@ -320,11 +417,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
320
417
|
if params.use_history:
|
321
418
|
# Get current prompt messages
|
322
419
|
prompt_messages = self.history.get(include_completion_history=False)
|
323
|
-
|
324
|
-
# Calculate new conversation messages (excluding prompts)
|
325
420
|
new_messages = messages[len(prompt_messages) :]
|
326
|
-
|
327
|
-
# Update conversation history
|
328
421
|
self.history.set(new_messages)
|
329
422
|
|
330
423
|
self._log_chat_finished(model=model)
|
@@ -362,8 +455,26 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
362
455
|
multipart_messages[:-1] if last_message.role == "user" else multipart_messages
|
363
456
|
)
|
364
457
|
converted = []
|
458
|
+
|
459
|
+
# Get cache mode configuration
|
460
|
+
cache_mode = self._get_cache_mode()
|
461
|
+
|
365
462
|
for msg in messages_to_add:
|
366
|
-
|
463
|
+
anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
|
464
|
+
|
465
|
+
# Apply caching to template messages if cache_mode is "prompt" or "auto"
|
466
|
+
if is_template and cache_mode in ["prompt", "auto"] and anthropic_msg.get("content"):
|
467
|
+
content_list = anthropic_msg["content"]
|
468
|
+
if isinstance(content_list, list) and content_list:
|
469
|
+
# Apply cache control to the last content block
|
470
|
+
last_block = content_list[-1]
|
471
|
+
if isinstance(last_block, dict):
|
472
|
+
last_block["cache_control"] = {"type": "ephemeral"}
|
473
|
+
self.logger.debug(
|
474
|
+
f"Applied cache_control to template message with role {anthropic_msg.get('role')}"
|
475
|
+
)
|
476
|
+
|
477
|
+
converted.append(anthropic_msg)
|
367
478
|
|
368
479
|
self.history.extend(converted, is_prompt=is_template)
|
369
480
|
|
@@ -398,6 +509,28 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
398
509
|
)
|
399
510
|
return self._structured_from_multipart(result, model)
|
400
511
|
|
512
|
+
def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
|
513
|
+
# Print raw usage for debugging
|
514
|
+
print(f"\n=== USAGE DEBUG ({turn_usage.model}) ===")
|
515
|
+
print(f"Raw usage: {raw_usage}")
|
516
|
+
print(
|
517
|
+
f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
518
|
+
)
|
519
|
+
print(
|
520
|
+
f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
521
|
+
)
|
522
|
+
print(f"Effective input: {turn_usage.effective_input_tokens}")
|
523
|
+
print(
|
524
|
+
f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
525
|
+
)
|
526
|
+
if self.usage_accumulator.context_usage_percentage:
|
527
|
+
print(
|
528
|
+
f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
529
|
+
)
|
530
|
+
if self.usage_accumulator.cache_hit_rate:
|
531
|
+
print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
532
|
+
print("===========================\n")
|
533
|
+
|
401
534
|
@classmethod
|
402
535
|
def convert_message_to_message_param(cls, message: Message, **kwargs) -> MessageParam:
|
403
536
|
"""Convert a response object to an input parameter object to allow LLM calls to be chained."""
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from openai import
|
1
|
+
from openai import AsyncAzureOpenAI, AsyncOpenAI, AuthenticationError
|
2
2
|
|
3
3
|
from mcp_agent.core.exceptions import ProviderKeyError
|
4
4
|
from mcp_agent.llm.provider_types import Provider
|
@@ -93,7 +93,7 @@ class AzureOpenAIAugmentedLLM(OpenAIAugmentedLLM):
|
|
93
93
|
if not self.resource_name and self.base_url:
|
94
94
|
self.resource_name = _extract_resource_name(self.base_url)
|
95
95
|
|
96
|
-
def _openai_client(self) ->
|
96
|
+
def _openai_client(self) -> AsyncOpenAI:
|
97
97
|
"""
|
98
98
|
Returns an AzureOpenAI client, handling both API Key and DefaultAzureCredential.
|
99
99
|
"""
|
@@ -104,7 +104,7 @@ class AzureOpenAIAugmentedLLM(OpenAIAugmentedLLM):
|
|
104
104
|
"Missing Azure endpoint",
|
105
105
|
"azure_endpoint (base_url) is None at client creation time.",
|
106
106
|
)
|
107
|
-
return
|
107
|
+
return AsyncAzureOpenAI(
|
108
108
|
azure_ad_token_provider=self.get_azure_token,
|
109
109
|
azure_endpoint=self.base_url,
|
110
110
|
api_version=self.api_version,
|
@@ -116,7 +116,7 @@ class AzureOpenAIAugmentedLLM(OpenAIAugmentedLLM):
|
|
116
116
|
"Missing Azure endpoint",
|
117
117
|
"azure_endpoint (base_url) is None at client creation time.",
|
118
118
|
)
|
119
|
-
return
|
119
|
+
return AsyncAzureOpenAI(
|
120
120
|
api_key=self.api_key,
|
121
121
|
azure_endpoint=self.base_url,
|
122
122
|
api_version=self.api_version,
|