fast-agent-mcp 0.2.34__py3-none-any.whl → 0.2.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/METADATA +6 -6
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/RECORD +24 -24
- mcp_agent/agents/base_agent.py +2 -2
- mcp_agent/agents/workflow/router_agent.py +1 -1
- mcp_agent/config.py +8 -0
- mcp_agent/context.py +3 -2
- mcp_agent/core/agent_app.py +1 -1
- mcp_agent/core/enhanced_prompt.py +73 -13
- mcp_agent/core/interactive_prompt.py +118 -8
- mcp_agent/event_progress.py +22 -4
- mcp_agent/llm/augmented_llm.py +47 -3
- mcp_agent/llm/memory.py +120 -0
- mcp_agent/llm/model_database.py +2 -2
- mcp_agent/llm/providers/augmented_llm_anthropic.py +178 -45
- mcp_agent/llm/providers/augmented_llm_azure.py +4 -4
- mcp_agent/llm/providers/augmented_llm_openai.py +195 -12
- mcp_agent/llm/providers/multipart_converter_openai.py +4 -3
- mcp_agent/llm/usage_tracking.py +34 -17
- mcp_agent/logging/events.py +24 -0
- mcp_agent/logging/rich_progress.py +9 -1
- mcp_agent/mcp/interfaces.py +1 -1
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.2.34.dist-info → fast_agent_mcp-0.2.36.dist-info}/licenses/LICENSE +0 -0
@@ -8,7 +8,8 @@ from mcp.types import (
|
|
8
8
|
ImageContent,
|
9
9
|
TextContent,
|
10
10
|
)
|
11
|
-
from openai import
|
11
|
+
from openai import AsyncOpenAI, AuthenticationError
|
12
|
+
from openai.lib.streaming.chat import ChatCompletionStreamState
|
12
13
|
|
13
14
|
# from openai.types.beta.chat import
|
14
15
|
from openai.types.chat import (
|
@@ -22,6 +23,7 @@ from rich.text import Text
|
|
22
23
|
|
23
24
|
from mcp_agent.core.exceptions import ProviderKeyError
|
24
25
|
from mcp_agent.core.prompt import Prompt
|
26
|
+
from mcp_agent.event_progress import ProgressAction
|
25
27
|
from mcp_agent.llm.augmented_llm import (
|
26
28
|
AugmentedLLM,
|
27
29
|
RequestParams,
|
@@ -103,9 +105,9 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
103
105
|
def _base_url(self) -> str:
|
104
106
|
return self.context.config.openai.base_url if self.context.config.openai else None
|
105
107
|
|
106
|
-
def _openai_client(self) ->
|
108
|
+
def _openai_client(self) -> AsyncOpenAI:
|
107
109
|
try:
|
108
|
-
return
|
110
|
+
return AsyncOpenAI(api_key=self._api_key(), base_url=self._base_url())
|
109
111
|
except AuthenticationError as e:
|
110
112
|
raise ProviderKeyError(
|
111
113
|
"Invalid OpenAI API key",
|
@@ -113,6 +115,182 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
113
115
|
"Please check that your API key is valid and not expired.",
|
114
116
|
) from e
|
115
117
|
|
118
|
+
async def _process_stream(self, stream, model: str):
|
119
|
+
"""Process the streaming response and display real-time token usage."""
|
120
|
+
# Track estimated output tokens by counting text chunks
|
121
|
+
estimated_tokens = 0
|
122
|
+
|
123
|
+
# For non-OpenAI providers (like Ollama), ChatCompletionStreamState might not work correctly
|
124
|
+
# Fall back to manual accumulation if needed
|
125
|
+
# TODO -- consider this and whether to subclass instead
|
126
|
+
if self.provider in [Provider.GENERIC, Provider.OPENROUTER]:
|
127
|
+
return await self._process_stream_manual(stream, model)
|
128
|
+
|
129
|
+
# Use ChatCompletionStreamState helper for accumulation (OpenAI only)
|
130
|
+
state = ChatCompletionStreamState()
|
131
|
+
|
132
|
+
# Process the stream chunks
|
133
|
+
async for chunk in stream:
|
134
|
+
# Handle chunk accumulation
|
135
|
+
state.handle_chunk(chunk)
|
136
|
+
|
137
|
+
# Count tokens in real-time from content deltas
|
138
|
+
if chunk.choices and chunk.choices[0].delta.content:
|
139
|
+
content = chunk.choices[0].delta.content
|
140
|
+
# Use base class method for token estimation and progress emission
|
141
|
+
estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
|
142
|
+
|
143
|
+
# Get the final completion with usage data
|
144
|
+
final_completion = state.get_final_completion()
|
145
|
+
|
146
|
+
# Log final usage information
|
147
|
+
if hasattr(final_completion, "usage") and final_completion.usage:
|
148
|
+
actual_tokens = final_completion.usage.completion_tokens
|
149
|
+
# Emit final progress with actual token count
|
150
|
+
token_str = str(actual_tokens).rjust(5)
|
151
|
+
data = {
|
152
|
+
"progress_action": ProgressAction.STREAMING,
|
153
|
+
"model": model,
|
154
|
+
"agent_name": self.name,
|
155
|
+
"chat_turn": self.chat_turn(),
|
156
|
+
"details": token_str.strip(),
|
157
|
+
}
|
158
|
+
self.logger.info("Streaming progress", data=data)
|
159
|
+
|
160
|
+
self.logger.info(
|
161
|
+
f"Streaming complete - Model: {model}, Input tokens: {final_completion.usage.prompt_tokens}, Output tokens: {final_completion.usage.completion_tokens}"
|
162
|
+
)
|
163
|
+
|
164
|
+
return final_completion
|
165
|
+
|
166
|
+
# TODO - as per other comment this needs to go in another class. There are a number of "special" cases dealt with
|
167
|
+
# here to deal with OpenRouter idiosyncrasies between e.g. Anthropic and Gemini models.
|
168
|
+
async def _process_stream_manual(self, stream, model: str):
|
169
|
+
"""Manual stream processing for providers like Ollama that may not work with ChatCompletionStreamState."""
|
170
|
+
from openai.types.chat import ChatCompletionMessageToolCall
|
171
|
+
from openai.types.chat.chat_completion_message_tool_call import Function
|
172
|
+
|
173
|
+
# Track estimated output tokens by counting text chunks
|
174
|
+
estimated_tokens = 0
|
175
|
+
|
176
|
+
# Manual accumulation of response data
|
177
|
+
accumulated_content = ""
|
178
|
+
role = "assistant"
|
179
|
+
tool_calls_map = {} # Use a map to accumulate tool calls by index
|
180
|
+
function_call = None
|
181
|
+
finish_reason = None
|
182
|
+
usage_data = None
|
183
|
+
|
184
|
+
# Process the stream chunks manually
|
185
|
+
async for chunk in stream:
|
186
|
+
# Count tokens in real-time from content deltas
|
187
|
+
if chunk.choices and chunk.choices[0].delta.content:
|
188
|
+
content = chunk.choices[0].delta.content
|
189
|
+
accumulated_content += content
|
190
|
+
# Use base class method for token estimation and progress emission
|
191
|
+
estimated_tokens = self._update_streaming_progress(content, model, estimated_tokens)
|
192
|
+
|
193
|
+
# Extract other fields from the chunk
|
194
|
+
if chunk.choices:
|
195
|
+
choice = chunk.choices[0]
|
196
|
+
if choice.delta.role:
|
197
|
+
role = choice.delta.role
|
198
|
+
if choice.delta.tool_calls:
|
199
|
+
# Accumulate tool call deltas
|
200
|
+
for delta_tool_call in choice.delta.tool_calls:
|
201
|
+
if delta_tool_call.index is not None:
|
202
|
+
if delta_tool_call.index not in tool_calls_map:
|
203
|
+
tool_calls_map[delta_tool_call.index] = {
|
204
|
+
"id": delta_tool_call.id,
|
205
|
+
"type": delta_tool_call.type or "function",
|
206
|
+
"function": {
|
207
|
+
"name": delta_tool_call.function.name
|
208
|
+
if delta_tool_call.function
|
209
|
+
else None,
|
210
|
+
"arguments": "",
|
211
|
+
},
|
212
|
+
}
|
213
|
+
|
214
|
+
# Always update if we have new data (needed for OpenRouter Gemini)
|
215
|
+
if delta_tool_call.id:
|
216
|
+
tool_calls_map[delta_tool_call.index]["id"] = delta_tool_call.id
|
217
|
+
if delta_tool_call.function:
|
218
|
+
if delta_tool_call.function.name:
|
219
|
+
tool_calls_map[delta_tool_call.index]["function"]["name"] = (
|
220
|
+
delta_tool_call.function.name
|
221
|
+
)
|
222
|
+
# Handle arguments - they might come as None, empty string, or actual content
|
223
|
+
if delta_tool_call.function.arguments is not None:
|
224
|
+
tool_calls_map[delta_tool_call.index]["function"][
|
225
|
+
"arguments"
|
226
|
+
] += delta_tool_call.function.arguments
|
227
|
+
|
228
|
+
if choice.delta.function_call:
|
229
|
+
function_call = choice.delta.function_call
|
230
|
+
if choice.finish_reason:
|
231
|
+
finish_reason = choice.finish_reason
|
232
|
+
|
233
|
+
# Extract usage data if available
|
234
|
+
if hasattr(chunk, "usage") and chunk.usage:
|
235
|
+
usage_data = chunk.usage
|
236
|
+
|
237
|
+
# Convert accumulated tool calls to proper format.
|
238
|
+
tool_calls = None
|
239
|
+
if tool_calls_map:
|
240
|
+
tool_calls = []
|
241
|
+
for idx in sorted(tool_calls_map.keys()):
|
242
|
+
tool_call_data = tool_calls_map[idx]
|
243
|
+
# Only add tool calls that have valid data
|
244
|
+
if tool_call_data["id"] and tool_call_data["function"]["name"]:
|
245
|
+
tool_calls.append(
|
246
|
+
ChatCompletionMessageToolCall(
|
247
|
+
id=tool_call_data["id"],
|
248
|
+
type=tool_call_data["type"],
|
249
|
+
function=Function(
|
250
|
+
name=tool_call_data["function"]["name"],
|
251
|
+
arguments=tool_call_data["function"]["arguments"],
|
252
|
+
),
|
253
|
+
)
|
254
|
+
)
|
255
|
+
|
256
|
+
# Create a ChatCompletionMessage manually
|
257
|
+
message = ChatCompletionMessage(
|
258
|
+
content=accumulated_content,
|
259
|
+
role=role,
|
260
|
+
tool_calls=tool_calls if tool_calls else None,
|
261
|
+
function_call=function_call,
|
262
|
+
refusal=None,
|
263
|
+
annotations=None,
|
264
|
+
audio=None,
|
265
|
+
)
|
266
|
+
|
267
|
+
from types import SimpleNamespace
|
268
|
+
|
269
|
+
final_completion = SimpleNamespace()
|
270
|
+
final_completion.choices = [SimpleNamespace()]
|
271
|
+
final_completion.choices[0].message = message
|
272
|
+
final_completion.choices[0].finish_reason = finish_reason
|
273
|
+
final_completion.usage = usage_data
|
274
|
+
|
275
|
+
# Log final usage information
|
276
|
+
if usage_data:
|
277
|
+
actual_tokens = getattr(usage_data, "completion_tokens", estimated_tokens)
|
278
|
+
token_str = str(actual_tokens).rjust(5)
|
279
|
+
data = {
|
280
|
+
"progress_action": ProgressAction.STREAMING,
|
281
|
+
"model": model,
|
282
|
+
"agent_name": self.name,
|
283
|
+
"chat_turn": self.chat_turn(),
|
284
|
+
"details": token_str.strip(),
|
285
|
+
}
|
286
|
+
self.logger.info("Streaming progress", data=data)
|
287
|
+
|
288
|
+
self.logger.info(
|
289
|
+
f"Streaming complete - Model: {model}, Input tokens: {getattr(usage_data, 'prompt_tokens', 0)}, Output tokens: {actual_tokens}"
|
290
|
+
)
|
291
|
+
|
292
|
+
return final_completion
|
293
|
+
|
116
294
|
async def _openai_completion(
|
117
295
|
self,
|
118
296
|
message: OpenAIMessage,
|
@@ -151,7 +329,10 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
151
329
|
]
|
152
330
|
|
153
331
|
if not available_tools:
|
154
|
-
|
332
|
+
if self.provider == Provider.DEEPSEEK:
|
333
|
+
available_tools = None # deepseek does not allow empty array
|
334
|
+
else:
|
335
|
+
available_tools = []
|
155
336
|
|
156
337
|
# we do NOT send "stop sequences" as this causes errors with mutlimodal processing
|
157
338
|
for i in range(request_params.max_iterations):
|
@@ -160,11 +341,10 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
160
341
|
|
161
342
|
self._log_chat_progress(self.chat_turn(), model=self.default_request_params.model)
|
162
343
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
response = executor_result[0]
|
344
|
+
# Use basic streaming API
|
345
|
+
stream = await self._openai_client().chat.completions.create(**arguments)
|
346
|
+
# Process the stream
|
347
|
+
response = await self._process_stream(stream, self.default_request_params.model)
|
168
348
|
|
169
349
|
# Track usage if response is valid and has usage data
|
170
350
|
if (
|
@@ -204,10 +384,11 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
204
384
|
if message.content:
|
205
385
|
responses.append(TextContent(type="text", text=message.content))
|
206
386
|
|
207
|
-
|
208
|
-
|
387
|
+
# ParsedChatCompletionMessage is compatible with ChatCompletionMessage
|
388
|
+
# since it inherits from it, so we can use it directly
|
389
|
+
messages.append(message)
|
209
390
|
|
210
|
-
message_text =
|
391
|
+
message_text = message.content
|
211
392
|
if choice.finish_reason in ["tool_calls", "function_call"] and message.tool_calls:
|
212
393
|
if message_text:
|
213
394
|
await self.show_assistant_message(
|
@@ -347,6 +528,8 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
347
528
|
"model": self.default_request_params.model,
|
348
529
|
"messages": messages,
|
349
530
|
"tools": tools,
|
531
|
+
"stream": True, # Enable basic streaming
|
532
|
+
"stream_options": {"include_usage": True}, # Required for usage data in streaming
|
350
533
|
}
|
351
534
|
|
352
535
|
if self._reasoning:
|
@@ -360,7 +360,7 @@ class OpenAIConverter:
|
|
360
360
|
return {
|
361
361
|
"role": "tool",
|
362
362
|
"tool_call_id": tool_call_id,
|
363
|
-
"content": "[
|
363
|
+
"content": "[Tool completed successfully]",
|
364
364
|
}
|
365
365
|
|
366
366
|
# Separate text and non-text content
|
@@ -387,8 +387,9 @@ class OpenAIConverter:
|
|
387
387
|
converted.get("content", "")
|
388
388
|
)
|
389
389
|
|
390
|
-
|
391
|
-
|
390
|
+
# Ensure we always have non-empty content for compatibility
|
391
|
+
if not tool_message_content or tool_message_content.strip() == "":
|
392
|
+
tool_message_content = "[Tool completed successfully]"
|
392
393
|
|
393
394
|
# Create the tool message with just the text
|
394
395
|
tool_message = {
|
mcp_agent/llm/usage_tracking.py
CHANGED
@@ -84,19 +84,32 @@ class TurnUsage(BaseModel):
|
|
84
84
|
@computed_field
|
85
85
|
@property
|
86
86
|
def current_context_tokens(self) -> int:
|
87
|
-
"""Current context size after this turn (input + output)"""
|
88
|
-
|
87
|
+
"""Current context size after this turn (total input including cache + output)"""
|
88
|
+
# For Anthropic: input_tokens + cache_read_tokens represents total input context
|
89
|
+
total_input = self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
|
90
|
+
return total_input + self.output_tokens
|
89
91
|
|
90
92
|
@computed_field
|
91
93
|
@property
|
92
94
|
def effective_input_tokens(self) -> int:
|
93
|
-
"""Input tokens
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
95
|
+
"""Input tokens actually processed (new tokens, not from cache)"""
|
96
|
+
# For Anthropic: input_tokens already excludes cached content
|
97
|
+
# For other providers: subtract cache hits from input_tokens
|
98
|
+
if self.provider == Provider.ANTHROPIC:
|
99
|
+
return self.input_tokens
|
100
|
+
else:
|
101
|
+
return max(0, self.input_tokens - self.cache_usage.cache_hit_tokens)
|
102
|
+
|
103
|
+
@computed_field
|
104
|
+
@property
|
105
|
+
def display_input_tokens(self) -> int:
|
106
|
+
"""Input tokens to display for 'Last turn' (total submitted tokens)"""
|
107
|
+
# For Anthropic: input_tokens excludes cache, so add cache tokens
|
108
|
+
if self.provider == Provider.ANTHROPIC:
|
109
|
+
return self.input_tokens + self.cache_usage.cache_read_tokens + self.cache_usage.cache_write_tokens
|
110
|
+
else:
|
111
|
+
# For OpenAI/Google: input_tokens already includes cached tokens
|
112
|
+
return self.input_tokens
|
100
113
|
|
101
114
|
@classmethod
|
102
115
|
def from_anthropic(cls, usage: AnthropicUsage, model: str) -> "TurnUsage":
|
@@ -204,8 +217,11 @@ class UsageAccumulator(BaseModel):
|
|
204
217
|
@computed_field
|
205
218
|
@property
|
206
219
|
def cumulative_input_tokens(self) -> int:
|
207
|
-
"""Total input tokens charged across all turns"""
|
208
|
-
return sum(
|
220
|
+
"""Total input tokens charged across all turns (including cache tokens)"""
|
221
|
+
return sum(
|
222
|
+
turn.input_tokens + turn.cache_usage.cache_read_tokens + turn.cache_usage.cache_write_tokens
|
223
|
+
for turn in self.turns
|
224
|
+
)
|
209
225
|
|
210
226
|
@computed_field
|
211
227
|
@property
|
@@ -216,8 +232,8 @@ class UsageAccumulator(BaseModel):
|
|
216
232
|
@computed_field
|
217
233
|
@property
|
218
234
|
def cumulative_billing_tokens(self) -> int:
|
219
|
-
"""Total tokens charged across all turns"""
|
220
|
-
return
|
235
|
+
"""Total tokens charged across all turns (including cache tokens)"""
|
236
|
+
return self.cumulative_input_tokens + self.cumulative_output_tokens
|
221
237
|
|
222
238
|
@computed_field
|
223
239
|
@property
|
@@ -258,11 +274,12 @@ class UsageAccumulator(BaseModel):
|
|
258
274
|
@computed_field
|
259
275
|
@property
|
260
276
|
def cache_hit_rate(self) -> Optional[float]:
|
261
|
-
"""Percentage of input
|
262
|
-
if self.cumulative_input_tokens == 0:
|
263
|
-
return None
|
277
|
+
"""Percentage of total input context served from cache"""
|
264
278
|
cache_tokens = self.cumulative_cache_read_tokens + self.cumulative_cache_hit_tokens
|
265
|
-
|
279
|
+
total_input_context = self.cumulative_input_tokens + cache_tokens
|
280
|
+
if total_input_context == 0:
|
281
|
+
return None
|
282
|
+
return (cache_tokens / total_input_context) * 100
|
266
283
|
|
267
284
|
@computed_field
|
268
285
|
@property
|
mcp_agent/logging/events.py
CHANGED
@@ -117,3 +117,27 @@ class SamplingFilter(EventFilter):
|
|
117
117
|
if not super().matches(event):
|
118
118
|
return False
|
119
119
|
return random.random() < self.sample_rate
|
120
|
+
|
121
|
+
|
122
|
+
class StreamingExclusionFilter(EventFilter):
|
123
|
+
"""
|
124
|
+
Event filter that excludes streaming progress events from logs.
|
125
|
+
This prevents token count updates from flooding the logs when info level is enabled.
|
126
|
+
"""
|
127
|
+
|
128
|
+
def matches(self, event: Event) -> bool:
|
129
|
+
# First check if it passes the base filter
|
130
|
+
if not super().matches(event):
|
131
|
+
return False
|
132
|
+
|
133
|
+
# Exclude events with "Streaming progress" message
|
134
|
+
if event.message == "Streaming progress":
|
135
|
+
return False
|
136
|
+
|
137
|
+
# Also check for events with progress_action = STREAMING in data
|
138
|
+
if event.data and isinstance(event.data.get("data"), dict):
|
139
|
+
event_data = event.data["data"]
|
140
|
+
if event_data.get("progress_action") == "Streaming":
|
141
|
+
return False
|
142
|
+
|
143
|
+
return True
|
@@ -73,6 +73,7 @@ class RichProgressDisplay:
|
|
73
73
|
ProgressAction.LOADED: "dim green",
|
74
74
|
ProgressAction.INITIALIZED: "dim green",
|
75
75
|
ProgressAction.CHATTING: "bold blue",
|
76
|
+
ProgressAction.STREAMING: "bold blue", # Same color as chatting
|
76
77
|
ProgressAction.ROUTING: "bold blue",
|
77
78
|
ProgressAction.PLANNING: "bold blue",
|
78
79
|
ProgressAction.READY: "dim green",
|
@@ -100,9 +101,16 @@ class RichProgressDisplay:
|
|
100
101
|
task_id = self._taskmap[task_name]
|
101
102
|
|
102
103
|
# Ensure no None values in the update
|
104
|
+
# For streaming, use custom description immediately to avoid flashing
|
105
|
+
if event.action == ProgressAction.STREAMING and event.streaming_tokens:
|
106
|
+
formatted_tokens = f"↓ {event.streaming_tokens.strip()}".ljust(15)
|
107
|
+
description = f"[{self._get_action_style(event.action)}]{formatted_tokens}"
|
108
|
+
else:
|
109
|
+
description = f"[{self._get_action_style(event.action)}]{event.action.value:<15}"
|
110
|
+
|
103
111
|
self._progress.update(
|
104
112
|
task_id,
|
105
|
-
description=
|
113
|
+
description=description,
|
106
114
|
target=event.target or task_name, # Use task_name as fallback for target
|
107
115
|
details=event.details or "",
|
108
116
|
task_name=task_name,
|
mcp_agent/mcp/interfaces.py
CHANGED
@@ -21,7 +21,7 @@ from typing import (
|
|
21
21
|
runtime_checkable,
|
22
22
|
)
|
23
23
|
|
24
|
-
from
|
24
|
+
from a2a.types import AgentCard
|
25
25
|
from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
|
26
26
|
from deprecated import deprecated
|
27
27
|
from mcp import ClientSession
|
File without changes
|
File without changes
|
File without changes
|