fast-agent-mcp 0.2.32__py3-none-any.whl → 0.2.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fast_agent_mcp-0.2.32.dist-info → fast_agent_mcp-0.2.34.dist-info}/METADATA +1 -1
- {fast_agent_mcp-0.2.32.dist-info → fast_agent_mcp-0.2.34.dist-info}/RECORD +23 -20
- mcp_agent/agents/base_agent.py +13 -0
- mcp_agent/config.py +40 -4
- mcp_agent/core/agent_app.py +41 -1
- mcp_agent/core/enhanced_prompt.py +9 -0
- mcp_agent/core/fastagent.py +14 -2
- mcp_agent/core/interactive_prompt.py +59 -13
- mcp_agent/core/usage_display.py +193 -0
- mcp_agent/llm/augmented_llm.py +26 -6
- mcp_agent/llm/augmented_llm_passthrough.py +66 -4
- mcp_agent/llm/augmented_llm_playback.py +19 -0
- mcp_agent/llm/augmented_llm_slow.py +12 -1
- mcp_agent/llm/model_database.py +236 -0
- mcp_agent/llm/model_factory.py +1 -0
- mcp_agent/llm/providers/augmented_llm_anthropic.py +44 -8
- mcp_agent/llm/providers/augmented_llm_google_native.py +18 -1
- mcp_agent/llm/providers/augmented_llm_openai.py +20 -7
- mcp_agent/llm/usage_tracking.py +385 -0
- mcp_agent/mcp/interfaces.py +6 -0
- {fast_agent_mcp-0.2.32.dist-info → fast_agent_mcp-0.2.34.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.32.dist-info → fast_agent_mcp-0.2.34.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.2.32.dist-info → fast_agent_mcp-0.2.34.dist-info}/licenses/LICENSE +0 -0
@@ -10,6 +10,7 @@ from mcp_agent.llm.providers.multipart_converter_anthropic import (
|
|
10
10
|
from mcp_agent.llm.providers.sampling_converter_anthropic import (
|
11
11
|
AnthropicSamplingConverter,
|
12
12
|
)
|
13
|
+
from mcp_agent.llm.usage_tracking import TurnUsage
|
13
14
|
from mcp_agent.mcp.interfaces import ModelT
|
14
15
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
15
16
|
|
@@ -75,14 +76,14 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
75
76
|
|
76
77
|
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
77
78
|
"""Initialize Anthropic-specific default parameters"""
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
79
|
+
# Get base defaults from parent (includes ModelDatabase lookup)
|
80
|
+
base_params = super()._initialize_default_params(kwargs)
|
81
|
+
|
82
|
+
# Override with Anthropic-specific settings
|
83
|
+
chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
|
84
|
+
base_params.model = chosen_model
|
85
|
+
|
86
|
+
return base_params
|
86
87
|
|
87
88
|
def _base_url(self) -> str | None:
|
88
89
|
assert self.context.config
|
@@ -158,6 +159,41 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
158
159
|
|
159
160
|
response = executor_result[0]
|
160
161
|
|
162
|
+
# Track usage if response is valid and has usage data
|
163
|
+
if (
|
164
|
+
hasattr(response, "usage")
|
165
|
+
and response.usage
|
166
|
+
and not isinstance(response, BaseException)
|
167
|
+
):
|
168
|
+
try:
|
169
|
+
turn_usage = TurnUsage.from_anthropic(
|
170
|
+
response.usage, model or DEFAULT_ANTHROPIC_MODEL
|
171
|
+
)
|
172
|
+
self.usage_accumulator.add_turn(turn_usage)
|
173
|
+
|
174
|
+
# # Print raw usage for debugging
|
175
|
+
# print(f"\n=== USAGE DEBUG ({model}) ===")
|
176
|
+
# print(f"Raw usage: {response.usage}")
|
177
|
+
# print(
|
178
|
+
# f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
179
|
+
# )
|
180
|
+
# print(
|
181
|
+
# f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
182
|
+
# )
|
183
|
+
# print(f"Effective input: {turn_usage.effective_input_tokens}")
|
184
|
+
# print(
|
185
|
+
# f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
186
|
+
# )
|
187
|
+
# if self.usage_accumulator.context_usage_percentage:
|
188
|
+
# print(
|
189
|
+
# f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
190
|
+
# )
|
191
|
+
# if self.usage_accumulator.cache_hit_rate:
|
192
|
+
# print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
193
|
+
# print("===========================\n")
|
194
|
+
except Exception as e:
|
195
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
196
|
+
|
161
197
|
if isinstance(response, AuthenticationError):
|
162
198
|
raise ProviderKeyError(
|
163
199
|
"Invalid Anthropic API key",
|
@@ -24,6 +24,7 @@ from mcp_agent.llm.provider_types import Provider
|
|
24
24
|
|
25
25
|
# Import the new converter class
|
26
26
|
from mcp_agent.llm.providers.google_converter import GoogleConverter
|
27
|
+
from mcp_agent.llm.usage_tracking import TurnUsage
|
27
28
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
28
29
|
|
29
30
|
# Define default model and potentially other Google-specific defaults
|
@@ -220,6 +221,7 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
|
|
220
221
|
parallel_tool_calls=True, # Assume parallel tool calls are supported by default with native API
|
221
222
|
max_iterations=20,
|
222
223
|
use_history=True,
|
224
|
+
maxTokens=65536, # Default max tokens for Google models
|
223
225
|
# Include other relevant default parameters
|
224
226
|
)
|
225
227
|
|
@@ -281,10 +283,25 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
|
|
281
283
|
)
|
282
284
|
self.logger.debug("Google generate_content response:", data=api_response)
|
283
285
|
|
286
|
+
# Track usage if response is valid and has usage data
|
287
|
+
if (
|
288
|
+
hasattr(api_response, "usage_metadata")
|
289
|
+
and api_response.usage_metadata
|
290
|
+
and not isinstance(api_response, BaseException)
|
291
|
+
):
|
292
|
+
try:
|
293
|
+
turn_usage = TurnUsage.from_google(
|
294
|
+
api_response.usage_metadata, request_params.model
|
295
|
+
)
|
296
|
+
self.usage_accumulator.add_turn(turn_usage)
|
297
|
+
|
298
|
+
except Exception as e:
|
299
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
300
|
+
|
284
301
|
except errors.APIError as e:
|
285
302
|
# Handle specific Google API errors
|
286
303
|
self.logger.error(f"Google API Error: {e.code} - {e.message}")
|
287
|
-
raise ProviderKeyError(f"Google API Error: {e.code}", e.message) from e
|
304
|
+
raise ProviderKeyError(f"Google API Error: {e.code}", e.message or "") from e
|
288
305
|
except Exception as e:
|
289
306
|
self.logger.error(f"Error during Google generate_content call: {e}")
|
290
307
|
# Decide how to handle other exceptions - potentially re-raise or return an error message
|
@@ -31,6 +31,7 @@ from mcp_agent.llm.providers.multipart_converter_openai import OpenAIConverter,
|
|
31
31
|
from mcp_agent.llm.providers.sampling_converter_openai import (
|
32
32
|
OpenAISamplingConverter,
|
33
33
|
)
|
34
|
+
from mcp_agent.llm.usage_tracking import TurnUsage
|
34
35
|
from mcp_agent.logging.logger import get_logger
|
35
36
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
36
37
|
|
@@ -90,15 +91,14 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
90
91
|
|
91
92
|
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
92
93
|
"""Initialize OpenAI-specific default parameters"""
|
94
|
+
# Get base defaults from parent (includes ModelDatabase lookup)
|
95
|
+
base_params = super()._initialize_default_params(kwargs)
|
96
|
+
|
97
|
+
# Override with OpenAI-specific settings
|
93
98
|
chosen_model = kwargs.get("model", DEFAULT_OPENAI_MODEL)
|
99
|
+
base_params.model = chosen_model
|
94
100
|
|
95
|
-
return
|
96
|
-
model=chosen_model,
|
97
|
-
systemPrompt=self.instruction,
|
98
|
-
parallel_tool_calls=True,
|
99
|
-
max_iterations=20,
|
100
|
-
use_history=True,
|
101
|
-
)
|
101
|
+
return base_params
|
102
102
|
|
103
103
|
def _base_url(self) -> str:
|
104
104
|
return self.context.config.openai.base_url if self.context.config.openai else None
|
@@ -166,6 +166,19 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
166
166
|
|
167
167
|
response = executor_result[0]
|
168
168
|
|
169
|
+
# Track usage if response is valid and has usage data
|
170
|
+
if (
|
171
|
+
hasattr(response, "usage")
|
172
|
+
and response.usage
|
173
|
+
and not isinstance(response, BaseException)
|
174
|
+
):
|
175
|
+
try:
|
176
|
+
model_name = self.default_request_params.model or DEFAULT_OPENAI_MODEL
|
177
|
+
turn_usage = TurnUsage.from_openai(response.usage, model_name)
|
178
|
+
self.usage_accumulator.add_turn(turn_usage)
|
179
|
+
except Exception as e:
|
180
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
181
|
+
|
169
182
|
self.logger.debug(
|
170
183
|
"OpenAI completion response:",
|
171
184
|
data=response,
|
@@ -0,0 +1,385 @@
|
|
1
|
+
"""
|
2
|
+
Usage tracking system for LLM providers with comprehensive cache support.
|
3
|
+
|
4
|
+
This module provides unified usage tracking across Anthropic, OpenAI, and Google providers,
|
5
|
+
including detailed cache metrics and context window management.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import time
|
9
|
+
from typing import List, Optional, Union
|
10
|
+
|
11
|
+
# Proper type imports for each provider
|
12
|
+
from anthropic.types import Usage as AnthropicUsage
|
13
|
+
from google.genai.types import GenerateContentResponseUsageMetadata as GoogleUsage
|
14
|
+
from openai.types.completion_usage import CompletionUsage as OpenAIUsage
|
15
|
+
from pydantic import BaseModel, Field, computed_field
|
16
|
+
|
17
|
+
from mcp_agent.llm.model_database import ModelDatabase
|
18
|
+
from mcp_agent.llm.provider_types import Provider
|
19
|
+
|
20
|
+
|
21
|
+
# Fast-agent specific usage type for synthetic providers
|
22
|
+
class FastAgentUsage(BaseModel):
|
23
|
+
"""Usage data for fast-agent providers (passthrough, playback, slow)"""
|
24
|
+
|
25
|
+
input_chars: int = Field(description="Characters in input messages")
|
26
|
+
output_chars: int = Field(description="Characters in output messages")
|
27
|
+
model_type: str = Field(description="Type of fast-agent model (passthrough/playbook/slow)")
|
28
|
+
tool_calls: int = Field(default=0, description="Number of tool calls made")
|
29
|
+
delay_seconds: float = Field(default=0.0, description="Artificial delays added")
|
30
|
+
|
31
|
+
|
32
|
+
# Union type for raw usage data from any provider
|
33
|
+
ProviderUsage = Union[AnthropicUsage, OpenAIUsage, GoogleUsage, FastAgentUsage]
|
34
|
+
|
35
|
+
|
36
|
+
class ModelContextWindows:
|
37
|
+
"""Context window sizes and cache configurations for various models"""
|
38
|
+
|
39
|
+
@classmethod
|
40
|
+
def get_context_window(cls, model: str) -> Optional[int]:
|
41
|
+
return ModelDatabase.get_context_window(model)
|
42
|
+
|
43
|
+
|
44
|
+
class CacheUsage(BaseModel):
|
45
|
+
"""Cache-specific usage metrics"""
|
46
|
+
|
47
|
+
cache_read_tokens: int = Field(default=0, description="Tokens read from cache")
|
48
|
+
cache_write_tokens: int = Field(default=0, description="Tokens written to cache")
|
49
|
+
cache_hit_tokens: int = Field(default=0, description="Total tokens served from cache")
|
50
|
+
|
51
|
+
@computed_field
|
52
|
+
@property
|
53
|
+
def total_cache_tokens(self) -> int:
|
54
|
+
"""Total cache-related tokens"""
|
55
|
+
return self.cache_read_tokens + self.cache_write_tokens + self.cache_hit_tokens
|
56
|
+
|
57
|
+
@computed_field
|
58
|
+
@property
|
59
|
+
def has_cache_activity(self) -> bool:
|
60
|
+
"""Whether any cache activity occurred"""
|
61
|
+
return self.total_cache_tokens > 0
|
62
|
+
|
63
|
+
|
64
|
+
class TurnUsage(BaseModel):
|
65
|
+
"""Usage data for a single turn/completion with cache support"""
|
66
|
+
|
67
|
+
provider: Provider
|
68
|
+
model: str
|
69
|
+
input_tokens: int
|
70
|
+
output_tokens: int
|
71
|
+
total_tokens: int
|
72
|
+
timestamp: float = Field(default_factory=time.time)
|
73
|
+
|
74
|
+
# Cache-specific metrics
|
75
|
+
cache_usage: CacheUsage = Field(default_factory=CacheUsage)
|
76
|
+
|
77
|
+
# Provider-specific token types
|
78
|
+
tool_use_tokens: int = Field(default=0, description="Tokens used for tool calling prompts")
|
79
|
+
reasoning_tokens: int = Field(default=0, description="Tokens used for reasoning/thinking")
|
80
|
+
|
81
|
+
# Raw usage data from provider (preserves all original data)
|
82
|
+
raw_usage: ProviderUsage
|
83
|
+
|
84
|
+
@computed_field
|
85
|
+
@property
|
86
|
+
def current_context_tokens(self) -> int:
|
87
|
+
"""Current context size after this turn (input + output)"""
|
88
|
+
return self.input_tokens + self.output_tokens
|
89
|
+
|
90
|
+
@computed_field
|
91
|
+
@property
|
92
|
+
def effective_input_tokens(self) -> int:
|
93
|
+
"""Input tokens excluding cache reads (tokens actually processed)"""
|
94
|
+
return max(
|
95
|
+
0,
|
96
|
+
self.input_tokens
|
97
|
+
- self.cache_usage.cache_read_tokens
|
98
|
+
- self.cache_usage.cache_hit_tokens,
|
99
|
+
)
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def from_anthropic(cls, usage: AnthropicUsage, model: str) -> "TurnUsage":
|
103
|
+
# Extract cache tokens with proper null handling
|
104
|
+
cache_creation_tokens = getattr(usage, "cache_creation_input_tokens", 0) or 0
|
105
|
+
cache_read_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0
|
106
|
+
|
107
|
+
cache_usage = CacheUsage(
|
108
|
+
cache_read_tokens=cache_read_tokens, # Tokens read from cache (90% discount)
|
109
|
+
cache_write_tokens=cache_creation_tokens, # Tokens written to cache (25% surcharge)
|
110
|
+
)
|
111
|
+
|
112
|
+
return cls(
|
113
|
+
provider=Provider.ANTHROPIC,
|
114
|
+
model=model,
|
115
|
+
input_tokens=usage.input_tokens,
|
116
|
+
output_tokens=usage.output_tokens,
|
117
|
+
total_tokens=usage.input_tokens + usage.output_tokens,
|
118
|
+
cache_usage=cache_usage,
|
119
|
+
raw_usage=usage, # Store the original Anthropic usage object
|
120
|
+
)
|
121
|
+
|
122
|
+
@classmethod
|
123
|
+
def from_openai(cls, usage: OpenAIUsage, model: str) -> "TurnUsage":
|
124
|
+
# Extract cache tokens with proper null handling
|
125
|
+
cached_tokens = 0
|
126
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
127
|
+
cached_tokens = getattr(usage.prompt_tokens_details, "cached_tokens", 0) or 0
|
128
|
+
|
129
|
+
cache_usage = CacheUsage(
|
130
|
+
cache_hit_tokens=cached_tokens # These are tokens served from cache (50% discount)
|
131
|
+
)
|
132
|
+
|
133
|
+
return cls(
|
134
|
+
provider=Provider.OPENAI,
|
135
|
+
model=model,
|
136
|
+
input_tokens=usage.prompt_tokens,
|
137
|
+
output_tokens=usage.completion_tokens,
|
138
|
+
total_tokens=usage.total_tokens,
|
139
|
+
cache_usage=cache_usage,
|
140
|
+
raw_usage=usage, # Store the original OpenAI usage object
|
141
|
+
)
|
142
|
+
|
143
|
+
@classmethod
|
144
|
+
def from_google(cls, usage: GoogleUsage, model: str) -> "TurnUsage":
|
145
|
+
# Extract token counts with proper null handling
|
146
|
+
prompt_tokens = getattr(usage, "prompt_token_count", 0) or 0
|
147
|
+
candidates_tokens = getattr(usage, "candidates_token_count", 0) or 0
|
148
|
+
total_tokens = getattr(usage, "total_token_count", 0) or 0
|
149
|
+
cached_content_tokens = getattr(usage, "cached_content_token_count", 0) or 0
|
150
|
+
|
151
|
+
# Extract additional Google-specific token types
|
152
|
+
tool_use_tokens = getattr(usage, "tool_use_prompt_token_count", 0) or 0
|
153
|
+
thinking_tokens = getattr(usage, "thoughts_token_count", 0) or 0
|
154
|
+
|
155
|
+
# Google cache tokens are read hits (75% discount on Gemini 2.5)
|
156
|
+
cache_usage = CacheUsage(cache_hit_tokens=cached_content_tokens)
|
157
|
+
|
158
|
+
return cls(
|
159
|
+
provider=Provider.GOOGLE,
|
160
|
+
model=model,
|
161
|
+
input_tokens=prompt_tokens,
|
162
|
+
output_tokens=candidates_tokens,
|
163
|
+
total_tokens=total_tokens,
|
164
|
+
cache_usage=cache_usage,
|
165
|
+
tool_use_tokens=tool_use_tokens,
|
166
|
+
reasoning_tokens=thinking_tokens,
|
167
|
+
raw_usage=usage, # Store the original Google usage object
|
168
|
+
)
|
169
|
+
|
170
|
+
@classmethod
|
171
|
+
def from_fast_agent(cls, usage: FastAgentUsage, model: str) -> "TurnUsage":
|
172
|
+
# For fast-agent providers, we use characters as "tokens"
|
173
|
+
# This provides a consistent unit of measurement across all providers
|
174
|
+
input_tokens = usage.input_chars
|
175
|
+
output_tokens = usage.output_chars
|
176
|
+
total_tokens = input_tokens + output_tokens
|
177
|
+
|
178
|
+
# Fast-agent providers don't have cache functionality
|
179
|
+
cache_usage = CacheUsage()
|
180
|
+
|
181
|
+
return cls(
|
182
|
+
provider=Provider.FAST_AGENT,
|
183
|
+
model=model,
|
184
|
+
input_tokens=input_tokens,
|
185
|
+
output_tokens=output_tokens,
|
186
|
+
total_tokens=total_tokens,
|
187
|
+
cache_usage=cache_usage,
|
188
|
+
raw_usage=usage, # Store the original FastAgentUsage object
|
189
|
+
)
|
190
|
+
|
191
|
+
|
192
|
+
class UsageAccumulator(BaseModel):
|
193
|
+
"""Accumulates usage data across multiple turns with cache analytics"""
|
194
|
+
|
195
|
+
turns: List[TurnUsage] = Field(default_factory=list)
|
196
|
+
model: Optional[str] = None
|
197
|
+
|
198
|
+
def add_turn(self, turn: TurnUsage) -> None:
|
199
|
+
"""Add a new turn to the accumulator"""
|
200
|
+
self.turns.append(turn)
|
201
|
+
if self.model is None:
|
202
|
+
self.model = turn.model
|
203
|
+
|
204
|
+
@computed_field
|
205
|
+
@property
|
206
|
+
def cumulative_input_tokens(self) -> int:
|
207
|
+
"""Total input tokens charged across all turns"""
|
208
|
+
return sum(turn.input_tokens for turn in self.turns)
|
209
|
+
|
210
|
+
@computed_field
|
211
|
+
@property
|
212
|
+
def cumulative_output_tokens(self) -> int:
|
213
|
+
"""Total output tokens charged across all turns"""
|
214
|
+
return sum(turn.output_tokens for turn in self.turns)
|
215
|
+
|
216
|
+
@computed_field
|
217
|
+
@property
|
218
|
+
def cumulative_billing_tokens(self) -> int:
|
219
|
+
"""Total tokens charged across all turns"""
|
220
|
+
return sum(turn.total_tokens for turn in self.turns)
|
221
|
+
|
222
|
+
@computed_field
|
223
|
+
@property
|
224
|
+
def cumulative_cache_read_tokens(self) -> int:
|
225
|
+
"""Total tokens read from cache across all turns"""
|
226
|
+
return sum(turn.cache_usage.cache_read_tokens for turn in self.turns)
|
227
|
+
|
228
|
+
@computed_field
|
229
|
+
@property
|
230
|
+
def cumulative_cache_write_tokens(self) -> int:
|
231
|
+
"""Total tokens written to cache across all turns"""
|
232
|
+
return sum(turn.cache_usage.cache_write_tokens for turn in self.turns)
|
233
|
+
|
234
|
+
@computed_field
|
235
|
+
@property
|
236
|
+
def cumulative_cache_hit_tokens(self) -> int:
|
237
|
+
"""Total tokens served from cache across all turns"""
|
238
|
+
return sum(turn.cache_usage.cache_hit_tokens for turn in self.turns)
|
239
|
+
|
240
|
+
@computed_field
|
241
|
+
@property
|
242
|
+
def cumulative_effective_input_tokens(self) -> int:
|
243
|
+
"""Total input tokens excluding cache reads across all turns"""
|
244
|
+
return sum(turn.effective_input_tokens for turn in self.turns)
|
245
|
+
|
246
|
+
@computed_field
|
247
|
+
@property
|
248
|
+
def cumulative_tool_use_tokens(self) -> int:
|
249
|
+
"""Total tokens used for tool calling prompts across all turns"""
|
250
|
+
return sum(turn.tool_use_tokens for turn in self.turns)
|
251
|
+
|
252
|
+
@computed_field
|
253
|
+
@property
|
254
|
+
def cumulative_reasoning_tokens(self) -> int:
|
255
|
+
"""Total tokens used for reasoning/thinking across all turns"""
|
256
|
+
return sum(turn.reasoning_tokens for turn in self.turns)
|
257
|
+
|
258
|
+
@computed_field
|
259
|
+
@property
|
260
|
+
def cache_hit_rate(self) -> Optional[float]:
|
261
|
+
"""Percentage of input tokens served from cache"""
|
262
|
+
if self.cumulative_input_tokens == 0:
|
263
|
+
return None
|
264
|
+
cache_tokens = self.cumulative_cache_read_tokens + self.cumulative_cache_hit_tokens
|
265
|
+
return (cache_tokens / self.cumulative_input_tokens) * 100
|
266
|
+
|
267
|
+
@computed_field
|
268
|
+
@property
|
269
|
+
def current_context_tokens(self) -> int:
|
270
|
+
"""Current context usage (last turn's context tokens)"""
|
271
|
+
if not self.turns:
|
272
|
+
return 0
|
273
|
+
return self.turns[-1].current_context_tokens
|
274
|
+
|
275
|
+
@computed_field
|
276
|
+
@property
|
277
|
+
def context_window_size(self) -> Optional[int]:
|
278
|
+
"""Get context window size for current model"""
|
279
|
+
if self.model:
|
280
|
+
return ModelContextWindows.get_context_window(self.model)
|
281
|
+
return None
|
282
|
+
|
283
|
+
@computed_field
|
284
|
+
@property
|
285
|
+
def context_usage_percentage(self) -> Optional[float]:
|
286
|
+
"""Percentage of context window used"""
|
287
|
+
window_size = self.context_window_size
|
288
|
+
if window_size and window_size > 0:
|
289
|
+
return (self.current_context_tokens / window_size) * 100
|
290
|
+
return None
|
291
|
+
|
292
|
+
@computed_field
|
293
|
+
@property
|
294
|
+
def turn_count(self) -> int:
|
295
|
+
"""Number of turns accumulated"""
|
296
|
+
return len(self.turns)
|
297
|
+
|
298
|
+
def get_cache_summary(self) -> dict[str, Union[int, float, None]]:
|
299
|
+
"""Get cache-specific metrics summary"""
|
300
|
+
return {
|
301
|
+
"cumulative_cache_read_tokens": self.cumulative_cache_read_tokens,
|
302
|
+
"cumulative_cache_write_tokens": self.cumulative_cache_write_tokens,
|
303
|
+
"cumulative_cache_hit_tokens": self.cumulative_cache_hit_tokens,
|
304
|
+
"cache_hit_rate_percent": self.cache_hit_rate,
|
305
|
+
"cumulative_effective_input_tokens": self.cumulative_effective_input_tokens,
|
306
|
+
}
|
307
|
+
|
308
|
+
def get_summary(self) -> dict[str, Union[int, float, str, None]]:
|
309
|
+
"""Get comprehensive usage statistics"""
|
310
|
+
cache_summary = self.get_cache_summary()
|
311
|
+
return {
|
312
|
+
"model": self.model,
|
313
|
+
"turn_count": self.turn_count,
|
314
|
+
"cumulative_input_tokens": self.cumulative_input_tokens,
|
315
|
+
"cumulative_output_tokens": self.cumulative_output_tokens,
|
316
|
+
"cumulative_billing_tokens": self.cumulative_billing_tokens,
|
317
|
+
"cumulative_tool_use_tokens": self.cumulative_tool_use_tokens,
|
318
|
+
"cumulative_reasoning_tokens": self.cumulative_reasoning_tokens,
|
319
|
+
"current_context_tokens": self.current_context_tokens,
|
320
|
+
"context_window_size": self.context_window_size,
|
321
|
+
"context_usage_percentage": self.context_usage_percentage,
|
322
|
+
**cache_summary,
|
323
|
+
}
|
324
|
+
|
325
|
+
|
326
|
+
# Utility functions for fast-agent integration
|
327
|
+
def create_fast_agent_usage(
|
328
|
+
input_content: str,
|
329
|
+
output_content: str,
|
330
|
+
model_type: str,
|
331
|
+
tool_calls: int = 0,
|
332
|
+
delay_seconds: float = 0.0,
|
333
|
+
) -> FastAgentUsage:
|
334
|
+
"""
|
335
|
+
Create FastAgentUsage from message content.
|
336
|
+
|
337
|
+
Args:
|
338
|
+
input_content: Input message content
|
339
|
+
output_content: Output message content
|
340
|
+
model_type: Type of fast-agent model (passthrough/playback/slow)
|
341
|
+
tool_calls: Number of tool calls made
|
342
|
+
delay_seconds: Artificial delays added
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
FastAgentUsage object with character counts
|
346
|
+
"""
|
347
|
+
return FastAgentUsage(
|
348
|
+
input_chars=len(input_content),
|
349
|
+
output_chars=len(output_content),
|
350
|
+
model_type=model_type,
|
351
|
+
tool_calls=tool_calls,
|
352
|
+
delay_seconds=delay_seconds,
|
353
|
+
)
|
354
|
+
|
355
|
+
|
356
|
+
def create_turn_usage_from_messages(
|
357
|
+
input_content: str,
|
358
|
+
output_content: str,
|
359
|
+
model: str,
|
360
|
+
model_type: str,
|
361
|
+
tool_calls: int = 0,
|
362
|
+
delay_seconds: float = 0.0,
|
363
|
+
) -> TurnUsage:
|
364
|
+
"""
|
365
|
+
Create TurnUsage directly from message content for fast-agent providers.
|
366
|
+
|
367
|
+
Args:
|
368
|
+
input_content: Input message content
|
369
|
+
output_content: Output message content
|
370
|
+
model: Model name (e.g., "passthrough", "playback", "slow")
|
371
|
+
model_type: Type for internal tracking
|
372
|
+
tool_calls: Number of tool calls made
|
373
|
+
delay_seconds: Artificial delays added
|
374
|
+
|
375
|
+
Returns:
|
376
|
+
TurnUsage object ready for accumulation
|
377
|
+
"""
|
378
|
+
usage = create_fast_agent_usage(
|
379
|
+
input_content=input_content,
|
380
|
+
output_content=output_content,
|
381
|
+
model_type=model_type,
|
382
|
+
tool_calls=tool_calls,
|
383
|
+
delay_seconds=delay_seconds,
|
384
|
+
)
|
385
|
+
return TurnUsage.from_fast_agent(usage, model)
|
mcp_agent/mcp/interfaces.py
CHANGED
@@ -5,6 +5,7 @@ This module defines protocols (interfaces) that can be used to break circular de
|
|
5
5
|
|
6
6
|
from datetime import timedelta
|
7
7
|
from typing import (
|
8
|
+
TYPE_CHECKING,
|
8
9
|
Any,
|
9
10
|
AsyncContextManager,
|
10
11
|
Callable,
|
@@ -31,6 +32,9 @@ from mcp_agent.core.agent_types import AgentType
|
|
31
32
|
from mcp_agent.core.request_params import RequestParams
|
32
33
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
33
34
|
|
35
|
+
if TYPE_CHECKING:
|
36
|
+
from mcp_agent.llm.usage_tracking import UsageAccumulator
|
37
|
+
|
34
38
|
|
35
39
|
@runtime_checkable
|
36
40
|
class MCPConnectionManagerProtocol(Protocol):
|
@@ -132,6 +136,8 @@ class AugmentedLLMProtocol(Protocol):
|
|
132
136
|
"""
|
133
137
|
...
|
134
138
|
|
139
|
+
usage_accumulator: "UsageAccumulator"
|
140
|
+
|
135
141
|
|
136
142
|
class AgentProtocol(AugmentedLLMProtocol, Protocol):
|
137
143
|
"""Protocol defining the standard agent interface"""
|
File without changes
|
File without changes
|
File without changes
|