fast-agent-mcp 0.2.32__py3-none-any.whl → 0.2.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ from mcp_agent.llm.providers.multipart_converter_anthropic import (
10
10
  from mcp_agent.llm.providers.sampling_converter_anthropic import (
11
11
  AnthropicSamplingConverter,
12
12
  )
13
+ from mcp_agent.llm.usage_tracking import TurnUsage
13
14
  from mcp_agent.mcp.interfaces import ModelT
14
15
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
15
16
 
@@ -75,14 +76,14 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
75
76
 
76
77
  def _initialize_default_params(self, kwargs: dict) -> RequestParams:
77
78
  """Initialize Anthropic-specific default parameters"""
78
- return RequestParams(
79
- model=kwargs.get("model", DEFAULT_ANTHROPIC_MODEL),
80
- maxTokens=4096, # default haiku3
81
- systemPrompt=self.instruction,
82
- parallel_tool_calls=True,
83
- max_iterations=20,
84
- use_history=True,
85
- )
79
+ # Get base defaults from parent (includes ModelDatabase lookup)
80
+ base_params = super()._initialize_default_params(kwargs)
81
+
82
+ # Override with Anthropic-specific settings
83
+ chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
84
+ base_params.model = chosen_model
85
+
86
+ return base_params
86
87
 
87
88
  def _base_url(self) -> str | None:
88
89
  assert self.context.config
@@ -158,6 +159,41 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
158
159
 
159
160
  response = executor_result[0]
160
161
 
162
+ # Track usage if response is valid and has usage data
163
+ if (
164
+ hasattr(response, "usage")
165
+ and response.usage
166
+ and not isinstance(response, BaseException)
167
+ ):
168
+ try:
169
+ turn_usage = TurnUsage.from_anthropic(
170
+ response.usage, model or DEFAULT_ANTHROPIC_MODEL
171
+ )
172
+ self.usage_accumulator.add_turn(turn_usage)
173
+
174
+ # # Print raw usage for debugging
175
+ # print(f"\n=== USAGE DEBUG ({model}) ===")
176
+ # print(f"Raw usage: {response.usage}")
177
+ # print(
178
+ # f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
179
+ # )
180
+ # print(
181
+ # f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
182
+ # )
183
+ # print(f"Effective input: {turn_usage.effective_input_tokens}")
184
+ # print(
185
+ # f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
186
+ # )
187
+ # if self.usage_accumulator.context_usage_percentage:
188
+ # print(
189
+ # f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
190
+ # )
191
+ # if self.usage_accumulator.cache_hit_rate:
192
+ # print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
193
+ # print("===========================\n")
194
+ except Exception as e:
195
+ self.logger.warning(f"Failed to track usage: {e}")
196
+
161
197
  if isinstance(response, AuthenticationError):
162
198
  raise ProviderKeyError(
163
199
  "Invalid Anthropic API key",
@@ -24,6 +24,7 @@ from mcp_agent.llm.provider_types import Provider
24
24
 
25
25
  # Import the new converter class
26
26
  from mcp_agent.llm.providers.google_converter import GoogleConverter
27
+ from mcp_agent.llm.usage_tracking import TurnUsage
27
28
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
28
29
 
29
30
  # Define default model and potentially other Google-specific defaults
@@ -220,6 +221,7 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
220
221
  parallel_tool_calls=True, # Assume parallel tool calls are supported by default with native API
221
222
  max_iterations=20,
222
223
  use_history=True,
224
+ maxTokens=65536, # Default max tokens for Google models
223
225
  # Include other relevant default parameters
224
226
  )
225
227
 
@@ -281,10 +283,25 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
281
283
  )
282
284
  self.logger.debug("Google generate_content response:", data=api_response)
283
285
 
286
+ # Track usage if response is valid and has usage data
287
+ if (
288
+ hasattr(api_response, "usage_metadata")
289
+ and api_response.usage_metadata
290
+ and not isinstance(api_response, BaseException)
291
+ ):
292
+ try:
293
+ turn_usage = TurnUsage.from_google(
294
+ api_response.usage_metadata, request_params.model
295
+ )
296
+ self.usage_accumulator.add_turn(turn_usage)
297
+
298
+ except Exception as e:
299
+ self.logger.warning(f"Failed to track usage: {e}")
300
+
284
301
  except errors.APIError as e:
285
302
  # Handle specific Google API errors
286
303
  self.logger.error(f"Google API Error: {e.code} - {e.message}")
287
- raise ProviderKeyError(f"Google API Error: {e.code}", e.message) from e
304
+ raise ProviderKeyError(f"Google API Error: {e.code}", e.message or "") from e
288
305
  except Exception as e:
289
306
  self.logger.error(f"Error during Google generate_content call: {e}")
290
307
  # Decide how to handle other exceptions - potentially re-raise or return an error message
@@ -31,6 +31,7 @@ from mcp_agent.llm.providers.multipart_converter_openai import OpenAIConverter,
31
31
  from mcp_agent.llm.providers.sampling_converter_openai import (
32
32
  OpenAISamplingConverter,
33
33
  )
34
+ from mcp_agent.llm.usage_tracking import TurnUsage
34
35
  from mcp_agent.logging.logger import get_logger
35
36
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
36
37
 
@@ -90,15 +91,14 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
90
91
 
91
92
  def _initialize_default_params(self, kwargs: dict) -> RequestParams:
92
93
  """Initialize OpenAI-specific default parameters"""
94
+ # Get base defaults from parent (includes ModelDatabase lookup)
95
+ base_params = super()._initialize_default_params(kwargs)
96
+
97
+ # Override with OpenAI-specific settings
93
98
  chosen_model = kwargs.get("model", DEFAULT_OPENAI_MODEL)
99
+ base_params.model = chosen_model
94
100
 
95
- return RequestParams(
96
- model=chosen_model,
97
- systemPrompt=self.instruction,
98
- parallel_tool_calls=True,
99
- max_iterations=20,
100
- use_history=True,
101
- )
101
+ return base_params
102
102
 
103
103
  def _base_url(self) -> str:
104
104
  return self.context.config.openai.base_url if self.context.config.openai else None
@@ -166,6 +166,19 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
166
166
 
167
167
  response = executor_result[0]
168
168
 
169
+ # Track usage if response is valid and has usage data
170
+ if (
171
+ hasattr(response, "usage")
172
+ and response.usage
173
+ and not isinstance(response, BaseException)
174
+ ):
175
+ try:
176
+ model_name = self.default_request_params.model or DEFAULT_OPENAI_MODEL
177
+ turn_usage = TurnUsage.from_openai(response.usage, model_name)
178
+ self.usage_accumulator.add_turn(turn_usage)
179
+ except Exception as e:
180
+ self.logger.warning(f"Failed to track usage: {e}")
181
+
169
182
  self.logger.debug(
170
183
  "OpenAI completion response:",
171
184
  data=response,
@@ -0,0 +1,385 @@
1
+ """
2
+ Usage tracking system for LLM providers with comprehensive cache support.
3
+
4
+ This module provides unified usage tracking across Anthropic, OpenAI, and Google providers,
5
+ including detailed cache metrics and context window management.
6
+ """
7
+
8
+ import time
9
+ from typing import List, Optional, Union
10
+
11
+ # Proper type imports for each provider
12
+ from anthropic.types import Usage as AnthropicUsage
13
+ from google.genai.types import GenerateContentResponseUsageMetadata as GoogleUsage
14
+ from openai.types.completion_usage import CompletionUsage as OpenAIUsage
15
+ from pydantic import BaseModel, Field, computed_field
16
+
17
+ from mcp_agent.llm.model_database import ModelDatabase
18
+ from mcp_agent.llm.provider_types import Provider
19
+
20
+
21
+ # Fast-agent specific usage type for synthetic providers
22
+ class FastAgentUsage(BaseModel):
23
+ """Usage data for fast-agent providers (passthrough, playback, slow)"""
24
+
25
+ input_chars: int = Field(description="Characters in input messages")
26
+ output_chars: int = Field(description="Characters in output messages")
27
+ model_type: str = Field(description="Type of fast-agent model (passthrough/playbook/slow)")
28
+ tool_calls: int = Field(default=0, description="Number of tool calls made")
29
+ delay_seconds: float = Field(default=0.0, description="Artificial delays added")
30
+
31
+
32
+ # Union type for raw usage data from any provider
33
+ ProviderUsage = Union[AnthropicUsage, OpenAIUsage, GoogleUsage, FastAgentUsage]
34
+
35
+
36
+ class ModelContextWindows:
37
+ """Context window sizes and cache configurations for various models"""
38
+
39
+ @classmethod
40
+ def get_context_window(cls, model: str) -> Optional[int]:
41
+ return ModelDatabase.get_context_window(model)
42
+
43
+
44
+ class CacheUsage(BaseModel):
45
+ """Cache-specific usage metrics"""
46
+
47
+ cache_read_tokens: int = Field(default=0, description="Tokens read from cache")
48
+ cache_write_tokens: int = Field(default=0, description="Tokens written to cache")
49
+ cache_hit_tokens: int = Field(default=0, description="Total tokens served from cache")
50
+
51
+ @computed_field
52
+ @property
53
+ def total_cache_tokens(self) -> int:
54
+ """Total cache-related tokens"""
55
+ return self.cache_read_tokens + self.cache_write_tokens + self.cache_hit_tokens
56
+
57
+ @computed_field
58
+ @property
59
+ def has_cache_activity(self) -> bool:
60
+ """Whether any cache activity occurred"""
61
+ return self.total_cache_tokens > 0
62
+
63
+
64
+ class TurnUsage(BaseModel):
65
+ """Usage data for a single turn/completion with cache support"""
66
+
67
+ provider: Provider
68
+ model: str
69
+ input_tokens: int
70
+ output_tokens: int
71
+ total_tokens: int
72
+ timestamp: float = Field(default_factory=time.time)
73
+
74
+ # Cache-specific metrics
75
+ cache_usage: CacheUsage = Field(default_factory=CacheUsage)
76
+
77
+ # Provider-specific token types
78
+ tool_use_tokens: int = Field(default=0, description="Tokens used for tool calling prompts")
79
+ reasoning_tokens: int = Field(default=0, description="Tokens used for reasoning/thinking")
80
+
81
+ # Raw usage data from provider (preserves all original data)
82
+ raw_usage: ProviderUsage
83
+
84
+ @computed_field
85
+ @property
86
+ def current_context_tokens(self) -> int:
87
+ """Current context size after this turn (input + output)"""
88
+ return self.input_tokens + self.output_tokens
89
+
90
+ @computed_field
91
+ @property
92
+ def effective_input_tokens(self) -> int:
93
+ """Input tokens excluding cache reads (tokens actually processed)"""
94
+ return max(
95
+ 0,
96
+ self.input_tokens
97
+ - self.cache_usage.cache_read_tokens
98
+ - self.cache_usage.cache_hit_tokens,
99
+ )
100
+
101
+ @classmethod
102
+ def from_anthropic(cls, usage: AnthropicUsage, model: str) -> "TurnUsage":
103
+ # Extract cache tokens with proper null handling
104
+ cache_creation_tokens = getattr(usage, "cache_creation_input_tokens", 0) or 0
105
+ cache_read_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0
106
+
107
+ cache_usage = CacheUsage(
108
+ cache_read_tokens=cache_read_tokens, # Tokens read from cache (90% discount)
109
+ cache_write_tokens=cache_creation_tokens, # Tokens written to cache (25% surcharge)
110
+ )
111
+
112
+ return cls(
113
+ provider=Provider.ANTHROPIC,
114
+ model=model,
115
+ input_tokens=usage.input_tokens,
116
+ output_tokens=usage.output_tokens,
117
+ total_tokens=usage.input_tokens + usage.output_tokens,
118
+ cache_usage=cache_usage,
119
+ raw_usage=usage, # Store the original Anthropic usage object
120
+ )
121
+
122
+ @classmethod
123
+ def from_openai(cls, usage: OpenAIUsage, model: str) -> "TurnUsage":
124
+ # Extract cache tokens with proper null handling
125
+ cached_tokens = 0
126
+ if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
127
+ cached_tokens = getattr(usage.prompt_tokens_details, "cached_tokens", 0) or 0
128
+
129
+ cache_usage = CacheUsage(
130
+ cache_hit_tokens=cached_tokens # These are tokens served from cache (50% discount)
131
+ )
132
+
133
+ return cls(
134
+ provider=Provider.OPENAI,
135
+ model=model,
136
+ input_tokens=usage.prompt_tokens,
137
+ output_tokens=usage.completion_tokens,
138
+ total_tokens=usage.total_tokens,
139
+ cache_usage=cache_usage,
140
+ raw_usage=usage, # Store the original OpenAI usage object
141
+ )
142
+
143
+ @classmethod
144
+ def from_google(cls, usage: GoogleUsage, model: str) -> "TurnUsage":
145
+ # Extract token counts with proper null handling
146
+ prompt_tokens = getattr(usage, "prompt_token_count", 0) or 0
147
+ candidates_tokens = getattr(usage, "candidates_token_count", 0) or 0
148
+ total_tokens = getattr(usage, "total_token_count", 0) or 0
149
+ cached_content_tokens = getattr(usage, "cached_content_token_count", 0) or 0
150
+
151
+ # Extract additional Google-specific token types
152
+ tool_use_tokens = getattr(usage, "tool_use_prompt_token_count", 0) or 0
153
+ thinking_tokens = getattr(usage, "thoughts_token_count", 0) or 0
154
+
155
+ # Google cache tokens are read hits (75% discount on Gemini 2.5)
156
+ cache_usage = CacheUsage(cache_hit_tokens=cached_content_tokens)
157
+
158
+ return cls(
159
+ provider=Provider.GOOGLE,
160
+ model=model,
161
+ input_tokens=prompt_tokens,
162
+ output_tokens=candidates_tokens,
163
+ total_tokens=total_tokens,
164
+ cache_usage=cache_usage,
165
+ tool_use_tokens=tool_use_tokens,
166
+ reasoning_tokens=thinking_tokens,
167
+ raw_usage=usage, # Store the original Google usage object
168
+ )
169
+
170
+ @classmethod
171
+ def from_fast_agent(cls, usage: FastAgentUsage, model: str) -> "TurnUsage":
172
+ # For fast-agent providers, we use characters as "tokens"
173
+ # This provides a consistent unit of measurement across all providers
174
+ input_tokens = usage.input_chars
175
+ output_tokens = usage.output_chars
176
+ total_tokens = input_tokens + output_tokens
177
+
178
+ # Fast-agent providers don't have cache functionality
179
+ cache_usage = CacheUsage()
180
+
181
+ return cls(
182
+ provider=Provider.FAST_AGENT,
183
+ model=model,
184
+ input_tokens=input_tokens,
185
+ output_tokens=output_tokens,
186
+ total_tokens=total_tokens,
187
+ cache_usage=cache_usage,
188
+ raw_usage=usage, # Store the original FastAgentUsage object
189
+ )
190
+
191
+
192
+ class UsageAccumulator(BaseModel):
193
+ """Accumulates usage data across multiple turns with cache analytics"""
194
+
195
+ turns: List[TurnUsage] = Field(default_factory=list)
196
+ model: Optional[str] = None
197
+
198
+ def add_turn(self, turn: TurnUsage) -> None:
199
+ """Add a new turn to the accumulator"""
200
+ self.turns.append(turn)
201
+ if self.model is None:
202
+ self.model = turn.model
203
+
204
+ @computed_field
205
+ @property
206
+ def cumulative_input_tokens(self) -> int:
207
+ """Total input tokens charged across all turns"""
208
+ return sum(turn.input_tokens for turn in self.turns)
209
+
210
+ @computed_field
211
+ @property
212
+ def cumulative_output_tokens(self) -> int:
213
+ """Total output tokens charged across all turns"""
214
+ return sum(turn.output_tokens for turn in self.turns)
215
+
216
+ @computed_field
217
+ @property
218
+ def cumulative_billing_tokens(self) -> int:
219
+ """Total tokens charged across all turns"""
220
+ return sum(turn.total_tokens for turn in self.turns)
221
+
222
+ @computed_field
223
+ @property
224
+ def cumulative_cache_read_tokens(self) -> int:
225
+ """Total tokens read from cache across all turns"""
226
+ return sum(turn.cache_usage.cache_read_tokens for turn in self.turns)
227
+
228
+ @computed_field
229
+ @property
230
+ def cumulative_cache_write_tokens(self) -> int:
231
+ """Total tokens written to cache across all turns"""
232
+ return sum(turn.cache_usage.cache_write_tokens for turn in self.turns)
233
+
234
+ @computed_field
235
+ @property
236
+ def cumulative_cache_hit_tokens(self) -> int:
237
+ """Total tokens served from cache across all turns"""
238
+ return sum(turn.cache_usage.cache_hit_tokens for turn in self.turns)
239
+
240
+ @computed_field
241
+ @property
242
+ def cumulative_effective_input_tokens(self) -> int:
243
+ """Total input tokens excluding cache reads across all turns"""
244
+ return sum(turn.effective_input_tokens for turn in self.turns)
245
+
246
+ @computed_field
247
+ @property
248
+ def cumulative_tool_use_tokens(self) -> int:
249
+ """Total tokens used for tool calling prompts across all turns"""
250
+ return sum(turn.tool_use_tokens for turn in self.turns)
251
+
252
+ @computed_field
253
+ @property
254
+ def cumulative_reasoning_tokens(self) -> int:
255
+ """Total tokens used for reasoning/thinking across all turns"""
256
+ return sum(turn.reasoning_tokens for turn in self.turns)
257
+
258
+ @computed_field
259
+ @property
260
+ def cache_hit_rate(self) -> Optional[float]:
261
+ """Percentage of input tokens served from cache"""
262
+ if self.cumulative_input_tokens == 0:
263
+ return None
264
+ cache_tokens = self.cumulative_cache_read_tokens + self.cumulative_cache_hit_tokens
265
+ return (cache_tokens / self.cumulative_input_tokens) * 100
266
+
267
+ @computed_field
268
+ @property
269
+ def current_context_tokens(self) -> int:
270
+ """Current context usage (last turn's context tokens)"""
271
+ if not self.turns:
272
+ return 0
273
+ return self.turns[-1].current_context_tokens
274
+
275
+ @computed_field
276
+ @property
277
+ def context_window_size(self) -> Optional[int]:
278
+ """Get context window size for current model"""
279
+ if self.model:
280
+ return ModelContextWindows.get_context_window(self.model)
281
+ return None
282
+
283
+ @computed_field
284
+ @property
285
+ def context_usage_percentage(self) -> Optional[float]:
286
+ """Percentage of context window used"""
287
+ window_size = self.context_window_size
288
+ if window_size and window_size > 0:
289
+ return (self.current_context_tokens / window_size) * 100
290
+ return None
291
+
292
+ @computed_field
293
+ @property
294
+ def turn_count(self) -> int:
295
+ """Number of turns accumulated"""
296
+ return len(self.turns)
297
+
298
+ def get_cache_summary(self) -> dict[str, Union[int, float, None]]:
299
+ """Get cache-specific metrics summary"""
300
+ return {
301
+ "cumulative_cache_read_tokens": self.cumulative_cache_read_tokens,
302
+ "cumulative_cache_write_tokens": self.cumulative_cache_write_tokens,
303
+ "cumulative_cache_hit_tokens": self.cumulative_cache_hit_tokens,
304
+ "cache_hit_rate_percent": self.cache_hit_rate,
305
+ "cumulative_effective_input_tokens": self.cumulative_effective_input_tokens,
306
+ }
307
+
308
+ def get_summary(self) -> dict[str, Union[int, float, str, None]]:
309
+ """Get comprehensive usage statistics"""
310
+ cache_summary = self.get_cache_summary()
311
+ return {
312
+ "model": self.model,
313
+ "turn_count": self.turn_count,
314
+ "cumulative_input_tokens": self.cumulative_input_tokens,
315
+ "cumulative_output_tokens": self.cumulative_output_tokens,
316
+ "cumulative_billing_tokens": self.cumulative_billing_tokens,
317
+ "cumulative_tool_use_tokens": self.cumulative_tool_use_tokens,
318
+ "cumulative_reasoning_tokens": self.cumulative_reasoning_tokens,
319
+ "current_context_tokens": self.current_context_tokens,
320
+ "context_window_size": self.context_window_size,
321
+ "context_usage_percentage": self.context_usage_percentage,
322
+ **cache_summary,
323
+ }
324
+
325
+
326
+ # Utility functions for fast-agent integration
327
+ def create_fast_agent_usage(
328
+ input_content: str,
329
+ output_content: str,
330
+ model_type: str,
331
+ tool_calls: int = 0,
332
+ delay_seconds: float = 0.0,
333
+ ) -> FastAgentUsage:
334
+ """
335
+ Create FastAgentUsage from message content.
336
+
337
+ Args:
338
+ input_content: Input message content
339
+ output_content: Output message content
340
+ model_type: Type of fast-agent model (passthrough/playback/slow)
341
+ tool_calls: Number of tool calls made
342
+ delay_seconds: Artificial delays added
343
+
344
+ Returns:
345
+ FastAgentUsage object with character counts
346
+ """
347
+ return FastAgentUsage(
348
+ input_chars=len(input_content),
349
+ output_chars=len(output_content),
350
+ model_type=model_type,
351
+ tool_calls=tool_calls,
352
+ delay_seconds=delay_seconds,
353
+ )
354
+
355
+
356
+ def create_turn_usage_from_messages(
357
+ input_content: str,
358
+ output_content: str,
359
+ model: str,
360
+ model_type: str,
361
+ tool_calls: int = 0,
362
+ delay_seconds: float = 0.0,
363
+ ) -> TurnUsage:
364
+ """
365
+ Create TurnUsage directly from message content for fast-agent providers.
366
+
367
+ Args:
368
+ input_content: Input message content
369
+ output_content: Output message content
370
+ model: Model name (e.g., "passthrough", "playback", "slow")
371
+ model_type: Type for internal tracking
372
+ tool_calls: Number of tool calls made
373
+ delay_seconds: Artificial delays added
374
+
375
+ Returns:
376
+ TurnUsage object ready for accumulation
377
+ """
378
+ usage = create_fast_agent_usage(
379
+ input_content=input_content,
380
+ output_content=output_content,
381
+ model_type=model_type,
382
+ tool_calls=tool_calls,
383
+ delay_seconds=delay_seconds,
384
+ )
385
+ return TurnUsage.from_fast_agent(usage, model)
@@ -5,6 +5,7 @@ This module defines protocols (interfaces) that can be used to break circular de
5
5
 
6
6
  from datetime import timedelta
7
7
  from typing import (
8
+ TYPE_CHECKING,
8
9
  Any,
9
10
  AsyncContextManager,
10
11
  Callable,
@@ -31,6 +32,9 @@ from mcp_agent.core.agent_types import AgentType
31
32
  from mcp_agent.core.request_params import RequestParams
32
33
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
33
34
 
35
+ if TYPE_CHECKING:
36
+ from mcp_agent.llm.usage_tracking import UsageAccumulator
37
+
34
38
 
35
39
  @runtime_checkable
36
40
  class MCPConnectionManagerProtocol(Protocol):
@@ -132,6 +136,8 @@ class AugmentedLLMProtocol(Protocol):
132
136
  """
133
137
  ...
134
138
 
139
+ usage_accumulator: "UsageAccumulator"
140
+
135
141
 
136
142
  class AgentProtocol(AugmentedLLMProtocol, Protocol):
137
143
  """Protocol defining the standard agent interface"""