fast-agent-mcp 0.2.33__py3-none-any.whl → 0.2.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,11 +30,13 @@ from mcp_agent.core.prompt import Prompt
30
30
  from mcp_agent.core.request_params import RequestParams
31
31
  from mcp_agent.event_progress import ProgressAction
32
32
  from mcp_agent.llm.memory import Memory, SimpleMemory
33
+ from mcp_agent.llm.model_database import ModelDatabase
33
34
  from mcp_agent.llm.provider_types import Provider
34
35
  from mcp_agent.llm.sampling_format_converter import (
35
36
  BasicFormatConverter,
36
37
  ProviderFormatConverter,
37
38
  )
39
+ from mcp_agent.llm.usage_tracking import UsageAccumulator
38
40
  from mcp_agent.logging.logger import get_logger
39
41
  from mcp_agent.mcp.helpers.content_helpers import get_text
40
42
  from mcp_agent.mcp.interfaces import (
@@ -155,12 +157,11 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
155
157
  # Initialize the display component
156
158
  self.display = ConsoleDisplay(config=self.context.config)
157
159
 
158
- # Initialize default parameters
159
- self.default_request_params = self._initialize_default_params(kwargs)
160
-
161
- # Apply model override if provided
160
+ # Initialize default parameters, passing model info
161
+ model_kwargs = kwargs.copy()
162
162
  if model:
163
- self.default_request_params.model = model
163
+ model_kwargs["model"] = model
164
+ self.default_request_params = self._initialize_default_params(model_kwargs)
164
165
 
165
166
  # Merge with provided params if any
166
167
  if self._init_request_params:
@@ -171,13 +172,22 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
171
172
  self.type_converter = type_converter
172
173
  self.verb = kwargs.get("verb")
173
174
 
175
+ # Initialize usage tracking
176
+ self.usage_accumulator = UsageAccumulator()
177
+
174
178
  def _initialize_default_params(self, kwargs: dict) -> RequestParams:
175
179
  """Initialize default parameters for the LLM.
176
180
  Should be overridden by provider implementations to set provider-specific defaults."""
181
+ # Get model-aware default max tokens
182
+ model = kwargs.get("model")
183
+ max_tokens = ModelDatabase.get_default_max_tokens(model)
184
+
177
185
  return RequestParams(
186
+ model=model,
187
+ maxTokens=max_tokens,
178
188
  systemPrompt=self.instruction,
179
189
  parallel_tool_calls=True,
180
- max_iterations=10,
190
+ max_iterations=20,
181
191
  use_history=True,
182
192
  )
183
193
 
@@ -642,3 +652,13 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
642
652
 
643
653
  assert self.provider
644
654
  return ProviderKeyManager.get_api_key(self.provider.value, self.context.config)
655
+
656
+ def get_usage_summary(self) -> dict:
657
+ """
658
+ Get a summary of usage statistics for this LLM instance.
659
+
660
+ Returns:
661
+ Dictionary containing usage statistics including tokens, cache metrics,
662
+ and context window utilization.
663
+ """
664
+ return self.usage_accumulator.get_summary()
@@ -10,6 +10,7 @@ from mcp_agent.llm.augmented_llm import (
10
10
  RequestParams,
11
11
  )
12
12
  from mcp_agent.llm.provider_types import Provider
13
+ from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
13
14
  from mcp_agent.logging.logger import get_logger
14
15
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
15
16
 
@@ -48,13 +49,34 @@ class PassthroughLLM(AugmentedLLM):
48
49
  await self.show_assistant_message(message, title="ASSISTANT/PASSTHROUGH")
49
50
 
50
51
  # Handle PromptMessage by concatenating all parts
52
+ result = ""
51
53
  if isinstance(message, PromptMessage):
52
54
  parts_text = []
53
55
  for part in message.content:
54
56
  parts_text.append(str(part))
55
- return "\n".join(parts_text)
57
+ result = "\n".join(parts_text)
58
+ else:
59
+ result = str(message)
56
60
 
57
- return str(message)
61
+ # Track usage for this passthrough "turn"
62
+ try:
63
+ input_content = str(message)
64
+ output_content = result
65
+ tool_calls = 1 if input_content.startswith("***CALL_TOOL") else 0
66
+
67
+ turn_usage = create_turn_usage_from_messages(
68
+ input_content=input_content,
69
+ output_content=output_content,
70
+ model="passthrough",
71
+ model_type="passthrough",
72
+ tool_calls=tool_calls,
73
+ delay_seconds=0.0,
74
+ )
75
+ self.usage_accumulator.add_turn(turn_usage)
76
+ except Exception as e:
77
+ self.logger.warning(f"Failed to track usage: {e}")
78
+
79
+ return result
58
80
 
59
81
  async def initialize(self) -> None:
60
82
  pass
@@ -146,6 +168,25 @@ class PassthroughLLM(AugmentedLLM):
146
168
  if self.is_tool_call(last_message):
147
169
  result = Prompt.assistant(await self.generate_str(last_message.first_text()))
148
170
  await self.show_assistant_message(result.first_text())
171
+
172
+ # Track usage for this tool call "turn"
173
+ try:
174
+ input_content = "\n".join(message.all_text() for message in multipart_messages)
175
+ output_content = result.first_text()
176
+
177
+ turn_usage = create_turn_usage_from_messages(
178
+ input_content=input_content,
179
+ output_content=output_content,
180
+ model="passthrough",
181
+ model_type="passthrough",
182
+ tool_calls=1, # This is definitely a tool call
183
+ delay_seconds=0.0,
184
+ )
185
+ self.usage_accumulator.add_turn(turn_usage)
186
+
187
+ except Exception as e:
188
+ self.logger.warning(f"Failed to track usage: {e}")
189
+
149
190
  return result
150
191
 
151
192
  if last_message.first_text().startswith(FIXED_RESPONSE_INDICATOR):
@@ -155,12 +196,33 @@ class PassthroughLLM(AugmentedLLM):
155
196
 
156
197
  if self._fixed_response:
157
198
  await self.show_assistant_message(self._fixed_response)
158
- return Prompt.assistant(self._fixed_response)
199
+ result = Prompt.assistant(self._fixed_response)
159
200
  else:
160
201
  # TODO -- improve when we support Audio/Multimodal gen models e.g. gemini . This should really just return the input as "assistant"...
161
202
  concatenated: str = "\n".join(message.all_text() for message in multipart_messages)
162
203
  await self.show_assistant_message(concatenated)
163
- return Prompt.assistant(concatenated)
204
+ result = Prompt.assistant(concatenated)
205
+
206
+ # Track usage for this passthrough "turn"
207
+ try:
208
+ input_content = "\n".join(message.all_text() for message in multipart_messages)
209
+ output_content = result.first_text()
210
+ tool_calls = 1 if self.is_tool_call(last_message) else 0
211
+
212
+ turn_usage = create_turn_usage_from_messages(
213
+ input_content=input_content,
214
+ output_content=output_content,
215
+ model="passthrough",
216
+ model_type="passthrough",
217
+ tool_calls=tool_calls,
218
+ delay_seconds=0.0,
219
+ )
220
+ self.usage_accumulator.add_turn(turn_usage)
221
+
222
+ except Exception as e:
223
+ self.logger.warning(f"Failed to track usage: {e}")
224
+
225
+ return result
164
226
 
165
227
  def is_tool_call(self, message: PromptMessageMultipart) -> bool:
166
228
  return message.first_text().startswith(CALL_TOOL_INDICATOR)
@@ -5,6 +5,7 @@ from mcp_agent.core.prompt import Prompt
5
5
  from mcp_agent.llm.augmented_llm import RequestParams
6
6
  from mcp_agent.llm.augmented_llm_passthrough import PassthroughLLM
7
7
  from mcp_agent.llm.provider_types import Provider
8
+ from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
8
9
  from mcp_agent.mcp.interfaces import ModelT
9
10
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
10
11
  from mcp_agent.mcp.prompts.prompt_helpers import MessageContent
@@ -83,6 +84,24 @@ class PlaybackLLM(PassthroughLLM):
83
84
  message_text=MessageContent.get_first_text(response), title="ASSISTANT/PLAYBACK"
84
85
  )
85
86
 
87
+ # Track usage for this playback "turn"
88
+ try:
89
+ input_content = str(multipart_messages) if multipart_messages else ""
90
+ output_content = MessageContent.get_first_text(response)
91
+
92
+ turn_usage = create_turn_usage_from_messages(
93
+ input_content=input_content,
94
+ output_content=output_content,
95
+ model="playback",
96
+ model_type="playback",
97
+ tool_calls=0,
98
+ delay_seconds=0.0,
99
+ )
100
+ self.usage_accumulator.add_turn(turn_usage)
101
+
102
+ except Exception as e:
103
+ self.logger.warning(f"Failed to track usage: {e}")
104
+
86
105
  return response
87
106
 
88
107
  async def structured(
@@ -30,7 +30,18 @@ class SlowLLM(PassthroughLLM):
30
30
  ) -> str:
31
31
  """Sleep for 3 seconds then return the input message as a string."""
32
32
  await asyncio.sleep(3)
33
- return await super().generate_str(message, request_params)
33
+ result = await super().generate_str(message, request_params)
34
+
35
+ # Override the last turn to include the 3-second delay
36
+ if self.usage_accumulator.turns:
37
+ last_turn = self.usage_accumulator.turns[-1]
38
+ # Update the raw usage to include delay
39
+ if hasattr(last_turn.raw_usage, 'delay_seconds'):
40
+ last_turn.raw_usage.delay_seconds = 3.0
41
+ # Print updated debug info
42
+ print("SlowLLM: Added 3.0s delay to turn usage")
43
+
44
+ return result
34
45
 
35
46
  async def _apply_prompt_provider_specific(
36
47
  self,
@@ -0,0 +1,236 @@
1
+ """
2
+ Model database for LLM parameters.
3
+
4
+ This module provides a centralized lookup for model parameters including
5
+ context windows, max output tokens, and supported tokenization types.
6
+ """
7
+
8
+ from typing import Dict, List, Optional
9
+
10
+ from pydantic import BaseModel
11
+
12
+
13
+ class ModelParameters(BaseModel):
14
+ """Configuration parameters for a specific model"""
15
+
16
+ context_window: int
17
+ """Maximum context window size in tokens"""
18
+
19
+ max_output_tokens: int
20
+ """Maximum output tokens the model can generate"""
21
+
22
+ tokenizes: List[str]
23
+ """List of supported content types for tokenization"""
24
+
25
+
26
+ class ModelDatabase:
27
+ """Centralized model configuration database"""
28
+
29
+ # Common parameter sets
30
+ OPENAI_MULTIMODAL = ["text/plain", "image/jpeg", "image/png", "image/webp", "application/pdf"]
31
+ OPENAI_VISION = ["text/plain", "image/jpeg", "image/png", "image/webp"]
32
+ ANTHROPIC_MULTIMODAL = [
33
+ "text/plain",
34
+ "image/jpeg",
35
+ "image/png",
36
+ "image/webp",
37
+ "application/pdf",
38
+ ]
39
+ GOOGLE_MULTIMODAL = [
40
+ "text/plain",
41
+ "image/jpeg",
42
+ "image/png",
43
+ "image/webp",
44
+ "application/pdf",
45
+ "audio/wav",
46
+ "audio/mp3",
47
+ "video/mp4",
48
+ ]
49
+ QWEN_MULTIMODAL = ["text/plain", "image/jpeg", "image/png", "image/webp"]
50
+ TEXT_ONLY = ["text/plain"]
51
+
52
+ # Common parameter configurations
53
+ OPENAI_STANDARD = ModelParameters(
54
+ context_window=128000, max_output_tokens=16384, tokenizes=OPENAI_MULTIMODAL
55
+ )
56
+
57
+ OPENAI_4_1_STANDARD = ModelParameters(
58
+ context_window=1047576, max_output_tokens=32768, tokenizes=OPENAI_MULTIMODAL
59
+ )
60
+
61
+ OPENAI_O_SERIES = ModelParameters(
62
+ context_window=200000, max_output_tokens=100000, tokenizes=OPENAI_VISION
63
+ )
64
+
65
+ ANTHROPIC_LEGACY = ModelParameters(
66
+ context_window=200000, max_output_tokens=4096, tokenizes=ANTHROPIC_MULTIMODAL
67
+ )
68
+
69
+ ANTHROPIC_35_SERIES = ModelParameters(
70
+ context_window=200000, max_output_tokens=8192, tokenizes=ANTHROPIC_MULTIMODAL
71
+ )
72
+
73
+ # TODO--- TO USE 64,000 NEED TO SUPPORT STREAMING
74
+ ANTHROPIC_37_SERIES = ModelParameters(
75
+ context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
76
+ )
77
+
78
+ GEMINI_FLASH = ModelParameters(
79
+ context_window=1048576, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
80
+ )
81
+
82
+ GEMINI_PRO = ModelParameters(
83
+ context_window=2097152, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
84
+ )
85
+
86
+ QWEN_STANDARD = ModelParameters(
87
+ context_window=32000, max_output_tokens=8192, tokenizes=QWEN_MULTIMODAL
88
+ )
89
+
90
+ FAST_AGENT_STANDARD = ModelParameters(
91
+ context_window=1000000, max_output_tokens=100000, tokenizes=TEXT_ONLY
92
+ )
93
+
94
+ OPENAI_4_1_SERIES = ModelParameters(
95
+ context_window=1047576, max_output_tokens=32768, tokenizes=OPENAI_MULTIMODAL
96
+ )
97
+
98
+ OPENAI_4O_SERIES = ModelParameters(
99
+ context_window=128000, max_output_tokens=16384, tokenizes=OPENAI_VISION
100
+ )
101
+
102
+ OPENAI_O3_SERIES = ModelParameters(
103
+ context_window=200000, max_output_tokens=100000, tokenizes=OPENAI_MULTIMODAL
104
+ )
105
+
106
+ OPENAI_O3_MINI_SERIES = ModelParameters(
107
+ context_window=200000, max_output_tokens=100000, tokenizes=TEXT_ONLY
108
+ )
109
+
110
+ # TODO update to 32000
111
+ ANTHROPIC_OPUS_4_VERSIONED = ModelParameters(
112
+ context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
113
+ )
114
+ # TODO update to 64000
115
+ ANTHROPIC_SONNET_4_VERSIONED = ModelParameters(
116
+ context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
117
+ )
118
+
119
+ DEEPSEEK_CHAT_STANDARD = ModelParameters(
120
+ context_window=65536, max_output_tokens=8192, tokenizes=TEXT_ONLY
121
+ )
122
+
123
+ DEEPSEEK_REASONER = ModelParameters(
124
+ context_window=65536, max_output_tokens=32768, tokenizes=TEXT_ONLY
125
+ )
126
+
127
+ GEMINI_2_5_PRO = ModelParameters(
128
+ context_window=2097152, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
129
+ )
130
+
131
+ # Model configuration database
132
+ MODELS: Dict[str, ModelParameters] = {
133
+ # internal models
134
+ "passthrough": FAST_AGENT_STANDARD,
135
+ "playback": FAST_AGENT_STANDARD,
136
+ "slow": FAST_AGENT_STANDARD,
137
+ # aliyun models
138
+ "qwen-turbo": QWEN_STANDARD,
139
+ "qwen-plus": QWEN_STANDARD,
140
+ "qwen-max": QWEN_STANDARD,
141
+ "qwen-long": ModelParameters(
142
+ context_window=10000000, max_output_tokens=8192, tokenizes=TEXT_ONLY
143
+ ),
144
+ # OpenAI Models (vanilla aliases and versioned)
145
+ "gpt-4.1": OPENAI_4_1_SERIES,
146
+ "gpt-4.1-mini": OPENAI_4_1_SERIES,
147
+ "gpt-4.1-nano": OPENAI_4_1_SERIES,
148
+ "gpt-4.1-2025-04-14": OPENAI_4_1_SERIES,
149
+ "gpt-4.1-mini-2025-04-14": OPENAI_4_1_SERIES,
150
+ "gpt-4.1-nano-2025-04-14": OPENAI_4_1_SERIES,
151
+ "gpt-4o": OPENAI_4O_SERIES,
152
+ "gpt-4o-2024-11-20": OPENAI_4O_SERIES,
153
+ "gpt-4o-mini-2024-07-18": OPENAI_4O_SERIES,
154
+ "o1": OPENAI_O_SERIES,
155
+ "o1-2024-12-17": OPENAI_O_SERIES,
156
+ "o3": OPENAI_O3_SERIES,
157
+ "o3-pro": ModelParameters(
158
+ context_window=200_000, max_output_tokens=100_000, tokenizes=TEXT_ONLY
159
+ ),
160
+ "o3-mini": OPENAI_O3_MINI_SERIES,
161
+ "o4-mini": OPENAI_O3_SERIES,
162
+ "o3-2025-04-16": OPENAI_O3_SERIES,
163
+ "o3-mini-2025-01-31": OPENAI_O3_MINI_SERIES,
164
+ "o4-mini-2025-04-16": OPENAI_O3_SERIES,
165
+ # Anthropic Models
166
+ "claude-3-haiku": ANTHROPIC_35_SERIES,
167
+ "claude-3-haiku-20240307": ANTHROPIC_LEGACY,
168
+ "claude-3-sonnet": ANTHROPIC_LEGACY,
169
+ "claude-3-opus": ANTHROPIC_LEGACY,
170
+ "claude-3-opus-20240229": ANTHROPIC_LEGACY,
171
+ "claude-3-opus-latest": ANTHROPIC_LEGACY,
172
+ "claude-3-5-haiku": ANTHROPIC_35_SERIES,
173
+ "claude-3-5-haiku-20241022": ANTHROPIC_35_SERIES,
174
+ "claude-3-5-haiku-latest": ANTHROPIC_35_SERIES,
175
+ "claude-3-sonnet-20240229": ANTHROPIC_LEGACY,
176
+ "claude-3-5-sonnet": ANTHROPIC_35_SERIES,
177
+ "claude-3-5-sonnet-20240620": ANTHROPIC_35_SERIES,
178
+ "claude-3-5-sonnet-20241022": ANTHROPIC_35_SERIES,
179
+ "claude-3-5-sonnet-latest": ANTHROPIC_35_SERIES,
180
+ "claude-3-7-sonnet": ANTHROPIC_37_SERIES,
181
+ "claude-3-7-sonnet-20250219": ANTHROPIC_37_SERIES,
182
+ "claude-3-7-sonnet-latest": ANTHROPIC_37_SERIES,
183
+ "claude-sonnet-4": ANTHROPIC_SONNET_4_VERSIONED,
184
+ "claude-sonnet-4-0": ANTHROPIC_SONNET_4_VERSIONED,
185
+ "claude-sonnet-4-20250514": ANTHROPIC_SONNET_4_VERSIONED,
186
+ "claude-opus-4": ANTHROPIC_OPUS_4_VERSIONED,
187
+ "claude-opus-4-0": ANTHROPIC_OPUS_4_VERSIONED,
188
+ "claude-opus-4-20250514": ANTHROPIC_OPUS_4_VERSIONED,
189
+ # DeepSeek Models
190
+ "deepseek-chat": DEEPSEEK_CHAT_STANDARD,
191
+ # Google Gemini Models (vanilla aliases and versioned)
192
+ "gemini-2.0-flash": GEMINI_FLASH,
193
+ "gemini-2.5-flash-preview": GEMINI_FLASH,
194
+ "gemini-2.5-pro-preview": GEMINI_2_5_PRO,
195
+ "gemini-2.5-flash-preview-05-20": GEMINI_FLASH,
196
+ "gemini-2.5-pro-preview-05-06": GEMINI_PRO,
197
+ }
198
+
199
+ @classmethod
200
+ def get_model_params(cls, model: str) -> Optional[ModelParameters]:
201
+ """Get model parameters for a given model name"""
202
+ return cls.MODELS.get(model)
203
+
204
+ @classmethod
205
+ def get_context_window(cls, model: str) -> Optional[int]:
206
+ """Get context window size for a model"""
207
+ params = cls.get_model_params(model)
208
+ return params.context_window if params else None
209
+
210
+ @classmethod
211
+ def get_max_output_tokens(cls, model: str) -> Optional[int]:
212
+ """Get maximum output tokens for a model"""
213
+ params = cls.get_model_params(model)
214
+ return params.max_output_tokens if params else None
215
+
216
+ @classmethod
217
+ def get_tokenizes(cls, model: str) -> Optional[List[str]]:
218
+ """Get supported tokenization types for a model"""
219
+ params = cls.get_model_params(model)
220
+ return params.tokenizes if params else None
221
+
222
+ @classmethod
223
+ def get_default_max_tokens(cls, model: str) -> int:
224
+ """Get default max_tokens for RequestParams based on model"""
225
+ if not model:
226
+ return 2048 # Fallback when no model specified
227
+
228
+ params = cls.get_model_params(model)
229
+ if params:
230
+ return params.max_output_tokens
231
+ return 2048 # Fallback for unknown models
232
+
233
+ @classmethod
234
+ def list_models(cls) -> List[str]:
235
+ """List all available model names"""
236
+ return list(cls.MODELS.keys())
@@ -87,6 +87,7 @@ class ModelFactory:
87
87
  "o1-preview": Provider.OPENAI,
88
88
  "o3": Provider.OPENAI,
89
89
  "o3-mini": Provider.OPENAI,
90
+ "o4-mini": Provider.OPENAI,
90
91
  "claude-3-haiku-20240307": Provider.ANTHROPIC,
91
92
  "claude-3-5-haiku-20241022": Provider.ANTHROPIC,
92
93
  "claude-3-5-haiku-latest": Provider.ANTHROPIC,
@@ -10,6 +10,7 @@ from mcp_agent.llm.providers.multipart_converter_anthropic import (
10
10
  from mcp_agent.llm.providers.sampling_converter_anthropic import (
11
11
  AnthropicSamplingConverter,
12
12
  )
13
+ from mcp_agent.llm.usage_tracking import TurnUsage
13
14
  from mcp_agent.mcp.interfaces import ModelT
14
15
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
15
16
 
@@ -75,14 +76,14 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
75
76
 
76
77
  def _initialize_default_params(self, kwargs: dict) -> RequestParams:
77
78
  """Initialize Anthropic-specific default parameters"""
78
- return RequestParams(
79
- model=kwargs.get("model", DEFAULT_ANTHROPIC_MODEL),
80
- maxTokens=4096, # default haiku3
81
- systemPrompt=self.instruction,
82
- parallel_tool_calls=True,
83
- max_iterations=20,
84
- use_history=True,
85
- )
79
+ # Get base defaults from parent (includes ModelDatabase lookup)
80
+ base_params = super()._initialize_default_params(kwargs)
81
+
82
+ # Override with Anthropic-specific settings
83
+ chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
84
+ base_params.model = chosen_model
85
+
86
+ return base_params
86
87
 
87
88
  def _base_url(self) -> str | None:
88
89
  assert self.context.config
@@ -158,6 +159,41 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
158
159
 
159
160
  response = executor_result[0]
160
161
 
162
+ # Track usage if response is valid and has usage data
163
+ if (
164
+ hasattr(response, "usage")
165
+ and response.usage
166
+ and not isinstance(response, BaseException)
167
+ ):
168
+ try:
169
+ turn_usage = TurnUsage.from_anthropic(
170
+ response.usage, model or DEFAULT_ANTHROPIC_MODEL
171
+ )
172
+ self.usage_accumulator.add_turn(turn_usage)
173
+
174
+ # # Print raw usage for debugging
175
+ # print(f"\n=== USAGE DEBUG ({model}) ===")
176
+ # print(f"Raw usage: {response.usage}")
177
+ # print(
178
+ # f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
179
+ # )
180
+ # print(
181
+ # f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
182
+ # )
183
+ # print(f"Effective input: {turn_usage.effective_input_tokens}")
184
+ # print(
185
+ # f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
186
+ # )
187
+ # if self.usage_accumulator.context_usage_percentage:
188
+ # print(
189
+ # f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
190
+ # )
191
+ # if self.usage_accumulator.cache_hit_rate:
192
+ # print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
193
+ # print("===========================\n")
194
+ except Exception as e:
195
+ self.logger.warning(f"Failed to track usage: {e}")
196
+
161
197
  if isinstance(response, AuthenticationError):
162
198
  raise ProviderKeyError(
163
199
  "Invalid Anthropic API key",
@@ -24,6 +24,7 @@ from mcp_agent.llm.provider_types import Provider
24
24
 
25
25
  # Import the new converter class
26
26
  from mcp_agent.llm.providers.google_converter import GoogleConverter
27
+ from mcp_agent.llm.usage_tracking import TurnUsage
27
28
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
28
29
 
29
30
  # Define default model and potentially other Google-specific defaults
@@ -220,6 +221,7 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
220
221
  parallel_tool_calls=True, # Assume parallel tool calls are supported by default with native API
221
222
  max_iterations=20,
222
223
  use_history=True,
224
+ maxTokens=65536, # Default max tokens for Google models
223
225
  # Include other relevant default parameters
224
226
  )
225
227
 
@@ -281,10 +283,25 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
281
283
  )
282
284
  self.logger.debug("Google generate_content response:", data=api_response)
283
285
 
286
+ # Track usage if response is valid and has usage data
287
+ if (
288
+ hasattr(api_response, "usage_metadata")
289
+ and api_response.usage_metadata
290
+ and not isinstance(api_response, BaseException)
291
+ ):
292
+ try:
293
+ turn_usage = TurnUsage.from_google(
294
+ api_response.usage_metadata, request_params.model
295
+ )
296
+ self.usage_accumulator.add_turn(turn_usage)
297
+
298
+ except Exception as e:
299
+ self.logger.warning(f"Failed to track usage: {e}")
300
+
284
301
  except errors.APIError as e:
285
302
  # Handle specific Google API errors
286
303
  self.logger.error(f"Google API Error: {e.code} - {e.message}")
287
- raise ProviderKeyError(f"Google API Error: {e.code}", e.message) from e
304
+ raise ProviderKeyError(f"Google API Error: {e.code}", e.message or "") from e
288
305
  except Exception as e:
289
306
  self.logger.error(f"Error during Google generate_content call: {e}")
290
307
  # Decide how to handle other exceptions - potentially re-raise or return an error message
@@ -31,6 +31,7 @@ from mcp_agent.llm.providers.multipart_converter_openai import OpenAIConverter,
31
31
  from mcp_agent.llm.providers.sampling_converter_openai import (
32
32
  OpenAISamplingConverter,
33
33
  )
34
+ from mcp_agent.llm.usage_tracking import TurnUsage
34
35
  from mcp_agent.logging.logger import get_logger
35
36
  from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
36
37
 
@@ -90,15 +91,14 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
90
91
 
91
92
  def _initialize_default_params(self, kwargs: dict) -> RequestParams:
92
93
  """Initialize OpenAI-specific default parameters"""
94
+ # Get base defaults from parent (includes ModelDatabase lookup)
95
+ base_params = super()._initialize_default_params(kwargs)
96
+
97
+ # Override with OpenAI-specific settings
93
98
  chosen_model = kwargs.get("model", DEFAULT_OPENAI_MODEL)
99
+ base_params.model = chosen_model
94
100
 
95
- return RequestParams(
96
- model=chosen_model,
97
- systemPrompt=self.instruction,
98
- parallel_tool_calls=True,
99
- max_iterations=20,
100
- use_history=True,
101
- )
101
+ return base_params
102
102
 
103
103
  def _base_url(self) -> str:
104
104
  return self.context.config.openai.base_url if self.context.config.openai else None
@@ -166,6 +166,19 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
166
166
 
167
167
  response = executor_result[0]
168
168
 
169
+ # Track usage if response is valid and has usage data
170
+ if (
171
+ hasattr(response, "usage")
172
+ and response.usage
173
+ and not isinstance(response, BaseException)
174
+ ):
175
+ try:
176
+ model_name = self.default_request_params.model or DEFAULT_OPENAI_MODEL
177
+ turn_usage = TurnUsage.from_openai(response.usage, model_name)
178
+ self.usage_accumulator.add_turn(turn_usage)
179
+ except Exception as e:
180
+ self.logger.warning(f"Failed to track usage: {e}")
181
+
169
182
  self.logger.debug(
170
183
  "OpenAI completion response:",
171
184
  data=response,