fast-agent-mcp 0.2.33__py3-none-any.whl → 0.2.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/METADATA +1 -1
- {fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/RECORD +22 -19
- mcp_agent/agents/base_agent.py +13 -0
- mcp_agent/core/agent_app.py +41 -1
- mcp_agent/core/enhanced_prompt.py +9 -0
- mcp_agent/core/fastagent.py +14 -2
- mcp_agent/core/interactive_prompt.py +59 -13
- mcp_agent/core/usage_display.py +193 -0
- mcp_agent/llm/augmented_llm.py +26 -6
- mcp_agent/llm/augmented_llm_passthrough.py +66 -4
- mcp_agent/llm/augmented_llm_playback.py +19 -0
- mcp_agent/llm/augmented_llm_slow.py +12 -1
- mcp_agent/llm/model_database.py +236 -0
- mcp_agent/llm/model_factory.py +1 -0
- mcp_agent/llm/providers/augmented_llm_anthropic.py +44 -8
- mcp_agent/llm/providers/augmented_llm_google_native.py +18 -1
- mcp_agent/llm/providers/augmented_llm_openai.py +20 -7
- mcp_agent/llm/usage_tracking.py +385 -0
- mcp_agent/mcp/interfaces.py +6 -0
- {fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/licenses/LICENSE +0 -0
mcp_agent/llm/augmented_llm.py
CHANGED
@@ -30,11 +30,13 @@ from mcp_agent.core.prompt import Prompt
|
|
30
30
|
from mcp_agent.core.request_params import RequestParams
|
31
31
|
from mcp_agent.event_progress import ProgressAction
|
32
32
|
from mcp_agent.llm.memory import Memory, SimpleMemory
|
33
|
+
from mcp_agent.llm.model_database import ModelDatabase
|
33
34
|
from mcp_agent.llm.provider_types import Provider
|
34
35
|
from mcp_agent.llm.sampling_format_converter import (
|
35
36
|
BasicFormatConverter,
|
36
37
|
ProviderFormatConverter,
|
37
38
|
)
|
39
|
+
from mcp_agent.llm.usage_tracking import UsageAccumulator
|
38
40
|
from mcp_agent.logging.logger import get_logger
|
39
41
|
from mcp_agent.mcp.helpers.content_helpers import get_text
|
40
42
|
from mcp_agent.mcp.interfaces import (
|
@@ -155,12 +157,11 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
155
157
|
# Initialize the display component
|
156
158
|
self.display = ConsoleDisplay(config=self.context.config)
|
157
159
|
|
158
|
-
# Initialize default parameters
|
159
|
-
|
160
|
-
|
161
|
-
# Apply model override if provided
|
160
|
+
# Initialize default parameters, passing model info
|
161
|
+
model_kwargs = kwargs.copy()
|
162
162
|
if model:
|
163
|
-
|
163
|
+
model_kwargs["model"] = model
|
164
|
+
self.default_request_params = self._initialize_default_params(model_kwargs)
|
164
165
|
|
165
166
|
# Merge with provided params if any
|
166
167
|
if self._init_request_params:
|
@@ -171,13 +172,22 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
171
172
|
self.type_converter = type_converter
|
172
173
|
self.verb = kwargs.get("verb")
|
173
174
|
|
175
|
+
# Initialize usage tracking
|
176
|
+
self.usage_accumulator = UsageAccumulator()
|
177
|
+
|
174
178
|
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
175
179
|
"""Initialize default parameters for the LLM.
|
176
180
|
Should be overridden by provider implementations to set provider-specific defaults."""
|
181
|
+
# Get model-aware default max tokens
|
182
|
+
model = kwargs.get("model")
|
183
|
+
max_tokens = ModelDatabase.get_default_max_tokens(model)
|
184
|
+
|
177
185
|
return RequestParams(
|
186
|
+
model=model,
|
187
|
+
maxTokens=max_tokens,
|
178
188
|
systemPrompt=self.instruction,
|
179
189
|
parallel_tool_calls=True,
|
180
|
-
max_iterations=
|
190
|
+
max_iterations=20,
|
181
191
|
use_history=True,
|
182
192
|
)
|
183
193
|
|
@@ -642,3 +652,13 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
642
652
|
|
643
653
|
assert self.provider
|
644
654
|
return ProviderKeyManager.get_api_key(self.provider.value, self.context.config)
|
655
|
+
|
656
|
+
def get_usage_summary(self) -> dict:
|
657
|
+
"""
|
658
|
+
Get a summary of usage statistics for this LLM instance.
|
659
|
+
|
660
|
+
Returns:
|
661
|
+
Dictionary containing usage statistics including tokens, cache metrics,
|
662
|
+
and context window utilization.
|
663
|
+
"""
|
664
|
+
return self.usage_accumulator.get_summary()
|
@@ -10,6 +10,7 @@ from mcp_agent.llm.augmented_llm import (
|
|
10
10
|
RequestParams,
|
11
11
|
)
|
12
12
|
from mcp_agent.llm.provider_types import Provider
|
13
|
+
from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
|
13
14
|
from mcp_agent.logging.logger import get_logger
|
14
15
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
15
16
|
|
@@ -48,13 +49,34 @@ class PassthroughLLM(AugmentedLLM):
|
|
48
49
|
await self.show_assistant_message(message, title="ASSISTANT/PASSTHROUGH")
|
49
50
|
|
50
51
|
# Handle PromptMessage by concatenating all parts
|
52
|
+
result = ""
|
51
53
|
if isinstance(message, PromptMessage):
|
52
54
|
parts_text = []
|
53
55
|
for part in message.content:
|
54
56
|
parts_text.append(str(part))
|
55
|
-
|
57
|
+
result = "\n".join(parts_text)
|
58
|
+
else:
|
59
|
+
result = str(message)
|
56
60
|
|
57
|
-
|
61
|
+
# Track usage for this passthrough "turn"
|
62
|
+
try:
|
63
|
+
input_content = str(message)
|
64
|
+
output_content = result
|
65
|
+
tool_calls = 1 if input_content.startswith("***CALL_TOOL") else 0
|
66
|
+
|
67
|
+
turn_usage = create_turn_usage_from_messages(
|
68
|
+
input_content=input_content,
|
69
|
+
output_content=output_content,
|
70
|
+
model="passthrough",
|
71
|
+
model_type="passthrough",
|
72
|
+
tool_calls=tool_calls,
|
73
|
+
delay_seconds=0.0,
|
74
|
+
)
|
75
|
+
self.usage_accumulator.add_turn(turn_usage)
|
76
|
+
except Exception as e:
|
77
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
78
|
+
|
79
|
+
return result
|
58
80
|
|
59
81
|
async def initialize(self) -> None:
|
60
82
|
pass
|
@@ -146,6 +168,25 @@ class PassthroughLLM(AugmentedLLM):
|
|
146
168
|
if self.is_tool_call(last_message):
|
147
169
|
result = Prompt.assistant(await self.generate_str(last_message.first_text()))
|
148
170
|
await self.show_assistant_message(result.first_text())
|
171
|
+
|
172
|
+
# Track usage for this tool call "turn"
|
173
|
+
try:
|
174
|
+
input_content = "\n".join(message.all_text() for message in multipart_messages)
|
175
|
+
output_content = result.first_text()
|
176
|
+
|
177
|
+
turn_usage = create_turn_usage_from_messages(
|
178
|
+
input_content=input_content,
|
179
|
+
output_content=output_content,
|
180
|
+
model="passthrough",
|
181
|
+
model_type="passthrough",
|
182
|
+
tool_calls=1, # This is definitely a tool call
|
183
|
+
delay_seconds=0.0,
|
184
|
+
)
|
185
|
+
self.usage_accumulator.add_turn(turn_usage)
|
186
|
+
|
187
|
+
except Exception as e:
|
188
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
189
|
+
|
149
190
|
return result
|
150
191
|
|
151
192
|
if last_message.first_text().startswith(FIXED_RESPONSE_INDICATOR):
|
@@ -155,12 +196,33 @@ class PassthroughLLM(AugmentedLLM):
|
|
155
196
|
|
156
197
|
if self._fixed_response:
|
157
198
|
await self.show_assistant_message(self._fixed_response)
|
158
|
-
|
199
|
+
result = Prompt.assistant(self._fixed_response)
|
159
200
|
else:
|
160
201
|
# TODO -- improve when we support Audio/Multimodal gen models e.g. gemini . This should really just return the input as "assistant"...
|
161
202
|
concatenated: str = "\n".join(message.all_text() for message in multipart_messages)
|
162
203
|
await self.show_assistant_message(concatenated)
|
163
|
-
|
204
|
+
result = Prompt.assistant(concatenated)
|
205
|
+
|
206
|
+
# Track usage for this passthrough "turn"
|
207
|
+
try:
|
208
|
+
input_content = "\n".join(message.all_text() for message in multipart_messages)
|
209
|
+
output_content = result.first_text()
|
210
|
+
tool_calls = 1 if self.is_tool_call(last_message) else 0
|
211
|
+
|
212
|
+
turn_usage = create_turn_usage_from_messages(
|
213
|
+
input_content=input_content,
|
214
|
+
output_content=output_content,
|
215
|
+
model="passthrough",
|
216
|
+
model_type="passthrough",
|
217
|
+
tool_calls=tool_calls,
|
218
|
+
delay_seconds=0.0,
|
219
|
+
)
|
220
|
+
self.usage_accumulator.add_turn(turn_usage)
|
221
|
+
|
222
|
+
except Exception as e:
|
223
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
224
|
+
|
225
|
+
return result
|
164
226
|
|
165
227
|
def is_tool_call(self, message: PromptMessageMultipart) -> bool:
|
166
228
|
return message.first_text().startswith(CALL_TOOL_INDICATOR)
|
@@ -5,6 +5,7 @@ from mcp_agent.core.prompt import Prompt
|
|
5
5
|
from mcp_agent.llm.augmented_llm import RequestParams
|
6
6
|
from mcp_agent.llm.augmented_llm_passthrough import PassthroughLLM
|
7
7
|
from mcp_agent.llm.provider_types import Provider
|
8
|
+
from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
|
8
9
|
from mcp_agent.mcp.interfaces import ModelT
|
9
10
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
10
11
|
from mcp_agent.mcp.prompts.prompt_helpers import MessageContent
|
@@ -83,6 +84,24 @@ class PlaybackLLM(PassthroughLLM):
|
|
83
84
|
message_text=MessageContent.get_first_text(response), title="ASSISTANT/PLAYBACK"
|
84
85
|
)
|
85
86
|
|
87
|
+
# Track usage for this playback "turn"
|
88
|
+
try:
|
89
|
+
input_content = str(multipart_messages) if multipart_messages else ""
|
90
|
+
output_content = MessageContent.get_first_text(response)
|
91
|
+
|
92
|
+
turn_usage = create_turn_usage_from_messages(
|
93
|
+
input_content=input_content,
|
94
|
+
output_content=output_content,
|
95
|
+
model="playback",
|
96
|
+
model_type="playback",
|
97
|
+
tool_calls=0,
|
98
|
+
delay_seconds=0.0,
|
99
|
+
)
|
100
|
+
self.usage_accumulator.add_turn(turn_usage)
|
101
|
+
|
102
|
+
except Exception as e:
|
103
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
104
|
+
|
86
105
|
return response
|
87
106
|
|
88
107
|
async def structured(
|
@@ -30,7 +30,18 @@ class SlowLLM(PassthroughLLM):
|
|
30
30
|
) -> str:
|
31
31
|
"""Sleep for 3 seconds then return the input message as a string."""
|
32
32
|
await asyncio.sleep(3)
|
33
|
-
|
33
|
+
result = await super().generate_str(message, request_params)
|
34
|
+
|
35
|
+
# Override the last turn to include the 3-second delay
|
36
|
+
if self.usage_accumulator.turns:
|
37
|
+
last_turn = self.usage_accumulator.turns[-1]
|
38
|
+
# Update the raw usage to include delay
|
39
|
+
if hasattr(last_turn.raw_usage, 'delay_seconds'):
|
40
|
+
last_turn.raw_usage.delay_seconds = 3.0
|
41
|
+
# Print updated debug info
|
42
|
+
print("SlowLLM: Added 3.0s delay to turn usage")
|
43
|
+
|
44
|
+
return result
|
34
45
|
|
35
46
|
async def _apply_prompt_provider_specific(
|
36
47
|
self,
|
@@ -0,0 +1,236 @@
|
|
1
|
+
"""
|
2
|
+
Model database for LLM parameters.
|
3
|
+
|
4
|
+
This module provides a centralized lookup for model parameters including
|
5
|
+
context windows, max output tokens, and supported tokenization types.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Dict, List, Optional
|
9
|
+
|
10
|
+
from pydantic import BaseModel
|
11
|
+
|
12
|
+
|
13
|
+
class ModelParameters(BaseModel):
|
14
|
+
"""Configuration parameters for a specific model"""
|
15
|
+
|
16
|
+
context_window: int
|
17
|
+
"""Maximum context window size in tokens"""
|
18
|
+
|
19
|
+
max_output_tokens: int
|
20
|
+
"""Maximum output tokens the model can generate"""
|
21
|
+
|
22
|
+
tokenizes: List[str]
|
23
|
+
"""List of supported content types for tokenization"""
|
24
|
+
|
25
|
+
|
26
|
+
class ModelDatabase:
|
27
|
+
"""Centralized model configuration database"""
|
28
|
+
|
29
|
+
# Common parameter sets
|
30
|
+
OPENAI_MULTIMODAL = ["text/plain", "image/jpeg", "image/png", "image/webp", "application/pdf"]
|
31
|
+
OPENAI_VISION = ["text/plain", "image/jpeg", "image/png", "image/webp"]
|
32
|
+
ANTHROPIC_MULTIMODAL = [
|
33
|
+
"text/plain",
|
34
|
+
"image/jpeg",
|
35
|
+
"image/png",
|
36
|
+
"image/webp",
|
37
|
+
"application/pdf",
|
38
|
+
]
|
39
|
+
GOOGLE_MULTIMODAL = [
|
40
|
+
"text/plain",
|
41
|
+
"image/jpeg",
|
42
|
+
"image/png",
|
43
|
+
"image/webp",
|
44
|
+
"application/pdf",
|
45
|
+
"audio/wav",
|
46
|
+
"audio/mp3",
|
47
|
+
"video/mp4",
|
48
|
+
]
|
49
|
+
QWEN_MULTIMODAL = ["text/plain", "image/jpeg", "image/png", "image/webp"]
|
50
|
+
TEXT_ONLY = ["text/plain"]
|
51
|
+
|
52
|
+
# Common parameter configurations
|
53
|
+
OPENAI_STANDARD = ModelParameters(
|
54
|
+
context_window=128000, max_output_tokens=16384, tokenizes=OPENAI_MULTIMODAL
|
55
|
+
)
|
56
|
+
|
57
|
+
OPENAI_4_1_STANDARD = ModelParameters(
|
58
|
+
context_window=1047576, max_output_tokens=32768, tokenizes=OPENAI_MULTIMODAL
|
59
|
+
)
|
60
|
+
|
61
|
+
OPENAI_O_SERIES = ModelParameters(
|
62
|
+
context_window=200000, max_output_tokens=100000, tokenizes=OPENAI_VISION
|
63
|
+
)
|
64
|
+
|
65
|
+
ANTHROPIC_LEGACY = ModelParameters(
|
66
|
+
context_window=200000, max_output_tokens=4096, tokenizes=ANTHROPIC_MULTIMODAL
|
67
|
+
)
|
68
|
+
|
69
|
+
ANTHROPIC_35_SERIES = ModelParameters(
|
70
|
+
context_window=200000, max_output_tokens=8192, tokenizes=ANTHROPIC_MULTIMODAL
|
71
|
+
)
|
72
|
+
|
73
|
+
# TODO--- TO USE 64,000 NEED TO SUPPORT STREAMING
|
74
|
+
ANTHROPIC_37_SERIES = ModelParameters(
|
75
|
+
context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
|
76
|
+
)
|
77
|
+
|
78
|
+
GEMINI_FLASH = ModelParameters(
|
79
|
+
context_window=1048576, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
|
80
|
+
)
|
81
|
+
|
82
|
+
GEMINI_PRO = ModelParameters(
|
83
|
+
context_window=2097152, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
|
84
|
+
)
|
85
|
+
|
86
|
+
QWEN_STANDARD = ModelParameters(
|
87
|
+
context_window=32000, max_output_tokens=8192, tokenizes=QWEN_MULTIMODAL
|
88
|
+
)
|
89
|
+
|
90
|
+
FAST_AGENT_STANDARD = ModelParameters(
|
91
|
+
context_window=1000000, max_output_tokens=100000, tokenizes=TEXT_ONLY
|
92
|
+
)
|
93
|
+
|
94
|
+
OPENAI_4_1_SERIES = ModelParameters(
|
95
|
+
context_window=1047576, max_output_tokens=32768, tokenizes=OPENAI_MULTIMODAL
|
96
|
+
)
|
97
|
+
|
98
|
+
OPENAI_4O_SERIES = ModelParameters(
|
99
|
+
context_window=128000, max_output_tokens=16384, tokenizes=OPENAI_VISION
|
100
|
+
)
|
101
|
+
|
102
|
+
OPENAI_O3_SERIES = ModelParameters(
|
103
|
+
context_window=200000, max_output_tokens=100000, tokenizes=OPENAI_MULTIMODAL
|
104
|
+
)
|
105
|
+
|
106
|
+
OPENAI_O3_MINI_SERIES = ModelParameters(
|
107
|
+
context_window=200000, max_output_tokens=100000, tokenizes=TEXT_ONLY
|
108
|
+
)
|
109
|
+
|
110
|
+
# TODO update to 32000
|
111
|
+
ANTHROPIC_OPUS_4_VERSIONED = ModelParameters(
|
112
|
+
context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
|
113
|
+
)
|
114
|
+
# TODO update to 64000
|
115
|
+
ANTHROPIC_SONNET_4_VERSIONED = ModelParameters(
|
116
|
+
context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
|
117
|
+
)
|
118
|
+
|
119
|
+
DEEPSEEK_CHAT_STANDARD = ModelParameters(
|
120
|
+
context_window=65536, max_output_tokens=8192, tokenizes=TEXT_ONLY
|
121
|
+
)
|
122
|
+
|
123
|
+
DEEPSEEK_REASONER = ModelParameters(
|
124
|
+
context_window=65536, max_output_tokens=32768, tokenizes=TEXT_ONLY
|
125
|
+
)
|
126
|
+
|
127
|
+
GEMINI_2_5_PRO = ModelParameters(
|
128
|
+
context_window=2097152, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
|
129
|
+
)
|
130
|
+
|
131
|
+
# Model configuration database
|
132
|
+
MODELS: Dict[str, ModelParameters] = {
|
133
|
+
# internal models
|
134
|
+
"passthrough": FAST_AGENT_STANDARD,
|
135
|
+
"playback": FAST_AGENT_STANDARD,
|
136
|
+
"slow": FAST_AGENT_STANDARD,
|
137
|
+
# aliyun models
|
138
|
+
"qwen-turbo": QWEN_STANDARD,
|
139
|
+
"qwen-plus": QWEN_STANDARD,
|
140
|
+
"qwen-max": QWEN_STANDARD,
|
141
|
+
"qwen-long": ModelParameters(
|
142
|
+
context_window=10000000, max_output_tokens=8192, tokenizes=TEXT_ONLY
|
143
|
+
),
|
144
|
+
# OpenAI Models (vanilla aliases and versioned)
|
145
|
+
"gpt-4.1": OPENAI_4_1_SERIES,
|
146
|
+
"gpt-4.1-mini": OPENAI_4_1_SERIES,
|
147
|
+
"gpt-4.1-nano": OPENAI_4_1_SERIES,
|
148
|
+
"gpt-4.1-2025-04-14": OPENAI_4_1_SERIES,
|
149
|
+
"gpt-4.1-mini-2025-04-14": OPENAI_4_1_SERIES,
|
150
|
+
"gpt-4.1-nano-2025-04-14": OPENAI_4_1_SERIES,
|
151
|
+
"gpt-4o": OPENAI_4O_SERIES,
|
152
|
+
"gpt-4o-2024-11-20": OPENAI_4O_SERIES,
|
153
|
+
"gpt-4o-mini-2024-07-18": OPENAI_4O_SERIES,
|
154
|
+
"o1": OPENAI_O_SERIES,
|
155
|
+
"o1-2024-12-17": OPENAI_O_SERIES,
|
156
|
+
"o3": OPENAI_O3_SERIES,
|
157
|
+
"o3-pro": ModelParameters(
|
158
|
+
context_window=200_000, max_output_tokens=100_000, tokenizes=TEXT_ONLY
|
159
|
+
),
|
160
|
+
"o3-mini": OPENAI_O3_MINI_SERIES,
|
161
|
+
"o4-mini": OPENAI_O3_SERIES,
|
162
|
+
"o3-2025-04-16": OPENAI_O3_SERIES,
|
163
|
+
"o3-mini-2025-01-31": OPENAI_O3_MINI_SERIES,
|
164
|
+
"o4-mini-2025-04-16": OPENAI_O3_SERIES,
|
165
|
+
# Anthropic Models
|
166
|
+
"claude-3-haiku": ANTHROPIC_35_SERIES,
|
167
|
+
"claude-3-haiku-20240307": ANTHROPIC_LEGACY,
|
168
|
+
"claude-3-sonnet": ANTHROPIC_LEGACY,
|
169
|
+
"claude-3-opus": ANTHROPIC_LEGACY,
|
170
|
+
"claude-3-opus-20240229": ANTHROPIC_LEGACY,
|
171
|
+
"claude-3-opus-latest": ANTHROPIC_LEGACY,
|
172
|
+
"claude-3-5-haiku": ANTHROPIC_35_SERIES,
|
173
|
+
"claude-3-5-haiku-20241022": ANTHROPIC_35_SERIES,
|
174
|
+
"claude-3-5-haiku-latest": ANTHROPIC_35_SERIES,
|
175
|
+
"claude-3-sonnet-20240229": ANTHROPIC_LEGACY,
|
176
|
+
"claude-3-5-sonnet": ANTHROPIC_35_SERIES,
|
177
|
+
"claude-3-5-sonnet-20240620": ANTHROPIC_35_SERIES,
|
178
|
+
"claude-3-5-sonnet-20241022": ANTHROPIC_35_SERIES,
|
179
|
+
"claude-3-5-sonnet-latest": ANTHROPIC_35_SERIES,
|
180
|
+
"claude-3-7-sonnet": ANTHROPIC_37_SERIES,
|
181
|
+
"claude-3-7-sonnet-20250219": ANTHROPIC_37_SERIES,
|
182
|
+
"claude-3-7-sonnet-latest": ANTHROPIC_37_SERIES,
|
183
|
+
"claude-sonnet-4": ANTHROPIC_SONNET_4_VERSIONED,
|
184
|
+
"claude-sonnet-4-0": ANTHROPIC_SONNET_4_VERSIONED,
|
185
|
+
"claude-sonnet-4-20250514": ANTHROPIC_SONNET_4_VERSIONED,
|
186
|
+
"claude-opus-4": ANTHROPIC_OPUS_4_VERSIONED,
|
187
|
+
"claude-opus-4-0": ANTHROPIC_OPUS_4_VERSIONED,
|
188
|
+
"claude-opus-4-20250514": ANTHROPIC_OPUS_4_VERSIONED,
|
189
|
+
# DeepSeek Models
|
190
|
+
"deepseek-chat": DEEPSEEK_CHAT_STANDARD,
|
191
|
+
# Google Gemini Models (vanilla aliases and versioned)
|
192
|
+
"gemini-2.0-flash": GEMINI_FLASH,
|
193
|
+
"gemini-2.5-flash-preview": GEMINI_FLASH,
|
194
|
+
"gemini-2.5-pro-preview": GEMINI_2_5_PRO,
|
195
|
+
"gemini-2.5-flash-preview-05-20": GEMINI_FLASH,
|
196
|
+
"gemini-2.5-pro-preview-05-06": GEMINI_PRO,
|
197
|
+
}
|
198
|
+
|
199
|
+
@classmethod
|
200
|
+
def get_model_params(cls, model: str) -> Optional[ModelParameters]:
|
201
|
+
"""Get model parameters for a given model name"""
|
202
|
+
return cls.MODELS.get(model)
|
203
|
+
|
204
|
+
@classmethod
|
205
|
+
def get_context_window(cls, model: str) -> Optional[int]:
|
206
|
+
"""Get context window size for a model"""
|
207
|
+
params = cls.get_model_params(model)
|
208
|
+
return params.context_window if params else None
|
209
|
+
|
210
|
+
@classmethod
|
211
|
+
def get_max_output_tokens(cls, model: str) -> Optional[int]:
|
212
|
+
"""Get maximum output tokens for a model"""
|
213
|
+
params = cls.get_model_params(model)
|
214
|
+
return params.max_output_tokens if params else None
|
215
|
+
|
216
|
+
@classmethod
|
217
|
+
def get_tokenizes(cls, model: str) -> Optional[List[str]]:
|
218
|
+
"""Get supported tokenization types for a model"""
|
219
|
+
params = cls.get_model_params(model)
|
220
|
+
return params.tokenizes if params else None
|
221
|
+
|
222
|
+
@classmethod
|
223
|
+
def get_default_max_tokens(cls, model: str) -> int:
|
224
|
+
"""Get default max_tokens for RequestParams based on model"""
|
225
|
+
if not model:
|
226
|
+
return 2048 # Fallback when no model specified
|
227
|
+
|
228
|
+
params = cls.get_model_params(model)
|
229
|
+
if params:
|
230
|
+
return params.max_output_tokens
|
231
|
+
return 2048 # Fallback for unknown models
|
232
|
+
|
233
|
+
@classmethod
|
234
|
+
def list_models(cls) -> List[str]:
|
235
|
+
"""List all available model names"""
|
236
|
+
return list(cls.MODELS.keys())
|
mcp_agent/llm/model_factory.py
CHANGED
@@ -87,6 +87,7 @@ class ModelFactory:
|
|
87
87
|
"o1-preview": Provider.OPENAI,
|
88
88
|
"o3": Provider.OPENAI,
|
89
89
|
"o3-mini": Provider.OPENAI,
|
90
|
+
"o4-mini": Provider.OPENAI,
|
90
91
|
"claude-3-haiku-20240307": Provider.ANTHROPIC,
|
91
92
|
"claude-3-5-haiku-20241022": Provider.ANTHROPIC,
|
92
93
|
"claude-3-5-haiku-latest": Provider.ANTHROPIC,
|
@@ -10,6 +10,7 @@ from mcp_agent.llm.providers.multipart_converter_anthropic import (
|
|
10
10
|
from mcp_agent.llm.providers.sampling_converter_anthropic import (
|
11
11
|
AnthropicSamplingConverter,
|
12
12
|
)
|
13
|
+
from mcp_agent.llm.usage_tracking import TurnUsage
|
13
14
|
from mcp_agent.mcp.interfaces import ModelT
|
14
15
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
15
16
|
|
@@ -75,14 +76,14 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
75
76
|
|
76
77
|
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
77
78
|
"""Initialize Anthropic-specific default parameters"""
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
79
|
+
# Get base defaults from parent (includes ModelDatabase lookup)
|
80
|
+
base_params = super()._initialize_default_params(kwargs)
|
81
|
+
|
82
|
+
# Override with Anthropic-specific settings
|
83
|
+
chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
|
84
|
+
base_params.model = chosen_model
|
85
|
+
|
86
|
+
return base_params
|
86
87
|
|
87
88
|
def _base_url(self) -> str | None:
|
88
89
|
assert self.context.config
|
@@ -158,6 +159,41 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
158
159
|
|
159
160
|
response = executor_result[0]
|
160
161
|
|
162
|
+
# Track usage if response is valid and has usage data
|
163
|
+
if (
|
164
|
+
hasattr(response, "usage")
|
165
|
+
and response.usage
|
166
|
+
and not isinstance(response, BaseException)
|
167
|
+
):
|
168
|
+
try:
|
169
|
+
turn_usage = TurnUsage.from_anthropic(
|
170
|
+
response.usage, model or DEFAULT_ANTHROPIC_MODEL
|
171
|
+
)
|
172
|
+
self.usage_accumulator.add_turn(turn_usage)
|
173
|
+
|
174
|
+
# # Print raw usage for debugging
|
175
|
+
# print(f"\n=== USAGE DEBUG ({model}) ===")
|
176
|
+
# print(f"Raw usage: {response.usage}")
|
177
|
+
# print(
|
178
|
+
# f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
|
179
|
+
# )
|
180
|
+
# print(
|
181
|
+
# f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
|
182
|
+
# )
|
183
|
+
# print(f"Effective input: {turn_usage.effective_input_tokens}")
|
184
|
+
# print(
|
185
|
+
# f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
|
186
|
+
# )
|
187
|
+
# if self.usage_accumulator.context_usage_percentage:
|
188
|
+
# print(
|
189
|
+
# f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
|
190
|
+
# )
|
191
|
+
# if self.usage_accumulator.cache_hit_rate:
|
192
|
+
# print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
|
193
|
+
# print("===========================\n")
|
194
|
+
except Exception as e:
|
195
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
196
|
+
|
161
197
|
if isinstance(response, AuthenticationError):
|
162
198
|
raise ProviderKeyError(
|
163
199
|
"Invalid Anthropic API key",
|
@@ -24,6 +24,7 @@ from mcp_agent.llm.provider_types import Provider
|
|
24
24
|
|
25
25
|
# Import the new converter class
|
26
26
|
from mcp_agent.llm.providers.google_converter import GoogleConverter
|
27
|
+
from mcp_agent.llm.usage_tracking import TurnUsage
|
27
28
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
28
29
|
|
29
30
|
# Define default model and potentially other Google-specific defaults
|
@@ -220,6 +221,7 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
|
|
220
221
|
parallel_tool_calls=True, # Assume parallel tool calls are supported by default with native API
|
221
222
|
max_iterations=20,
|
222
223
|
use_history=True,
|
224
|
+
maxTokens=65536, # Default max tokens for Google models
|
223
225
|
# Include other relevant default parameters
|
224
226
|
)
|
225
227
|
|
@@ -281,10 +283,25 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
|
|
281
283
|
)
|
282
284
|
self.logger.debug("Google generate_content response:", data=api_response)
|
283
285
|
|
286
|
+
# Track usage if response is valid and has usage data
|
287
|
+
if (
|
288
|
+
hasattr(api_response, "usage_metadata")
|
289
|
+
and api_response.usage_metadata
|
290
|
+
and not isinstance(api_response, BaseException)
|
291
|
+
):
|
292
|
+
try:
|
293
|
+
turn_usage = TurnUsage.from_google(
|
294
|
+
api_response.usage_metadata, request_params.model
|
295
|
+
)
|
296
|
+
self.usage_accumulator.add_turn(turn_usage)
|
297
|
+
|
298
|
+
except Exception as e:
|
299
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
300
|
+
|
284
301
|
except errors.APIError as e:
|
285
302
|
# Handle specific Google API errors
|
286
303
|
self.logger.error(f"Google API Error: {e.code} - {e.message}")
|
287
|
-
raise ProviderKeyError(f"Google API Error: {e.code}", e.message) from e
|
304
|
+
raise ProviderKeyError(f"Google API Error: {e.code}", e.message or "") from e
|
288
305
|
except Exception as e:
|
289
306
|
self.logger.error(f"Error during Google generate_content call: {e}")
|
290
307
|
# Decide how to handle other exceptions - potentially re-raise or return an error message
|
@@ -31,6 +31,7 @@ from mcp_agent.llm.providers.multipart_converter_openai import OpenAIConverter,
|
|
31
31
|
from mcp_agent.llm.providers.sampling_converter_openai import (
|
32
32
|
OpenAISamplingConverter,
|
33
33
|
)
|
34
|
+
from mcp_agent.llm.usage_tracking import TurnUsage
|
34
35
|
from mcp_agent.logging.logger import get_logger
|
35
36
|
from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
|
36
37
|
|
@@ -90,15 +91,14 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
90
91
|
|
91
92
|
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
92
93
|
"""Initialize OpenAI-specific default parameters"""
|
94
|
+
# Get base defaults from parent (includes ModelDatabase lookup)
|
95
|
+
base_params = super()._initialize_default_params(kwargs)
|
96
|
+
|
97
|
+
# Override with OpenAI-specific settings
|
93
98
|
chosen_model = kwargs.get("model", DEFAULT_OPENAI_MODEL)
|
99
|
+
base_params.model = chosen_model
|
94
100
|
|
95
|
-
return
|
96
|
-
model=chosen_model,
|
97
|
-
systemPrompt=self.instruction,
|
98
|
-
parallel_tool_calls=True,
|
99
|
-
max_iterations=20,
|
100
|
-
use_history=True,
|
101
|
-
)
|
101
|
+
return base_params
|
102
102
|
|
103
103
|
def _base_url(self) -> str:
|
104
104
|
return self.context.config.openai.base_url if self.context.config.openai else None
|
@@ -166,6 +166,19 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
|
|
166
166
|
|
167
167
|
response = executor_result[0]
|
168
168
|
|
169
|
+
# Track usage if response is valid and has usage data
|
170
|
+
if (
|
171
|
+
hasattr(response, "usage")
|
172
|
+
and response.usage
|
173
|
+
and not isinstance(response, BaseException)
|
174
|
+
):
|
175
|
+
try:
|
176
|
+
model_name = self.default_request_params.model or DEFAULT_OPENAI_MODEL
|
177
|
+
turn_usage = TurnUsage.from_openai(response.usage, model_name)
|
178
|
+
self.usage_accumulator.add_turn(turn_usage)
|
179
|
+
except Exception as e:
|
180
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
181
|
+
|
169
182
|
self.logger.debug(
|
170
183
|
"OpenAI completion response:",
|
171
184
|
data=response,
|