PyPI - fast-agent-mcp - Versions diffs - 0.2.33__py3-none-any.whl → 0.2.34__py3-none-any.whl - Mend

fast-agent-mcp 0.2.33py3-none-any.whl → 0.2.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/METADATA +1 -1
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/RECORD +22 -19
mcp_agent/agents/base_agent.py +13 -0
mcp_agent/core/agent_app.py +41 -1
mcp_agent/core/enhanced_prompt.py +9 -0
mcp_agent/core/fastagent.py +14 -2
mcp_agent/core/interactive_prompt.py +59 -13
mcp_agent/core/usage_display.py +193 -0
mcp_agent/llm/augmented_llm.py +26 -6
mcp_agent/llm/augmented_llm_passthrough.py +66 -4
mcp_agent/llm/augmented_llm_playback.py +19 -0
mcp_agent/llm/augmented_llm_slow.py +12 -1
mcp_agent/llm/model_database.py +236 -0
mcp_agent/llm/model_factory.py +1 -0
mcp_agent/llm/providers/augmented_llm_anthropic.py +44 -8
mcp_agent/llm/providers/augmented_llm_google_native.py +18 -1
mcp_agent/llm/providers/augmented_llm_openai.py +20 -7
mcp_agent/llm/usage_tracking.py +385 -0
mcp_agent/mcp/interfaces.py +6 -0
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/WHEEL +0 -0
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/entry_points.txt +0 -0
{fast_agent_mcp-0.2.33.dist-info → fast_agent_mcp-0.2.34.dist-info}/licenses/LICENSE +0 -0

mcp_agent/llm/augmented_llm.py CHANGED Viewed

@@ -30,11 +30,13 @@ from mcp_agent.core.prompt import Prompt
 from mcp_agent.core.request_params import RequestParams
 from mcp_agent.event_progress import ProgressAction
 from mcp_agent.llm.memory import Memory, SimpleMemory
+from mcp_agent.llm.model_database import ModelDatabase
 from mcp_agent.llm.provider_types import Provider
 from mcp_agent.llm.sampling_format_converter import (
     BasicFormatConverter,
     ProviderFormatConverter,
 )
+from mcp_agent.llm.usage_tracking import UsageAccumulator
 from mcp_agent.logging.logger import get_logger
 from mcp_agent.mcp.helpers.content_helpers import get_text
 from mcp_agent.mcp.interfaces import (
@@ -155,12 +157,11 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         # Initialize the display component
         self.display = ConsoleDisplay(config=self.context.config)
-        # Initialize default parameters
-        self.default_request_params = self._initialize_default_params(kwargs)
-        # Apply model override if provided
+        # Initialize default parameters, passing model info
+        model_kwargs = kwargs.copy()
         if model:
-            self.default_request_params.model = model
+            model_kwargs["model"] = model
+        self.default_request_params = self._initialize_default_params(model_kwargs)
         # Merge with provided params if any
         if self._init_request_params:
@@ -171,13 +172,22 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         self.type_converter = type_converter
         self.verb = kwargs.get("verb")
+        # Initialize usage tracking
+        self.usage_accumulator = UsageAccumulator()
     def _initialize_default_params(self, kwargs: dict) -> RequestParams:
         """Initialize default parameters for the LLM.
         Should be overridden by provider implementations to set provider-specific defaults."""
+        # Get model-aware default max tokens
+        model = kwargs.get("model")
+        max_tokens = ModelDatabase.get_default_max_tokens(model)
         return RequestParams(
+            model=model,
+            maxTokens=max_tokens,
             systemPrompt=self.instruction,
             parallel_tool_calls=True,
-            max_iterations=10,
+            max_iterations=20,
             use_history=True,
         )
@@ -642,3 +652,13 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
         assert self.provider
         return ProviderKeyManager.get_api_key(self.provider.value, self.context.config)
+    def get_usage_summary(self) -> dict:
+        """
+        Get a summary of usage statistics for this LLM instance.
+        Returns:
+            Dictionary containing usage statistics including tokens, cache metrics,
+            and context window utilization.
+        """
+        return self.usage_accumulator.get_summary()

mcp_agent/llm/augmented_llm_passthrough.py CHANGED Viewed

@@ -10,6 +10,7 @@ from mcp_agent.llm.augmented_llm import (
     RequestParams,
 )
 from mcp_agent.llm.provider_types import Provider
+from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
 from mcp_agent.logging.logger import get_logger
 from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
@@ -48,13 +49,34 @@ class PassthroughLLM(AugmentedLLM):
         await self.show_assistant_message(message, title="ASSISTANT/PASSTHROUGH")
         # Handle PromptMessage by concatenating all parts
+        result = ""
         if isinstance(message, PromptMessage):
             parts_text = []
             for part in message.content:
                 parts_text.append(str(part))
-            return "\n".join(parts_text)
+            result = "\n".join(parts_text)
+        else:
+            result = str(message)
-        return str(message)
+        # Track usage for this passthrough "turn"
+        try:
+            input_content = str(message)
+            output_content = result
+            tool_calls = 1 if input_content.startswith("***CALL_TOOL") else 0
+            turn_usage = create_turn_usage_from_messages(
+                input_content=input_content,
+                output_content=output_content,
+                model="passthrough",
+                model_type="passthrough",
+                tool_calls=tool_calls,
+                delay_seconds=0.0,
+            )
+            self.usage_accumulator.add_turn(turn_usage)
+        except Exception as e:
+            self.logger.warning(f"Failed to track usage: {e}")
+        return result
     async def initialize(self) -> None:
         pass
@@ -146,6 +168,25 @@ class PassthroughLLM(AugmentedLLM):
         if self.is_tool_call(last_message):
             result = Prompt.assistant(await self.generate_str(last_message.first_text()))
             await self.show_assistant_message(result.first_text())
+            # Track usage for this tool call "turn"
+            try:
+                input_content = "\n".join(message.all_text() for message in multipart_messages)
+                output_content = result.first_text()
+                turn_usage = create_turn_usage_from_messages(
+                    input_content=input_content,
+                    output_content=output_content,
+                    model="passthrough",
+                    model_type="passthrough",
+                    tool_calls=1,  # This is definitely a tool call
+                    delay_seconds=0.0,
+                )
+                self.usage_accumulator.add_turn(turn_usage)
+            except Exception as e:
+                self.logger.warning(f"Failed to track usage: {e}")
             return result
         if last_message.first_text().startswith(FIXED_RESPONSE_INDICATOR):
@@ -155,12 +196,33 @@ class PassthroughLLM(AugmentedLLM):
         if self._fixed_response:
             await self.show_assistant_message(self._fixed_response)
-            return Prompt.assistant(self._fixed_response)
+            result = Prompt.assistant(self._fixed_response)
         else:
             # TODO -- improve when we support Audio/Multimodal gen models e.g. gemini . This should really just return the input as "assistant"...
             concatenated: str = "\n".join(message.all_text() for message in multipart_messages)
             await self.show_assistant_message(concatenated)
-            return Prompt.assistant(concatenated)
+            result = Prompt.assistant(concatenated)
+        # Track usage for this passthrough "turn"
+        try:
+            input_content = "\n".join(message.all_text() for message in multipart_messages)
+            output_content = result.first_text()
+            tool_calls = 1 if self.is_tool_call(last_message) else 0
+            turn_usage = create_turn_usage_from_messages(
+                input_content=input_content,
+                output_content=output_content,
+                model="passthrough",
+                model_type="passthrough",
+                tool_calls=tool_calls,
+                delay_seconds=0.0,
+            )
+            self.usage_accumulator.add_turn(turn_usage)
+        except Exception as e:
+            self.logger.warning(f"Failed to track usage: {e}")
+        return result
     def is_tool_call(self, message: PromptMessageMultipart) -> bool:
         return message.first_text().startswith(CALL_TOOL_INDICATOR)

mcp_agent/llm/augmented_llm_playback.py CHANGED Viewed

@@ -5,6 +5,7 @@ from mcp_agent.core.prompt import Prompt
 from mcp_agent.llm.augmented_llm import RequestParams
 from mcp_agent.llm.augmented_llm_passthrough import PassthroughLLM
 from mcp_agent.llm.provider_types import Provider
+from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
 from mcp_agent.mcp.interfaces import ModelT
 from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
 from mcp_agent.mcp.prompts.prompt_helpers import MessageContent
@@ -83,6 +84,24 @@ class PlaybackLLM(PassthroughLLM):
             message_text=MessageContent.get_first_text(response), title="ASSISTANT/PLAYBACK"
         )
+        # Track usage for this playback "turn"
+        try:
+            input_content = str(multipart_messages) if multipart_messages else ""
+            output_content = MessageContent.get_first_text(response)
+            turn_usage = create_turn_usage_from_messages(
+                input_content=input_content,
+                output_content=output_content,
+                model="playback",
+                model_type="playback",
+                tool_calls=0,
+                delay_seconds=0.0,
+            )
+            self.usage_accumulator.add_turn(turn_usage)
+        except Exception as e:
+            self.logger.warning(f"Failed to track usage: {e}")
         return response
     async def structured(

mcp_agent/llm/augmented_llm_slow.py CHANGED Viewed

@@ -30,7 +30,18 @@ class SlowLLM(PassthroughLLM):
     ) -> str:
         """Sleep for 3 seconds then return the input message as a string."""
         await asyncio.sleep(3)
-        return await super().generate_str(message, request_params)
+        result = await super().generate_str(message, request_params)
+        # Override the last turn to include the 3-second delay
+        if self.usage_accumulator.turns:
+            last_turn = self.usage_accumulator.turns[-1]
+            # Update the raw usage to include delay
+            if hasattr(last_turn.raw_usage, 'delay_seconds'):
+                last_turn.raw_usage.delay_seconds = 3.0
+                # Print updated debug info
+                print("SlowLLM: Added 3.0s delay to turn usage")
+        return result
     async def _apply_prompt_provider_specific(
         self,

mcp_agent/llm/model_database.py ADDED Viewed

@@ -0,0 +1,236 @@
+"""
+Model database for LLM parameters.
+This module provides a centralized lookup for model parameters including
+context windows, max output tokens, and supported tokenization types.
+"""
+from typing import Dict, List, Optional
+from pydantic import BaseModel
+class ModelParameters(BaseModel):
+    """Configuration parameters for a specific model"""
+    context_window: int
+    """Maximum context window size in tokens"""
+    max_output_tokens: int
+    """Maximum output tokens the model can generate"""
+    tokenizes: List[str]
+    """List of supported content types for tokenization"""
+class ModelDatabase:
+    """Centralized model configuration database"""
+    # Common parameter sets
+    OPENAI_MULTIMODAL = ["text/plain", "image/jpeg", "image/png", "image/webp", "application/pdf"]
+    OPENAI_VISION = ["text/plain", "image/jpeg", "image/png", "image/webp"]
+    ANTHROPIC_MULTIMODAL = [
+        "text/plain",
+        "image/jpeg",
+        "image/png",
+        "image/webp",
+        "application/pdf",
+    ]
+    GOOGLE_MULTIMODAL = [
+        "text/plain",
+        "image/jpeg",
+        "image/png",
+        "image/webp",
+        "application/pdf",
+        "audio/wav",
+        "audio/mp3",
+        "video/mp4",
+    ]
+    QWEN_MULTIMODAL = ["text/plain", "image/jpeg", "image/png", "image/webp"]
+    TEXT_ONLY = ["text/plain"]
+    # Common parameter configurations
+    OPENAI_STANDARD = ModelParameters(
+        context_window=128000, max_output_tokens=16384, tokenizes=OPENAI_MULTIMODAL
+    )
+    OPENAI_4_1_STANDARD = ModelParameters(
+        context_window=1047576, max_output_tokens=32768, tokenizes=OPENAI_MULTIMODAL
+    )
+    OPENAI_O_SERIES = ModelParameters(
+        context_window=200000, max_output_tokens=100000, tokenizes=OPENAI_VISION
+    )
+    ANTHROPIC_LEGACY = ModelParameters(
+        context_window=200000, max_output_tokens=4096, tokenizes=ANTHROPIC_MULTIMODAL
+    )
+    ANTHROPIC_35_SERIES = ModelParameters(
+        context_window=200000, max_output_tokens=8192, tokenizes=ANTHROPIC_MULTIMODAL
+    )
+    # TODO--- TO USE 64,000 NEED TO SUPPORT STREAMING
+    ANTHROPIC_37_SERIES = ModelParameters(
+        context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
+    )
+    GEMINI_FLASH = ModelParameters(
+        context_window=1048576, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
+    )
+    GEMINI_PRO = ModelParameters(
+        context_window=2097152, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
+    )
+    QWEN_STANDARD = ModelParameters(
+        context_window=32000, max_output_tokens=8192, tokenizes=QWEN_MULTIMODAL
+    )
+    FAST_AGENT_STANDARD = ModelParameters(
+        context_window=1000000, max_output_tokens=100000, tokenizes=TEXT_ONLY
+    )
+    OPENAI_4_1_SERIES = ModelParameters(
+        context_window=1047576, max_output_tokens=32768, tokenizes=OPENAI_MULTIMODAL
+    )
+    OPENAI_4O_SERIES = ModelParameters(
+        context_window=128000, max_output_tokens=16384, tokenizes=OPENAI_VISION
+    )
+    OPENAI_O3_SERIES = ModelParameters(
+        context_window=200000, max_output_tokens=100000, tokenizes=OPENAI_MULTIMODAL
+    )
+    OPENAI_O3_MINI_SERIES = ModelParameters(
+        context_window=200000, max_output_tokens=100000, tokenizes=TEXT_ONLY
+    )
+    # TODO update to 32000
+    ANTHROPIC_OPUS_4_VERSIONED = ModelParameters(
+        context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
+    )
+    # TODO update to 64000
+    ANTHROPIC_SONNET_4_VERSIONED = ModelParameters(
+        context_window=200000, max_output_tokens=16384, tokenizes=ANTHROPIC_MULTIMODAL
+    )
+    DEEPSEEK_CHAT_STANDARD = ModelParameters(
+        context_window=65536, max_output_tokens=8192, tokenizes=TEXT_ONLY
+    )
+    DEEPSEEK_REASONER = ModelParameters(
+        context_window=65536, max_output_tokens=32768, tokenizes=TEXT_ONLY
+    )
+    GEMINI_2_5_PRO = ModelParameters(
+        context_window=2097152, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
+    )
+    # Model configuration database
+    MODELS: Dict[str, ModelParameters] = {
+        # internal models
+        "passthrough": FAST_AGENT_STANDARD,
+        "playback": FAST_AGENT_STANDARD,
+        "slow": FAST_AGENT_STANDARD,
+        # aliyun models
+        "qwen-turbo": QWEN_STANDARD,
+        "qwen-plus": QWEN_STANDARD,
+        "qwen-max": QWEN_STANDARD,
+        "qwen-long": ModelParameters(
+            context_window=10000000, max_output_tokens=8192, tokenizes=TEXT_ONLY
+        ),
+        # OpenAI Models (vanilla aliases and versioned)
+        "gpt-4.1": OPENAI_4_1_SERIES,
+        "gpt-4.1-mini": OPENAI_4_1_SERIES,
+        "gpt-4.1-nano": OPENAI_4_1_SERIES,
+        "gpt-4.1-2025-04-14": OPENAI_4_1_SERIES,
+        "gpt-4.1-mini-2025-04-14": OPENAI_4_1_SERIES,
+        "gpt-4.1-nano-2025-04-14": OPENAI_4_1_SERIES,
+        "gpt-4o": OPENAI_4O_SERIES,
+        "gpt-4o-2024-11-20": OPENAI_4O_SERIES,
+        "gpt-4o-mini-2024-07-18": OPENAI_4O_SERIES,
+        "o1": OPENAI_O_SERIES,
+        "o1-2024-12-17": OPENAI_O_SERIES,
+        "o3": OPENAI_O3_SERIES,
+        "o3-pro": ModelParameters(
+            context_window=200_000, max_output_tokens=100_000, tokenizes=TEXT_ONLY
+        ),
+        "o3-mini": OPENAI_O3_MINI_SERIES,
+        "o4-mini": OPENAI_O3_SERIES,
+        "o3-2025-04-16": OPENAI_O3_SERIES,
+        "o3-mini-2025-01-31": OPENAI_O3_MINI_SERIES,
+        "o4-mini-2025-04-16": OPENAI_O3_SERIES,
+        # Anthropic Models
+        "claude-3-haiku": ANTHROPIC_35_SERIES,
+        "claude-3-haiku-20240307": ANTHROPIC_LEGACY,
+        "claude-3-sonnet": ANTHROPIC_LEGACY,
+        "claude-3-opus": ANTHROPIC_LEGACY,
+        "claude-3-opus-20240229": ANTHROPIC_LEGACY,
+        "claude-3-opus-latest": ANTHROPIC_LEGACY,
+        "claude-3-5-haiku": ANTHROPIC_35_SERIES,
+        "claude-3-5-haiku-20241022": ANTHROPIC_35_SERIES,
+        "claude-3-5-haiku-latest": ANTHROPIC_35_SERIES,
+        "claude-3-sonnet-20240229": ANTHROPIC_LEGACY,
+        "claude-3-5-sonnet": ANTHROPIC_35_SERIES,
+        "claude-3-5-sonnet-20240620": ANTHROPIC_35_SERIES,
+        "claude-3-5-sonnet-20241022": ANTHROPIC_35_SERIES,
+        "claude-3-5-sonnet-latest": ANTHROPIC_35_SERIES,
+        "claude-3-7-sonnet": ANTHROPIC_37_SERIES,
+        "claude-3-7-sonnet-20250219": ANTHROPIC_37_SERIES,
+        "claude-3-7-sonnet-latest": ANTHROPIC_37_SERIES,
+        "claude-sonnet-4": ANTHROPIC_SONNET_4_VERSIONED,
+        "claude-sonnet-4-0": ANTHROPIC_SONNET_4_VERSIONED,
+        "claude-sonnet-4-20250514": ANTHROPIC_SONNET_4_VERSIONED,
+        "claude-opus-4": ANTHROPIC_OPUS_4_VERSIONED,
+        "claude-opus-4-0": ANTHROPIC_OPUS_4_VERSIONED,
+        "claude-opus-4-20250514": ANTHROPIC_OPUS_4_VERSIONED,
+        # DeepSeek Models
+        "deepseek-chat": DEEPSEEK_CHAT_STANDARD,
+        # Google Gemini Models (vanilla aliases and versioned)
+        "gemini-2.0-flash": GEMINI_FLASH,
+        "gemini-2.5-flash-preview": GEMINI_FLASH,
+        "gemini-2.5-pro-preview": GEMINI_2_5_PRO,
+        "gemini-2.5-flash-preview-05-20": GEMINI_FLASH,
+        "gemini-2.5-pro-preview-05-06": GEMINI_PRO,
+    }
+    @classmethod
+    def get_model_params(cls, model: str) -> Optional[ModelParameters]:
+        """Get model parameters for a given model name"""
+        return cls.MODELS.get(model)
+    @classmethod
+    def get_context_window(cls, model: str) -> Optional[int]:
+        """Get context window size for a model"""
+        params = cls.get_model_params(model)
+        return params.context_window if params else None
+    @classmethod
+    def get_max_output_tokens(cls, model: str) -> Optional[int]:
+        """Get maximum output tokens for a model"""
+        params = cls.get_model_params(model)
+        return params.max_output_tokens if params else None
+    @classmethod
+    def get_tokenizes(cls, model: str) -> Optional[List[str]]:
+        """Get supported tokenization types for a model"""
+        params = cls.get_model_params(model)
+        return params.tokenizes if params else None
+    @classmethod
+    def get_default_max_tokens(cls, model: str) -> int:
+        """Get default max_tokens for RequestParams based on model"""
+        if not model:
+            return 2048  # Fallback when no model specified
+        params = cls.get_model_params(model)
+        if params:
+            return params.max_output_tokens
+        return 2048  # Fallback for unknown models
+    @classmethod
+    def list_models(cls) -> List[str]:
+        """List all available model names"""
+        return list(cls.MODELS.keys())

mcp_agent/llm/model_factory.py CHANGED Viewed

@@ -87,6 +87,7 @@ class ModelFactory:
         "o1-preview": Provider.OPENAI,
         "o3": Provider.OPENAI,
         "o3-mini": Provider.OPENAI,
+        "o4-mini": Provider.OPENAI,
         "claude-3-haiku-20240307": Provider.ANTHROPIC,
         "claude-3-5-haiku-20241022": Provider.ANTHROPIC,
         "claude-3-5-haiku-latest": Provider.ANTHROPIC,

mcp_agent/llm/providers/augmented_llm_anthropic.py CHANGED Viewed

@@ -10,6 +10,7 @@ from mcp_agent.llm.providers.multipart_converter_anthropic import (
 from mcp_agent.llm.providers.sampling_converter_anthropic import (
     AnthropicSamplingConverter,
 )
+from mcp_agent.llm.usage_tracking import TurnUsage
 from mcp_agent.mcp.interfaces import ModelT
 from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
@@ -75,14 +76,14 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
     def _initialize_default_params(self, kwargs: dict) -> RequestParams:
         """Initialize Anthropic-specific default parameters"""
-        return RequestParams(
-            model=kwargs.get("model", DEFAULT_ANTHROPIC_MODEL),
-            maxTokens=4096,  # default haiku3
-            systemPrompt=self.instruction,
-            parallel_tool_calls=True,
-            max_iterations=20,
-            use_history=True,
-        )
+        # Get base defaults from parent (includes ModelDatabase lookup)
+        base_params = super()._initialize_default_params(kwargs)
+        # Override with Anthropic-specific settings
+        chosen_model = kwargs.get("model", DEFAULT_ANTHROPIC_MODEL)
+        base_params.model = chosen_model
+        return base_params
     def _base_url(self) -> str | None:
         assert self.context.config
@@ -158,6 +159,41 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
             response = executor_result[0]
+            # Track usage if response is valid and has usage data
+            if (
+                hasattr(response, "usage")
+                and response.usage
+                and not isinstance(response, BaseException)
+            ):
+                try:
+                    turn_usage = TurnUsage.from_anthropic(
+                        response.usage, model or DEFAULT_ANTHROPIC_MODEL
+                    )
+                    self.usage_accumulator.add_turn(turn_usage)
+                    # # Print raw usage for debugging
+                    # print(f"\n=== USAGE DEBUG ({model}) ===")
+                    # print(f"Raw usage: {response.usage}")
+                    # print(
+                    #     f"Turn usage: input={turn_usage.input_tokens}, output={turn_usage.output_tokens}, current_context={turn_usage.current_context_tokens}"
+                    # )
+                    # print(
+                    #     f"Cache: read={turn_usage.cache_usage.cache_read_tokens}, write={turn_usage.cache_usage.cache_write_tokens}"
+                    # )
+                    # print(f"Effective input: {turn_usage.effective_input_tokens}")
+                    # print(
+                    #     f"Accumulator: total_turns={self.usage_accumulator.turn_count}, cumulative_billing={self.usage_accumulator.cumulative_billing_tokens}, current_context={self.usage_accumulator.current_context_tokens}"
+                    # )
+                    # if self.usage_accumulator.context_usage_percentage:
+                    #     print(
+                    #         f"Context usage: {self.usage_accumulator.context_usage_percentage:.1f}% of {self.usage_accumulator.context_window_size}"
+                    #     )
+                    # if self.usage_accumulator.cache_hit_rate:
+                    #     print(f"Cache hit rate: {self.usage_accumulator.cache_hit_rate:.1f}%")
+                    # print("===========================\n")
+                except Exception as e:
+                    self.logger.warning(f"Failed to track usage: {e}")
             if isinstance(response, AuthenticationError):
                 raise ProviderKeyError(
                     "Invalid Anthropic API key",

mcp_agent/llm/providers/augmented_llm_google_native.py CHANGED Viewed

@@ -24,6 +24,7 @@ from mcp_agent.llm.provider_types import Provider
 # Import the new converter class
 from mcp_agent.llm.providers.google_converter import GoogleConverter
+from mcp_agent.llm.usage_tracking import TurnUsage
 from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
 # Define default model and potentially other Google-specific defaults
@@ -220,6 +221,7 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
             parallel_tool_calls=True,  # Assume parallel tool calls are supported by default with native API
             max_iterations=20,
             use_history=True,
+            maxTokens=65536,  # Default max tokens for Google models
             # Include other relevant default parameters
         )
@@ -281,10 +283,25 @@ class GoogleNativeAugmentedLLM(AugmentedLLM[types.Content, types.Content]):
                 )
                 self.logger.debug("Google generate_content response:", data=api_response)
+                # Track usage if response is valid and has usage data
+                if (
+                    hasattr(api_response, "usage_metadata")
+                    and api_response.usage_metadata
+                    and not isinstance(api_response, BaseException)
+                ):
+                    try:
+                        turn_usage = TurnUsage.from_google(
+                            api_response.usage_metadata, request_params.model
+                        )
+                        self.usage_accumulator.add_turn(turn_usage)
+                    except Exception as e:
+                        self.logger.warning(f"Failed to track usage: {e}")
             except errors.APIError as e:
                 # Handle specific Google API errors
                 self.logger.error(f"Google API Error: {e.code} - {e.message}")
-                raise ProviderKeyError(f"Google API Error: {e.code}", e.message) from e
+                raise ProviderKeyError(f"Google API Error: {e.code}", e.message or "") from e
             except Exception as e:
                 self.logger.error(f"Error during Google generate_content call: {e}")
                 # Decide how to handle other exceptions - potentially re-raise or return an error message

mcp_agent/llm/providers/augmented_llm_openai.py CHANGED Viewed

@@ -31,6 +31,7 @@ from mcp_agent.llm.providers.multipart_converter_openai import OpenAIConverter,
 from mcp_agent.llm.providers.sampling_converter_openai import (
     OpenAISamplingConverter,
 )
+from mcp_agent.llm.usage_tracking import TurnUsage
 from mcp_agent.logging.logger import get_logger
 from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
@@ -90,15 +91,14 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
     def _initialize_default_params(self, kwargs: dict) -> RequestParams:
         """Initialize OpenAI-specific default parameters"""
+        # Get base defaults from parent (includes ModelDatabase lookup)
+        base_params = super()._initialize_default_params(kwargs)
+        # Override with OpenAI-specific settings
         chosen_model = kwargs.get("model", DEFAULT_OPENAI_MODEL)
+        base_params.model = chosen_model
-        return RequestParams(
-            model=chosen_model,
-            systemPrompt=self.instruction,
-            parallel_tool_calls=True,
-            max_iterations=20,
-            use_history=True,
-        )
+        return base_params
     def _base_url(self) -> str:
         return self.context.config.openai.base_url if self.context.config.openai else None
@@ -166,6 +166,19 @@ class OpenAIAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletion
             response = executor_result[0]
+            # Track usage if response is valid and has usage data
+            if (
+                hasattr(response, "usage")
+                and response.usage
+                and not isinstance(response, BaseException)
+            ):
+                try:
+                    model_name = self.default_request_params.model or DEFAULT_OPENAI_MODEL
+                    turn_usage = TurnUsage.from_openai(response.usage, model_name)
+                    self.usage_accumulator.add_turn(turn_usage)
+                except Exception as e:
+                    self.logger.warning(f"Failed to track usage: {e}")
             self.logger.debug(
                 "OpenAI completion response:",
                 data=response,

fast-agent-mcp 0.2.33__py3-none-any.whl → 0.2.34__py3-none-any.whl

fast-agent-mcp 0.2.33py3-none-any.whl → 0.2.34py3-none-any.whl