PyPI - fast-agent-mcp - Versions diffs - 0.2.58__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

fast-agent-mcp 0.2.58py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fast-agent-mcp might be problematic. Click here for more details.

Files changed (234) hide show

fast_agent/llm/internal/passthrough.py ADDED Viewed

@@ -0,0 +1,137 @@
+import json  # Import at the module level
+from typing import Any, Dict, List, Optional
+from mcp import CallToolRequest, Tool
+from mcp.types import CallToolRequestParams, PromptMessage
+from fast_agent.core.logging.logger import get_logger
+from fast_agent.core.prompt import Prompt
+from fast_agent.llm.fastagent_llm import (
+    FastAgentLLM,
+    RequestParams,
+)
+from fast_agent.llm.provider_types import Provider
+from fast_agent.llm.usage_tracking import create_turn_usage_from_messages
+from fast_agent.mcp.helpers.content_helpers import get_text
+from fast_agent.types import PromptMessageExtended
+from fast_agent.types.llm_stop_reason import LlmStopReason
+CALL_TOOL_INDICATOR = "***CALL_TOOL"
+FIXED_RESPONSE_INDICATOR = "***FIXED_RESPONSE"
+class PassthroughLLM(FastAgentLLM):
+    """
+    A specialized LLM implementation that simply passes through input messages without modification.
+    This is useful for cases where you need an object with the AugmentedLLM interface
+    but want to preserve the original message without any processing, such as in a
+    parallel workflow where no fan-in aggregation is needed.
+    """
+    def __init__(
+        self, provider=Provider.FAST_AGENT, name: str = "Passthrough", **kwargs: dict[str, Any]
+    ) -> None:
+        super().__init__(name=name, provider=provider, **kwargs)
+        self.logger = get_logger(__name__)
+        self._messages = [PromptMessage]
+        self._fixed_response: str | None = None
+        self._correlation_id: int = 0
+    async def initialize(self) -> None:
+        pass
+    def _parse_tool_command(self, command: str) -> tuple[str, Optional[dict]]:
+        """
+        Parse a tool command string into tool name and arguments.
+        Args:
+            command: The command string in format "***CALL_TOOL <tool_name> [arguments_json]"
+        Returns:
+            Tuple of (tool_name, arguments_dict)
+        Raises:
+            ValueError: If command format is invalid
+        """
+        parts = command.split(" ", 2)
+        if len(parts) < 2:
+            raise ValueError("Invalid format. Expected '***CALL_TOOL <tool_name> [arguments_json]'")
+        tool_name = parts[1].strip()
+        arguments = None
+        if len(parts) > 2:
+            try:
+                arguments = json.loads(parts[2])
+            except json.JSONDecodeError:
+                raise ValueError(f"Invalid JSON arguments: {parts[2]}")
+        self.logger.info(f"Calling tool {tool_name} with arguments {arguments}")
+        return tool_name, arguments
+    async def _apply_prompt_provider_specific(
+        self,
+        multipart_messages: List["PromptMessageExtended"],
+        request_params: RequestParams | None = None,
+        tools: list[Tool] | None = None,
+        is_template: bool = False,
+    ) -> PromptMessageExtended:
+        # Add messages to history with proper is_prompt flag
+        self.history.extend(multipart_messages, is_prompt=is_template)
+        last_message = multipart_messages[-1]
+        tool_calls: Dict[str, CallToolRequest] = {}
+        stop_reason: LlmStopReason = LlmStopReason.END_TURN
+        if self.is_tool_call(last_message):
+            tool_name, arguments = self._parse_tool_command(last_message.first_text())
+            tool_calls["correlationId" + str(self._correlation_id)] = CallToolRequest(
+                method="tools/call",
+                params=CallToolRequestParams(name=tool_name, arguments=arguments),
+            )
+            self._correlation_id += 1
+            stop_reason = LlmStopReason.TOOL_USE
+        if last_message.first_text().startswith(FIXED_RESPONSE_INDICATOR):
+            self._fixed_response = (
+                last_message.first_text().split(FIXED_RESPONSE_INDICATOR, 1)[1].strip()
+            )
+        if len(last_message.tool_results or {}) > 0:
+            assert last_message.tool_results
+            concatenated_content = " ".join(
+                [
+                    (get_text(tool_result.content[0]) or "<empty>")
+                    for tool_result in last_message.tool_results.values()
+                ]
+            )
+            result = Prompt.assistant(concatenated_content, stop_reason=stop_reason)
+        elif self._fixed_response:
+            result = Prompt.assistant(
+                self._fixed_response, tool_calls=tool_calls, stop_reason=stop_reason
+            )
+        else:
+            concatenated_content = "\n".join(
+                [message.all_text() for message in multipart_messages if "user" == message.role]
+            )
+            result = Prompt.assistant(
+                concatenated_content,
+                tool_calls=tool_calls,
+                stop_reason=stop_reason,
+            )
+        turn_usage = create_turn_usage_from_messages(
+            input_content=multipart_messages[-1].all_text(),
+            output_content=result.all_text(),
+            model="passthrough",
+            model_type="passthrough",
+            tool_calls=len(tool_calls),
+            delay_seconds=0.0,
+        )
+        self.usage_accumulator.add_turn(turn_usage)
+        return result
+    def is_tool_call(self, message: PromptMessageExtended) -> bool:
+        return message.first_text().startswith(CALL_TOOL_INDICATOR)

mcp_agent/llm/augmented_llm_playback.py → fast_agent/llm/internal/playback.py RENAMED Viewed

@@ -1,16 +1,19 @@
 from typing import Any, List, Type, Union
+from mcp import Tool
 from mcp.types import PromptMessage
-from mcp_agent.core.exceptions import ModelConfigError
-from mcp_agent.core.prompt import Prompt
-from mcp_agent.llm.augmented_llm import RequestParams
-from mcp_agent.llm.augmented_llm_passthrough import PassthroughLLM
-from mcp_agent.llm.provider_types import Provider
-from mcp_agent.llm.usage_tracking import create_turn_usage_from_messages
-from mcp_agent.mcp.interfaces import ModelT
-from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
-from mcp_agent.mcp.prompts.prompt_helpers import MessageContent
+from fast_agent.core.exceptions import ModelConfigError
+from fast_agent.core.prompt import Prompt
+from fast_agent.interfaces import ModelT
+from fast_agent.llm.internal.passthrough import PassthroughLLM
+from fast_agent.llm.provider_types import Provider
+from fast_agent.llm.usage_tracking import create_turn_usage_from_messages
+from fast_agent.mcp.helpers.content_helpers import normalize_to_extended_list
+from fast_agent.mcp.prompts.prompt_helpers import MessageContent
+from fast_agent.types import PromptMessageExtended, RequestParams
+# TODO -- support tool usage/replay
 class PlaybackLLM(PassthroughLLM):
@@ -28,11 +31,11 @@ class PlaybackLLM(PassthroughLLM):
     def __init__(self, name: str = "Playback", **kwargs: dict[str, Any]) -> None:
         super().__init__(name=name, provider=Provider.FAST_AGENT, **kwargs)
-        self._messages: List[PromptMessageMultipart] = []
+        self._messages: List[PromptMessageExtended] = []
         self._current_index = -1
         self._overage = -1
-    def _get_next_assistant_message(self) -> PromptMessageMultipart:
+    def _get_next_assistant_message(self) -> PromptMessageExtended:
         """
         Get the next assistant message from the loaded messages.
         Increments the current message index and skips user messages.
@@ -53,14 +56,23 @@ class PlaybackLLM(PassthroughLLM):
     async def generate(
         self,
-        multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
+        messages: Union[
+            str,
+            PromptMessage,
+            PromptMessageExtended,
+            List[Union[str, PromptMessage, PromptMessageExtended]],
+        ],
         request_params: RequestParams | None = None,
-    ) -> PromptMessageMultipart:
+        tools: List[Tool] | None = None,
+    ) -> PromptMessageExtended:
         """
         Handle playback of messages in two modes:
         1. First call: store messages for playback and return "HISTORY LOADED"
         2. Subsequent calls: return the next assistant message
         """
+        # Normalize all input types to a list of PromptMessageExtended
+        multipart_messages = normalize_to_extended_list(messages)
         # If this is the first call (initialization) or we're loading a prompt template
         # with multiple messages (comes from apply_prompt)
         if -1 == self._current_index:
@@ -72,24 +84,16 @@ class PlaybackLLM(PassthroughLLM):
             # Reset the index to the beginning for proper playback
             self._current_index = 0
-            await self.show_assistant_message(
-                message_text=f"HISTORY LOADED ({len(self._messages)} messages)",
-                title="ASSISTANT/PLAYBACK",
-            )
             # In PlaybackLLM, we always return "HISTORY LOADED" on initialization,
             # regardless of the prompt content. The next call will return messages.
-            return Prompt.assistant("HISTORY LOADED")
+            return Prompt.assistant(f"HISTORY LOADED ({len(self._messages)}) messages")
         response = self._get_next_assistant_message()
-        await self.show_assistant_message(
-            message_text=MessageContent.get_first_text(response), title="ASSISTANT/PLAYBACK"
-        )
         # Track usage for this playback "turn"
         try:
             input_content = str(multipart_messages) if multipart_messages else ""
-            output_content = MessageContent.get_first_text(response)
+            output_content = MessageContent.get_first_text(response) or ""
             turn_usage = create_turn_usage_from_messages(
                 input_content=input_content,
@@ -108,10 +112,10 @@ class PlaybackLLM(PassthroughLLM):
     async def structured(
         self,
-        multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
+        messages: List[PromptMessageExtended],
         model: Type[ModelT],
         request_params: RequestParams | None = None,
-    ) -> tuple[ModelT | None, PromptMessageMultipart]:
+    ) -> tuple[ModelT | None, PromptMessageExtended]:
         """
         Handle structured requests by returning the next assistant message.
         """

mcp_agent/llm/augmented_llm_silent.py → fast_agent/llm/internal/silent.py RENAMED Viewed

@@ -2,47 +2,40 @@
 from typing import Any
-from mcp_agent.llm.augmented_llm_passthrough import PassthroughLLM
-from mcp_agent.llm.provider_types import Provider
-from mcp_agent.llm.usage_tracking import TurnUsage, UsageAccumulator
+from fast_agent.llm.internal.passthrough import PassthroughLLM
+from fast_agent.llm.provider_types import Provider
+from fast_agent.llm.usage_tracking import TurnUsage, UsageAccumulator
 class ZeroUsageAccumulator(UsageAccumulator):
     """Usage accumulator that always reports zero usage."""
     def add_turn(self, turn: TurnUsage) -> None:
         """Override to do nothing - no usage accumulation."""
         pass
+# TODO -- this won't work anymore
 class SilentLLM(PassthroughLLM):
     """
     A specialized LLM that processes messages like PassthroughLLM but suppresses all display output.
     This is particularly useful for parallel agent workflows where the fan-in agent
     should aggregate results without polluting the console with intermediate output.
     Token counting is disabled - the model always reports zero usage.
     """
     def __init__(
         self, provider=Provider.FAST_AGENT, name: str = "Silent", **kwargs: dict[str, Any]
     ) -> None:
         super().__init__(name=name, provider=provider, **kwargs)
         # Override with zero usage accumulator - silent model reports no usage
         self.usage_accumulator = ZeroUsageAccumulator()
-    def show_user_message(self, message: Any, **kwargs) -> None:
-        """Override to suppress user message display."""
-        pass
-    async def show_assistant_message(self, message: Any, **kwargs) -> None:
-        """Override to suppress assistant message display."""
-        pass
     def show_tool_calls(self, tool_calls: Any, **kwargs) -> None:
         """Override to suppress tool call display."""
         pass
     def show_tool_results(self, tool_results: Any, **kwargs) -> None:
         """Override to suppress tool result display."""
-        pass
+        pass

fast_agent/llm/internal/slow.py ADDED Viewed

@@ -0,0 +1,38 @@
+import asyncio
+from typing import Any, List
+from mcp import Tool
+from fast_agent.llm.fastagent_llm import (
+    RequestParams,
+)
+from fast_agent.llm.internal.passthrough import PassthroughLLM
+from fast_agent.llm.provider_types import Provider
+from fast_agent.types import PromptMessageExtended
+class SlowLLM(PassthroughLLM):
+    """
+    A specialized LLM implementation that sleeps for 3 seconds before responding like PassthroughLLM.
+    This is useful for testing scenarios where you want to simulate slow responses
+    or for debugging timing-related issues in parallel workflows.
+    """
+    def __init__(
+        self, provider=Provider.FAST_AGENT, name: str = "Slow", **kwargs: dict[str, Any]
+    ) -> None:
+        super().__init__(name=name, provider=provider, **kwargs)
+    async def _apply_prompt_provider_specific(
+        self,
+        multipart_messages: List["PromptMessageExtended"],
+        request_params: RequestParams | None = None,
+        tools: list[Tool] | None = None,
+        is_template: bool = False,
+    ) -> PromptMessageExtended:
+        """Sleep for 3 seconds then apply prompt like PassthroughLLM."""
+        await asyncio.sleep(3)
+        return await super()._apply_prompt_provider_specific(
+            multipart_messages, request_params, tools, is_template
+        )

{mcp_agent → fast_agent}/llm/memory.py RENAMED Viewed

@@ -35,7 +35,9 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
     def __init__(self) -> None:
         self.history: List[MessageParamT] = []
         self.prompt_messages: List[MessageParamT] = []  # Always included
-        self.conversation_cache_positions: List[int] = []  # Track active conversation cache positions
+        self.conversation_cache_positions: List[
+            int
+        ] = []  # Track active conversation cache positions
         self.cache_walk_distance: int = 6  # Messages between cache blocks
         self.max_conversation_cache_blocks: int = 2  # Maximum conversation cache blocks
@@ -109,79 +111,85 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
     def should_apply_conversation_cache(self) -> bool:
         """
         Determine if conversation caching should be applied based on walking algorithm.
         Returns:
             True if we should add or update cache blocks
         """
         total_messages = len(self.history)
         # Need at least cache_walk_distance messages to start caching
         if total_messages < self.cache_walk_distance:
             return False
         # Check if we need to add a new cache block
-        return len(self._calculate_cache_positions(total_messages)) != len(self.conversation_cache_positions)
+        return len(self._calculate_cache_positions(total_messages)) != len(
+            self.conversation_cache_positions
+        )
     def _calculate_cache_positions(self, total_conversation_messages: int) -> List[int]:
         """
         Calculate where cache blocks should be placed using walking algorithm.
         Args:
             total_conversation_messages: Number of conversation messages (not including prompts)
         Returns:
             List of positions (relative to conversation start) where cache should be placed
         """
         positions = []
         # Place cache blocks every cache_walk_distance messages
-        for i in range(self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance):
+        for i in range(
+            self.cache_walk_distance - 1, total_conversation_messages, self.cache_walk_distance
+        ):
             positions.append(i)
             if len(positions) >= self.max_conversation_cache_blocks:
                 break
         # Keep only the most recent cache blocks (walking behavior)
         if len(positions) > self.max_conversation_cache_blocks:
-            positions = positions[-self.max_conversation_cache_blocks:]
+            positions = positions[-self.max_conversation_cache_blocks :]
         return positions
     def get_conversation_cache_updates(self) -> dict:
         """
         Get cache position updates needed for the walking algorithm.
         Returns:
             Dict with 'add', 'remove', and 'active' position lists (relative to full message array)
         """
         total_conversation_messages = len(self.history)
         new_positions = self._calculate_cache_positions(total_conversation_messages)
         # Convert to absolute positions (including prompt messages)
         prompt_offset = len(self.prompt_messages)
         new_absolute_positions = [pos + prompt_offset for pos in new_positions]
         old_positions_set = set(self.conversation_cache_positions)
         new_positions_set = set(new_absolute_positions)
         return {
-            'add': sorted(new_positions_set - old_positions_set),
-            'remove': sorted(old_positions_set - new_positions_set),
-            'active': sorted(new_absolute_positions)
+            "add": sorted(new_positions_set - old_positions_set),
+            "remove": sorted(old_positions_set - new_positions_set),
+            "active": sorted(new_absolute_positions),
         }
     def apply_conversation_cache_updates(self, updates: dict) -> None:
         """
         Apply cache position updates.
         Args:
             updates: Dict from get_conversation_cache_updates()
         """
-        self.conversation_cache_positions = updates['active'].copy()
+        self.conversation_cache_positions = updates["active"].copy()
-    def remove_cache_control_from_messages(self, messages: List[MessageParamT], positions: List[int]) -> None:
+    def remove_cache_control_from_messages(
+        self, messages: List[MessageParamT], positions: List[int]
+    ) -> None:
         """
         Remove cache control from specified message positions.
         Args:
             messages: The message array to modify
             positions: List of positions to remove cache control from
@@ -196,14 +204,16 @@ class SimpleMemory(Memory, Generic[MessageParamT]):
                             if isinstance(content_block, dict) and "cache_control" in content_block:
                                 del content_block["cache_control"]
-    def add_cache_control_to_messages(self, messages: List[MessageParamT], positions: List[int]) -> int:
+    def add_cache_control_to_messages(
+        self, messages: List[MessageParamT], positions: List[int]
+    ) -> int:
         """
         Add cache control to specified message positions.
         Args:
-            messages: The message array to modify
+            messages: The message array to modify
             positions: List of positions to add cache control to
         Returns:
             Number of cache blocks successfully applied
         """

{mcp_agent → fast_agent}/llm/model_database.py RENAMED Viewed

@@ -158,8 +158,9 @@ class ModelDatabase:
         context_window=2097152, max_output_tokens=8192, tokenizes=GOOGLE_MULTIMODAL
     )
+    # 31/08/25 switched to object mode (even though groq says schema supported and used to work..)
     KIMI_MOONSHOT = ModelParameters(
-        context_window=131072, max_output_tokens=16384, tokenizes=TEXT_ONLY
+        context_window=262144, max_output_tokens=16384, tokenizes=TEXT_ONLY, json_mode="object"
     )
     # FIXME: xAI has not documented the max output tokens for Grok 4. Using Grok 3 as a placeholder. Will need to update when available (if ever)
@@ -245,7 +246,7 @@ class ModelDatabase:
         "grok-3-mini": GROK_3,
         "grok-3-fast": GROK_3,
         "grok-3-mini-fast": GROK_3,
-        "moonshotai/kimi-k2-instruct": KIMI_MOONSHOT,
+        "moonshotai/kimi-k2-instruct-0905": KIMI_MOONSHOT,
         "qwen/qwen3-32b": QWEN3_REASONER,
         "deepseek-r1-distill-llama-70b": DEEPSEEK_DISTILL,
         "openai/gpt-oss-120b": OPENAI_GPT_OSS_SERIES,
@@ -275,6 +276,38 @@ class ModelDatabase:
         params = cls.get_model_params(model)
         return params.tokenizes if params else None
+    @classmethod
+    def supports_mime(cls, model: str, mime_type: str) -> bool:
+        """
+        Return True if the given model supports the provided MIME type.
+        Normalizes common aliases (e.g., image/jpg->image/jpeg, document/pdf->application/pdf)
+        and also accepts bare extensions like "pdf" or "png".
+        """
+        from fast_agent.mcp.mime_utils import normalize_mime_type
+        tokenizes = cls.get_tokenizes(model) or []
+        # Normalize the candidate and the database entries to lowercase
+        normalized_supported = [t.lower() for t in tokenizes]
+        # Handle wildcard inputs like "image/*" quickly
+        mt = (mime_type or "").strip().lower()
+        if mt.endswith("/*") and "/" in mt:
+            prefix = mt.split("/", 1)[0] + "/"
+            return any(s.startswith(prefix) for s in normalized_supported)
+        normalized = normalize_mime_type(mime_type)
+        if not normalized:
+            return False
+        return normalized.lower() in normalized_supported
+    @classmethod
+    def supports_any_mime(cls, model: str, mime_types: List[str]) -> bool:
+        """Return True if the model supports any of the provided MIME types."""
+        return any(cls.supports_mime(model, m) for m in mime_types)
     @classmethod
     def get_json_mode(cls, model: str) -> str | None:
         """Get supported json mode (structured output) for a model"""

fast-agent-mcp 0.2.58__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

fast-agent-mcp 0.2.58py3-none-any.whl → 0.3.1py3-none-any.whl