PyPI - agno - Versions diffs - 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl - Mend

agno 2.3.1py3-none-any.whl → 2.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

agno/agent/agent.py +514 -186
agno/compression/__init__.py +3 -0
agno/compression/manager.py +176 -0
agno/db/dynamo/dynamo.py +11 -0
agno/db/firestore/firestore.py +5 -1
agno/db/gcs_json/gcs_json_db.py +5 -2
agno/db/in_memory/in_memory_db.py +5 -2
agno/db/json/json_db.py +5 -1
agno/db/migrations/manager.py +4 -4
agno/db/mongo/async_mongo.py +158 -34
agno/db/mongo/mongo.py +6 -2
agno/db/mysql/mysql.py +48 -54
agno/db/postgres/async_postgres.py +61 -51
agno/db/postgres/postgres.py +42 -50
agno/db/redis/redis.py +5 -0
agno/db/redis/utils.py +5 -5
agno/db/schemas/memory.py +7 -5
agno/db/singlestore/singlestore.py +99 -108
agno/db/sqlite/async_sqlite.py +32 -30
agno/db/sqlite/sqlite.py +34 -30
agno/knowledge/reader/pdf_reader.py +2 -2
agno/knowledge/reader/tavily_reader.py +0 -1
agno/memory/__init__.py +14 -1
agno/memory/manager.py +223 -8
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +67 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/anthropic/claude.py +84 -80
agno/models/aws/bedrock.py +38 -16
agno/models/aws/claude.py +97 -277
agno/models/azure/ai_foundry.py +8 -4
agno/models/base.py +101 -14
agno/models/cerebras/cerebras.py +18 -7
agno/models/cerebras/cerebras_openai.py +4 -2
agno/models/cohere/chat.py +8 -4
agno/models/google/gemini.py +578 -20
agno/models/groq/groq.py +18 -5
agno/models/huggingface/huggingface.py +17 -6
agno/models/ibm/watsonx.py +16 -6
agno/models/litellm/chat.py +17 -7
agno/models/message.py +19 -5
agno/models/meta/llama.py +20 -4
agno/models/mistral/mistral.py +8 -4
agno/models/ollama/chat.py +17 -6
agno/models/openai/chat.py +17 -6
agno/models/openai/responses.py +23 -9
agno/models/vertexai/claude.py +99 -5
agno/os/interfaces/agui/router.py +1 -0
agno/os/interfaces/agui/utils.py +97 -57
agno/os/router.py +16 -1
agno/os/routers/memory/memory.py +146 -0
agno/os/routers/memory/schemas.py +26 -0
agno/os/schema.py +21 -6
agno/os/utils.py +134 -10
agno/run/base.py +2 -1
agno/run/workflow.py +1 -1
agno/team/team.py +571 -225
agno/tools/mcp/mcp.py +1 -1
agno/utils/agent.py +119 -1
agno/utils/dttm.py +33 -0
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +12 -5
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/print_response/agent.py +37 -2
agno/utils/print_response/team.py +52 -0
agno/utils/tokens.py +41 -0
agno/workflow/types.py +2 -2
{agno-2.3.1.dist-info → agno-2.3.3.dist-info}/METADATA +45 -40
{agno-2.3.1.dist-info → agno-2.3.3.dist-info}/RECORD +75 -68
{agno-2.3.1.dist-info → agno-2.3.3.dist-info}/WHEEL +0 -0
{agno-2.3.1.dist-info → agno-2.3.3.dist-info}/licenses/LICENSE +0 -0
{agno-2.3.1.dist-info → agno-2.3.3.dist-info}/top_level.txt +0 -0

agno/models/groq/groq.py CHANGED Viewed

@@ -221,19 +221,28 @@ class Groq(Model):
         self,
         message: Message,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+        compress_tool_results: bool = False,
     ) -> Dict[str, Any]:
         """
         Format a message into the format expected by Groq.
         Args:
             message (Message): The message to format.
+            response_format: Optional response format specification.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
         """
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool":
+            content = message.get_content(use_compressed_content=compress_tool_results)
+        else:
+            content = message.content
         message_dict: Dict[str, Any] = {
             "role": message.role,
-            "content": message.content,
+            "content": content,
             "name": message.name,
             "tool_call_id": message.tool_call_id,
             "tool_calls": message.tool_calls,
@@ -276,6 +285,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the Groq API.
@@ -287,7 +297,7 @@ class Groq(Model):
             assistant_message.metrics.start_timer()
             provider_response = self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -316,6 +326,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the Groq API.
@@ -327,7 +338,7 @@ class Groq(Model):
             assistant_message.metrics.start_timer()
             response = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -356,6 +367,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the Groq API.
@@ -368,7 +380,7 @@ class Groq(Model):
             for chunk in self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 stream=True,
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             ):
@@ -396,6 +408,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the Groq API.
@@ -409,7 +422,7 @@ class Groq(Model):
             async_stream = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 stream=True,
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             )

agno/models/huggingface/huggingface.py CHANGED Viewed

@@ -191,19 +191,26 @@ class HuggingFace(Model):
         cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
         return cleaned_dict
-    def _format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by HuggingFace.
         Args:
             message (Message): The message to format.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
         """
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool":
+            content = message.get_content(use_compressed_content=compress_tool_results)
+        else:
+            content = message.content if message.content is not None else ""
         message_dict: Dict[str, Any] = {
             "role": message.role,
-            "content": message.content if message.content is not None else "",
+            "content": content,
             "name": message.name or message.tool_name,
             "tool_call_id": message.tool_call_id,
             "tool_calls": message.tool_calls,
@@ -236,6 +243,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the HuggingFace Hub.
@@ -247,7 +255,7 @@ class HuggingFace(Model):
             assistant_message.metrics.start_timer()
             provider_response = self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -269,6 +277,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
@@ -280,7 +289,7 @@ class HuggingFace(Model):
             assistant_message.metrics.start_timer()
             provider_response = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -302,6 +311,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the HuggingFace API.
@@ -314,7 +324,7 @@ class HuggingFace(Model):
             stream = self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 stream=True,
                 stream_options=ChatCompletionInputStreamOptions(include_usage=True),  # type: ignore
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),
@@ -340,6 +350,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[Any]:
         """
         Sends an asynchronous streaming chat completion request to the HuggingFace API.
@@ -351,7 +362,7 @@ class HuggingFace(Model):
             assistant_message.metrics.start_timer()
             provider_response = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 stream=True,
                 stream_options=ChatCompletionInputStreamOptions(include_usage=True),  # type: ignore
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),

agno/models/ibm/watsonx.py CHANGED Viewed

@@ -129,12 +129,13 @@ class WatsonX(Model):
             log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
         return request_params
-    def _format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by WatsonX.
         Args:
             message (Message): The message to format.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
@@ -151,7 +152,12 @@ class WatsonX(Model):
         if message.videos is not None and len(message.videos) > 0:
             log_warning("Video input is currently unsupported.")
-        return message.to_dict()
+        message_dict = message.to_dict()
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool" and compress_tool_results:
+            message_dict["content"] = message.get_content(use_compressed_content=True)
+        return message_dict
     def invoke(
         self,
@@ -161,6 +167,7 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the WatsonX API.
@@ -171,7 +178,7 @@ class WatsonX(Model):
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             request_params = self.get_request_params(
                 response_format=response_format, tools=tools, tool_choice=tool_choice
             )
@@ -196,6 +203,7 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Any:
         """
         Sends an asynchronous chat completion request to the WatsonX API.
@@ -205,7 +213,7 @@ class WatsonX(Model):
                 run_response.metrics.set_time_to_first_token()
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             request_params = self.get_request_params(
                 response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -231,13 +239,14 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the WatsonX API.
         """
         try:
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             request_params = self.get_request_params(
                 response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -265,6 +274,7 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the WatsonX API.
@@ -274,7 +284,7 @@ class WatsonX(Model):
                 run_response.metrics.set_time_to_first_token()
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             # Get parameters for chat
             request_params = self.get_request_params(

agno/models/litellm/chat.py CHANGED Viewed

@@ -74,11 +74,17 @@ class LiteLLM(Model):
         self.client = litellm
         return self.client
-    def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
+    def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
         """Format messages for LiteLLM API."""
         formatted_messages = []
         for m in messages:
-            msg = {"role": m.role, "content": m.content if m.content is not None else ""}
+            # Use compressed content for tool messages if compression is active
+            if m.role == "tool":
+                content = m.get_content(use_compressed_content=compress_tool_results)
+            else:
+                content = m.content if m.content is not None else ""
+            msg = {"role": m.role, "content": content}
             # Handle media
             if (m.images is not None and len(m.images) > 0) or (m.audio is not None and len(m.audio) > 0):
@@ -98,7 +104,7 @@ class LiteLLM(Model):
                 if isinstance(msg["content"], str):
                     content_list = [{"type": "text", "text": msg["content"]}]
                 else:
-                    content_list = msg["content"]
+                    content_list = msg["content"] if isinstance(msg["content"], list) else []
                 for file in m.files:
                     file_part = _format_file_for_message(file)
                     if file_part:
@@ -186,10 +192,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """Sends a chat completion request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         if run_response and run_response.metrics:
             run_response.metrics.set_time_to_first_token()
@@ -211,10 +218,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """Sends a streaming chat completion request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         completion_kwargs["stream"] = True
         completion_kwargs["stream_options"] = {"include_usage": True}
@@ -236,10 +244,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """Sends an asynchronous chat completion request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         if run_response and run_response.metrics:
             run_response.metrics.set_time_to_first_token()
@@ -261,10 +270,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """Sends an asynchronous streaming chat request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         completion_kwargs["stream"] = True
         completion_kwargs["stream_options"] = {"include_usage": True}

agno/models/message.py CHANGED Viewed

@@ -59,6 +59,9 @@ class Message(BaseModel):
     role: str
     # The contents of the message.
     content: Optional[Union[List[Any], str]] = None
+    # Compressed content of the message
+    compressed_content: Optional[str] = None
     # An optional name for the participant.
     # Provides the model information to differentiate between participants of the same role.
     name: Optional[str] = None
@@ -123,6 +126,12 @@ class Message(BaseModel):
                 return json.dumps(self.content)
         return ""
+    def get_content(self, use_compressed_content: bool = False) -> Optional[Union[List[Any], str]]:
+        """Return tool result content to send to API"""
+        if use_compressed_content and self.compressed_content is not None:
+            return self.compressed_content
+        return self.content
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "Message":
         # Handle image reconstruction properly
@@ -266,6 +275,7 @@ class Message(BaseModel):
             "content": self.content,
             "reasoning_content": self.reasoning_content,
             "from_history": self.from_history,
+            "compressed_content": self.compressed_content,
             "stop_after_tool_call": self.stop_after_tool_call,
             "role": self.role,
             "name": self.name,
@@ -315,13 +325,14 @@ class Message(BaseModel):
             "created_at": self.created_at,
         }
-    def log(self, metrics: bool = True, level: Optional[str] = None):
+    def log(self, metrics: bool = True, level: Optional[str] = None, use_compressed_content: bool = False):
         """Log the message to the console
         Args:
             metrics (bool): Whether to log the metrics.
             level (str): The level to log the message at. One of debug, info, warning, or error.
                 Defaults to debug.
+            use_compressed_content (bool): Whether to use compressed content.
         """
         _logger = log_debug
         if level == "info":
@@ -348,10 +359,13 @@ class Message(BaseModel):
         if self.reasoning_content:
             _logger(f"<reasoning>\n{self.reasoning_content}\n</reasoning>")
         if self.content:
-            if isinstance(self.content, str) or isinstance(self.content, list):
-                _logger(self.content)
-            elif isinstance(self.content, dict):
-                _logger(json.dumps(self.content, indent=2))
+            if use_compressed_content and self.compressed_content:
+                _logger("Compressed content:\n" + self.compressed_content)
+            else:
+                if isinstance(self.content, str) or isinstance(self.content, list):
+                    _logger(self.content)
+                elif isinstance(self.content, dict):
+                    _logger(json.dumps(self.content, indent=2))
         if self.tool_calls:
             tool_calls_list = ["Tool Calls:"]
             for tool_call in self.tool_calls:

agno/models/meta/llama.py CHANGED Viewed

@@ -217,6 +217,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the Llama API.
@@ -225,7 +226,10 @@ class Llama(Model):
         provider_response = self.get_client().chat.completions.create(
             model=self.id,
-            messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+            messages=[
+                format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                for m in messages
+            ],
             **self.get_request_params(tools=tools, response_format=response_format),
         )
@@ -242,6 +246,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the Llama API.
@@ -253,7 +258,10 @@ class Llama(Model):
         provider_response = await self.get_async_client().chat.completions.create(
             model=self.id,
-            messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+            messages=[
+                format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                for m in messages
+            ],
             **self.get_request_params(tools=tools, response_format=response_format),
         )
@@ -270,6 +278,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the Llama API.
@@ -282,7 +291,10 @@ class Llama(Model):
             for chunk in self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+                messages=[
+                    format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                    for m in messages
+                ],
                 stream=True,
                 **self.get_request_params(tools=tools, response_format=response_format),
             ):
@@ -302,6 +314,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the Llama API.
@@ -314,7 +327,10 @@ class Llama(Model):
         try:
             async for chunk in await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+                messages=[
+                    format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                    for m in messages
+                ],
                 stream=True,
                 **self.get_request_params(tools=tools, response_format=response_format),
             ):

agno/models/mistral/mistral.py CHANGED Viewed

@@ -174,11 +174,12 @@ class MistralChat(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the Mistral model.
         """
-        mistral_messages = format_messages(messages)
+        mistral_messages = format_messages(messages, compress_tool_results)
         try:
             response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
             if (
@@ -229,11 +230,12 @@ class MistralChat(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Stream the response from the Mistral model.
         """
-        mistral_messages = format_messages(messages)
+        mistral_messages = format_messages(messages, compress_tool_results)
         if run_response and run_response.metrics:
             run_response.metrics.set_time_to_first_token()
@@ -265,11 +267,12 @@ class MistralChat(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send an asynchronous chat completion request to the Mistral API.
         """
-        mistral_messages = format_messages(messages)
+        mistral_messages = format_messages(messages, compress_tool_results)
         try:
             response: Union[ChatCompletionResponse, ParsedChatCompletionResponse]
             if (
@@ -316,11 +319,12 @@ class MistralChat(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Stream an asynchronous response from the Mistral API.
         """
-        mistral_messages = format_messages(messages)
+        mistral_messages = format_messages(messages, compress_tool_results)
         try:
             if run_response and run_response.metrics:
                 run_response.metrics.set_time_to_first_token()

agno/models/ollama/chat.py CHANGED Viewed

@@ -147,19 +147,26 @@ class Ollama(Model):
         cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
         return cleaned_dict
-    def _format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by Ollama.
         Args:
             message (Message): The message to format.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
         """
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool":
+            content = message.get_content(use_compressed_content=compress_tool_results)
+        else:
+            content = message.content
         _message: Dict[str, Any] = {
             "role": message.role,
-            "content": message.content,
+            "content": content,
         }
         if message.role == "assistant" and message.tool_calls is not None:
@@ -228,6 +235,7 @@ class Ollama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat request to the Ollama API.
@@ -241,7 +249,7 @@ class Ollama(Model):
         provider_response = self.get_client().chat(
             model=self.id.strip(),
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             **request_kwargs,
         )  # type: ignore
@@ -258,6 +266,7 @@ class Ollama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat request to the Ollama API.
@@ -271,7 +280,7 @@ class Ollama(Model):
         provider_response = await self.get_async_client().chat(
             model=self.id.strip(),
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             **request_kwargs,
         )  # type: ignore
@@ -288,6 +297,7 @@ class Ollama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Sends a streaming chat request to the Ollama API.
@@ -299,7 +309,7 @@ class Ollama(Model):
         for chunk in self.get_client().chat(
             model=self.id,
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             stream=True,
             **self.get_request_params(tools=tools),
         ):
@@ -315,6 +325,7 @@ class Ollama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the Ollama API.
@@ -326,7 +337,7 @@ class Ollama(Model):
         async for chunk in await self.get_async_client().chat(
             model=self.id.strip(),
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             stream=True,
             **self.get_request_params(tools=tools),
         ):

agno 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl

agno 2.3.1py3-none-any.whl → 2.3.3py3-none-any.whl