PyPI - agno - Versions diffs - 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl - Mend

agno 2.3.2py3-none-any.whl → 2.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

agno/agent/agent.py +513 -185
agno/compression/__init__.py +3 -0
agno/compression/manager.py +176 -0
agno/db/dynamo/dynamo.py +11 -0
agno/db/firestore/firestore.py +5 -1
agno/db/gcs_json/gcs_json_db.py +5 -2
agno/db/in_memory/in_memory_db.py +5 -2
agno/db/json/json_db.py +5 -1
agno/db/migrations/manager.py +4 -4
agno/db/mongo/async_mongo.py +158 -34
agno/db/mongo/mongo.py +6 -2
agno/db/mysql/mysql.py +48 -54
agno/db/postgres/async_postgres.py +66 -52
agno/db/postgres/postgres.py +42 -50
agno/db/redis/redis.py +5 -0
agno/db/redis/utils.py +5 -5
agno/db/singlestore/singlestore.py +99 -108
agno/db/sqlite/async_sqlite.py +29 -27
agno/db/sqlite/sqlite.py +30 -26
agno/knowledge/reader/pdf_reader.py +2 -2
agno/knowledge/reader/tavily_reader.py +0 -1
agno/memory/__init__.py +14 -1
agno/memory/manager.py +217 -4
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +67 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/aimlapi.py +18 -0
agno/models/anthropic/claude.py +87 -81
agno/models/aws/bedrock.py +38 -16
agno/models/aws/claude.py +97 -277
agno/models/azure/ai_foundry.py +8 -4
agno/models/base.py +101 -14
agno/models/cerebras/cerebras.py +25 -9
agno/models/cerebras/cerebras_openai.py +22 -2
agno/models/cohere/chat.py +18 -6
agno/models/cometapi/cometapi.py +19 -1
agno/models/deepinfra/deepinfra.py +19 -1
agno/models/fireworks/fireworks.py +19 -1
agno/models/google/gemini.py +583 -21
agno/models/groq/groq.py +23 -6
agno/models/huggingface/huggingface.py +22 -7
agno/models/ibm/watsonx.py +21 -7
agno/models/internlm/internlm.py +19 -1
agno/models/langdb/langdb.py +10 -0
agno/models/litellm/chat.py +17 -7
agno/models/litellm/litellm_openai.py +19 -1
agno/models/message.py +19 -5
agno/models/meta/llama.py +25 -5
agno/models/meta/llama_openai.py +18 -0
agno/models/mistral/mistral.py +13 -5
agno/models/nvidia/nvidia.py +19 -1
agno/models/ollama/chat.py +17 -6
agno/models/openai/chat.py +22 -7
agno/models/openai/responses.py +28 -10
agno/models/openrouter/openrouter.py +20 -0
agno/models/perplexity/perplexity.py +17 -0
agno/models/requesty/requesty.py +18 -0
agno/models/sambanova/sambanova.py +19 -1
agno/models/siliconflow/siliconflow.py +19 -1
agno/models/together/together.py +19 -1
agno/models/vercel/v0.py +19 -1
agno/models/vertexai/claude.py +99 -5
agno/models/xai/xai.py +18 -0
agno/os/interfaces/agui/router.py +1 -0
agno/os/interfaces/agui/utils.py +97 -57
agno/os/router.py +16 -0
agno/os/routers/memory/memory.py +143 -0
agno/os/routers/memory/schemas.py +26 -0
agno/os/schema.py +33 -6
agno/os/utils.py +134 -10
agno/run/base.py +2 -1
agno/run/workflow.py +1 -1
agno/team/team.py +566 -219
agno/tools/mcp/mcp.py +1 -1
agno/utils/agent.py +119 -1
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +12 -5
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/print_response/agent.py +37 -2
agno/utils/print_response/team.py +52 -0
agno/utils/tokens.py +41 -0
agno/workflow/types.py +2 -2
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/METADATA +45 -40
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/RECORD +90 -83
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/WHEEL +0 -0
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/licenses/LICENSE +0 -0
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/top_level.txt +0 -0

agno/models/groq/groq.py CHANGED Viewed

@@ -74,7 +74,11 @@ class Groq(Model):
         if not self.api_key:
             self.api_key = getenv("GROQ_API_KEY")
             if not self.api_key:
-                log_error("GROQ_API_KEY not set. Please set the GROQ_API_KEY environment variable.")
+                raise ModelProviderError(
+                    message="GROQ_API_KEY not set. Please set the GROQ_API_KEY environment variable.",
+                    model_name=self.name,
+                    model_id=self.id,
+                )
         # Define base client params
         base_params = {
@@ -221,19 +225,28 @@ class Groq(Model):
         self,
         message: Message,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
+        compress_tool_results: bool = False,
     ) -> Dict[str, Any]:
         """
         Format a message into the format expected by Groq.
         Args:
             message (Message): The message to format.
+            response_format: Optional response format specification.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
         """
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool":
+            content = message.get_content(use_compressed_content=compress_tool_results)
+        else:
+            content = message.content
         message_dict: Dict[str, Any] = {
             "role": message.role,
-            "content": message.content,
+            "content": content,
             "name": message.name,
             "tool_call_id": message.tool_call_id,
             "tool_calls": message.tool_calls,
@@ -276,6 +289,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the Groq API.
@@ -287,7 +301,7 @@ class Groq(Model):
             assistant_message.metrics.start_timer()
             provider_response = self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -316,6 +330,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the Groq API.
@@ -327,7 +342,7 @@ class Groq(Model):
             assistant_message.metrics.start_timer()
             response = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -356,6 +371,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the Groq API.
@@ -368,7 +384,7 @@ class Groq(Model):
             for chunk in self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 stream=True,
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             ):
@@ -396,6 +412,7 @@ class Groq(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the Groq API.
@@ -409,7 +426,7 @@ class Groq(Model):
             async_stream = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self.format_message(m) for m in messages],  # type: ignore
+                messages=[self.format_message(m, response_format, compress_tool_results) for m in messages],  # type: ignore
                 stream=True,
                 **self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
             )

agno/models/huggingface/huggingface.py CHANGED Viewed

@@ -73,7 +73,11 @@ class HuggingFace(Model):
     def get_client_params(self) -> Dict[str, Any]:
         self.api_key = self.api_key or getenv("HF_TOKEN")
         if not self.api_key:
-            log_error("HF_TOKEN not set. Please set the HF_TOKEN environment variable.")
+            raise ModelProviderError(
+                message="HF_TOKEN not set. Please set the HF_TOKEN environment variable.",
+                model_name=self.name,
+                model_id=self.id,
+            )
         _client_params: Dict[str, Any] = {}
         if self.api_key is not None:
@@ -191,19 +195,26 @@ class HuggingFace(Model):
         cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
         return cleaned_dict
-    def _format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by HuggingFace.
         Args:
             message (Message): The message to format.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
         """
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool":
+            content = message.get_content(use_compressed_content=compress_tool_results)
+        else:
+            content = message.content if message.content is not None else ""
         message_dict: Dict[str, Any] = {
             "role": message.role,
-            "content": message.content if message.content is not None else "",
+            "content": content,
             "name": message.name or message.tool_name,
             "tool_call_id": message.tool_call_id,
             "tool_calls": message.tool_calls,
@@ -236,6 +247,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the HuggingFace Hub.
@@ -247,7 +259,7 @@ class HuggingFace(Model):
             assistant_message.metrics.start_timer()
             provider_response = self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -269,6 +281,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
@@ -280,7 +293,7 @@ class HuggingFace(Model):
             assistant_message.metrics.start_timer()
             provider_response = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),
             )
             assistant_message.metrics.stop_timer()
@@ -302,6 +315,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the HuggingFace API.
@@ -314,7 +328,7 @@ class HuggingFace(Model):
             stream = self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 stream=True,
                 stream_options=ChatCompletionInputStreamOptions(include_usage=True),  # type: ignore
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),
@@ -340,6 +354,7 @@ class HuggingFace(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[Any]:
         """
         Sends an asynchronous streaming chat completion request to the HuggingFace API.
@@ -351,7 +366,7 @@ class HuggingFace(Model):
             assistant_message.metrics.start_timer()
             provider_response = await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[self._format_message(m) for m in messages],
+                messages=[self._format_message(m, compress_tool_results) for m in messages],
                 stream=True,
                 stream_options=ChatCompletionInputStreamOptions(include_usage=True),  # type: ignore
                 **self.get_request_params(tools=tools, tool_choice=tool_choice),

agno/models/ibm/watsonx.py CHANGED Viewed

@@ -59,7 +59,11 @@ class WatsonX(Model):
         # Fetch API key and project ID from env if not already set
         self.api_key = self.api_key or getenv("IBM_WATSONX_API_KEY")
         if not self.api_key:
-            log_error("IBM_WATSONX_API_KEY not set. Please set the IBM_WATSONX_API_KEY environment variable.")
+            raise ModelProviderError(
+                message="IBM_WATSONX_API_KEY not set. Please set the IBM_WATSONX_API_KEY environment variable.",
+                model_name=self.name,
+                model_id=self.id,
+            )
         self.project_id = self.project_id or getenv("IBM_WATSONX_PROJECT_ID")
         if not self.project_id:
@@ -129,12 +133,13 @@ class WatsonX(Model):
             log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
         return request_params
-    def _format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by WatsonX.
         Args:
             message (Message): The message to format.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
@@ -151,7 +156,12 @@ class WatsonX(Model):
         if message.videos is not None and len(message.videos) > 0:
             log_warning("Video input is currently unsupported.")
-        return message.to_dict()
+        message_dict = message.to_dict()
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool" and compress_tool_results:
+            message_dict["content"] = message.get_content(use_compressed_content=True)
+        return message_dict
     def invoke(
         self,
@@ -161,6 +171,7 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the WatsonX API.
@@ -171,7 +182,7 @@ class WatsonX(Model):
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             request_params = self.get_request_params(
                 response_format=response_format, tools=tools, tool_choice=tool_choice
             )
@@ -196,6 +207,7 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Any:
         """
         Sends an asynchronous chat completion request to the WatsonX API.
@@ -205,7 +217,7 @@ class WatsonX(Model):
                 run_response.metrics.set_time_to_first_token()
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             request_params = self.get_request_params(
                 response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -231,13 +243,14 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the WatsonX API.
         """
         try:
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             request_params = self.get_request_params(
                 response_format=response_format, tools=tools, tool_choice=tool_choice
@@ -265,6 +278,7 @@ class WatsonX(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the WatsonX API.
@@ -274,7 +288,7 @@ class WatsonX(Model):
                 run_response.metrics.set_time_to_first_token()
             client = self.get_client()
-            formatted_messages = [self._format_message(m) for m in messages]
+            formatted_messages = [self._format_message(m, compress_tool_results) for m in messages]
             # Get parameters for chat
             request_params = self.get_request_params(

agno/models/internlm/internlm.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from dataclasses import dataclass, field
 from os import getenv
-from typing import Optional
+from typing import Any, Dict, Optional
+from agno.exceptions import ModelProviderError
 from agno.models.openai.like import OpenAILike
@@ -24,3 +25,20 @@ class InternLM(OpenAILike):
     api_key: Optional[str] = field(default_factory=lambda: getenv("INTERNLM_API_KEY"))
     base_url: Optional[str] = "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/chat/completions"
+    def _get_client_params(self) -> Dict[str, Any]:
+        """
+        Returns client parameters for API requests, checking for INTERNLM_API_KEY.
+        Returns:
+            Dict[str, Any]: A dictionary of client parameters for API requests.
+        """
+        if not self.api_key:
+            self.api_key = getenv("INTERNLM_API_KEY")
+            if not self.api_key:
+                raise ModelProviderError(
+                    message="INTERNLM_API_KEY not set. Please set the INTERNLM_API_KEY environment variable.",
+                    model_name=self.name,
+                    model_id=self.id,
+                )
+        return super()._get_client_params()

agno/models/langdb/langdb.py CHANGED Viewed

@@ -2,6 +2,7 @@ from dataclasses import dataclass, field
 from os import getenv
 from typing import Any, Dict, Optional
+from agno.exceptions import ModelProviderError
 from agno.models.openai.like import OpenAILike
@@ -32,6 +33,15 @@ class LangDB(OpenAILike):
     default_headers: Optional[dict] = None
     def _get_client_params(self) -> Dict[str, Any]:
+        if not self.api_key:
+            self.api_key = getenv("LANGDB_API_KEY")
+            if not self.api_key:
+                raise ModelProviderError(
+                    message="LANGDB_API_KEY not set. Please set the LANGDB_API_KEY environment variable.",
+                    model_name=self.name,
+                    model_id=self.id,
+                )
         if not self.project_id:
             raise ValueError("LANGDB_PROJECT_ID not set in the environment")

agno/models/litellm/chat.py CHANGED Viewed

@@ -74,11 +74,17 @@ class LiteLLM(Model):
         self.client = litellm
         return self.client
-    def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
+    def _format_messages(self, messages: List[Message], compress_tool_results: bool = False) -> List[Dict[str, Any]]:
         """Format messages for LiteLLM API."""
         formatted_messages = []
         for m in messages:
-            msg = {"role": m.role, "content": m.content if m.content is not None else ""}
+            # Use compressed content for tool messages if compression is active
+            if m.role == "tool":
+                content = m.get_content(use_compressed_content=compress_tool_results)
+            else:
+                content = m.content if m.content is not None else ""
+            msg = {"role": m.role, "content": content}
             # Handle media
             if (m.images is not None and len(m.images) > 0) or (m.audio is not None and len(m.audio) > 0):
@@ -98,7 +104,7 @@ class LiteLLM(Model):
                 if isinstance(msg["content"], str):
                     content_list = [{"type": "text", "text": msg["content"]}]
                 else:
-                    content_list = msg["content"]
+                    content_list = msg["content"] if isinstance(msg["content"], list) else []
                 for file in m.files:
                     file_part = _format_file_for_message(file)
                     if file_part:
@@ -186,10 +192,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """Sends a chat completion request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         if run_response and run_response.metrics:
             run_response.metrics.set_time_to_first_token()
@@ -211,10 +218,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """Sends a streaming chat completion request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         completion_kwargs["stream"] = True
         completion_kwargs["stream_options"] = {"include_usage": True}
@@ -236,10 +244,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """Sends an asynchronous chat completion request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         if run_response and run_response.metrics:
             run_response.metrics.set_time_to_first_token()
@@ -261,10 +270,11 @@ class LiteLLM(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """Sends an asynchronous streaming chat request to the LiteLLM API."""
         completion_kwargs = self.get_request_params(tools=tools)
-        completion_kwargs["messages"] = self._format_messages(messages)
+        completion_kwargs["messages"] = self._format_messages(messages, compress_tool_results)
         completion_kwargs["stream"] = True
         completion_kwargs["stream_options"] = {"include_usage": True}

agno/models/litellm/litellm_openai.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from dataclasses import dataclass, field
 from os import getenv
-from typing import Optional
+from typing import Any, Dict, Optional
+from agno.exceptions import ModelProviderError
 from agno.models.openai.like import OpenAILike
@@ -23,3 +24,20 @@ class LiteLLMOpenAI(OpenAILike):
     api_key: Optional[str] = field(default_factory=lambda: getenv("LITELLM_API_KEY"))
     base_url: str = "http://0.0.0.0:4000"
+    def _get_client_params(self) -> Dict[str, Any]:
+        """
+        Returns client parameters for API requests, checking for LITELLM_API_KEY.
+        Returns:
+            Dict[str, Any]: A dictionary of client parameters for API requests.
+        """
+        if not self.api_key:
+            self.api_key = getenv("LITELLM_API_KEY")
+            if not self.api_key:
+                raise ModelProviderError(
+                    message="LITELLM_API_KEY not set. Please set the LITELLM_API_KEY environment variable.",
+                    model_name=self.name,
+                    model_id=self.id,
+                )
+        return super()._get_client_params()

agno/models/message.py CHANGED Viewed

@@ -59,6 +59,9 @@ class Message(BaseModel):
     role: str
     # The contents of the message.
     content: Optional[Union[List[Any], str]] = None
+    # Compressed content of the message
+    compressed_content: Optional[str] = None
     # An optional name for the participant.
     # Provides the model information to differentiate between participants of the same role.
     name: Optional[str] = None
@@ -123,6 +126,12 @@ class Message(BaseModel):
                 return json.dumps(self.content)
         return ""
+    def get_content(self, use_compressed_content: bool = False) -> Optional[Union[List[Any], str]]:
+        """Return tool result content to send to API"""
+        if use_compressed_content and self.compressed_content is not None:
+            return self.compressed_content
+        return self.content
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "Message":
         # Handle image reconstruction properly
@@ -266,6 +275,7 @@ class Message(BaseModel):
             "content": self.content,
             "reasoning_content": self.reasoning_content,
             "from_history": self.from_history,
+            "compressed_content": self.compressed_content,
             "stop_after_tool_call": self.stop_after_tool_call,
             "role": self.role,
             "name": self.name,
@@ -315,13 +325,14 @@ class Message(BaseModel):
             "created_at": self.created_at,
         }
-    def log(self, metrics: bool = True, level: Optional[str] = None):
+    def log(self, metrics: bool = True, level: Optional[str] = None, use_compressed_content: bool = False):
         """Log the message to the console
         Args:
             metrics (bool): Whether to log the metrics.
             level (str): The level to log the message at. One of debug, info, warning, or error.
                 Defaults to debug.
+            use_compressed_content (bool): Whether to use compressed content.
         """
         _logger = log_debug
         if level == "info":
@@ -348,10 +359,13 @@ class Message(BaseModel):
         if self.reasoning_content:
             _logger(f"<reasoning>\n{self.reasoning_content}\n</reasoning>")
         if self.content:
-            if isinstance(self.content, str) or isinstance(self.content, list):
-                _logger(self.content)
-            elif isinstance(self.content, dict):
-                _logger(json.dumps(self.content, indent=2))
+            if use_compressed_content and self.compressed_content:
+                _logger("Compressed content:\n" + self.compressed_content)
+            else:
+                if isinstance(self.content, str) or isinstance(self.content, list):
+                    _logger(self.content)
+                elif isinstance(self.content, dict):
+                    _logger(json.dumps(self.content, indent=2))
         if self.tool_calls:
             tool_calls_list = ["Tool Calls:"]
             for tool_call in self.tool_calls:

agno/models/meta/llama.py CHANGED Viewed

@@ -74,7 +74,11 @@ class Llama(Model):
         if not self.api_key:
             self.api_key = getenv("LLAMA_API_KEY")
             if not self.api_key:
-                log_error("LLAMA_API_KEY not set. Please set the LLAMA_API_KEY environment variable.")
+                raise ModelProviderError(
+                    message="LLAMA_API_KEY not set. Please set the LLAMA_API_KEY environment variable.",
+                    model_name=self.name,
+                    model_id=self.id,
+                )
         # Define base client params
         base_params = {
@@ -217,6 +221,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the Llama API.
@@ -225,7 +230,10 @@ class Llama(Model):
         provider_response = self.get_client().chat.completions.create(
             model=self.id,
-            messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+            messages=[
+                format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                for m in messages
+            ],
             **self.get_request_params(tools=tools, response_format=response_format),
         )
@@ -242,6 +250,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the Llama API.
@@ -253,7 +262,10 @@ class Llama(Model):
         provider_response = await self.get_async_client().chat.completions.create(
             model=self.id,
-            messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+            messages=[
+                format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                for m in messages
+            ],
             **self.get_request_params(tools=tools, response_format=response_format),
         )
@@ -270,6 +282,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the Llama API.
@@ -282,7 +295,10 @@ class Llama(Model):
             for chunk in self.get_client().chat.completions.create(
                 model=self.id,
-                messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+                messages=[
+                    format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                    for m in messages
+                ],
                 stream=True,
                 **self.get_request_params(tools=tools, response_format=response_format),
             ):
@@ -302,6 +318,7 @@ class Llama(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the Llama API.
@@ -314,7 +331,10 @@ class Llama(Model):
         try:
             async for chunk in await self.get_async_client().chat.completions.create(
                 model=self.id,
-                messages=[format_message(m, tool_calls=bool(tools)) for m in messages],  # type: ignore
+                messages=[
+                    format_message(m, tool_calls=bool(tools), compress_tool_results=compress_tool_results)  # type: ignore
+                    for m in messages
+                ],
                 stream=True,
                 **self.get_request_params(tools=tools, response_format=response_format),
             ):

agno 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl

agno 2.3.2py3-none-any.whl → 2.3.4py3-none-any.whl