PyPI - agno - Versions diffs - 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl - Mend

agno 2.3.2py3-none-any.whl → 2.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

agno/agent/agent.py +513 -185
agno/compression/__init__.py +3 -0
agno/compression/manager.py +176 -0
agno/db/dynamo/dynamo.py +11 -0
agno/db/firestore/firestore.py +5 -1
agno/db/gcs_json/gcs_json_db.py +5 -2
agno/db/in_memory/in_memory_db.py +5 -2
agno/db/json/json_db.py +5 -1
agno/db/migrations/manager.py +4 -4
agno/db/mongo/async_mongo.py +158 -34
agno/db/mongo/mongo.py +6 -2
agno/db/mysql/mysql.py +48 -54
agno/db/postgres/async_postgres.py +66 -52
agno/db/postgres/postgres.py +42 -50
agno/db/redis/redis.py +5 -0
agno/db/redis/utils.py +5 -5
agno/db/singlestore/singlestore.py +99 -108
agno/db/sqlite/async_sqlite.py +29 -27
agno/db/sqlite/sqlite.py +30 -26
agno/knowledge/reader/pdf_reader.py +2 -2
agno/knowledge/reader/tavily_reader.py +0 -1
agno/memory/__init__.py +14 -1
agno/memory/manager.py +217 -4
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +67 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/aimlapi.py +18 -0
agno/models/anthropic/claude.py +87 -81
agno/models/aws/bedrock.py +38 -16
agno/models/aws/claude.py +97 -277
agno/models/azure/ai_foundry.py +8 -4
agno/models/base.py +101 -14
agno/models/cerebras/cerebras.py +25 -9
agno/models/cerebras/cerebras_openai.py +22 -2
agno/models/cohere/chat.py +18 -6
agno/models/cometapi/cometapi.py +19 -1
agno/models/deepinfra/deepinfra.py +19 -1
agno/models/fireworks/fireworks.py +19 -1
agno/models/google/gemini.py +583 -21
agno/models/groq/groq.py +23 -6
agno/models/huggingface/huggingface.py +22 -7
agno/models/ibm/watsonx.py +21 -7
agno/models/internlm/internlm.py +19 -1
agno/models/langdb/langdb.py +10 -0
agno/models/litellm/chat.py +17 -7
agno/models/litellm/litellm_openai.py +19 -1
agno/models/message.py +19 -5
agno/models/meta/llama.py +25 -5
agno/models/meta/llama_openai.py +18 -0
agno/models/mistral/mistral.py +13 -5
agno/models/nvidia/nvidia.py +19 -1
agno/models/ollama/chat.py +17 -6
agno/models/openai/chat.py +22 -7
agno/models/openai/responses.py +28 -10
agno/models/openrouter/openrouter.py +20 -0
agno/models/perplexity/perplexity.py +17 -0
agno/models/requesty/requesty.py +18 -0
agno/models/sambanova/sambanova.py +19 -1
agno/models/siliconflow/siliconflow.py +19 -1
agno/models/together/together.py +19 -1
agno/models/vercel/v0.py +19 -1
agno/models/vertexai/claude.py +99 -5
agno/models/xai/xai.py +18 -0
agno/os/interfaces/agui/router.py +1 -0
agno/os/interfaces/agui/utils.py +97 -57
agno/os/router.py +16 -0
agno/os/routers/memory/memory.py +143 -0
agno/os/routers/memory/schemas.py +26 -0
agno/os/schema.py +33 -6
agno/os/utils.py +134 -10
agno/run/base.py +2 -1
agno/run/workflow.py +1 -1
agno/team/team.py +566 -219
agno/tools/mcp/mcp.py +1 -1
agno/utils/agent.py +119 -1
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +12 -5
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/print_response/agent.py +37 -2
agno/utils/print_response/team.py +52 -0
agno/utils/tokens.py +41 -0
agno/workflow/types.py +2 -2
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/METADATA +45 -40
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/RECORD +90 -83
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/WHEEL +0 -0
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/licenses/LICENSE +0 -0
{agno-2.3.2.dist-info → agno-2.3.4.dist-info}/top_level.txt +0 -0

agno/models/base.py CHANGED Viewed

@@ -312,6 +312,7 @@ class Model(ABC):
         tool_call_limit: Optional[int] = None,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
+        compression_manager: Optional[Any] = None,
     ) -> ModelResponse:
         """
         Generate a response from the model.
@@ -348,6 +349,8 @@ class Model(ABC):
             _tool_dicts = self._format_tools(tools) if tools is not None else []
             _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
+            _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
             while True:
                 # Get response from model
                 assistant_message = Message(role=self.assistant_message_role)
@@ -359,13 +362,14 @@ class Model(ABC):
                     tools=_tool_dicts,
                     tool_choice=tool_choice or self._tool_choice,
                     run_response=run_response,
+                    compress_tool_results=_compress_tool_results,
                 )
                 # Add assistant message to messages
                 messages.append(assistant_message)
                 # Log response and metrics
-                assistant_message.log(metrics=True)
+                assistant_message.log(metrics=True, use_compressed_content=_compress_tool_results)
                 # Handle tool calls if present
                 if assistant_message.tool_calls:
@@ -433,9 +437,17 @@ class Model(ABC):
                     # Add a function call for each successful execution
                     function_call_count += len(function_call_results)
+                    all_messages = messages + function_call_results
+                    # Compress tool results
+                    if compression_manager and compression_manager.should_compress(all_messages):
+                        compression_manager.compress(all_messages)
                     # Format and add results to messages
                     self.format_function_call_results(
-                        messages=messages, function_call_results=function_call_results, **model_response.extra or {}
+                        messages=messages,
+                        function_call_results=function_call_results,
+                        compress_tool_results=_compress_tool_results,
+                        **model_response.extra or {},
                     )
                     if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
@@ -447,7 +459,7 @@ class Model(ABC):
                         )
                     for function_call_result in function_call_results:
-                        function_call_result.log(metrics=True)
+                        function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
                     # Check if we should stop after tool calls
                     if any(m.stop_after_tool_call for m in function_call_results):
@@ -499,6 +511,7 @@ class Model(ABC):
         tool_call_limit: Optional[int] = None,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
+        compression_manager: Optional[Any] = None,
     ) -> ModelResponse:
         """
         Generate an asynchronous response from the model.
@@ -523,6 +536,8 @@ class Model(ABC):
             _tool_dicts = self._format_tools(tools) if tools is not None else []
             _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
+            _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
             function_call_count = 0
             while True:
@@ -536,6 +551,7 @@ class Model(ABC):
                     tools=_tool_dicts,
                     tool_choice=tool_choice or self._tool_choice,
                     run_response=run_response,
+                    compress_tool_results=_compress_tool_results,
                 )
                 # Add assistant message to messages
@@ -609,9 +625,17 @@ class Model(ABC):
                     # Add a function call for each successful execution
                     function_call_count += len(function_call_results)
+                    all_messages = messages + function_call_results
+                    # Compress tool results
+                    if compression_manager and compression_manager.should_compress(all_messages):
+                        await compression_manager.acompress(all_messages)
                     # Format and add results to messages
                     self.format_function_call_results(
-                        messages=messages, function_call_results=function_call_results, **model_response.extra or {}
+                        messages=messages,
+                        function_call_results=function_call_results,
+                        compress_tool_results=_compress_tool_results,
+                        **model_response.extra or {},
                     )
                     if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
@@ -623,7 +647,7 @@ class Model(ABC):
                         )
                     for function_call_result in function_call_results:
-                        function_call_result.log(metrics=True)
+                        function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
                     # Check if we should stop after tool calls
                     if any(m.stop_after_tool_call for m in function_call_results):
@@ -675,6 +699,7 @@ class Model(ABC):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
     ) -> None:
         """
         Process a single model response and return the assistant message and whether to continue.
@@ -690,6 +715,7 @@ class Model(ABC):
             tools=tools,
             tool_choice=tool_choice or self._tool_choice,
             run_response=run_response,
+            compress_tool_results=compress_tool_results,
         )
         # Populate the assistant message
@@ -730,6 +756,7 @@ class Model(ABC):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
     ) -> None:
         """
         Process a single async model response and return the assistant message and whether to continue.
@@ -745,6 +772,7 @@ class Model(ABC):
             tool_choice=tool_choice or self._tool_choice,
             assistant_message=assistant_message,
             run_response=run_response,
+            compress_tool_results=compress_tool_results,
         )
         # Populate the assistant message
@@ -855,6 +883,7 @@ class Model(ABC):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Process a streaming response from the model.
@@ -867,6 +896,7 @@ class Model(ABC):
             tools=tools,
             tool_choice=tool_choice or self._tool_choice,
             run_response=run_response,
+            compress_tool_results=compress_tool_results,
         ):
             for model_response_delta in self._populate_stream_data(
                 stream_data=stream_data,
@@ -887,6 +917,7 @@ class Model(ABC):
         stream_model_response: bool = True,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
+        compression_manager: Optional[Any] = None,
     ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
         """
         Generate a streaming response from the model.
@@ -919,6 +950,8 @@ class Model(ABC):
             _tool_dicts = self._format_tools(tools) if tools is not None else []
             _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
+            _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
             function_call_count = 0
             while True:
@@ -936,6 +969,7 @@ class Model(ABC):
                         tools=_tool_dicts,
                         tool_choice=tool_choice or self._tool_choice,
                         run_response=run_response,
+                        compress_tool_results=_compress_tool_results,
                     ):
                         if self.cache_response and isinstance(response, ModelResponse):
                             streaming_responses.append(response)
@@ -949,6 +983,8 @@ class Model(ABC):
                         response_format=response_format,
                         tools=_tool_dicts,
                         tool_choice=tool_choice or self._tool_choice,
+                        run_response=run_response,
+                        compress_tool_results=_compress_tool_results,
                     )
                     if self.cache_response:
                         streaming_responses.append(model_response)
@@ -980,18 +1016,31 @@ class Model(ABC):
                     # Add a function call for each successful execution
                     function_call_count += len(function_call_results)
+                    all_messages = messages + function_call_results
+                    # Compress tool results
+                    if compression_manager and compression_manager.should_compress(all_messages):
+                        compression_manager.compress(all_messages)
                     # Format and add results to messages
                     if stream_data and stream_data.extra is not None:
                         self.format_function_call_results(
-                            messages=messages, function_call_results=function_call_results, **stream_data.extra
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            compress_tool_results=_compress_tool_results,
+                            **stream_data.extra,
                         )
                     elif model_response and model_response.extra is not None:
                         self.format_function_call_results(
-                            messages=messages, function_call_results=function_call_results, **model_response.extra
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            compress_tool_results=_compress_tool_results,
+                            **model_response.extra,
                         )
                     else:
                         self.format_function_call_results(
-                            messages=messages, function_call_results=function_call_results
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            compress_tool_results=_compress_tool_results,
                         )
                     # Handle function call media
@@ -1003,7 +1052,7 @@ class Model(ABC):
                         )
                     for function_call_result in function_call_results:
-                        function_call_result.log(metrics=True)
+                        function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
                     # Check if we should stop after tool calls
                     if any(m.stop_after_tool_call for m in function_call_results):
@@ -1053,6 +1102,7 @@ class Model(ABC):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Process a streaming response from the model.
@@ -1064,6 +1114,7 @@ class Model(ABC):
             tools=tools,
             tool_choice=tool_choice or self._tool_choice,
             run_response=run_response,
+            compress_tool_results=compress_tool_results,
         ):  # type: ignore
             for model_response_delta in self._populate_stream_data(
                 stream_data=stream_data,
@@ -1084,6 +1135,7 @@ class Model(ABC):
         stream_model_response: bool = True,
         run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
+        compression_manager: Optional[Any] = None,
     ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
         """
         Generate an asynchronous streaming response from the model.
@@ -1116,6 +1168,8 @@ class Model(ABC):
             _tool_dicts = self._format_tools(tools) if tools is not None else []
             _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
+            _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
             function_call_count = 0
             while True:
@@ -1133,6 +1187,7 @@ class Model(ABC):
                         tools=_tool_dicts,
                         tool_choice=tool_choice or self._tool_choice,
                         run_response=run_response,
+                        compress_tool_results=_compress_tool_results,
                     ):
                         if self.cache_response and isinstance(model_response, ModelResponse):
                             streaming_responses.append(model_response)
@@ -1147,6 +1202,7 @@ class Model(ABC):
                         tools=_tool_dicts,
                         tool_choice=tool_choice or self._tool_choice,
                         run_response=run_response,
+                        compress_tool_results=_compress_tool_results,
                     )
                     if self.cache_response:
                         streaming_responses.append(model_response)
@@ -1178,18 +1234,31 @@ class Model(ABC):
                     # Add a function call for each successful execution
                     function_call_count += len(function_call_results)
+                    all_messages = messages + function_call_results
+                    # Compress tool results
+                    if compression_manager and compression_manager.should_compress(all_messages):
+                        await compression_manager.acompress(all_messages)
                     # Format and add results to messages
                     if stream_data and stream_data.extra is not None:
                         self.format_function_call_results(
-                            messages=messages, function_call_results=function_call_results, **stream_data.extra
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            compress_tool_results=_compress_tool_results,
+                            **stream_data.extra,
                         )
                     elif model_response and model_response.extra is not None:
                         self.format_function_call_results(
-                            messages=messages, function_call_results=function_call_results, **model_response.extra or {}
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            compress_tool_results=_compress_tool_results,
+                            **model_response.extra or {},
                         )
                     else:
                         self.format_function_call_results(
-                            messages=messages, function_call_results=function_call_results
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            compress_tool_results=_compress_tool_results,
                         )
                     # Handle function call media
@@ -1201,7 +1270,7 @@ class Model(ABC):
                         )
                     for function_call_result in function_call_results:
-                        function_call_result.log(metrics=True)
+                        function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
                     # Check if we should stop after tool calls
                     if any(m.stop_after_tool_call for m in function_call_results):
@@ -1490,11 +1559,15 @@ class Model(ABC):
         # Run function calls sequentially
         function_execution_result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
+        stop_after_tool_call_from_exception = False
         try:
             function_execution_result = function_call.execute()
         except AgentRunException as a_exc:
             # Update additional messages from function call
             _handle_agent_exception(a_exc, additional_input)
+            # If stop_execution is True, mark that we should stop after this tool call
+            if a_exc.stop_execution:
+                stop_after_tool_call_from_exception = True
             # Set function call success to False if an exception occurred
         except Exception as e:
             log_error(f"Error executing function {function_call.function.name}: {e}")
@@ -1583,6 +1656,9 @@ class Model(ABC):
             timer=function_call_timer,
             function_execution_result=function_execution_result,
         )
+        # Override stop_after_tool_call if set by exception
+        if stop_after_tool_call_from_exception:
+            function_call_result.stop_after_tool_call = True
         yield ModelResponse(
             content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
             tool_executions=[
@@ -2022,10 +2098,14 @@ class Model(ABC):
             updated_session_state = function_execution_result.updated_session_state
             # Handle AgentRunException
+            stop_after_tool_call_from_exception = False
             if isinstance(function_call_success, AgentRunException):
                 a_exc = function_call_success
                 # Update additional messages from function call
                 _handle_agent_exception(a_exc, additional_input)
+                # If stop_execution is True, mark that we should stop after this tool call
+                if a_exc.stop_execution:
+                    stop_after_tool_call_from_exception = True
                 # Set function call success to False if an exception occurred
                 function_call_success = False
@@ -2097,6 +2177,9 @@ class Model(ABC):
                 timer=function_call_timer,
                 function_execution_result=function_execution_result,
             )
+            # Override stop_after_tool_call if set by exception
+            if stop_after_tool_call_from_exception:
+                function_call_result.stop_after_tool_call = True
             yield ModelResponse(
                 content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
                 tool_executions=[
@@ -2146,7 +2229,11 @@ class Model(ABC):
         return function_calls_to_run
     def format_function_call_results(
-        self, messages: List[Message], function_call_results: List[Message], **kwargs
+        self,
+        messages: List[Message],
+        function_call_results: List[Message],
+        compress_tool_results: bool = False,
+        **kwargs,
     ) -> None:
         """
         Format function call results.

agno/models/cerebras/cerebras.py CHANGED Viewed

@@ -7,13 +7,14 @@ from typing import Any, Dict, Iterator, List, Optional, Type, Union
 import httpx
 from pydantic import BaseModel
+from agno.exceptions import ModelProviderError
 from agno.models.base import Model
 from agno.models.message import Message
 from agno.models.metrics import Metrics
 from agno.models.response import ModelResponse
 from agno.run.agent import RunOutput
 from agno.utils.http import get_default_async_client, get_default_sync_client
-from agno.utils.log import log_debug, log_error, log_warning
+from agno.utils.log import log_debug, log_warning
 try:
     from cerebras.cloud.sdk import AsyncCerebras as AsyncCerebrasClient
@@ -77,7 +78,11 @@ class Cerebras(Model):
         if not self.api_key:
             self.api_key = getenv("CEREBRAS_API_KEY")
             if not self.api_key:
-                log_error("CEREBRAS_API_KEY not set. Please set the CEREBRAS_API_KEY environment variable.")
+                raise ModelProviderError(
+                    message="CEREBRAS_API_KEY not set. Please set the CEREBRAS_API_KEY environment variable.",
+                    model_name=self.name,
+                    model_id=self.id,
+                )
         # Define base client params
         base_params = {
@@ -212,6 +217,7 @@ class Cerebras(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Send a chat completion request to the Cerebras API.
@@ -228,7 +234,7 @@ class Cerebras(Model):
         assistant_message.metrics.start_timer()
         provider_response = self.get_client().chat.completions.create(
             model=self.id,
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             **self.get_request_params(response_format=response_format, tools=tools),
         )
         assistant_message.metrics.stop_timer()
@@ -245,6 +251,7 @@ class Cerebras(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Sends an asynchronous chat completion request to the Cerebras API.
@@ -261,7 +268,7 @@ class Cerebras(Model):
         assistant_message.metrics.start_timer()
         provider_response = await self.get_async_client().chat.completions.create(
             model=self.id,
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             **self.get_request_params(response_format=response_format, tools=tools),
         )
         assistant_message.metrics.stop_timer()
@@ -278,6 +285,7 @@ class Cerebras(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Send a streaming chat completion request to the Cerebras API.
@@ -295,7 +303,7 @@ class Cerebras(Model):
         for chunk in self.get_client().chat.completions.create(
             model=self.id,
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             stream=True,
             **self.get_request_params(response_format=response_format, tools=tools),
         ):
@@ -311,6 +319,7 @@ class Cerebras(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Sends an asynchronous streaming chat completion request to the Cerebras API.
@@ -328,7 +337,7 @@ class Cerebras(Model):
         async_stream = await self.get_async_client().chat.completions.create(
             model=self.id,
-            messages=[self._format_message(m) for m in messages],  # type: ignore
+            messages=[self._format_message(m, compress_tool_results) for m in messages],  # type: ignore
             stream=True,
             **self.get_request_params(response_format=response_format, tools=tools),
         )
@@ -338,20 +347,27 @@ class Cerebras(Model):
         assistant_message.metrics.stop_timer()
-    def _format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by the Cerebras API.
         Args:
             message (Message): The message to format.
+            compress_tool_results: Whether to compress tool results.
         Returns:
             Dict[str, Any]: The formatted message.
         """
+        # Use compressed content for tool messages if compression is active
+        if message.role == "tool":
+            content = message.get_content(use_compressed_content=compress_tool_results)
+        else:
+            content = message.content if message.content is not None else ""
         # Basic message content
         message_dict: Dict[str, Any] = {
             "role": message.role,
-            "content": message.content if message.content is not None else "",
+            "content": content,
         }
         # Add name if present
@@ -380,7 +396,7 @@ class Cerebras(Model):
             message_dict = {
                 "role": "tool",
                 "tool_call_id": message.tool_call_id,
-                "content": message.content if message.content is not None else "",
+                "content": content,
             }
         # Ensure no None values in the message

agno/models/cerebras/cerebras_openai.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Type, Union
 from pydantic import BaseModel
+from agno.exceptions import ModelProviderError
 from agno.models.message import Message
 from agno.models.openai.like import OpenAILike
 from agno.utils.log import log_debug
@@ -20,6 +21,23 @@ class CerebrasOpenAI(OpenAILike):
     base_url: str = "https://api.cerebras.ai/v1"
     api_key: Optional[str] = field(default_factory=lambda: getenv("CEREBRAS_API_KEY", None))
+    def _get_client_params(self) -> Dict[str, Any]:
+        """
+        Returns client parameters for API requests, checking for CEREBRAS_API_KEY.
+        Returns:
+            Dict[str, Any]: A dictionary of client parameters for API requests.
+        """
+        if not self.api_key:
+            self.api_key = getenv("CEREBRAS_API_KEY")
+            if not self.api_key:
+                raise ModelProviderError(
+                    message="CEREBRAS_API_KEY not set. Please set the CEREBRAS_API_KEY environment variable.",
+                    model_name=self.name,
+                    model_id=self.id,
+                )
+        return super()._get_client_params()
     def get_request_params(
         self,
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
@@ -61,7 +79,7 @@ class CerebrasOpenAI(OpenAILike):
             log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
         return request_params
-    def _format_message(self, message: Message) -> Dict[str, Any]:
+    def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
         """
         Format a message into the format expected by the Cerebras API.
@@ -71,6 +89,7 @@ class CerebrasOpenAI(OpenAILike):
         Returns:
             Dict[str, Any]: The formatted message.
         """
         # Basic message content
         message_dict: Dict[str, Any] = {
             "role": message.role,
@@ -100,10 +119,11 @@ class CerebrasOpenAI(OpenAILike):
         # Handle tool responses
         if message.role == "tool" and message.tool_call_id:
+            content = message.get_content(use_compressed_content=compress_tool_results)
             message_dict = {
                 "role": "tool",
                 "tool_call_id": message.tool_call_id,
-                "content": message.content if message.content is not None else "",
+                "content": content if message.content is not None else "",
             }
         # Ensure no None values in the message

agno/models/cohere/chat.py CHANGED Viewed

@@ -65,7 +65,11 @@ class Cohere(Model):
         self.api_key = self.api_key or getenv("CO_API_KEY")
         if not self.api_key:
-            log_error("CO_API_KEY not set. Please set the CO_API_KEY environment variable.")
+            raise ModelProviderError(
+                message="CO_API_KEY not set. Please set the CO_API_KEY environment variable.",
+                model_name=self.name,
+                model_id=self.id,
+            )
         _client_params["api_key"] = self.api_key
@@ -92,7 +96,11 @@ class Cohere(Model):
         self.api_key = self.api_key or getenv("CO_API_KEY")
         if not self.api_key:
-            log_error("CO_API_KEY not set. Please set the CO_API_KEY environment variable.")
+            raise ModelProviderError(
+                message="CO_API_KEY not set. Please set the CO_API_KEY environment variable.",
+                model_name=self.name,
+                model_id=self.id,
+            )
         _client_params["api_key"] = self.api_key
@@ -181,6 +189,7 @@ class Cohere(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Invoke a non-streamed chat response from the Cohere API.
@@ -194,7 +203,7 @@ class Cohere(Model):
             assistant_message.metrics.start_timer()
             provider_response = self.get_client().chat(
                 model=self.id,
-                messages=format_messages(messages),  # type: ignore
+                messages=format_messages(messages, compress_tool_results),  # type: ignore
                 **request_kwargs,
             )  # type: ignore
             assistant_message.metrics.stop_timer()
@@ -215,6 +224,7 @@ class Cohere(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> Iterator[ModelResponse]:
         """
         Invoke a streamed chat response from the Cohere API.
@@ -231,7 +241,7 @@ class Cohere(Model):
             for response in self.get_client().chat_stream(
                 model=self.id,
-                messages=format_messages(messages),  # type: ignore
+                messages=format_messages(messages, compress_tool_results),  # type: ignore
                 **request_kwargs,
             ):
                 model_response, tool_use = self._parse_provider_response_delta(response, tool_use=tool_use)
@@ -251,6 +261,7 @@ class Cohere(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> ModelResponse:
         """
         Asynchronously invoke a non-streamed chat response from the Cohere API.
@@ -264,7 +275,7 @@ class Cohere(Model):
             assistant_message.metrics.start_timer()
             provider_response = await self.get_async_client().chat(
                 model=self.id,
-                messages=format_messages(messages),  # type: ignore
+                messages=format_messages(messages, compress_tool_results),  # type: ignore
                 **request_kwargs,
             )
             assistant_message.metrics.stop_timer()
@@ -285,6 +296,7 @@ class Cohere(Model):
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         run_response: Optional[RunOutput] = None,
+        compress_tool_results: bool = False,
     ) -> AsyncIterator[ModelResponse]:
         """
         Asynchronously invoke a streamed chat response from the Cohere API.
@@ -301,7 +313,7 @@ class Cohere(Model):
             async for response in self.get_async_client().chat_stream(
                 model=self.id,
-                messages=format_messages(messages),  # type: ignore
+                messages=format_messages(messages, compress_tool_results),  # type: ignore
                 **request_kwargs,
             ):
                 model_response, tool_use = self._parse_provider_response_delta(response, tool_use=tool_use)

agno 2.3.2__py3-none-any.whl → 2.3.4__py3-none-any.whl

agno 2.3.2py3-none-any.whl → 2.3.4py3-none-any.whl