PyPI - agno - Versions diffs - 2.2.6__py3-none-any.whl → 2.2.7__py3-none-any.whl - Mend

agno 2.2.6py3-none-any.whl → 2.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

agno/agent/agent.py +430 -416
agno/api/os.py +1 -1
agno/culture/manager.py +9 -4
agno/guardrails/prompt_injection.py +1 -0
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/knowledge.py +37 -5
agno/models/base.py +62 -54
agno/models/openai/chat.py +6 -5
agno/models/openai/responses.py +1 -58
agno/models/requesty/requesty.py +5 -2
agno/os/app.py +13 -12
agno/os/schema.py +11 -8
agno/run/__init__.py +6 -0
agno/run/base.py +18 -1
agno/run/team.py +11 -9
agno/team/team.py +258 -361
agno/tools/exa.py +21 -16
agno/tools/function.py +93 -16
agno/tools/mcp/mcp.py +8 -1
agno/tools/notion.py +14 -11
agno/utils/agent.py +78 -0
agno/utils/hooks.py +1 -1
agno/utils/models/claude.py +25 -8
agno/vectordb/__init__.py +2 -1
agno/vectordb/redis/__init__.py +5 -0
agno/vectordb/redis/redisdb.py +687 -0
agno/workflow/agent.py +10 -9
agno/workflow/step.py +13 -2
agno/workflow/workflow.py +85 -53
{agno-2.2.6.dist-info → agno-2.2.7.dist-info}/METADATA +7 -3
{agno-2.2.6.dist-info → agno-2.2.7.dist-info}/RECORD +34 -31
{agno-2.2.6.dist-info → agno-2.2.7.dist-info}/WHEEL +0 -0
{agno-2.2.6.dist-info → agno-2.2.7.dist-info}/licenses/LICENSE +0 -0
{agno-2.2.6.dist-info → agno-2.2.7.dist-info}/top_level.txt +0 -0

agno/api/os.py CHANGED Viewed

@@ -14,4 +14,4 @@ def log_os_telemetry(launch: OSLaunch) -> None:
             )
             response.raise_for_status()
         except Exception as e:
-            log_debug(f"Could not create OS launch: {e}")
+            log_debug(f"Could not register OS launch for telemetry: {e}")

agno/culture/manager.py CHANGED Viewed

@@ -134,9 +134,10 @@ class CultureManager:
         if not self.db:
             return None
-        self.db = cast(AsyncBaseDb, self.db)
-        return await self.db.get_all_cultural_knowledge(name=name)
+        if isinstance(self.db, AsyncBaseDb):
+            return await self.db.get_all_cultural_knowledge(name=name)
+        else:
+            return self.db.get_all_cultural_knowledge(name=name)
     def add_cultural_knowledge(
         self,
@@ -230,7 +231,11 @@ class CultureManager:
         if not messages or not isinstance(messages, list):
             raise ValueError("Invalid messages list")
-        knowledge = self.get_all_knowledge()
+        if isinstance(self.db, AsyncBaseDb):
+            knowledge = await self.aget_all_knowledge()
+        else:
+            knowledge = self.get_all_knowledge()
         if knowledge is None:
             knowledge = []

agno/guardrails/prompt_injection.py CHANGED Viewed

@@ -32,6 +32,7 @@ class PromptInjectionGuardrail(BaseGuardrail):
             "ignore safeguards",
             "admin override",
             "root access",
+            "forget everything",
         ]
     def check(self, run_input: Union[RunInput, TeamRunInput]) -> None:

agno/knowledge/embedder/vllm.py ADDED Viewed

@@ -0,0 +1,262 @@
+import asyncio
+from dataclasses import dataclass
+from os import getenv
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+from agno.knowledge.embedder.base import Embedder
+from agno.utils.log import logger
+try:
+    from vllm import LLM  # type: ignore
+    from vllm.outputs import EmbeddingRequestOutput  # type: ignore
+except ImportError:
+    raise ImportError("`vllm` not installed. Please install using `pip install vllm`.")
+if TYPE_CHECKING:
+    from openai import AsyncOpenAI
+    from openai import OpenAI as OpenAIClient
+    from openai.types.create_embedding_response import CreateEmbeddingResponse
+@dataclass
+class VLLMEmbedder(Embedder):
+    """
+    VLLM Embedder supporting both local and remote deployment modes.
+    Local Mode (default):
+        - Loads model locally and runs inference on your GPU/CPU
+        - No API key required
+        - Example: VLLMEmbedder(id="intfloat/e5-mistral-7b-instruct")
+    Remote Mode:
+        - Connects to a remote vLLM server via OpenAI-compatible API
+        - Uses OpenAI SDK to communicate with vLLM's OpenAI-compatible endpoint
+        - Requires base_url and optionally api_key
+        - Example: VLLMEmbedder(base_url="http://localhost:8000/v1", api_key="your-key")
+        - Ref: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+    """
+    id: str = "sentence-transformers/all-MiniLM-L6-v2"
+    dimensions: int = 4096
+    # Local mode parameters
+    enforce_eager: bool = True
+    vllm_kwargs: Optional[Dict[str, Any]] = None
+    vllm_client: Optional[LLM] = None
+    # Remote mode parameters
+    api_key: Optional[str] = getenv("VLLM_API_KEY")
+    base_url: Optional[str] = None
+    request_params: Optional[Dict[str, Any]] = None
+    client_params: Optional[Dict[str, Any]] = None
+    remote_client: Optional["OpenAIClient"] = None  # OpenAI-compatible client for vLLM server
+    async_remote_client: Optional["AsyncOpenAI"] = None  # Async OpenAI-compatible client for vLLM server
+    @property
+    def is_remote(self) -> bool:
+        """Determine if we should use remote mode."""
+        return self.base_url is not None
+    def _get_vllm_client(self) -> LLM:
+        """Get local VLLM client."""
+        if self.vllm_client:
+            return self.vllm_client
+        _vllm_params: Dict[str, Any] = {
+            "model": self.id,
+            "task": "embed",
+            "enforce_eager": self.enforce_eager,
+        }
+        if self.vllm_kwargs:
+            _vllm_params.update(self.vllm_kwargs)
+        self.vllm_client = LLM(**_vllm_params)
+        return self.vllm_client
+    def _get_remote_client(self) -> "OpenAIClient":
+        """Get OpenAI-compatible client for remote vLLM server."""
+        if self.remote_client:
+            return self.remote_client
+        try:
+            from openai import OpenAI as OpenAIClient
+        except ImportError:
+            raise ImportError("`openai` package required for remote vLLM mode. ")
+        _client_params: Dict[str, Any] = {
+            "api_key": self.api_key or "EMPTY",  # VLLM can run without API key
+            "base_url": self.base_url,
+        }
+        if self.client_params:
+            _client_params.update(self.client_params)
+        self.remote_client = OpenAIClient(**_client_params)
+        return self.remote_client
+    def _get_async_remote_client(self) -> "AsyncOpenAI":
+        """Get async OpenAI-compatible client for remote vLLM server."""
+        if self.async_remote_client:
+            return self.async_remote_client
+        try:
+            from openai import AsyncOpenAI
+        except ImportError:
+            raise ImportError("`openai` package required for remote vLLM mode. ")
+        _client_params: Dict[str, Any] = {
+            "api_key": self.api_key or "EMPTY",
+            "base_url": self.base_url,
+        }
+        if self.client_params:
+            _client_params.update(self.client_params)
+        self.async_remote_client = AsyncOpenAI(**_client_params)
+        return self.async_remote_client
+    def _create_embedding_local(self, text: str) -> Optional[EmbeddingRequestOutput]:
+        """Create embedding using local VLLM."""
+        try:
+            outputs = self._get_vllm_client().embed([text])
+            return outputs[0] if outputs else None
+        except Exception as e:
+            logger.warning(f"Error creating local embedding: {e}")
+            return None
+    def _create_embedding_remote(self, text: str) -> "CreateEmbeddingResponse":
+        """Create embedding using remote vLLM server."""
+        _request_params: Dict[str, Any] = {
+            "input": text,
+            "model": self.id,
+        }
+        if self.request_params:
+            _request_params.update(self.request_params)
+        return self._get_remote_client().embeddings.create(**_request_params)
+    def get_embedding(self, text: str) -> List[float]:
+        try:
+            if self.is_remote:
+                # Remote mode: OpenAI-compatible API
+                response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
+                return response.data[0].embedding
+            else:
+                # Local mode: Direct VLLM
+                output = self._create_embedding_local(text=text)
+                if output and hasattr(output, "outputs") and hasattr(output.outputs, "embedding"):
+                    embedding = output.outputs.embedding
+                    if len(embedding) != self.dimensions:
+                        logger.warning(f"Expected embedding dimension {self.dimensions}, but got {len(embedding)}")
+                    return embedding
+                return []
+        except Exception as e:
+            logger.warning(f"Error extracting embedding: {e}")
+            return []
+    def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
+        if self.is_remote:
+            try:
+                response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
+                embedding = response.data[0].embedding
+                usage = response.usage
+                if usage:
+                    return embedding, usage.model_dump()
+                return embedding, None
+            except Exception as e:
+                logger.warning(f"Error in remote embedding: {e}")
+                return [], None
+        else:
+            embedding = self.get_embedding(text=text)
+            # Local VLLM doesn't provide usage information
+            return embedding, None
+    async def async_get_embedding(self, text: str) -> List[float]:
+        """Async version of get_embedding using thread executor for local mode."""
+        if self.is_remote:
+            # Remote mode: async client for vLLM server
+            try:
+                req: Dict[str, Any] = {
+                    "input": text,
+                    "model": self.id,
+                }
+                if self.request_params:
+                    req.update(self.request_params)
+                response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
+                return response.data[0].embedding
+            except Exception as e:
+                logger.warning(f"Error in async remote embedding: {e}")
+                return []
+        else:
+            # Local mode: use thread executor for CPU-bound operations
+            loop = asyncio.get_event_loop()
+            return await loop.run_in_executor(None, self.get_embedding, text)
+    async def async_get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
+        """Async version of get_embedding_and_usage using thread executor for local mode."""
+        if self.is_remote:
+            try:
+                req: Dict[str, Any] = {
+                    "input": text,
+                    "model": self.id,
+                }
+                if self.request_params:
+                    req.update(self.request_params)
+                response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
+                embedding = response.data[0].embedding
+                usage = response.usage
+                return embedding, usage.model_dump() if usage else None
+            except Exception as e:
+                logger.warning(f"Error in async remote embedding: {e}")
+                return [], None
+        else:
+            # Local mode: use thread executor for CPU-bound operations
+            try:
+                loop = asyncio.get_event_loop()
+                return await loop.run_in_executor(None, self.get_embedding_and_usage, text)
+            except Exception as e:
+                logger.warning(f"Error in async local embedding: {e}")
+                return [], None
+    async def async_get_embeddings_batch_and_usage(
+        self, texts: List[str]
+    ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
+        """
+        Get embeddings and usage for multiple texts in batches (async version).
+        Args:
+            texts: List of text strings to embed
+        Returns:
+            Tuple of (List of embedding vectors, List of usage dictionaries)
+        """
+        all_embeddings = []
+        all_usage = []
+        logger.info(f"Getting embeddings for {len(texts)} texts in batches of {self.batch_size} (async)")
+        for i in range(0, len(texts), self.batch_size):
+            batch_texts = texts[i : i + self.batch_size]
+            try:
+                if self.is_remote:
+                    # Remote mode: use batch API
+                    req: Dict[str, Any] = {
+                        "input": batch_texts,
+                        "model": self.id,
+                    }
+                    if self.request_params:
+                        req.update(self.request_params)
+                    response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
+                    batch_embeddings = [data.embedding for data in response.data]
+                    all_embeddings.extend(batch_embeddings)
+                    # For each embedding in the batch, add the same usage information
+                    usage_dict = response.usage.model_dump() if response.usage else None
+                    all_usage.extend([usage_dict] * len(batch_embeddings))
+                else:
+                    # Local mode: process individually using thread executor
+                    for text in batch_texts:
+                        embedding, usage = await self.async_get_embedding_and_usage(text)
+                        all_embeddings.append(embedding)
+                        all_usage.append(usage)
+            except Exception as e:
+                logger.warning(f"Error in async batch embedding: {e}")
+                # Fallback: add empty results for failed batch
+                for _ in batch_texts:
+                    all_embeddings.append([])
+                    all_usage.append(None)
+        return all_embeddings, all_usage

agno/knowledge/knowledge.py CHANGED Viewed

@@ -4,7 +4,6 @@ import io
 import time
 from dataclasses import dataclass
 from enum import Enum
-from functools import cached_property
 from io import BytesIO
 from os.path import basename
 from pathlib import Path
@@ -187,10 +186,14 @@ class Knowledge:
         paths: Optional[List[str]] = None,
         urls: Optional[List[str]] = None,
         metadata: Optional[Dict[str, str]] = None,
+        topics: Optional[List[str]] = None,
+        text_contents: Optional[List[str]] = None,
+        reader: Optional[Reader] = None,
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
         upsert: bool = True,
         skip_if_exists: bool = False,
+        remote_content: Optional[RemoteContent] = None,
     ) -> None: ...
     def add_contents(self, *args, **kwargs) -> None:
@@ -208,10 +211,14 @@ class Knowledge:
             paths: Optional list of file paths to load content from
             urls: Optional list of URLs to load content from
             metadata: Optional metadata dictionary to apply to all content
+            topics: Optional list of topics to add
+            text_contents: Optional list of text content strings to add
+            reader: Optional reader to use for processing content
             include: Optional list of file patterns to include
             exclude: Optional list of file patterns to exclude
             upsert: Whether to update existing content if it already exists
             skip_if_exists: Whether to skip adding content if it already exists
+            remote_content: Optional remote content (S3, GCS, etc.) to add
         """
         asyncio.run(self.add_contents_async(*args, **kwargs))
@@ -1449,14 +1456,16 @@ class Knowledge:
     def get_valid_filters(self) -> Set[str]:
         if self.valid_metadata_filters is None:
             self.valid_metadata_filters = set()
-        self.valid_metadata_filters.update(self._get_filters_from_db)
+        self.valid_metadata_filters.update(self._get_filters_from_db())
         return self.valid_metadata_filters
-    def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
+    async def aget_valid_filters(self) -> Set[str]:
         if self.valid_metadata_filters is None:
             self.valid_metadata_filters = set()
-        self.valid_metadata_filters.update(self._get_filters_from_db)
+        self.valid_metadata_filters.update(await self._aget_filters_from_db())
+        return self.valid_metadata_filters
+    def _validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
         if not filters:
             return {}, []
@@ -1480,6 +1489,20 @@ class Knowledge:
         return valid_filters, invalid_keys
+    def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
+        if self.valid_metadata_filters is None:
+            self.valid_metadata_filters = set()
+        self.valid_metadata_filters.update(self._get_filters_from_db())
+        return self._validate_filters(filters)
+    async def async_validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
+        if self.valid_metadata_filters is None:
+            self.valid_metadata_filters = set()
+        self.valid_metadata_filters.update(await self._aget_filters_from_db())
+        return self._validate_filters(filters)
     def add_filters(self, metadata: Dict[str, Any]) -> None:
         if self.valid_metadata_filters is None:
             self.valid_metadata_filters = set()
@@ -1488,7 +1511,6 @@ class Knowledge:
             for key in metadata.keys():
                 self.valid_metadata_filters.add(key)
-    @cached_property
     def _get_filters_from_db(self) -> Set[str]:
         if self.contents_db is None:
             return set()
@@ -1499,6 +1521,16 @@ class Knowledge:
                 valid_filters.update(content.metadata.keys())
         return valid_filters
+    async def _aget_filters_from_db(self) -> Set[str]:
+        if self.contents_db is None:
+            return set()
+        contents, _ = await self.aget_content()
+        valid_filters: Set[str] = set()
+        for content in contents:
+            if content.metadata:
+                valid_filters.update(content.metadata.keys())
+        return valid_filters
     def remove_vector_by_id(self, id: str) -> bool:
         from agno.vectordb import VectorDb

agno/models/base.py CHANGED Viewed

@@ -31,7 +31,7 @@ from agno.models.metrics import Metrics
 from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
 from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
 from agno.run.team import RunContentEvent as TeamRunContentEvent
-from agno.run.team import TeamRunOutputEvent
+from agno.run.team import TeamRunOutput, TeamRunOutputEvent
 from agno.run.workflow import WorkflowRunOutputEvent
 from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
 from agno.utils.log import log_debug, log_error, log_info, log_warning
@@ -53,6 +53,8 @@ class MessageData:
     response_video: Optional[Video] = None
     response_file: Optional[File] = None
+    response_metrics: Optional[Metrics] = None
     # Data from the provider that we might need on subsequent messages
     response_provider_data: Optional[Dict[str, Any]] = None
@@ -308,7 +310,7 @@ class Model(ABC):
         tools: Optional[List[Union[Function, dict]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
     ) -> ModelResponse:
         """
@@ -482,6 +484,7 @@ class Model(ABC):
         tools: Optional[List[Union[Function, dict]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
     ) -> ModelResponse:
         """
@@ -517,6 +520,7 @@ class Model(ABC):
                 response_format=response_format,
                 tools=_tool_dicts,
                 tool_choice=tool_choice or self._tool_choice,
+                run_response=run_response,
             )
             # Add assistant message to messages
@@ -644,7 +648,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> None:
         """
         Process a single model response and return the assistant message and whether to continue.
@@ -697,7 +701,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> None:
         """
         Process a single async model response and return the assistant message and whether to continue.
@@ -757,7 +761,6 @@ class Model(ABC):
         Returns:
             Message: The populated assistant message
         """
-        # Add role to assistant message
         if provider_response.role is not None:
             assistant_message.role = provider_response.role
@@ -821,7 +824,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> Iterator[ModelResponse]:
         """
         Process a streaming response from the model.
@@ -835,14 +838,14 @@ class Model(ABC):
             tool_choice=tool_choice or self._tool_choice,
             run_response=run_response,
         ):
-            yield from self._populate_stream_data_and_assistant_message(
+            for model_response_delta in self._populate_stream_data(
                 stream_data=stream_data,
-                assistant_message=assistant_message,
                 model_response_delta=response_delta,
-            )
+            ):
+                yield model_response_delta
-        # Add final metrics to assistant message
-        self._populate_assistant_message(assistant_message=assistant_message, provider_response=response_delta)
+        # Populate assistant message from stream data after the stream ends
+        self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
     def response_stream(
         self,
@@ -852,7 +855,7 @@ class Model(ABC):
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
         stream_model_response: bool = True,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
     ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
         """
@@ -906,22 +909,6 @@ class Model(ABC):
                         streaming_responses.append(response)
                     yield response
-                # Populate assistant message from stream data
-                if stream_data.response_content:
-                    assistant_message.content = stream_data.response_content
-                if stream_data.response_reasoning_content:
-                    assistant_message.reasoning_content = stream_data.response_reasoning_content
-                if stream_data.response_redacted_reasoning_content:
-                    assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
-                if stream_data.response_provider_data:
-                    assistant_message.provider_data = stream_data.response_provider_data
-                if stream_data.response_citations:
-                    assistant_message.citations = stream_data.response_citations
-                if stream_data.response_audio:
-                    assistant_message.audio_output = stream_data.response_audio
-                if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
-                    assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
             else:
                 self._process_model_response(
                     messages=messages,
@@ -1020,7 +1007,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> AsyncIterator[ModelResponse]:
         """
         Process a streaming response from the model.
@@ -1033,15 +1020,14 @@ class Model(ABC):
             tool_choice=tool_choice or self._tool_choice,
             run_response=run_response,
         ):  # type: ignore
-            for model_response in self._populate_stream_data_and_assistant_message(
+            for model_response_delta in self._populate_stream_data(
                 stream_data=stream_data,
-                assistant_message=assistant_message,
                 model_response_delta=response_delta,
             ):
-                yield model_response
+                yield model_response_delta
-        # Populate the assistant message
-        self._populate_assistant_message(assistant_message=assistant_message, provider_response=model_response)
+        # Populate assistant message from stream data after the stream ends
+        self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
     async def aresponse_stream(
         self,
@@ -1051,7 +1037,7 @@ class Model(ABC):
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
         stream_model_response: bool = True,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
         send_media_to_model: bool = True,
     ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
         """
@@ -1105,20 +1091,6 @@ class Model(ABC):
                         streaming_responses.append(model_response)
                     yield model_response
-                # Populate assistant message from stream data
-                if stream_data.response_content:
-                    assistant_message.content = stream_data.response_content
-                if stream_data.response_reasoning_content:
-                    assistant_message.reasoning_content = stream_data.response_reasoning_content
-                if stream_data.response_redacted_reasoning_content:
-                    assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
-                if stream_data.response_provider_data:
-                    assistant_message.provider_data = stream_data.response_provider_data
-                if stream_data.response_audio:
-                    assistant_message.audio_output = stream_data.response_audio
-                if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
-                    assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
             else:
                 await self._aprocess_model_response(
                     messages=messages,
@@ -1210,15 +1182,51 @@ class Model(ABC):
         if self.cache_response and cache_key and streaming_responses:
             self._save_streaming_responses_to_cache(cache_key, streaming_responses)
-    def _populate_stream_data_and_assistant_message(
-        self, stream_data: MessageData, assistant_message: Message, model_response_delta: ModelResponse
+    def _populate_assistant_message_from_stream_data(
+        self, assistant_message: Message, stream_data: MessageData
+    ) -> None:
+        """
+        Populate an assistant message with the stream data.
+        """
+        if stream_data.response_role is not None:
+            assistant_message.role = stream_data.response_role
+        if stream_data.response_metrics is not None:
+            assistant_message.metrics = stream_data.response_metrics
+        if stream_data.response_content:
+            assistant_message.content = stream_data.response_content
+        if stream_data.response_reasoning_content:
+            assistant_message.reasoning_content = stream_data.response_reasoning_content
+        if stream_data.response_redacted_reasoning_content:
+            assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
+        if stream_data.response_provider_data:
+            assistant_message.provider_data = stream_data.response_provider_data
+        if stream_data.response_citations:
+            assistant_message.citations = stream_data.response_citations
+        if stream_data.response_audio:
+            assistant_message.audio_output = stream_data.response_audio
+        if stream_data.response_image:
+            assistant_message.image_output = stream_data.response_image
+        if stream_data.response_video:
+            assistant_message.video_output = stream_data.response_video
+        if stream_data.response_file:
+            assistant_message.file_output = stream_data.response_file
+        if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
+            assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
+    def _populate_stream_data(
+        self, stream_data: MessageData, model_response_delta: ModelResponse
     ) -> Iterator[ModelResponse]:
         """Update the stream data and assistant message with the model response."""
-        # Add role to assistant message
-        if model_response_delta.role is not None:
-            assistant_message.role = model_response_delta.role
         should_yield = False
+        if model_response_delta.role is not None:
+            stream_data.response_role = model_response_delta.role  # type: ignore
+        if model_response_delta.response_usage is not None:
+            if stream_data.response_metrics is None:
+                stream_data.response_metrics = Metrics()
+            stream_data.response_metrics += model_response_delta.response_usage
         # Update stream_data content
         if model_response_delta.content is not None:
             stream_data.response_content += model_response_delta.content

agno 2.2.6__py3-none-any.whl → 2.2.7__py3-none-any.whl

agno 2.2.6py3-none-any.whl → 2.2.7py3-none-any.whl