PyPI - agno - Versions diffs - 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (331) hide show

agno/agent/agent.py +6009 -2874
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +385 -6
agno/db/dynamo/dynamo.py +388 -81
agno/db/dynamo/schemas.py +47 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +435 -64
agno/db/firestore/schemas.py +11 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +384 -42
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +351 -66
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +339 -48
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +510 -37
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2036 -0
agno/db/mongo/mongo.py +653 -76
agno/db/mongo/schemas.py +13 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/mysql.py +687 -25
agno/db/mysql/schemas.py +61 -37
agno/db/mysql/utils.py +60 -2
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2001 -0
agno/db/postgres/postgres.py +676 -57
agno/db/postgres/schemas.py +43 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +344 -38
agno/db/redis/schemas.py +18 -0
agno/db/redis/utils.py +60 -2
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/memory.py +13 -0
agno/db/singlestore/schemas.py +26 -1
agno/db/singlestore/singlestore.py +687 -53
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2371 -0
agno/db/sqlite/schemas.py +24 -0
agno/db/sqlite/sqlite.py +774 -85
agno/db/sqlite/utils.py +168 -5
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +309 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1361 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +50 -22
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +68 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/integrations/discord/client.py +1 -0
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +1 -1
agno/knowledge/chunking/semantic.py +40 -8
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +13 -0
agno/knowledge/embedder/openai.py +37 -65
agno/knowledge/embedder/sentence_transformer.py +8 -4
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +595 -187
agno/knowledge/reader/base.py +9 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/json_reader.py +6 -5
agno/knowledge/reader/markdown_reader.py +13 -13
agno/knowledge/reader/pdf_reader.py +43 -68
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +51 -6
agno/knowledge/reader/s3_reader.py +3 -15
agno/knowledge/reader/tavily_reader.py +194 -0
agno/knowledge/reader/text_reader.py +13 -13
agno/knowledge/reader/web_search_reader.py +2 -43
agno/knowledge/reader/website_reader.py +43 -25
agno/knowledge/reranker/__init__.py +3 -0
agno/knowledge/types.py +9 -0
agno/knowledge/utils.py +20 -0
agno/media.py +339 -266
agno/memory/manager.py +336 -82
agno/models/aimlapi/aimlapi.py +2 -2
agno/models/anthropic/claude.py +183 -37
agno/models/aws/bedrock.py +52 -112
agno/models/aws/claude.py +33 -1
agno/models/azure/ai_foundry.py +33 -15
agno/models/azure/openai_chat.py +25 -8
agno/models/base.py +1011 -566
agno/models/cerebras/cerebras.py +19 -13
agno/models/cerebras/cerebras_openai.py +8 -5
agno/models/cohere/chat.py +27 -1
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +57 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/deepinfra/deepinfra.py +2 -2
agno/models/deepseek/deepseek.py +2 -2
agno/models/fireworks/fireworks.py +2 -2
agno/models/google/gemini.py +110 -37
agno/models/groq/groq.py +28 -11
agno/models/huggingface/huggingface.py +2 -1
agno/models/internlm/internlm.py +2 -2
agno/models/langdb/langdb.py +4 -4
agno/models/litellm/chat.py +18 -1
agno/models/litellm/litellm_openai.py +2 -2
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/message.py +143 -4
agno/models/meta/llama.py +27 -10
agno/models/meta/llama_openai.py +5 -17
agno/models/nebius/nebius.py +6 -6
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/nvidia.py +2 -2
agno/models/ollama/chat.py +60 -6
agno/models/openai/chat.py +102 -43
agno/models/openai/responses.py +103 -106
agno/models/openrouter/openrouter.py +41 -3
agno/models/perplexity/perplexity.py +4 -5
agno/models/portkey/portkey.py +3 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +52 -0
agno/models/response.py +81 -5
agno/models/sambanova/sambanova.py +2 -2
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +25 -0
agno/models/together/together.py +2 -2
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +2 -2
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +96 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +3 -2
agno/os/app.py +543 -175
agno/os/auth.py +24 -14
agno/os/config.py +1 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +250 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/agui.py +23 -7
agno/os/interfaces/agui/router.py +27 -3
agno/os/interfaces/agui/utils.py +242 -142
agno/os/interfaces/base.py +6 -2
agno/os/interfaces/slack/router.py +81 -23
agno/os/interfaces/slack/slack.py +29 -14
agno/os/interfaces/whatsapp/router.py +11 -4
agno/os/interfaces/whatsapp/whatsapp.py +14 -7
agno/os/mcp.py +111 -54
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +556 -139
agno/os/routers/evals/evals.py +71 -34
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/evals/utils.py +6 -5
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/knowledge.py +185 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +158 -53
agno/os/routers/memory/schemas.py +20 -16
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +499 -38
agno/os/schema.py +308 -198
agno/os/utils.py +401 -41
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +2 -2
agno/reasoning/deepseek.py +2 -2
agno/reasoning/default.py +3 -1
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +2 -2
agno/reasoning/ollama.py +2 -2
agno/reasoning/openai.py +7 -2
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +266 -112
agno/run/base.py +53 -24
agno/run/team.py +252 -111
agno/run/workflow.py +156 -45
agno/session/agent.py +105 -89
agno/session/summary.py +65 -25
agno/session/team.py +176 -96
agno/session/workflow.py +406 -40
agno/team/team.py +3854 -1692
agno/tools/brightdata.py +3 -3
agno/tools/cartesia.py +3 -5
agno/tools/dalle.py +9 -8
agno/tools/decorator.py +4 -2
agno/tools/desi_vocal.py +2 -2
agno/tools/duckduckgo.py +15 -11
agno/tools/e2b.py +20 -13
agno/tools/eleven_labs.py +26 -28
agno/tools/exa.py +21 -16
agno/tools/fal.py +4 -4
agno/tools/file.py +153 -23
agno/tools/file_generation.py +350 -0
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +257 -37
agno/tools/giphy.py +2 -2
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +270 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/knowledge.py +3 -3
agno/tools/lumalab.py +3 -3
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +11 -17
agno/tools/memori.py +1 -53
agno/tools/memory.py +419 -0
agno/tools/models/azure_openai.py +2 -2
agno/tools/models/gemini.py +3 -3
agno/tools/models/groq.py +3 -5
agno/tools/models/nebius.py +7 -7
agno/tools/models_labs.py +25 -15
agno/tools/notion.py +204 -0
agno/tools/openai.py +4 -9
agno/tools/opencv.py +3 -3
agno/tools/parallel.py +314 -0
agno/tools/replicate.py +7 -7
agno/tools/scrapegraph.py +58 -31
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/slack.py +18 -3
agno/tools/spider.py +2 -2
agno/tools/tavily.py +146 -0
agno/tools/whatsapp.py +1 -1
agno/tools/workflow.py +278 -0
agno/tools/yfinance.py +12 -11
agno/utils/agent.py +820 -0
agno/utils/audio.py +27 -0
agno/utils/common.py +90 -1
agno/utils/events.py +222 -7
agno/utils/gemini.py +181 -23
agno/utils/hooks.py +57 -0
agno/utils/http.py +111 -0
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +95 -5
agno/utils/media.py +188 -10
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +60 -0
agno/utils/models/claude.py +40 -11
agno/utils/models/cohere.py +1 -1
agno/utils/models/watsonx.py +1 -1
agno/utils/openai.py +1 -1
agno/utils/print_response/agent.py +105 -21
agno/utils/print_response/team.py +103 -38
agno/utils/print_response/workflow.py +251 -34
agno/utils/reasoning.py +22 -1
agno/utils/serialize.py +32 -0
agno/utils/streamlit.py +16 -10
agno/utils/string.py +41 -0
agno/utils/team.py +98 -9
agno/utils/tools.py +1 -1
agno/vectordb/base.py +23 -4
agno/vectordb/cassandra/cassandra.py +65 -9
agno/vectordb/chroma/chromadb.py +182 -38
agno/vectordb/clickhouse/clickhousedb.py +64 -11
agno/vectordb/couchbase/couchbase.py +105 -10
agno/vectordb/lancedb/lance_db.py +183 -135
agno/vectordb/langchaindb/langchaindb.py +25 -7
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +46 -7
agno/vectordb/milvus/milvus.py +126 -9
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +112 -7
agno/vectordb/pgvector/pgvector.py +142 -21
agno/vectordb/pineconedb/pineconedb.py +80 -8
agno/vectordb/qdrant/qdrant.py +125 -39
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +694 -0
agno/vectordb/singlestore/singlestore.py +111 -25
agno/vectordb/surrealdb/surrealdb.py +31 -5
agno/vectordb/upstashdb/upstashdb.py +76 -8
agno/vectordb/weaviate/weaviate.py +86 -15
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +112 -18
agno/workflow/loop.py +69 -10
agno/workflow/parallel.py +266 -118
agno/workflow/router.py +110 -17
agno/workflow/step.py +645 -136
agno/workflow/steps.py +65 -6
agno/workflow/types.py +71 -33
agno/workflow/workflow.py +2113 -300
agno-2.3.0.dist-info/METADATA +618 -0
agno-2.3.0.dist-info/RECORD +577 -0
agno-2.3.0.dist-info/licenses/LICENSE +201 -0
agno/knowledge/reader/url_reader.py +0 -128
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -610
agno/utils/models/aws_claude.py +0 -170
agno-2.0.0rc2.dist-info/METADATA +0 -355
agno-2.0.0rc2.dist-info/RECORD +0 -515
agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0

agno/models/base.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import asyncio
 import collections.abc
+import json
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
+from hashlib import md5
+from pathlib import Path
+from time import time
 from types import AsyncGeneratorType, GeneratorType
 from typing import (
     Any,
@@ -21,15 +25,16 @@ from uuid import uuid4
 from pydantic import BaseModel
 from agno.exceptions import AgentRunException
-from agno.media import Audio, AudioArtifact, AudioResponse, Image, ImageArtifact, Video, VideoArtifact
+from agno.media import Audio, File, Image, Video
 from agno.models.message import Citations, Message
 from agno.models.metrics import Metrics
 from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
 from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
 from agno.run.team import RunContentEvent as TeamRunContentEvent
-from agno.run.team import TeamRunOutputEvent
+from agno.run.team import TeamRunOutput, TeamRunOutputEvent
+from agno.run.workflow import WorkflowRunOutputEvent
 from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
-from agno.utils.log import log_debug, log_error, log_warning
+from agno.utils.log import log_debug, log_error, log_info, log_warning
 from agno.utils.timer import Timer
 from agno.utils.tools import get_function_call_for_tool_call, get_function_call_for_tool_execution
@@ -43,9 +48,12 @@ class MessageData:
     response_citations: Optional[Citations] = None
     response_tool_calls: List[Dict[str, Any]] = field(default_factory=list)
-    response_audio: Optional[AudioResponse] = None
-    response_image: Optional[ImageArtifact] = None
-    response_video: Optional[VideoArtifact] = None
+    response_audio: Optional[Audio] = None
+    response_image: Optional[Image] = None
+    response_video: Optional[Video] = None
+    response_file: Optional[File] = None
+    response_metrics: Optional[Metrics] = None
     # Data from the provider that we might need on subsequent messages
     response_provider_data: Optional[Dict[str, Any]] = None
@@ -132,6 +140,11 @@ class Model(ABC):
     # The role of the assistant message.
     assistant_message_role: str = "assistant"
+    # Cache model responses to avoid redundant API calls during development
+    cache_response: bool = False
+    cache_ttl: Optional[int] = None
+    cache_dir: Optional[str] = None
     def __post_init__(self):
         if self.provider is None and self.name is not None:
             self.provider = f"{self.name} ({self.id})"
@@ -144,6 +157,100 @@ class Model(ABC):
     def get_provider(self) -> str:
         return self.provider or self.name or self.__class__.__name__
+    def _get_model_cache_key(self, messages: List[Message], stream: bool, **kwargs: Any) -> str:
+        """Generate a cache key based on model messages and core parameters."""
+        message_data = []
+        for msg in messages:
+            msg_dict = {
+                "role": msg.role,
+                "content": msg.content,
+            }
+            message_data.append(msg_dict)
+        # Include tools parameter in cache key
+        has_tools = bool(kwargs.get("tools"))
+        cache_data = {
+            "model_id": self.id,
+            "messages": message_data,
+            "has_tools": has_tools,
+            "response_format": kwargs.get("response_format"),
+            "stream": stream,
+        }
+        cache_str = json.dumps(cache_data, sort_keys=True)
+        return md5(cache_str.encode()).hexdigest()
+    def _get_model_cache_file_path(self, cache_key: str) -> Path:
+        """Get the file path for a cache key."""
+        if self.cache_dir:
+            cache_dir = Path(self.cache_dir)
+        else:
+            cache_dir = Path.home() / ".agno" / "cache" / "model_responses"
+        cache_dir.mkdir(parents=True, exist_ok=True)
+        return cache_dir / f"{cache_key}.json"
+    def _get_cached_model_response(self, cache_key: str) -> Optional[Dict[str, Any]]:
+        """Retrieve a cached response if it exists and is not expired."""
+        cache_file = self._get_model_cache_file_path(cache_key)
+        if not cache_file.exists():
+            return None
+        try:
+            with open(cache_file, "r") as f:
+                cached_data = json.load(f)
+            # Check TTL if set (None means no expiration)
+            if self.cache_ttl is not None:
+                if time() - cached_data["timestamp"] > self.cache_ttl:
+                    return None
+            return cached_data
+        except Exception:
+            return None
+    def _save_model_response_to_cache(self, cache_key: str, result: ModelResponse, is_streaming: bool = False) -> None:
+        """Save a model response to cache."""
+        try:
+            cache_file = self._get_model_cache_file_path(cache_key)
+            cache_data = {
+                "timestamp": int(time()),
+                "is_streaming": is_streaming,
+                "result": result.to_dict(),
+            }
+            with open(cache_file, "w") as f:
+                json.dump(cache_data, f)
+        except Exception:
+            pass
+    def _save_streaming_responses_to_cache(self, cache_key: str, responses: List[ModelResponse]) -> None:
+        """Save streaming responses to cache."""
+        cache_file = self._get_model_cache_file_path(cache_key)
+        cache_data = {
+            "timestamp": int(time()),
+            "is_streaming": True,
+            "streaming_responses": [r.to_dict() for r in responses],
+        }
+        try:
+            with open(cache_file, "w") as f:
+                json.dump(cache_data, f)
+        except Exception:
+            pass
+    def _model_response_from_cache(self, cached_data: Dict[str, Any]) -> ModelResponse:
+        """Reconstruct a ModelResponse from cached data."""
+        return ModelResponse.from_dict(cached_data["result"])
+    def _streaming_responses_from_cache(self, cached_data: list) -> Iterator[ModelResponse]:
+        """Reconstruct streaming responses from cached data."""
+        for cached_response in cached_data:
+            yield ModelResponse.from_dict(cached_response)
     @abstractmethod
     def invoke(self, *args, **kwargs) -> ModelResponse:
         pass
@@ -186,278 +293,377 @@ class Model(ABC):
         """
         pass
+    def _format_tools(self, tools: Optional[List[Union[Function, dict]]]) -> List[Dict[str, Any]]:
+        _tool_dicts = []
+        for tool in tools or []:
+            if isinstance(tool, Function):
+                _tool_dicts.append({"type": "function", "function": tool.to_dict()})
+            else:
+                # If a dict is passed, it is a builtin tool
+                _tool_dicts.append(tool)
+        return _tool_dicts
     def response(
         self,
         messages: List[Message],
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        functions: Optional[Dict[str, Function]] = None,
+        tools: Optional[List[Union[Function, dict]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        send_media_to_model: bool = True,
     ) -> ModelResponse:
         """
         Generate a response from the model.
+        Args:
+            messages: List of messages to send to the model
+            response_format: Response format to use
+            tools: List of tools to use. This includes the original Function objects and dicts for built-in tools.
+            tool_choice: Tool choice to use
+            tool_call_limit: Tool call limit
+            run_response: Run response to use
+            send_media_to_model: Whether to send media to the model
         """
+        try:
+            # Check cache if enabled
+            if self.cache_response:
+                cache_key = self._get_model_cache_key(
+                    messages, stream=False, response_format=response_format, tools=tools
+                )
+                cached_data = self._get_cached_model_response(cache_key)
-        log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
-        log_debug(f"Model: {self.id}", center=True, symbol="-")
-        _log_messages(messages)
-        model_response = ModelResponse()
-        function_call_count = 0
-        while True:
-            # Get response from model
-            assistant_message = Message(role=self.assistant_message_role)
-            self._process_model_response(
-                messages=messages,
-                assistant_message=assistant_message,
-                model_response=model_response,
-                response_format=response_format,
-                tools=tools,
-                tool_choice=tool_choice or self._tool_choice,
-                run_response=run_response,
-            )
+                if cached_data:
+                    log_info("Cache hit for model response")
+                    return self._model_response_from_cache(cached_data)
-            # Add assistant message to messages
-            messages.append(assistant_message)
+            log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
+            log_debug(f"Model: {self.id}", center=True, symbol="-")
-            # Log response and metrics
-            assistant_message.log(metrics=True)
+            _log_messages(messages)
+            model_response = ModelResponse()
-            # Handle tool calls if present
-            if assistant_message.tool_calls:
-                # Prepare function calls
-                function_calls_to_run = self._prepare_function_calls(
-                    assistant_message=assistant_message,
+            function_call_count = 0
+            _tool_dicts = self._format_tools(tools) if tools is not None else []
+            _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
+            while True:
+                # Get response from model
+                assistant_message = Message(role=self.assistant_message_role)
+                self._process_model_response(
                     messages=messages,
+                    assistant_message=assistant_message,
                     model_response=model_response,
-                    functions=functions,
+                    response_format=response_format,
+                    tools=_tool_dicts,
+                    tool_choice=tool_choice or self._tool_choice,
+                    run_response=run_response,
                 )
-                function_call_results: List[Message] = []
-                # Execute function calls
-                for function_call_response in self.run_function_calls(
-                    function_calls=function_calls_to_run,
-                    function_call_results=function_call_results,
-                    current_function_call_count=function_call_count,
-                    function_call_limit=tool_call_limit,
-                ):
-                    if isinstance(function_call_response, ModelResponse):
-                        # The session state is updated by the function call
-                        if function_call_response.updated_session_state is not None:
-                            model_response.updated_session_state = function_call_response.updated_session_state
-                        # Media artifacts are generated by the function call
-                        if function_call_response.images is not None:
-                            if model_response.images is None:
-                                model_response.images = []
-                            model_response.images.extend(function_call_response.images)
-                        if function_call_response.audios is not None:
-                            if model_response.audios is None:
-                                model_response.audios = []
-                            model_response.audios.extend(function_call_response.audios)
-                        if function_call_response.videos is not None:
-                            if model_response.videos is None:
-                                model_response.videos = []
-                            model_response.videos.extend(function_call_response.videos)
-                        if (
-                            function_call_response.event
-                            in [
+                # Add assistant message to messages
+                messages.append(assistant_message)
+                # Log response and metrics
+                assistant_message.log(metrics=True)
+                # Handle tool calls if present
+                if assistant_message.tool_calls:
+                    # Prepare function calls
+                    function_calls_to_run = self._prepare_function_calls(
+                        assistant_message=assistant_message,
+                        messages=messages,
+                        model_response=model_response,
+                        functions=_functions,
+                    )
+                    function_call_results: List[Message] = []
+                    # Execute function calls
+                    for function_call_response in self.run_function_calls(
+                        function_calls=function_calls_to_run,
+                        function_call_results=function_call_results,
+                        current_function_call_count=function_call_count,
+                        function_call_limit=tool_call_limit,
+                    ):
+                        if isinstance(function_call_response, ModelResponse):
+                            # The session state is updated by the function call
+                            if function_call_response.updated_session_state is not None:
+                                model_response.updated_session_state = function_call_response.updated_session_state
+                            # Media artifacts are generated by the function call
+                            if function_call_response.images is not None:
+                                if model_response.images is None:
+                                    model_response.images = []
+                                model_response.images.extend(function_call_response.images)
+                            if function_call_response.audios is not None:
+                                if model_response.audios is None:
+                                    model_response.audios = []
+                                model_response.audios.extend(function_call_response.audios)
+                            if function_call_response.videos is not None:
+                                if model_response.videos is None:
+                                    model_response.videos = []
+                                model_response.videos.extend(function_call_response.videos)
+                            if function_call_response.files is not None:
+                                if model_response.files is None:
+                                    model_response.files = []
+                                model_response.files.extend(function_call_response.files)
+                            if (
+                                function_call_response.event
+                                in [
+                                    ModelResponseEvent.tool_call_completed.value,
+                                    ModelResponseEvent.tool_call_paused.value,
+                                ]
+                                and function_call_response.tool_executions is not None
+                            ):
+                                if model_response.tool_executions is None:
+                                    model_response.tool_executions = []
+                                model_response.tool_executions.extend(function_call_response.tool_executions)
+                            elif function_call_response.event not in [
+                                ModelResponseEvent.tool_call_started.value,
                                 ModelResponseEvent.tool_call_completed.value,
-                                ModelResponseEvent.tool_call_paused.value,
-                            ]
-                            and function_call_response.tool_executions is not None
-                        ):
-                            if model_response.tool_executions is None:
-                                model_response.tool_executions = []
-                            model_response.tool_executions.extend(function_call_response.tool_executions)
-                        elif function_call_response.event not in [
-                            ModelResponseEvent.tool_call_started.value,
-                            ModelResponseEvent.tool_call_completed.value,
-                        ]:
-                            if function_call_response.content:
-                                model_response.content += function_call_response.content  # type: ignore
-                # Add a function call for each successful execution
-                function_call_count += len(function_call_results)
-                # Format and add results to messages
-                self.format_function_call_results(
-                    messages=messages, function_call_results=function_call_results, **model_response.extra or {}
-                )
+                            ]:
+                                if function_call_response.content:
+                                    model_response.content += function_call_response.content  # type: ignore
-                if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
-                    # Handle function call media
-                    self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
+                    # Add a function call for each successful execution
+                    function_call_count += len(function_call_results)
+                    # Format and add results to messages
+                    self.format_function_call_results(
+                        messages=messages, function_call_results=function_call_results, **model_response.extra or {}
+                    )
-                for function_call_result in function_call_results:
-                    function_call_result.log(metrics=True)
+                    if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
+                        # Handle function call media
+                        self._handle_function_call_media(
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            send_media_to_model=send_media_to_model,
+                        )
-                # Check if we should stop after tool calls
-                if any(m.stop_after_tool_call for m in function_call_results):
-                    break
+                    for function_call_result in function_call_results:
+                        function_call_result.log(metrics=True)
-                # If we have any tool calls that require confirmation, break the loop
-                if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
-                    break
+                    # Check if we should stop after tool calls
+                    if any(m.stop_after_tool_call for m in function_call_results):
+                        break
-                # If we have any tool calls that require external execution, break the loop
-                if any(tc.external_execution_required for tc in model_response.tool_executions or []):
-                    break
+                    # If we have any tool calls that require confirmation, break the loop
+                    if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
+                        break
-                # If we have any tool calls that require user input, break the loop
-                if any(tc.requires_user_input for tc in model_response.tool_executions or []):
-                    break
+                    # If we have any tool calls that require external execution, break the loop
+                    if any(tc.external_execution_required for tc in model_response.tool_executions or []):
+                        break
-                # Continue loop to get next response
-                continue
+                    # If we have any tool calls that require user input, break the loop
+                    if any(tc.requires_user_input for tc in model_response.tool_executions or []):
+                        break
+                    # Continue loop to get next response
+                    continue
-            # No tool calls or finished processing them
-            break
+                # No tool calls or finished processing them
+                break
+            log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
+            # Save to cache if enabled
+            if self.cache_response:
+                self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
+        finally:
+            # Close the Gemini client
+            if self.__class__.__name__ == "Gemini" and self.client is not None:  # type: ignore
+                try:
+                    self.client.close()  # type: ignore
+                    self.client = None
+                except AttributeError:
+                    log_warning(
+                        "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
+                        " please upgrade Gemini to the latest version: pip install -U google-genai"
+                    )
-        log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
         return model_response
     async def aresponse(
         self,
         messages: List[Message],
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        functions: Optional[Dict[str, Function]] = None,
+        tools: Optional[List[Union[Function, dict]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        send_media_to_model: bool = True,
     ) -> ModelResponse:
         """
         Generate an asynchronous response from the model.
         """
+        try:
+            # Check cache if enabled
+            if self.cache_response:
+                cache_key = self._get_model_cache_key(
+                    messages, stream=False, response_format=response_format, tools=tools
+                )
+                cached_data = self._get_cached_model_response(cache_key)
-        log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
-        log_debug(f"Model: {self.id}", center=True, symbol="-")
-        _log_messages(messages)
-        model_response = ModelResponse()
-        function_call_count = 0
-        while True:
-            # Get response from model
-            assistant_message = Message(role=self.assistant_message_role)
-            await self._aprocess_model_response(
-                messages=messages,
-                assistant_message=assistant_message,
-                model_response=model_response,
-                response_format=response_format,
-                tools=tools,
-                tool_choice=tool_choice or self._tool_choice,
-            )
+                if cached_data:
+                    log_info("Cache hit for model response")
+                    return self._model_response_from_cache(cached_data)
-            # Add assistant message to messages
-            messages.append(assistant_message)
+            log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
+            log_debug(f"Model: {self.id}", center=True, symbol="-")
+            _log_messages(messages)
+            model_response = ModelResponse()
-            # Log response and metrics
-            assistant_message.log(metrics=True)
+            _tool_dicts = self._format_tools(tools) if tools is not None else []
+            _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
-            # Handle tool calls if present
-            if assistant_message.tool_calls:
-                # Prepare function calls
-                function_calls_to_run = self._prepare_function_calls(
-                    assistant_message=assistant_message,
+            function_call_count = 0
+            while True:
+                # Get response from model
+                assistant_message = Message(role=self.assistant_message_role)
+                await self._aprocess_model_response(
                     messages=messages,
+                    assistant_message=assistant_message,
                     model_response=model_response,
-                    functions=functions,
+                    response_format=response_format,
+                    tools=_tool_dicts,
+                    tool_choice=tool_choice or self._tool_choice,
+                    run_response=run_response,
                 )
-                function_call_results: List[Message] = []
-                # Execute function calls
-                async for function_call_response in self.arun_function_calls(
-                    function_calls=function_calls_to_run,
-                    function_call_results=function_call_results,
-                    current_function_call_count=function_call_count,
-                    function_call_limit=tool_call_limit,
-                ):
-                    if isinstance(function_call_response, ModelResponse):
-                        # The session state is updated by the function call
-                        if function_call_response.updated_session_state is not None:
-                            model_response.updated_session_state = function_call_response.updated_session_state
-                        # Media artifacts are generated by the function call
-                        if function_call_response.images is not None:
-                            if model_response.images is None:
-                                model_response.images = []
-                            model_response.images.extend(function_call_response.images)
-                        if function_call_response.audios is not None:
-                            if model_response.audios is None:
-                                model_response.audios = []
-                            model_response.audios.extend(function_call_response.audios)
-                        if function_call_response.videos is not None:
-                            if model_response.videos is None:
-                                model_response.videos = []
-                            model_response.videos.extend(function_call_response.videos)
-                        if (
-                            function_call_response.event
-                            in [
+                # Add assistant message to messages
+                messages.append(assistant_message)
+                # Log response and metrics
+                assistant_message.log(metrics=True)
+                # Handle tool calls if present
+                if assistant_message.tool_calls:
+                    # Prepare function calls
+                    function_calls_to_run = self._prepare_function_calls(
+                        assistant_message=assistant_message,
+                        messages=messages,
+                        model_response=model_response,
+                        functions=_functions,
+                    )
+                    function_call_results: List[Message] = []
+                    # Execute function calls
+                    async for function_call_response in self.arun_function_calls(
+                        function_calls=function_calls_to_run,
+                        function_call_results=function_call_results,
+                        current_function_call_count=function_call_count,
+                        function_call_limit=tool_call_limit,
+                    ):
+                        if isinstance(function_call_response, ModelResponse):
+                            # The session state is updated by the function call
+                            if function_call_response.updated_session_state is not None:
+                                model_response.updated_session_state = function_call_response.updated_session_state
+                            # Media artifacts are generated by the function call
+                            if function_call_response.images is not None:
+                                if model_response.images is None:
+                                    model_response.images = []
+                                model_response.images.extend(function_call_response.images)
+                            if function_call_response.audios is not None:
+                                if model_response.audios is None:
+                                    model_response.audios = []
+                                model_response.audios.extend(function_call_response.audios)
+                            if function_call_response.videos is not None:
+                                if model_response.videos is None:
+                                    model_response.videos = []
+                                model_response.videos.extend(function_call_response.videos)
+                            if function_call_response.files is not None:
+                                if model_response.files is None:
+                                    model_response.files = []
+                                model_response.files.extend(function_call_response.files)
+                            if (
+                                function_call_response.event
+                                in [
+                                    ModelResponseEvent.tool_call_completed.value,
+                                    ModelResponseEvent.tool_call_paused.value,
+                                ]
+                                and function_call_response.tool_executions is not None
+                            ):
+                                if model_response.tool_executions is None:
+                                    model_response.tool_executions = []
+                                model_response.tool_executions.extend(function_call_response.tool_executions)
+                            elif function_call_response.event not in [
+                                ModelResponseEvent.tool_call_started.value,
                                 ModelResponseEvent.tool_call_completed.value,
-                                ModelResponseEvent.tool_call_paused.value,
-                            ]
-                            and function_call_response.tool_executions is not None
-                        ):
-                            if model_response.tool_executions is None:
-                                model_response.tool_executions = []
-                            model_response.tool_executions.extend(function_call_response.tool_executions)
-                        elif function_call_response.event not in [
-                            ModelResponseEvent.tool_call_started.value,
-                            ModelResponseEvent.tool_call_completed.value,
-                        ]:
-                            if function_call_response.content:
-                                model_response.content += function_call_response.content  # type: ignore
-                # Add a function call for each successful execution
-                function_call_count += len(function_call_results)
-                # Format and add results to messages
-                self.format_function_call_results(
-                    messages=messages, function_call_results=function_call_results, **model_response.extra or {}
-                )
+                            ]:
+                                if function_call_response.content:
+                                    model_response.content += function_call_response.content  # type: ignore
-                if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
-                    # Handle function call media
-                    self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
+                    # Add a function call for each successful execution
+                    function_call_count += len(function_call_results)
-                for function_call_result in function_call_results:
-                    function_call_result.log(metrics=True)
+                    # Format and add results to messages
+                    self.format_function_call_results(
+                        messages=messages, function_call_results=function_call_results, **model_response.extra or {}
+                    )
-                # Check if we should stop after tool calls
-                if any(m.stop_after_tool_call for m in function_call_results):
-                    break
+                    if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
+                        # Handle function call media
+                        self._handle_function_call_media(
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            send_media_to_model=send_media_to_model,
+                        )
-                # If we have any tool calls that require confirmation, break the loop
-                if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
-                    break
+                    for function_call_result in function_call_results:
+                        function_call_result.log(metrics=True)
-                # If we have any tool calls that require external execution, break the loop
-                if any(tc.external_execution_required for tc in model_response.tool_executions or []):
-                    break
+                    # Check if we should stop after tool calls
+                    if any(m.stop_after_tool_call for m in function_call_results):
+                        break
-                # If we have any tool calls that require user input, break the loop
-                if any(tc.requires_user_input for tc in model_response.tool_executions or []):
-                    break
+                    # If we have any tool calls that require confirmation, break the loop
+                    if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
+                        break
-                # Continue loop to get next response
-                continue
+                    # If we have any tool calls that require external execution, break the loop
+                    if any(tc.external_execution_required for tc in model_response.tool_executions or []):
+                        break
+                    # If we have any tool calls that require user input, break the loop
+                    if any(tc.requires_user_input for tc in model_response.tool_executions or []):
+                        break
+                    # Continue loop to get next response
+                    continue
+                # No tool calls or finished processing them
+                break
+            log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
-            # No tool calls or finished processing them
-            break
+            # Save to cache if enabled
+            if self.cache_response:
+                self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
+        finally:
+            # Close the Gemini client
+            if self.__class__.__name__ == "Gemini" and self.client is not None:
+                try:
+                    await self.client.aio.aclose()  # type: ignore
+                    self.client = None
+                except AttributeError:
+                    log_warning(
+                        "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
+                        " please upgrade Gemini to the latest version: pip install -U google-genai"
+                    )
-        log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
         return model_response
     def _process_model_response(
@@ -468,7 +674,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> None:
         """
         Process a single model response and return the assistant message and whether to continue.
@@ -502,9 +708,7 @@ class Model(ABC):
         if assistant_message.citations is not None:
             model_response.citations = assistant_message.citations
         if assistant_message.audio_output is not None:
-            if isinstance(assistant_message.audio_output, AudioArtifact):
-                model_response.audios = [assistant_message.audio_output]
-            elif isinstance(assistant_message.audio_output, AudioResponse):
+            if isinstance(assistant_message.audio_output, Audio):
                 model_response.audio = assistant_message.audio_output
         if assistant_message.image_output is not None:
             model_response.images = [assistant_message.image_output]
@@ -514,6 +718,8 @@ class Model(ABC):
             if model_response.extra is None:
                 model_response.extra = {}
             model_response.extra.update(provider_response.extra)
+        if provider_response.provider_data is not None:
+            model_response.provider_data = provider_response.provider_data
     async def _aprocess_model_response(
         self,
@@ -523,7 +729,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> None:
         """
         Process a single async model response and return the assistant message and whether to continue.
@@ -557,9 +763,7 @@ class Model(ABC):
         if assistant_message.citations is not None:
             model_response.citations = assistant_message.citations
         if assistant_message.audio_output is not None:
-            if isinstance(assistant_message.audio_output, AudioArtifact):
-                model_response.audios = [assistant_message.audio_output]
-            elif isinstance(assistant_message.audio_output, AudioResponse):
+            if isinstance(assistant_message.audio_output, Audio):
                 model_response.audio = assistant_message.audio_output
         if assistant_message.image_output is not None:
             model_response.images = [assistant_message.image_output]
@@ -569,6 +773,8 @@ class Model(ABC):
             if model_response.extra is None:
                 model_response.extra = {}
             model_response.extra.update(provider_response.extra)
+        if provider_response.provider_data is not None:
+            model_response.provider_data = provider_response.provider_data
     def _populate_assistant_message(
         self,
@@ -585,7 +791,6 @@ class Model(ABC):
         Returns:
             Message: The populated assistant message
         """
-        # Add role to assistant message
         if provider_response.role is not None:
             assistant_message.role = provider_response.role
@@ -611,6 +816,10 @@ class Model(ABC):
             if provider_response.videos:
                 assistant_message.video_output = provider_response.videos[-1]  # Taking last (most recent) video
+        if provider_response.files is not None:
+            if provider_response.files:
+                assistant_message.file_output = provider_response.files[-1]  # Taking last (most recent) file
         if provider_response.audios is not None:
             if provider_response.audios:
                 assistant_message.audio_output = provider_response.audios[-1]  # Taking last (most recent) audio
@@ -645,7 +854,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> Iterator[ModelResponse]:
         """
         Process a streaming response from the model.
@@ -659,142 +868,181 @@ class Model(ABC):
             tool_choice=tool_choice or self._tool_choice,
             run_response=run_response,
         ):
-            yield from self._populate_stream_data_and_assistant_message(
+            for model_response_delta in self._populate_stream_data(
                 stream_data=stream_data,
-                assistant_message=assistant_message,
                 model_response_delta=response_delta,
-            )
+            ):
+                yield model_response_delta
-        # Add final metrics to assistant message
-        self._populate_assistant_message(assistant_message=assistant_message, provider_response=response_delta)
+        # Populate assistant message from stream data after the stream ends
+        self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
     def response_stream(
         self,
         messages: List[Message],
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        functions: Optional[Dict[str, Function]] = None,
+        tools: Optional[List[Union[Function, dict]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
         stream_model_response: bool = True,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        send_media_to_model: bool = True,
     ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
         """
         Generate a streaming response from the model.
         """
+        try:
+            # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
+            cache_key = None
+            if self.cache_response:
+                cache_key = self._get_model_cache_key(
+                    messages, stream=True, response_format=response_format, tools=tools
+                )
+                cached_data = self._get_cached_model_response(cache_key)
-        log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
-        log_debug(f"Model: {self.id}", center=True, symbol="-")
-        _log_messages(messages)
+                if cached_data:
+                    log_info("Cache hit for streaming model response")
+                    # Yield cached responses
+                    for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
+                        yield response
+                    return
-        function_call_count = 0
+                log_info("Cache miss for streaming model response")
-        while True:
-            assistant_message = Message(role=self.assistant_message_role)
-            # Create assistant message and stream data
-            stream_data = MessageData()
-            if stream_model_response:
-                # Generate response
-                yield from self.process_response_stream(
-                    messages=messages,
-                    assistant_message=assistant_message,
-                    stream_data=stream_data,
-                    response_format=response_format,
-                    tools=tools,
-                    tool_choice=tool_choice or self._tool_choice,
-                    run_response=run_response,
-                )
+            # Track streaming responses for caching
+            streaming_responses: List[ModelResponse] = []
-                # Populate assistant message from stream data
-                if stream_data.response_content:
-                    assistant_message.content = stream_data.response_content
-                if stream_data.response_reasoning_content:
-                    assistant_message.reasoning_content = stream_data.response_reasoning_content
-                if stream_data.response_redacted_reasoning_content:
-                    assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
-                if stream_data.response_provider_data:
-                    assistant_message.provider_data = stream_data.response_provider_data
-                if stream_data.response_citations:
-                    assistant_message.citations = stream_data.response_citations
-                if stream_data.response_audio:
-                    assistant_message.audio_output = stream_data.response_audio
-                if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
-                    assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
+            log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
+            log_debug(f"Model: {self.id}", center=True, symbol="-")
+            _log_messages(messages)
-            else:
-                model_response = ModelResponse()
-                self._process_model_response(
-                    messages=messages,
-                    assistant_message=assistant_message,
-                    model_response=model_response,
-                    response_format=response_format,
-                    tools=tools,
-                    tool_choice=tool_choice or self._tool_choice,
-                )
-                yield model_response
+            _tool_dicts = self._format_tools(tools) if tools is not None else []
+            _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
-            # Add assistant message to messages
-            messages.append(assistant_message)
-            assistant_message.log(metrics=True)
+            function_call_count = 0
+            while True:
+                assistant_message = Message(role=self.assistant_message_role)
+                # Create assistant message and stream data
+                stream_data = MessageData()
+                model_response = ModelResponse()
+                if stream_model_response:
+                    # Generate response
+                    for response in self.process_response_stream(
+                        messages=messages,
+                        assistant_message=assistant_message,
+                        stream_data=stream_data,
+                        response_format=response_format,
+                        tools=_tool_dicts,
+                        tool_choice=tool_choice or self._tool_choice,
+                        run_response=run_response,
+                    ):
+                        if self.cache_response and isinstance(response, ModelResponse):
+                            streaming_responses.append(response)
+                        yield response
-            # Handle tool calls if present
-            if assistant_message.tool_calls is not None:
-                # Prepare function calls
-                function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
-                    assistant_message, messages, functions
-                )
-                function_call_results: List[Message] = []
-                # Execute function calls
-                for function_call_response in self.run_function_calls(
-                    function_calls=function_calls_to_run,
-                    function_call_results=function_call_results,
-                    current_function_call_count=function_call_count,
-                    function_call_limit=tool_call_limit,
-                ):
-                    yield function_call_response
-                # Add a function call for each successful execution
-                function_call_count += len(function_call_results)
-                # Format and add results to messages
-                if stream_data and stream_data.extra is not None:
-                    self.format_function_call_results(
-                        messages=messages, function_call_results=function_call_results, **stream_data.extra
-                    )
                 else:
-                    self.format_function_call_results(messages=messages, function_call_results=function_call_results)
+                    self._process_model_response(
+                        messages=messages,
+                        assistant_message=assistant_message,
+                        model_response=model_response,
+                        response_format=response_format,
+                        tools=_tool_dicts,
+                        tool_choice=tool_choice or self._tool_choice,
+                    )
+                    if self.cache_response:
+                        streaming_responses.append(model_response)
+                    yield model_response
+                # Add assistant message to messages
+                messages.append(assistant_message)
+                assistant_message.log(metrics=True)
+                # Handle tool calls if present
+                if assistant_message.tool_calls is not None:
+                    # Prepare function calls
+                    function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
+                        assistant_message=assistant_message, messages=messages, functions=_functions
+                    )
+                    function_call_results: List[Message] = []
+                    # Execute function calls
+                    for function_call_response in self.run_function_calls(
+                        function_calls=function_calls_to_run,
+                        function_call_results=function_call_results,
+                        current_function_call_count=function_call_count,
+                        function_call_limit=tool_call_limit,
+                    ):
+                        if self.cache_response and isinstance(function_call_response, ModelResponse):
+                            streaming_responses.append(function_call_response)
+                        yield function_call_response
+                    # Add a function call for each successful execution
+                    function_call_count += len(function_call_results)
+                    # Format and add results to messages
+                    if stream_data and stream_data.extra is not None:
+                        self.format_function_call_results(
+                            messages=messages, function_call_results=function_call_results, **stream_data.extra
+                        )
+                    elif model_response and model_response.extra is not None:
+                        self.format_function_call_results(
+                            messages=messages, function_call_results=function_call_results, **model_response.extra
+                        )
+                    else:
+                        self.format_function_call_results(
+                            messages=messages, function_call_results=function_call_results
+                        )
-                # Handle function call media
-                if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
-                    self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
+                    # Handle function call media
+                    if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
+                        self._handle_function_call_media(
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            send_media_to_model=send_media_to_model,
+                        )
-                for function_call_result in function_call_results:
-                    function_call_result.log(metrics=True)
+                    for function_call_result in function_call_results:
+                        function_call_result.log(metrics=True)
-                # Check if we should stop after tool calls
-                if any(m.stop_after_tool_call for m in function_call_results):
-                    break
+                    # Check if we should stop after tool calls
+                    if any(m.stop_after_tool_call for m in function_call_results):
+                        break
-                # If we have any tool calls that require confirmation, break the loop
-                if any(fc.function.requires_confirmation for fc in function_calls_to_run):
-                    break
+                    # If we have any tool calls that require confirmation, break the loop
+                    if any(fc.function.requires_confirmation for fc in function_calls_to_run):
+                        break
-                # If we have any tool calls that require external execution, break the loop
-                if any(fc.function.external_execution for fc in function_calls_to_run):
-                    break
+                    # If we have any tool calls that require external execution, break the loop
+                    if any(fc.function.external_execution for fc in function_calls_to_run):
+                        break
-                # If we have any tool calls that require user input, break the loop
-                if any(fc.function.requires_user_input for fc in function_calls_to_run):
-                    break
+                    # If we have any tool calls that require user input, break the loop
+                    if any(fc.function.requires_user_input for fc in function_calls_to_run):
+                        break
-                # Continue loop to get next response
-                continue
+                    # Continue loop to get next response
+                    continue
-            # No tool calls or finished processing them
-            break
+                # No tool calls or finished processing them
+                break
-        log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
+            log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
+            # Save streaming responses to cache if enabled
+            if self.cache_response and cache_key and streaming_responses:
+                self._save_streaming_responses_to_cache(cache_key, streaming_responses)
+        finally:
+            # Close the Gemini client
+            if self.__class__.__name__ == "Gemini" and self.client is not None:
+                try:
+                    self.client.close()  # type: ignore
+                    self.client = None
+                except AttributeError:
+                    log_warning(
+                        "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
+                        " please upgrade Gemini to the latest version: pip install -U google-genai"
+                    )
     async def aprocess_response_stream(
         self,
@@ -804,7 +1052,7 @@ class Model(ABC):
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
     ) -> AsyncIterator[ModelResponse]:
         """
         Process a streaming response from the model.
@@ -817,153 +1065,229 @@ class Model(ABC):
             tool_choice=tool_choice or self._tool_choice,
             run_response=run_response,
         ):  # type: ignore
-            for model_response in self._populate_stream_data_and_assistant_message(
+            for model_response_delta in self._populate_stream_data(
                 stream_data=stream_data,
-                assistant_message=assistant_message,
                 model_response_delta=response_delta,
             ):
-                yield model_response
+                yield model_response_delta
-        # Populate the assistant message
-        self._populate_assistant_message(assistant_message=assistant_message, provider_response=model_response)
+        # Populate assistant message from stream data after the stream ends
+        self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
     async def aresponse_stream(
         self,
         messages: List[Message],
         response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
-        tools: Optional[List[Dict[str, Any]]] = None,
-        functions: Optional[Dict[str, Function]] = None,
+        tools: Optional[List[Union[Function, dict]]] = None,
         tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
         tool_call_limit: Optional[int] = None,
         stream_model_response: bool = True,
-        run_response: Optional[RunOutput] = None,
+        run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
+        send_media_to_model: bool = True,
     ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
         """
         Generate an asynchronous streaming response from the model.
         """
+        try:
+            # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
+            cache_key = None
+            if self.cache_response:
+                cache_key = self._get_model_cache_key(
+                    messages, stream=True, response_format=response_format, tools=tools
+                )
+                cached_data = self._get_cached_model_response(cache_key)
-        log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
-        log_debug(f"Model: {self.id}", center=True, symbol="-")
-        _log_messages(messages)
+                if cached_data:
+                    log_info("Cache hit for async streaming model response")
+                    # Yield cached responses
+                    for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
+                        yield response
+                    return
-        function_call_count = 0
+                log_info("Cache miss for async streaming model response")
-        while True:
-            # Create assistant message and stream data
-            assistant_message = Message(role=self.assistant_message_role)
-            stream_data = MessageData()
-            if stream_model_response:
-                # Generate response
-                async for response in self.aprocess_response_stream(
-                    messages=messages,
-                    assistant_message=assistant_message,
-                    stream_data=stream_data,
-                    response_format=response_format,
-                    tools=tools,
-                    tool_choice=tool_choice or self._tool_choice,
-                    run_response=run_response,
-                ):
-                    yield response
-                # Populate assistant message from stream data
-                if stream_data.response_content:
-                    assistant_message.content = stream_data.response_content
-                if stream_data.response_reasoning_content:
-                    assistant_message.reasoning_content = stream_data.response_reasoning_content
-                if stream_data.response_redacted_reasoning_content:
-                    assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
-                if stream_data.response_provider_data:
-                    assistant_message.provider_data = stream_data.response_provider_data
-                if stream_data.response_audio:
-                    assistant_message.audio_output = stream_data.response_audio
-                if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
-                    assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
+            # Track streaming responses for caching
+            streaming_responses: List[ModelResponse] = []
-            else:
-                model_response = ModelResponse()
-                await self._aprocess_model_response(
-                    messages=messages,
-                    assistant_message=assistant_message,
-                    model_response=model_response,
-                    response_format=response_format,
-                    tools=tools,
-                    tool_choice=tool_choice or self._tool_choice,
-                    run_response=run_response,
-                )
-                yield model_response
+            log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
+            log_debug(f"Model: {self.id}", center=True, symbol="-")
+            _log_messages(messages)
-            # Add assistant message to messages
-            messages.append(assistant_message)
-            assistant_message.log(metrics=True)
+            _tool_dicts = self._format_tools(tools) if tools is not None else []
+            _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
+            function_call_count = 0
+            while True:
+                # Create assistant message and stream data
+                assistant_message = Message(role=self.assistant_message_role)
+                stream_data = MessageData()
+                model_response = ModelResponse()
+                if stream_model_response:
+                    # Generate response
+                    async for model_response in self.aprocess_response_stream(
+                        messages=messages,
+                        assistant_message=assistant_message,
+                        stream_data=stream_data,
+                        response_format=response_format,
+                        tools=_tool_dicts,
+                        tool_choice=tool_choice or self._tool_choice,
+                        run_response=run_response,
+                    ):
+                        if self.cache_response and isinstance(model_response, ModelResponse):
+                            streaming_responses.append(model_response)
+                        yield model_response
-            # Handle tool calls if present
-            if assistant_message.tool_calls is not None:
-                # Prepare function calls
-                function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
-                    assistant_message, messages, functions
-                )
-                function_call_results: List[Message] = []
-                # Execute function calls
-                async for function_call_response in self.arun_function_calls(
-                    function_calls=function_calls_to_run,
-                    function_call_results=function_call_results,
-                    current_function_call_count=function_call_count,
-                    function_call_limit=tool_call_limit,
-                ):
-                    yield function_call_response
-                # Add a function call for each successful execution
-                function_call_count += len(function_call_results)
-                # Format and add results to messages
-                if stream_data and stream_data.extra is not None:
-                    self.format_function_call_results(
-                        messages=messages, function_call_results=function_call_results, **stream_data.extra
-                    )
                 else:
-                    self.format_function_call_results(messages=messages, function_call_results=function_call_results)
+                    await self._aprocess_model_response(
+                        messages=messages,
+                        assistant_message=assistant_message,
+                        model_response=model_response,
+                        response_format=response_format,
+                        tools=_tool_dicts,
+                        tool_choice=tool_choice or self._tool_choice,
+                        run_response=run_response,
+                    )
+                    if self.cache_response:
+                        streaming_responses.append(model_response)
+                    yield model_response
+                # Add assistant message to messages
+                messages.append(assistant_message)
+                assistant_message.log(metrics=True)
+                # Handle tool calls if present
+                if assistant_message.tool_calls is not None:
+                    # Prepare function calls
+                    function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
+                        assistant_message=assistant_message, messages=messages, functions=_functions
+                    )
+                    function_call_results: List[Message] = []
+                    # Execute function calls
+                    async for function_call_response in self.arun_function_calls(
+                        function_calls=function_calls_to_run,
+                        function_call_results=function_call_results,
+                        current_function_call_count=function_call_count,
+                        function_call_limit=tool_call_limit,
+                    ):
+                        if self.cache_response and isinstance(function_call_response, ModelResponse):
+                            streaming_responses.append(function_call_response)
+                        yield function_call_response
-                # Handle function call media
-                if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
-                    self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
+                    # Add a function call for each successful execution
+                    function_call_count += len(function_call_results)
-                for function_call_result in function_call_results:
-                    function_call_result.log(metrics=True)
+                    # Format and add results to messages
+                    if stream_data and stream_data.extra is not None:
+                        self.format_function_call_results(
+                            messages=messages, function_call_results=function_call_results, **stream_data.extra
+                        )
+                    elif model_response and model_response.extra is not None:
+                        self.format_function_call_results(
+                            messages=messages, function_call_results=function_call_results, **model_response.extra or {}
+                        )
+                    else:
+                        self.format_function_call_results(
+                            messages=messages, function_call_results=function_call_results
+                        )
-                # Check if we should stop after tool calls
-                if any(m.stop_after_tool_call for m in function_call_results):
-                    break
+                    # Handle function call media
+                    if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
+                        self._handle_function_call_media(
+                            messages=messages,
+                            function_call_results=function_call_results,
+                            send_media_to_model=send_media_to_model,
+                        )
-                # If we have any tool calls that require confirmation, break the loop
-                if any(fc.function.requires_confirmation for fc in function_calls_to_run):
-                    break
+                    for function_call_result in function_call_results:
+                        function_call_result.log(metrics=True)
-                # If we have any tool calls that require external execution, break the loop
-                if any(fc.function.external_execution for fc in function_calls_to_run):
-                    break
+                    # Check if we should stop after tool calls
+                    if any(m.stop_after_tool_call for m in function_call_results):
+                        break
-                # If we have any tool calls that require user input, break the loop
-                if any(fc.function.requires_user_input for fc in function_calls_to_run):
-                    break
+                    # If we have any tool calls that require confirmation, break the loop
+                    if any(fc.function.requires_confirmation for fc in function_calls_to_run):
+                        break
-                # Continue loop to get next response
-                continue
+                    # If we have any tool calls that require external execution, break the loop
+                    if any(fc.function.external_execution for fc in function_calls_to_run):
+                        break
+                    # If we have any tool calls that require user input, break the loop
+                    if any(fc.function.requires_user_input for fc in function_calls_to_run):
+                        break
+                    # Continue loop to get next response
+                    continue
-            # No tool calls or finished processing them
-            break
+                # No tool calls or finished processing them
+                break
-        log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
+            log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
+            # Save streaming responses to cache if enabled
+            if self.cache_response and cache_key and streaming_responses:
+                self._save_streaming_responses_to_cache(cache_key, streaming_responses)
+        finally:
+            # Close the Gemini client
+            if self.__class__.__name__ == "Gemini" and self.client is not None:
+                try:
+                    await self.client.aio.aclose()  # type: ignore
+                    self.client = None
+                except AttributeError:
+                    log_warning(
+                        "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
+                        " please upgrade Gemini to the latest version: pip install -U google-genai"
+                    )
-    def _populate_stream_data_and_assistant_message(
-        self, stream_data: MessageData, assistant_message: Message, model_response_delta: ModelResponse
+    def _populate_assistant_message_from_stream_data(
+        self, assistant_message: Message, stream_data: MessageData
+    ) -> None:
+        """
+        Populate an assistant message with the stream data.
+        """
+        if stream_data.response_role is not None:
+            assistant_message.role = stream_data.response_role
+        if stream_data.response_metrics is not None:
+            assistant_message.metrics = stream_data.response_metrics
+        if stream_data.response_content:
+            assistant_message.content = stream_data.response_content
+        if stream_data.response_reasoning_content:
+            assistant_message.reasoning_content = stream_data.response_reasoning_content
+        if stream_data.response_redacted_reasoning_content:
+            assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
+        if stream_data.response_provider_data:
+            assistant_message.provider_data = stream_data.response_provider_data
+        if stream_data.response_citations:
+            assistant_message.citations = stream_data.response_citations
+        if stream_data.response_audio:
+            assistant_message.audio_output = stream_data.response_audio
+        if stream_data.response_image:
+            assistant_message.image_output = stream_data.response_image
+        if stream_data.response_video:
+            assistant_message.video_output = stream_data.response_video
+        if stream_data.response_file:
+            assistant_message.file_output = stream_data.response_file
+        if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
+            assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
+    def _populate_stream_data(
+        self, stream_data: MessageData, model_response_delta: ModelResponse
     ) -> Iterator[ModelResponse]:
         """Update the stream data and assistant message with the model response."""
-        # Add role to assistant message
-        if model_response_delta.role is not None:
-            assistant_message.role = model_response_delta.role
         should_yield = False
+        if model_response_delta.role is not None:
+            stream_data.response_role = model_response_delta.role  # type: ignore
+        if model_response_delta.response_usage is not None:
+            if stream_data.response_metrics is None:
+                stream_data.response_metrics = Metrics()
+            stream_data.response_metrics += model_response_delta.response_usage
         # Update stream_data content
         if model_response_delta.content is not None:
             stream_data.response_content += model_response_delta.content
@@ -993,13 +1317,13 @@ class Model(ABC):
             stream_data.response_tool_calls.extend(model_response_delta.tool_calls)
             should_yield = True
-        if model_response_delta.audio is not None and isinstance(model_response_delta.audio, AudioResponse):
+        if model_response_delta.audio is not None and isinstance(model_response_delta.audio, Audio):
             if stream_data.response_audio is None:
-                stream_data.response_audio = AudioResponse(id=str(uuid4()), content="", transcript="")
+                stream_data.response_audio = Audio(id=str(uuid4()), content="", transcript="")
             from typing import cast
-            audio_response = cast(AudioResponse, model_response_delta.audio)
+            audio_response = cast(Audio, model_response_delta.audio)
             # Update the stream data with audio information
             if audio_response.id is not None:
@@ -1030,7 +1354,13 @@ class Model(ABC):
         if model_response_delta.extra is not None:
             if stream_data.extra is None:
                 stream_data.extra = {}
-            stream_data.extra.update(model_response_delta.extra)
+            for key in model_response_delta.extra:
+                if isinstance(model_response_delta.extra[key], list):
+                    if not stream_data.extra.get(key):
+                        stream_data.extra[key] = []
+                    stream_data.extra[key].extend(model_response_delta.extra[key])
+                else:
+                    stream_data.extra[key] = model_response_delta.extra[key]
         if should_yield:
             yield model_response_delta
@@ -1102,43 +1432,14 @@ class Model(ABC):
         images = None
         videos = None
         audios = None
+        files = None
         if success and function_execution_result:
-            # Convert ImageArtifacts to Images for message compatibility
-            if function_execution_result.images:
-                from agno.media import Image
-                images = []
-                for img_artifact in function_execution_result.images:
-                    if img_artifact.url:
-                        images.append(Image(url=img_artifact.url))
-                    elif img_artifact.content:
-                        images.append(Image(content=img_artifact.content))
-            # Convert VideoArtifacts to Videos for message compatibility
-            if function_execution_result.videos:
-                from agno.media import Video
-                videos = []
-                for vid_artifact in function_execution_result.videos:
-                    if vid_artifact.url:
-                        videos.append(Video(url=vid_artifact.url))
-                    elif vid_artifact.content:
-                        videos.append(Video(content=vid_artifact.content))
-            # Convert AudioArtifacts to Audio for message compatibility
-            if function_execution_result.audios:
-                from agno.media import Audio
-                audios = []
-                for aud_artifact in function_execution_result.audios:
-                    if aud_artifact.url:
-                        audios.append(Audio(url=aud_artifact.url))
-                    elif aud_artifact.base64_audio:
-                        import base64
-                        audio_bytes = base64.b64decode(aud_artifact.base64_audio)
-                        audios.append(Audio(content=audio_bytes))
+            # With unified classes, no conversion needed - use directly
+            images = function_execution_result.images
+            videos = function_execution_result.videos
+            audios = function_execution_result.audios
+            files = function_execution_result.files
         return Message(
             role=self.tool_message_role,
@@ -1151,6 +1452,7 @@ class Model(ABC):
             images=images,
             videos=videos,
             audio=audios,
+            files=files,
             **kwargs,  # type: ignore
         )
@@ -1207,32 +1509,49 @@ class Model(ABC):
         function_call_output: str = ""
         if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
-            for item in function_execution_result.result:
-                # This function yields agent/team run events
-                if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
-                    item, tuple(get_args(TeamRunOutputEvent))
-                ):
-                    # We only capture content events
-                    if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
-                        if item.content is not None and isinstance(item.content, BaseModel):
-                            function_call_output += item.content.model_dump_json()
-                        else:
-                            # Capture output
-                            function_call_output += item.content or ""
+            try:
+                for item in function_execution_result.result:
+                    # This function yields agent/team/workflow run events
+                    if (
+                        isinstance(item, tuple(get_args(RunOutputEvent)))
+                        or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
+                        or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
+                    ):
+                        # We only capture content events for output accumulation
+                        if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
+                            if item.content is not None and isinstance(item.content, BaseModel):
+                                function_call_output += item.content.model_dump_json()
+                            else:
+                                # Capture output
+                                function_call_output += item.content or ""
-                        if function_call.function.show_result:
-                            yield ModelResponse(content=item.content)
+                            if function_call.function.show_result and item.content is not None:
+                                yield ModelResponse(content=item.content)
                         if isinstance(item, CustomEvent):
                             function_call_output += str(item)
-                    # Yield the event itself to bubble it up
-                    yield item
+                        # For WorkflowCompletedEvent, extract content for final output
+                        from agno.run.workflow import WorkflowCompletedEvent
-                else:
-                    function_call_output += str(item)
-                    if function_call.function.show_result:
-                        yield ModelResponse(content=str(item))
+                        if isinstance(item, WorkflowCompletedEvent):
+                            if item.content is not None:
+                                if isinstance(item.content, BaseModel):
+                                    function_call_output += item.content.model_dump_json()
+                                else:
+                                    function_call_output += str(item.content)
+                        # Yield the event itself to bubble it up
+                        yield item
+                    else:
+                        function_call_output += str(item)
+                        if function_call.function.show_result and item is not None:
+                            yield ModelResponse(content=str(item))
+            except Exception as e:
+                log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
+                function_call.error = str(e)
+                function_call_success = False
         else:
             from agno.tools.function import ToolResult
@@ -1248,10 +1567,12 @@ class Model(ABC):
                     function_execution_result.videos = tool_result.videos
                 if tool_result.audios:
                     function_execution_result.audios = tool_result.audios
+                if tool_result.files:
+                    function_execution_result.files = tool_result.files
             else:
                 function_call_output = str(function_execution_result.result) if function_execution_result.result else ""
-            if function_call.function.show_result:
+            if function_call.function.show_result and function_call_output is not None:
                 yield ModelResponse(content=function_call_output)
         # Create and yield function call result
@@ -1263,7 +1584,7 @@ class Model(ABC):
             function_execution_result=function_execution_result,
         )
         yield ModelResponse(
-            content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
+            content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
             tool_executions=[
                 ToolExecution(
                     tool_call_id=function_call_result.tool_call_id,
@@ -1281,6 +1602,7 @@ class Model(ABC):
             images=function_execution_result.images,
             videos=function_execution_result.videos,
             audios=function_execution_result.audios,
+            files=function_execution_result.files,
         )
         # Add function call to function call results
@@ -1399,6 +1721,7 @@ class Model(ABC):
         function_call_timer = Timer()
         function_call_timer.start()
         success: Union[bool, AgentRunException] = False
+        result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
         try:
             if (
@@ -1564,57 +1887,41 @@ class Model(ABC):
             *(self.arun_function_call(fc) for fc in function_calls_to_run), return_exceptions=True
         )
-        # Process results
+        # Separate async generators from other results for concurrent processing
+        async_generator_results: List[Any] = []
+        non_async_generator_results: List[Any] = []
         for result in results:
-            # If result is an exception, skip processing it
             if isinstance(result, BaseException):
-                log_error(f"Error during function call: {result}")
-                raise result
+                non_async_generator_results.append(result)
+                continue
-            # Unpack result
             function_call_success, function_call_timer, function_call, function_execution_result = result
-            updated_session_state = function_execution_result.updated_session_state
-            # Handle AgentRunException
-            if isinstance(function_call_success, AgentRunException):
-                a_exc = function_call_success
-                # Update additional messages from function call
-                _handle_agent_exception(a_exc, additional_input)
-                # Set function call success to False if an exception occurred
-                function_call_success = False
+            # Check if this result contains an async generator
+            if isinstance(function_call.result, (AsyncGeneratorType, AsyncIterator)):
+                async_generator_results.append(result)
+            else:
+                non_async_generator_results.append(result)
-            # Process function call output
-            function_call_output: str = ""
-            if isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
-                for item in function_call.result:
-                    # This function yields agent/team run events
-                    if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
-                        item, tuple(get_args(TeamRunOutputEvent))
-                    ):
-                        # We only capture content events
-                        if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
-                            if item.content is not None and isinstance(item.content, BaseModel):
-                                function_call_output += item.content.model_dump_json()
-                            else:
-                                # Capture output
-                                function_call_output += item.content or ""
+        # Process async generators with real-time event streaming using asyncio.Queue
+        async_generator_outputs: Dict[int, Tuple[Any, str, Optional[BaseException]]] = {}
+        event_queue: asyncio.Queue = asyncio.Queue()
+        active_generators_count: int = len(async_generator_results)
-                            if function_call.function.show_result:
-                                yield ModelResponse(content=item.content)
-                                continue
+        # Create background tasks for each async generator
+        async def process_async_generator(result, generator_id):
+            function_call_success, function_call_timer, function_call, function_execution_result = result
+            function_call_output = ""
-                        # Yield the event itself to bubble it up
-                        yield item
-                    else:
-                        function_call_output += str(item)
-                        if function_call.function.show_result:
-                            yield ModelResponse(content=str(item))
-            elif isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
+            try:
                 async for item in function_call.result:
-                    # This function yields agent/team run events
-                    if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
-                        item, tuple(get_args(TeamRunOutputEvent))
+                    # This function yields agent/team/workflow run events
+                    if isinstance(
+                        item,
+                        tuple(get_args(RunOutputEvent))
+                        + tuple(get_args(TeamRunOutputEvent))
+                        + tuple(get_args(WorkflowRunOutputEvent)),
                     ):
                         # We only capture content events
                         if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
@@ -1624,21 +1931,143 @@ class Model(ABC):
                                 # Capture output
                                 function_call_output += item.content or ""
-                            if function_call.function.show_result:
-                                yield ModelResponse(content=item.content)
+                            if function_call.function.show_result and item.content is not None:
+                                await event_queue.put(ModelResponse(content=item.content))
                                 continue
-                            if isinstance(item, CustomEvent):
-                                function_call_output += str(item)
+                        if isinstance(item, CustomEvent):
+                            function_call_output += str(item)
-                        # Yield the event itself to bubble it up
-                        yield item
+                            # For WorkflowCompletedEvent, extract content for final output
+                            from agno.run.workflow import WorkflowCompletedEvent
+                            if isinstance(item, WorkflowCompletedEvent):
+                                if item.content is not None:
+                                    if isinstance(item.content, BaseModel):
+                                        function_call_output += item.content.model_dump_json()
+                                    else:
+                                        function_call_output += str(item.content)
+                        # Put the event into the queue to be yielded
+                        await event_queue.put(item)
                     # Yield custom events emitted by the tool
                     else:
                         function_call_output += str(item)
-                        if function_call.function.show_result:
-                            yield ModelResponse(content=str(item))
+                        if function_call.function.show_result and item is not None:
+                            await event_queue.put(ModelResponse(content=str(item)))
+                # Store the final output for this generator
+                async_generator_outputs[generator_id] = (result, function_call_output, None)
+            except Exception as e:
+                # Store the exception
+                async_generator_outputs[generator_id] = (result, "", e)
+            # Signal that this generator is done
+            await event_queue.put(("GENERATOR_DONE", generator_id))
+        # Start all async generator tasks
+        generator_tasks = []
+        for i, result in enumerate(async_generator_results):
+            task = asyncio.create_task(process_async_generator(result, i))
+            generator_tasks.append(task)
+        # Stream events from the queue as they arrive
+        completed_generators_count = 0
+        while completed_generators_count < active_generators_count:
+            try:
+                event = await event_queue.get()
+                # Check if this is a completion signal
+                if isinstance(event, tuple) and event[0] == "GENERATOR_DONE":
+                    completed_generators_count += 1
+                    continue
+                # Yield the actual event
+                yield event
+            except Exception as e:
+                log_error(f"Error processing async generator event: {e}")
+                break
+        # Now process all results (non-async generators and completed async generators)
+        for i, original_result in enumerate(results):
+            # If result is an exception, skip processing it
+            if isinstance(original_result, BaseException):
+                log_error(f"Error during function call: {original_result}")
+                raise original_result
+            # Unpack result
+            function_call_success, function_call_timer, function_call, function_execution_result = original_result
+            # Check if this was an async generator that was already processed
+            async_function_call_output = None
+            if isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
+                # Find the corresponding processed result
+                async_gen_index = 0
+                for j, result in enumerate(results[: i + 1]):
+                    if not isinstance(result, BaseException):
+                        _, _, fc, _ = result
+                        if isinstance(fc.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
+                            if j == i:  # This is our async generator
+                                if async_gen_index in async_generator_outputs:
+                                    _, async_function_call_output, error = async_generator_outputs[async_gen_index]
+                                    if error:
+                                        log_error(f"Error in async generator: {error}")
+                                        raise error
+                                break
+                            async_gen_index += 1
+            updated_session_state = function_execution_result.updated_session_state
+            # Handle AgentRunException
+            if isinstance(function_call_success, AgentRunException):
+                a_exc = function_call_success
+                # Update additional messages from function call
+                _handle_agent_exception(a_exc, additional_input)
+                # Set function call success to False if an exception occurred
+                function_call_success = False
+            # Process function call output
+            function_call_output: str = ""
+            # Check if this was an async generator that was already processed
+            if async_function_call_output is not None:
+                function_call_output = async_function_call_output
+                # Events from async generators were already yielded in real-time above
+            elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
+                try:
+                    for item in function_call.result:
+                        # This function yields agent/team/workflow run events
+                        if isinstance(
+                            item,
+                            tuple(get_args(RunOutputEvent))
+                            + tuple(get_args(TeamRunOutputEvent))
+                            + tuple(get_args(WorkflowRunOutputEvent)),
+                        ):
+                            # We only capture content events
+                            if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
+                                if item.content is not None and isinstance(item.content, BaseModel):
+                                    function_call_output += item.content.model_dump_json()
+                                else:
+                                    # Capture output
+                                    function_call_output += item.content or ""
+                                if function_call.function.show_result and item.content is not None:
+                                    yield ModelResponse(content=item.content)
+                                    continue
+                            # Yield the event itself to bubble it up
+                            yield item
+                        else:
+                            function_call_output += str(item)
+                            if function_call.function.show_result and item is not None:
+                                yield ModelResponse(content=str(item))
+                except Exception as e:
+                    log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
+                    function_call.error = str(e)
+                    function_call_success = False
             else:
                 from agno.tools.function import ToolResult
@@ -1652,10 +2081,12 @@ class Model(ABC):
                         function_execution_result.videos = tool_result.videos
                     if tool_result.audios:
                         function_execution_result.audios = tool_result.audios
+                    if tool_result.files:
+                        function_execution_result.files = tool_result.files
                 else:
                     function_call_output = str(function_call.result)
-                if function_call.function.show_result:
+                if function_call.function.show_result and function_call_output is not None:
                     yield ModelResponse(content=function_call_output)
             # Create and yield function call result
@@ -1667,7 +2098,7 @@ class Model(ABC):
                 function_execution_result=function_execution_result,
             )
             yield ModelResponse(
-                content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
+                content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
                 tool_executions=[
                     ToolExecution(
                         tool_call_id=function_call_result.tool_call_id,
@@ -1684,6 +2115,7 @@ class Model(ABC):
                 images=function_execution_result.images,
                 videos=function_execution_result.videos,
                 audios=function_execution_result.audios,
+                files=function_execution_result.files,
             )
             # Add function call result to function call results
@@ -1709,7 +2141,7 @@ class Model(ABC):
             model_response.tool_calls = []
         function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
-            assistant_message, messages, functions
+            assistant_message=assistant_message, messages=messages, functions=functions
         )
         return function_calls_to_run
@@ -1722,7 +2154,9 @@ class Model(ABC):
         if len(function_call_results) > 0:
             messages.extend(function_call_results)
-    def _handle_function_call_media(self, messages: List[Message], function_call_results: List[Message]) -> None:
+    def _handle_function_call_media(
+        self, messages: List[Message], function_call_results: List[Message], send_media_to_model: bool = True
+    ) -> None:
         """
         Handle media artifacts from function calls by adding follow-up user messages for generated media if needed.
         """
@@ -1733,6 +2167,7 @@ class Model(ABC):
         all_images: List[Image] = []
         all_videos: List[Video] = []
         all_audio: List[Audio] = []
+        all_files: List[File] = []
         for result_message in function_call_results:
             if result_message.images:
@@ -1748,15 +2183,21 @@ class Model(ABC):
                 all_audio.extend(result_message.audio)
                 result_message.audio = None
-        # If we have media artifacts, add a follow-up "user" message instead of a "tool"
-        # message with the media artifacts which throws error for some models
-        if all_images or all_videos or all_audio:
+            if result_message.files:
+                all_files.extend(result_message.files)
+                result_message.files = None
+        # Only add media message if we should send media to model
+        if send_media_to_model and (all_images or all_videos or all_audio or all_files):
+            # If we have media artifacts, add a follow-up "user" message instead of a "tool"
+            # message with the media artifacts which throws error for some models
             media_message = Message(
                 role="user",
                 content="Take note of the following content",
                 images=all_images if all_images else None,
                 videos=all_videos if all_videos else None,
                 audio=all_audio if all_audio else None,
+                files=all_files if all_files else None,
             )
             messages.append(media_message)
@@ -1782,10 +2223,14 @@ class Model(ABC):
         new_model = cls.__new__(cls)
         memo[id(self)] = new_model
-        # Deep copy all attributes
+        # Deep copy all attributes except client objects
         for k, v in self.__dict__.items():
             if k in {"response_format", "_tools", "_functions"}:
                 continue
+            # Skip client objects
+            if k in {"client", "async_client", "http_client", "mistral_client", "model_client"}:
+                setattr(new_model, k, None)
+                continue
             try:
                 setattr(new_model, k, deepcopy(v, memo))
             except Exception:

agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl