agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +6015 -2823
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +594 -186
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +2 -8
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +72 -0
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +999 -519
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +103 -31
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +139 -0
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +59 -5
  142. agno/models/openai/chat.py +69 -29
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +77 -1
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -178
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +248 -94
  205. agno/run/base.py +44 -5
  206. agno/run/team.py +238 -97
  207. agno/run/workflow.py +144 -33
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1610
  213. agno/tools/dalle.py +2 -4
  214. agno/tools/decorator.py +4 -2
  215. agno/tools/duckduckgo.py +15 -11
  216. agno/tools/e2b.py +14 -7
  217. agno/tools/eleven_labs.py +23 -25
  218. agno/tools/exa.py +21 -16
  219. agno/tools/file.py +153 -23
  220. agno/tools/file_generation.py +350 -0
  221. agno/tools/firecrawl.py +4 -4
  222. agno/tools/function.py +250 -30
  223. agno/tools/gmail.py +238 -14
  224. agno/tools/google_drive.py +270 -0
  225. agno/tools/googlecalendar.py +36 -8
  226. agno/tools/googlesheets.py +20 -5
  227. agno/tools/jira.py +20 -0
  228. agno/tools/knowledge.py +3 -3
  229. agno/tools/mcp/__init__.py +10 -0
  230. agno/tools/mcp/mcp.py +331 -0
  231. agno/tools/mcp/multi_mcp.py +347 -0
  232. agno/tools/mcp/params.py +24 -0
  233. agno/tools/mcp_toolbox.py +284 -0
  234. agno/tools/mem0.py +11 -17
  235. agno/tools/memori.py +1 -53
  236. agno/tools/memory.py +419 -0
  237. agno/tools/models/nebius.py +5 -5
  238. agno/tools/models_labs.py +20 -10
  239. agno/tools/notion.py +204 -0
  240. agno/tools/parallel.py +314 -0
  241. agno/tools/scrapegraph.py +58 -31
  242. agno/tools/searxng.py +2 -2
  243. agno/tools/serper.py +2 -2
  244. agno/tools/slack.py +18 -3
  245. agno/tools/spider.py +2 -2
  246. agno/tools/tavily.py +146 -0
  247. agno/tools/whatsapp.py +1 -1
  248. agno/tools/workflow.py +278 -0
  249. agno/tools/yfinance.py +12 -11
  250. agno/utils/agent.py +820 -0
  251. agno/utils/audio.py +27 -0
  252. agno/utils/common.py +90 -1
  253. agno/utils/events.py +217 -2
  254. agno/utils/gemini.py +180 -22
  255. agno/utils/hooks.py +57 -0
  256. agno/utils/http.py +111 -0
  257. agno/utils/knowledge.py +12 -5
  258. agno/utils/log.py +1 -0
  259. agno/utils/mcp.py +92 -2
  260. agno/utils/media.py +188 -10
  261. agno/utils/merge_dict.py +22 -1
  262. agno/utils/message.py +60 -0
  263. agno/utils/models/claude.py +40 -11
  264. agno/utils/print_response/agent.py +105 -21
  265. agno/utils/print_response/team.py +103 -38
  266. agno/utils/print_response/workflow.py +251 -34
  267. agno/utils/reasoning.py +22 -1
  268. agno/utils/serialize.py +32 -0
  269. agno/utils/streamlit.py +16 -10
  270. agno/utils/string.py +41 -0
  271. agno/utils/team.py +98 -9
  272. agno/utils/tools.py +1 -1
  273. agno/vectordb/base.py +23 -4
  274. agno/vectordb/cassandra/cassandra.py +65 -9
  275. agno/vectordb/chroma/chromadb.py +182 -38
  276. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  277. agno/vectordb/couchbase/couchbase.py +105 -10
  278. agno/vectordb/lancedb/lance_db.py +124 -133
  279. agno/vectordb/langchaindb/langchaindb.py +25 -7
  280. agno/vectordb/lightrag/lightrag.py +17 -3
  281. agno/vectordb/llamaindex/__init__.py +3 -0
  282. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  283. agno/vectordb/milvus/milvus.py +126 -9
  284. agno/vectordb/mongodb/__init__.py +7 -1
  285. agno/vectordb/mongodb/mongodb.py +112 -7
  286. agno/vectordb/pgvector/pgvector.py +142 -21
  287. agno/vectordb/pineconedb/pineconedb.py +80 -8
  288. agno/vectordb/qdrant/qdrant.py +125 -39
  289. agno/vectordb/redis/__init__.py +9 -0
  290. agno/vectordb/redis/redisdb.py +694 -0
  291. agno/vectordb/singlestore/singlestore.py +111 -25
  292. agno/vectordb/surrealdb/surrealdb.py +31 -5
  293. agno/vectordb/upstashdb/upstashdb.py +76 -8
  294. agno/vectordb/weaviate/weaviate.py +86 -15
  295. agno/workflow/__init__.py +2 -0
  296. agno/workflow/agent.py +299 -0
  297. agno/workflow/condition.py +112 -18
  298. agno/workflow/loop.py +69 -10
  299. agno/workflow/parallel.py +266 -118
  300. agno/workflow/router.py +110 -17
  301. agno/workflow/step.py +638 -129
  302. agno/workflow/steps.py +65 -6
  303. agno/workflow/types.py +61 -23
  304. agno/workflow/workflow.py +2085 -272
  305. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
  306. agno-2.3.0.dist-info/RECORD +577 -0
  307. agno/knowledge/reader/url_reader.py +0 -128
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -610
  310. agno/utils/models/aws_claude.py +0 -170
  311. agno-2.0.1.dist-info/RECORD +0 -515
  312. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  313. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
agno/models/base.py CHANGED
@@ -1,7 +1,11 @@
1
1
  import asyncio
2
2
  import collections.abc
3
+ import json
3
4
  from abc import ABC, abstractmethod
4
5
  from dataclasses import dataclass, field
6
+ from hashlib import md5
7
+ from pathlib import Path
8
+ from time import time
5
9
  from types import AsyncGeneratorType, GeneratorType
6
10
  from typing import (
7
11
  Any,
@@ -21,15 +25,16 @@ from uuid import uuid4
21
25
  from pydantic import BaseModel
22
26
 
23
27
  from agno.exceptions import AgentRunException
24
- from agno.media import Audio, Image, Video
28
+ from agno.media import Audio, File, Image, Video
25
29
  from agno.models.message import Citations, Message
26
30
  from agno.models.metrics import Metrics
27
31
  from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
28
32
  from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
29
33
  from agno.run.team import RunContentEvent as TeamRunContentEvent
30
- from agno.run.team import TeamRunOutputEvent
34
+ from agno.run.team import TeamRunOutput, TeamRunOutputEvent
35
+ from agno.run.workflow import WorkflowRunOutputEvent
31
36
  from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
32
- from agno.utils.log import log_debug, log_error, log_warning
37
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
33
38
  from agno.utils.timer import Timer
34
39
  from agno.utils.tools import get_function_call_for_tool_call, get_function_call_for_tool_execution
35
40
 
@@ -46,6 +51,9 @@ class MessageData:
46
51
  response_audio: Optional[Audio] = None
47
52
  response_image: Optional[Image] = None
48
53
  response_video: Optional[Video] = None
54
+ response_file: Optional[File] = None
55
+
56
+ response_metrics: Optional[Metrics] = None
49
57
 
50
58
  # Data from the provider that we might need on subsequent messages
51
59
  response_provider_data: Optional[Dict[str, Any]] = None
@@ -132,6 +140,11 @@ class Model(ABC):
132
140
  # The role of the assistant message.
133
141
  assistant_message_role: str = "assistant"
134
142
 
143
+ # Cache model responses to avoid redundant API calls during development
144
+ cache_response: bool = False
145
+ cache_ttl: Optional[int] = None
146
+ cache_dir: Optional[str] = None
147
+
135
148
  def __post_init__(self):
136
149
  if self.provider is None and self.name is not None:
137
150
  self.provider = f"{self.name} ({self.id})"
@@ -144,6 +157,100 @@ class Model(ABC):
144
157
  def get_provider(self) -> str:
145
158
  return self.provider or self.name or self.__class__.__name__
146
159
 
160
+ def _get_model_cache_key(self, messages: List[Message], stream: bool, **kwargs: Any) -> str:
161
+ """Generate a cache key based on model messages and core parameters."""
162
+ message_data = []
163
+ for msg in messages:
164
+ msg_dict = {
165
+ "role": msg.role,
166
+ "content": msg.content,
167
+ }
168
+ message_data.append(msg_dict)
169
+
170
+ # Include tools parameter in cache key
171
+ has_tools = bool(kwargs.get("tools"))
172
+
173
+ cache_data = {
174
+ "model_id": self.id,
175
+ "messages": message_data,
176
+ "has_tools": has_tools,
177
+ "response_format": kwargs.get("response_format"),
178
+ "stream": stream,
179
+ }
180
+
181
+ cache_str = json.dumps(cache_data, sort_keys=True)
182
+ return md5(cache_str.encode()).hexdigest()
183
+
184
+ def _get_model_cache_file_path(self, cache_key: str) -> Path:
185
+ """Get the file path for a cache key."""
186
+ if self.cache_dir:
187
+ cache_dir = Path(self.cache_dir)
188
+ else:
189
+ cache_dir = Path.home() / ".agno" / "cache" / "model_responses"
190
+
191
+ cache_dir.mkdir(parents=True, exist_ok=True)
192
+ return cache_dir / f"{cache_key}.json"
193
+
194
+ def _get_cached_model_response(self, cache_key: str) -> Optional[Dict[str, Any]]:
195
+ """Retrieve a cached response if it exists and is not expired."""
196
+ cache_file = self._get_model_cache_file_path(cache_key)
197
+
198
+ if not cache_file.exists():
199
+ return None
200
+
201
+ try:
202
+ with open(cache_file, "r") as f:
203
+ cached_data = json.load(f)
204
+
205
+ # Check TTL if set (None means no expiration)
206
+ if self.cache_ttl is not None:
207
+ if time() - cached_data["timestamp"] > self.cache_ttl:
208
+ return None
209
+
210
+ return cached_data
211
+ except Exception:
212
+ return None
213
+
214
+ def _save_model_response_to_cache(self, cache_key: str, result: ModelResponse, is_streaming: bool = False) -> None:
215
+ """Save a model response to cache."""
216
+ try:
217
+ cache_file = self._get_model_cache_file_path(cache_key)
218
+
219
+ cache_data = {
220
+ "timestamp": int(time()),
221
+ "is_streaming": is_streaming,
222
+ "result": result.to_dict(),
223
+ }
224
+ with open(cache_file, "w") as f:
225
+ json.dump(cache_data, f)
226
+ except Exception:
227
+ pass
228
+
229
+ def _save_streaming_responses_to_cache(self, cache_key: str, responses: List[ModelResponse]) -> None:
230
+ """Save streaming responses to cache."""
231
+ cache_file = self._get_model_cache_file_path(cache_key)
232
+
233
+ cache_data = {
234
+ "timestamp": int(time()),
235
+ "is_streaming": True,
236
+ "streaming_responses": [r.to_dict() for r in responses],
237
+ }
238
+
239
+ try:
240
+ with open(cache_file, "w") as f:
241
+ json.dump(cache_data, f)
242
+ except Exception:
243
+ pass
244
+
245
+ def _model_response_from_cache(self, cached_data: Dict[str, Any]) -> ModelResponse:
246
+ """Reconstruct a ModelResponse from cached data."""
247
+ return ModelResponse.from_dict(cached_data["result"])
248
+
249
+ def _streaming_responses_from_cache(self, cached_data: list) -> Iterator[ModelResponse]:
250
+ """Reconstruct streaming responses from cached data."""
251
+ for cached_response in cached_data:
252
+ yield ModelResponse.from_dict(cached_response)
253
+
147
254
  @abstractmethod
148
255
  def invoke(self, *args, **kwargs) -> ModelResponse:
149
256
  pass
@@ -186,278 +293,377 @@ class Model(ABC):
186
293
  """
187
294
  pass
188
295
 
296
+ def _format_tools(self, tools: Optional[List[Union[Function, dict]]]) -> List[Dict[str, Any]]:
297
+ _tool_dicts = []
298
+ for tool in tools or []:
299
+ if isinstance(tool, Function):
300
+ _tool_dicts.append({"type": "function", "function": tool.to_dict()})
301
+ else:
302
+ # If a dict is passed, it is a builtin tool
303
+ _tool_dicts.append(tool)
304
+ return _tool_dicts
305
+
189
306
  def response(
190
307
  self,
191
308
  messages: List[Message],
192
309
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
193
- tools: Optional[List[Dict[str, Any]]] = None,
194
- functions: Optional[Dict[str, Function]] = None,
310
+ tools: Optional[List[Union[Function, dict]]] = None,
195
311
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
196
312
  tool_call_limit: Optional[int] = None,
197
- run_response: Optional[RunOutput] = None,
313
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
314
+ send_media_to_model: bool = True,
198
315
  ) -> ModelResponse:
199
316
  """
200
317
  Generate a response from the model.
318
+
319
+ Args:
320
+ messages: List of messages to send to the model
321
+ response_format: Response format to use
322
+ tools: List of tools to use. This includes the original Function objects and dicts for built-in tools.
323
+ tool_choice: Tool choice to use
324
+ tool_call_limit: Tool call limit
325
+ run_response: Run response to use
326
+ send_media_to_model: Whether to send media to the model
201
327
  """
328
+ try:
329
+ # Check cache if enabled
330
+ if self.cache_response:
331
+ cache_key = self._get_model_cache_key(
332
+ messages, stream=False, response_format=response_format, tools=tools
333
+ )
334
+ cached_data = self._get_cached_model_response(cache_key)
202
335
 
203
- log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
204
- log_debug(f"Model: {self.id}", center=True, symbol="-")
205
-
206
- _log_messages(messages)
207
- model_response = ModelResponse()
208
-
209
- function_call_count = 0
210
-
211
- while True:
212
- # Get response from model
213
- assistant_message = Message(role=self.assistant_message_role)
214
- self._process_model_response(
215
- messages=messages,
216
- assistant_message=assistant_message,
217
- model_response=model_response,
218
- response_format=response_format,
219
- tools=tools,
220
- tool_choice=tool_choice or self._tool_choice,
221
- run_response=run_response,
222
- )
336
+ if cached_data:
337
+ log_info("Cache hit for model response")
338
+ return self._model_response_from_cache(cached_data)
223
339
 
224
- # Add assistant message to messages
225
- messages.append(assistant_message)
340
+ log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
341
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
226
342
 
227
- # Log response and metrics
228
- assistant_message.log(metrics=True)
343
+ _log_messages(messages)
344
+ model_response = ModelResponse()
229
345
 
230
- # Handle tool calls if present
231
- if assistant_message.tool_calls:
232
- # Prepare function calls
233
- function_calls_to_run = self._prepare_function_calls(
234
- assistant_message=assistant_message,
346
+ function_call_count = 0
347
+
348
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
349
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
350
+
351
+ while True:
352
+ # Get response from model
353
+ assistant_message = Message(role=self.assistant_message_role)
354
+ self._process_model_response(
235
355
  messages=messages,
356
+ assistant_message=assistant_message,
236
357
  model_response=model_response,
237
- functions=functions,
358
+ response_format=response_format,
359
+ tools=_tool_dicts,
360
+ tool_choice=tool_choice or self._tool_choice,
361
+ run_response=run_response,
238
362
  )
239
- function_call_results: List[Message] = []
240
-
241
- # Execute function calls
242
- for function_call_response in self.run_function_calls(
243
- function_calls=function_calls_to_run,
244
- function_call_results=function_call_results,
245
- current_function_call_count=function_call_count,
246
- function_call_limit=tool_call_limit,
247
- ):
248
- if isinstance(function_call_response, ModelResponse):
249
- # The session state is updated by the function call
250
- if function_call_response.updated_session_state is not None:
251
- model_response.updated_session_state = function_call_response.updated_session_state
252
-
253
- # Media artifacts are generated by the function call
254
- if function_call_response.images is not None:
255
- if model_response.images is None:
256
- model_response.images = []
257
- model_response.images.extend(function_call_response.images)
258
-
259
- if function_call_response.audios is not None:
260
- if model_response.audios is None:
261
- model_response.audios = []
262
- model_response.audios.extend(function_call_response.audios)
263
-
264
- if function_call_response.videos is not None:
265
- if model_response.videos is None:
266
- model_response.videos = []
267
- model_response.videos.extend(function_call_response.videos)
268
-
269
- if (
270
- function_call_response.event
271
- in [
363
+
364
+ # Add assistant message to messages
365
+ messages.append(assistant_message)
366
+
367
+ # Log response and metrics
368
+ assistant_message.log(metrics=True)
369
+
370
+ # Handle tool calls if present
371
+ if assistant_message.tool_calls:
372
+ # Prepare function calls
373
+ function_calls_to_run = self._prepare_function_calls(
374
+ assistant_message=assistant_message,
375
+ messages=messages,
376
+ model_response=model_response,
377
+ functions=_functions,
378
+ )
379
+ function_call_results: List[Message] = []
380
+
381
+ # Execute function calls
382
+ for function_call_response in self.run_function_calls(
383
+ function_calls=function_calls_to_run,
384
+ function_call_results=function_call_results,
385
+ current_function_call_count=function_call_count,
386
+ function_call_limit=tool_call_limit,
387
+ ):
388
+ if isinstance(function_call_response, ModelResponse):
389
+ # The session state is updated by the function call
390
+ if function_call_response.updated_session_state is not None:
391
+ model_response.updated_session_state = function_call_response.updated_session_state
392
+
393
+ # Media artifacts are generated by the function call
394
+ if function_call_response.images is not None:
395
+ if model_response.images is None:
396
+ model_response.images = []
397
+ model_response.images.extend(function_call_response.images)
398
+
399
+ if function_call_response.audios is not None:
400
+ if model_response.audios is None:
401
+ model_response.audios = []
402
+ model_response.audios.extend(function_call_response.audios)
403
+
404
+ if function_call_response.videos is not None:
405
+ if model_response.videos is None:
406
+ model_response.videos = []
407
+ model_response.videos.extend(function_call_response.videos)
408
+
409
+ if function_call_response.files is not None:
410
+ if model_response.files is None:
411
+ model_response.files = []
412
+ model_response.files.extend(function_call_response.files)
413
+
414
+ if (
415
+ function_call_response.event
416
+ in [
417
+ ModelResponseEvent.tool_call_completed.value,
418
+ ModelResponseEvent.tool_call_paused.value,
419
+ ]
420
+ and function_call_response.tool_executions is not None
421
+ ):
422
+ if model_response.tool_executions is None:
423
+ model_response.tool_executions = []
424
+ model_response.tool_executions.extend(function_call_response.tool_executions)
425
+
426
+ elif function_call_response.event not in [
427
+ ModelResponseEvent.tool_call_started.value,
272
428
  ModelResponseEvent.tool_call_completed.value,
273
- ModelResponseEvent.tool_call_paused.value,
274
- ]
275
- and function_call_response.tool_executions is not None
276
- ):
277
- if model_response.tool_executions is None:
278
- model_response.tool_executions = []
279
- model_response.tool_executions.extend(function_call_response.tool_executions)
280
-
281
- elif function_call_response.event not in [
282
- ModelResponseEvent.tool_call_started.value,
283
- ModelResponseEvent.tool_call_completed.value,
284
- ]:
285
- if function_call_response.content:
286
- model_response.content += function_call_response.content # type: ignore
287
-
288
- # Add a function call for each successful execution
289
- function_call_count += len(function_call_results)
290
-
291
- # Format and add results to messages
292
- self.format_function_call_results(
293
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
294
- )
429
+ ]:
430
+ if function_call_response.content:
431
+ model_response.content += function_call_response.content # type: ignore
295
432
 
296
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
297
- # Handle function call media
298
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
433
+ # Add a function call for each successful execution
434
+ function_call_count += len(function_call_results)
435
+
436
+ # Format and add results to messages
437
+ self.format_function_call_results(
438
+ messages=messages, function_call_results=function_call_results, **model_response.extra or {}
439
+ )
299
440
 
300
- for function_call_result in function_call_results:
301
- function_call_result.log(metrics=True)
441
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
442
+ # Handle function call media
443
+ self._handle_function_call_media(
444
+ messages=messages,
445
+ function_call_results=function_call_results,
446
+ send_media_to_model=send_media_to_model,
447
+ )
302
448
 
303
- # Check if we should stop after tool calls
304
- if any(m.stop_after_tool_call for m in function_call_results):
305
- break
449
+ for function_call_result in function_call_results:
450
+ function_call_result.log(metrics=True)
306
451
 
307
- # If we have any tool calls that require confirmation, break the loop
308
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
309
- break
452
+ # Check if we should stop after tool calls
453
+ if any(m.stop_after_tool_call for m in function_call_results):
454
+ break
310
455
 
311
- # If we have any tool calls that require external execution, break the loop
312
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
313
- break
456
+ # If we have any tool calls that require confirmation, break the loop
457
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
458
+ break
314
459
 
315
- # If we have any tool calls that require user input, break the loop
316
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
317
- break
460
+ # If we have any tool calls that require external execution, break the loop
461
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
462
+ break
318
463
 
319
- # Continue loop to get next response
320
- continue
464
+ # If we have any tool calls that require user input, break the loop
465
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
466
+ break
467
+
468
+ # Continue loop to get next response
469
+ continue
321
470
 
322
- # No tool calls or finished processing them
323
- break
471
+ # No tool calls or finished processing them
472
+ break
473
+
474
+ log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
475
+
476
+ # Save to cache if enabled
477
+ if self.cache_response:
478
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
479
+ finally:
480
+ # Close the Gemini client
481
+ if self.__class__.__name__ == "Gemini" and self.client is not None: # type: ignore
482
+ try:
483
+ self.client.close() # type: ignore
484
+ self.client = None
485
+ except AttributeError:
486
+ log_warning(
487
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
488
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
489
+ )
324
490
 
325
- log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
326
491
  return model_response
327
492
 
328
493
  async def aresponse(
329
494
  self,
330
495
  messages: List[Message],
331
496
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
332
- tools: Optional[List[Dict[str, Any]]] = None,
333
- functions: Optional[Dict[str, Function]] = None,
497
+ tools: Optional[List[Union[Function, dict]]] = None,
334
498
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
335
499
  tool_call_limit: Optional[int] = None,
500
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
501
+ send_media_to_model: bool = True,
336
502
  ) -> ModelResponse:
337
503
  """
338
504
  Generate an asynchronous response from the model.
339
505
  """
506
+ try:
507
+ # Check cache if enabled
508
+ if self.cache_response:
509
+ cache_key = self._get_model_cache_key(
510
+ messages, stream=False, response_format=response_format, tools=tools
511
+ )
512
+ cached_data = self._get_cached_model_response(cache_key)
340
513
 
341
- log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
342
- log_debug(f"Model: {self.id}", center=True, symbol="-")
343
- _log_messages(messages)
344
- model_response = ModelResponse()
345
-
346
- function_call_count = 0
347
-
348
- while True:
349
- # Get response from model
350
- assistant_message = Message(role=self.assistant_message_role)
351
- await self._aprocess_model_response(
352
- messages=messages,
353
- assistant_message=assistant_message,
354
- model_response=model_response,
355
- response_format=response_format,
356
- tools=tools,
357
- tool_choice=tool_choice or self._tool_choice,
358
- )
514
+ if cached_data:
515
+ log_info("Cache hit for model response")
516
+ return self._model_response_from_cache(cached_data)
359
517
 
360
- # Add assistant message to messages
361
- messages.append(assistant_message)
518
+ log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
519
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
520
+ _log_messages(messages)
521
+ model_response = ModelResponse()
362
522
 
363
- # Log response and metrics
364
- assistant_message.log(metrics=True)
523
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
524
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
365
525
 
366
- # Handle tool calls if present
367
- if assistant_message.tool_calls:
368
- # Prepare function calls
369
- function_calls_to_run = self._prepare_function_calls(
370
- assistant_message=assistant_message,
526
+ function_call_count = 0
527
+
528
+ while True:
529
+ # Get response from model
530
+ assistant_message = Message(role=self.assistant_message_role)
531
+ await self._aprocess_model_response(
371
532
  messages=messages,
533
+ assistant_message=assistant_message,
372
534
  model_response=model_response,
373
- functions=functions,
535
+ response_format=response_format,
536
+ tools=_tool_dicts,
537
+ tool_choice=tool_choice or self._tool_choice,
538
+ run_response=run_response,
374
539
  )
375
- function_call_results: List[Message] = []
376
-
377
- # Execute function calls
378
- async for function_call_response in self.arun_function_calls(
379
- function_calls=function_calls_to_run,
380
- function_call_results=function_call_results,
381
- current_function_call_count=function_call_count,
382
- function_call_limit=tool_call_limit,
383
- ):
384
- if isinstance(function_call_response, ModelResponse):
385
- # The session state is updated by the function call
386
- if function_call_response.updated_session_state is not None:
387
- model_response.updated_session_state = function_call_response.updated_session_state
388
-
389
- # Media artifacts are generated by the function call
390
- if function_call_response.images is not None:
391
- if model_response.images is None:
392
- model_response.images = []
393
- model_response.images.extend(function_call_response.images)
394
-
395
- if function_call_response.audios is not None:
396
- if model_response.audios is None:
397
- model_response.audios = []
398
- model_response.audios.extend(function_call_response.audios)
399
-
400
- if function_call_response.videos is not None:
401
- if model_response.videos is None:
402
- model_response.videos = []
403
- model_response.videos.extend(function_call_response.videos)
404
-
405
- if (
406
- function_call_response.event
407
- in [
540
+
541
+ # Add assistant message to messages
542
+ messages.append(assistant_message)
543
+
544
+ # Log response and metrics
545
+ assistant_message.log(metrics=True)
546
+
547
+ # Handle tool calls if present
548
+ if assistant_message.tool_calls:
549
+ # Prepare function calls
550
+ function_calls_to_run = self._prepare_function_calls(
551
+ assistant_message=assistant_message,
552
+ messages=messages,
553
+ model_response=model_response,
554
+ functions=_functions,
555
+ )
556
+ function_call_results: List[Message] = []
557
+
558
+ # Execute function calls
559
+ async for function_call_response in self.arun_function_calls(
560
+ function_calls=function_calls_to_run,
561
+ function_call_results=function_call_results,
562
+ current_function_call_count=function_call_count,
563
+ function_call_limit=tool_call_limit,
564
+ ):
565
+ if isinstance(function_call_response, ModelResponse):
566
+ # The session state is updated by the function call
567
+ if function_call_response.updated_session_state is not None:
568
+ model_response.updated_session_state = function_call_response.updated_session_state
569
+
570
+ # Media artifacts are generated by the function call
571
+ if function_call_response.images is not None:
572
+ if model_response.images is None:
573
+ model_response.images = []
574
+ model_response.images.extend(function_call_response.images)
575
+
576
+ if function_call_response.audios is not None:
577
+ if model_response.audios is None:
578
+ model_response.audios = []
579
+ model_response.audios.extend(function_call_response.audios)
580
+
581
+ if function_call_response.videos is not None:
582
+ if model_response.videos is None:
583
+ model_response.videos = []
584
+ model_response.videos.extend(function_call_response.videos)
585
+
586
+ if function_call_response.files is not None:
587
+ if model_response.files is None:
588
+ model_response.files = []
589
+ model_response.files.extend(function_call_response.files)
590
+
591
+ if (
592
+ function_call_response.event
593
+ in [
594
+ ModelResponseEvent.tool_call_completed.value,
595
+ ModelResponseEvent.tool_call_paused.value,
596
+ ]
597
+ and function_call_response.tool_executions is not None
598
+ ):
599
+ if model_response.tool_executions is None:
600
+ model_response.tool_executions = []
601
+ model_response.tool_executions.extend(function_call_response.tool_executions)
602
+ elif function_call_response.event not in [
603
+ ModelResponseEvent.tool_call_started.value,
408
604
  ModelResponseEvent.tool_call_completed.value,
409
- ModelResponseEvent.tool_call_paused.value,
410
- ]
411
- and function_call_response.tool_executions is not None
412
- ):
413
- if model_response.tool_executions is None:
414
- model_response.tool_executions = []
415
- model_response.tool_executions.extend(function_call_response.tool_executions)
416
- elif function_call_response.event not in [
417
- ModelResponseEvent.tool_call_started.value,
418
- ModelResponseEvent.tool_call_completed.value,
419
- ]:
420
- if function_call_response.content:
421
- model_response.content += function_call_response.content # type: ignore
422
-
423
- # Add a function call for each successful execution
424
- function_call_count += len(function_call_results)
425
-
426
- # Format and add results to messages
427
- self.format_function_call_results(
428
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
429
- )
605
+ ]:
606
+ if function_call_response.content:
607
+ model_response.content += function_call_response.content # type: ignore
430
608
 
431
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
432
- # Handle function call media
433
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
609
+ # Add a function call for each successful execution
610
+ function_call_count += len(function_call_results)
434
611
 
435
- for function_call_result in function_call_results:
436
- function_call_result.log(metrics=True)
612
+ # Format and add results to messages
613
+ self.format_function_call_results(
614
+ messages=messages, function_call_results=function_call_results, **model_response.extra or {}
615
+ )
437
616
 
438
- # Check if we should stop after tool calls
439
- if any(m.stop_after_tool_call for m in function_call_results):
440
- break
617
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
618
+ # Handle function call media
619
+ self._handle_function_call_media(
620
+ messages=messages,
621
+ function_call_results=function_call_results,
622
+ send_media_to_model=send_media_to_model,
623
+ )
441
624
 
442
- # If we have any tool calls that require confirmation, break the loop
443
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
444
- break
625
+ for function_call_result in function_call_results:
626
+ function_call_result.log(metrics=True)
445
627
 
446
- # If we have any tool calls that require external execution, break the loop
447
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
448
- break
628
+ # Check if we should stop after tool calls
629
+ if any(m.stop_after_tool_call for m in function_call_results):
630
+ break
449
631
 
450
- # If we have any tool calls that require user input, break the loop
451
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
452
- break
632
+ # If we have any tool calls that require confirmation, break the loop
633
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
634
+ break
453
635
 
454
- # Continue loop to get next response
455
- continue
636
+ # If we have any tool calls that require external execution, break the loop
637
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
638
+ break
639
+
640
+ # If we have any tool calls that require user input, break the loop
641
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
642
+ break
643
+
644
+ # Continue loop to get next response
645
+ continue
646
+
647
+ # No tool calls or finished processing them
648
+ break
649
+
650
+ log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
456
651
 
457
- # No tool calls or finished processing them
458
- break
652
+ # Save to cache if enabled
653
+ if self.cache_response:
654
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
655
+ finally:
656
+ # Close the Gemini client
657
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
658
+ try:
659
+ await self.client.aio.aclose() # type: ignore
660
+ self.client = None
661
+ except AttributeError:
662
+ log_warning(
663
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
664
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
665
+ )
459
666
 
460
- log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
461
667
  return model_response
462
668
 
463
669
  def _process_model_response(
@@ -468,7 +674,7 @@ class Model(ABC):
468
674
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
469
675
  tools: Optional[List[Dict[str, Any]]] = None,
470
676
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
471
- run_response: Optional[RunOutput] = None,
677
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
472
678
  ) -> None:
473
679
  """
474
680
  Process a single model response and return the assistant message and whether to continue.
@@ -512,6 +718,8 @@ class Model(ABC):
512
718
  if model_response.extra is None:
513
719
  model_response.extra = {}
514
720
  model_response.extra.update(provider_response.extra)
721
+ if provider_response.provider_data is not None:
722
+ model_response.provider_data = provider_response.provider_data
515
723
 
516
724
  async def _aprocess_model_response(
517
725
  self,
@@ -521,7 +729,7 @@ class Model(ABC):
521
729
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
522
730
  tools: Optional[List[Dict[str, Any]]] = None,
523
731
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
524
- run_response: Optional[RunOutput] = None,
732
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
525
733
  ) -> None:
526
734
  """
527
735
  Process a single async model response and return the assistant message and whether to continue.
@@ -565,6 +773,8 @@ class Model(ABC):
565
773
  if model_response.extra is None:
566
774
  model_response.extra = {}
567
775
  model_response.extra.update(provider_response.extra)
776
+ if provider_response.provider_data is not None:
777
+ model_response.provider_data = provider_response.provider_data
568
778
 
569
779
  def _populate_assistant_message(
570
780
  self,
@@ -581,7 +791,6 @@ class Model(ABC):
581
791
  Returns:
582
792
  Message: The populated assistant message
583
793
  """
584
- # Add role to assistant message
585
794
  if provider_response.role is not None:
586
795
  assistant_message.role = provider_response.role
587
796
 
@@ -607,6 +816,10 @@ class Model(ABC):
607
816
  if provider_response.videos:
608
817
  assistant_message.video_output = provider_response.videos[-1] # Taking last (most recent) video
609
818
 
819
+ if provider_response.files is not None:
820
+ if provider_response.files:
821
+ assistant_message.file_output = provider_response.files[-1] # Taking last (most recent) file
822
+
610
823
  if provider_response.audios is not None:
611
824
  if provider_response.audios:
612
825
  assistant_message.audio_output = provider_response.audios[-1] # Taking last (most recent) audio
@@ -641,7 +854,7 @@ class Model(ABC):
641
854
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
642
855
  tools: Optional[List[Dict[str, Any]]] = None,
643
856
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
644
- run_response: Optional[RunOutput] = None,
857
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
645
858
  ) -> Iterator[ModelResponse]:
646
859
  """
647
860
  Process a streaming response from the model.
@@ -655,142 +868,181 @@ class Model(ABC):
655
868
  tool_choice=tool_choice or self._tool_choice,
656
869
  run_response=run_response,
657
870
  ):
658
- yield from self._populate_stream_data_and_assistant_message(
871
+ for model_response_delta in self._populate_stream_data(
659
872
  stream_data=stream_data,
660
- assistant_message=assistant_message,
661
873
  model_response_delta=response_delta,
662
- )
874
+ ):
875
+ yield model_response_delta
663
876
 
664
- # Add final metrics to assistant message
665
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=response_delta)
877
+ # Populate assistant message from stream data after the stream ends
878
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
666
879
 
667
880
  def response_stream(
668
881
  self,
669
882
  messages: List[Message],
670
883
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
671
- tools: Optional[List[Dict[str, Any]]] = None,
672
- functions: Optional[Dict[str, Function]] = None,
884
+ tools: Optional[List[Union[Function, dict]]] = None,
673
885
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
674
886
  tool_call_limit: Optional[int] = None,
675
887
  stream_model_response: bool = True,
676
- run_response: Optional[RunOutput] = None,
888
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
889
+ send_media_to_model: bool = True,
677
890
  ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
678
891
  """
679
892
  Generate a streaming response from the model.
680
893
  """
894
+ try:
895
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
896
+ cache_key = None
897
+ if self.cache_response:
898
+ cache_key = self._get_model_cache_key(
899
+ messages, stream=True, response_format=response_format, tools=tools
900
+ )
901
+ cached_data = self._get_cached_model_response(cache_key)
681
902
 
682
- log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
683
- log_debug(f"Model: {self.id}", center=True, symbol="-")
684
- _log_messages(messages)
903
+ if cached_data:
904
+ log_info("Cache hit for streaming model response")
905
+ # Yield cached responses
906
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
907
+ yield response
908
+ return
685
909
 
686
- function_call_count = 0
910
+ log_info("Cache miss for streaming model response")
687
911
 
688
- while True:
689
- assistant_message = Message(role=self.assistant_message_role)
690
- # Create assistant message and stream data
691
- stream_data = MessageData()
692
- if stream_model_response:
693
- # Generate response
694
- yield from self.process_response_stream(
695
- messages=messages,
696
- assistant_message=assistant_message,
697
- stream_data=stream_data,
698
- response_format=response_format,
699
- tools=tools,
700
- tool_choice=tool_choice or self._tool_choice,
701
- run_response=run_response,
702
- )
912
+ # Track streaming responses for caching
913
+ streaming_responses: List[ModelResponse] = []
703
914
 
704
- # Populate assistant message from stream data
705
- if stream_data.response_content:
706
- assistant_message.content = stream_data.response_content
707
- if stream_data.response_reasoning_content:
708
- assistant_message.reasoning_content = stream_data.response_reasoning_content
709
- if stream_data.response_redacted_reasoning_content:
710
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
711
- if stream_data.response_provider_data:
712
- assistant_message.provider_data = stream_data.response_provider_data
713
- if stream_data.response_citations:
714
- assistant_message.citations = stream_data.response_citations
715
- if stream_data.response_audio:
716
- assistant_message.audio_output = stream_data.response_audio
717
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
718
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
915
+ log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
916
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
917
+ _log_messages(messages)
719
918
 
720
- else:
721
- model_response = ModelResponse()
722
- self._process_model_response(
723
- messages=messages,
724
- assistant_message=assistant_message,
725
- model_response=model_response,
726
- response_format=response_format,
727
- tools=tools,
728
- tool_choice=tool_choice or self._tool_choice,
729
- )
730
- yield model_response
919
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
920
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
731
921
 
732
- # Add assistant message to messages
733
- messages.append(assistant_message)
734
- assistant_message.log(metrics=True)
922
+ function_call_count = 0
923
+
924
+ while True:
925
+ assistant_message = Message(role=self.assistant_message_role)
926
+ # Create assistant message and stream data
927
+ stream_data = MessageData()
928
+ model_response = ModelResponse()
929
+ if stream_model_response:
930
+ # Generate response
931
+ for response in self.process_response_stream(
932
+ messages=messages,
933
+ assistant_message=assistant_message,
934
+ stream_data=stream_data,
935
+ response_format=response_format,
936
+ tools=_tool_dicts,
937
+ tool_choice=tool_choice or self._tool_choice,
938
+ run_response=run_response,
939
+ ):
940
+ if self.cache_response and isinstance(response, ModelResponse):
941
+ streaming_responses.append(response)
942
+ yield response
735
943
 
736
- # Handle tool calls if present
737
- if assistant_message.tool_calls is not None:
738
- # Prepare function calls
739
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
740
- assistant_message, messages, functions
741
- )
742
- function_call_results: List[Message] = []
743
-
744
- # Execute function calls
745
- for function_call_response in self.run_function_calls(
746
- function_calls=function_calls_to_run,
747
- function_call_results=function_call_results,
748
- current_function_call_count=function_call_count,
749
- function_call_limit=tool_call_limit,
750
- ):
751
- yield function_call_response
752
-
753
- # Add a function call for each successful execution
754
- function_call_count += len(function_call_results)
755
-
756
- # Format and add results to messages
757
- if stream_data and stream_data.extra is not None:
758
- self.format_function_call_results(
759
- messages=messages, function_call_results=function_call_results, **stream_data.extra
760
- )
761
944
  else:
762
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
945
+ self._process_model_response(
946
+ messages=messages,
947
+ assistant_message=assistant_message,
948
+ model_response=model_response,
949
+ response_format=response_format,
950
+ tools=_tool_dicts,
951
+ tool_choice=tool_choice or self._tool_choice,
952
+ )
953
+ if self.cache_response:
954
+ streaming_responses.append(model_response)
955
+ yield model_response
956
+
957
+ # Add assistant message to messages
958
+ messages.append(assistant_message)
959
+ assistant_message.log(metrics=True)
960
+
961
+ # Handle tool calls if present
962
+ if assistant_message.tool_calls is not None:
963
+ # Prepare function calls
964
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
965
+ assistant_message=assistant_message, messages=messages, functions=_functions
966
+ )
967
+ function_call_results: List[Message] = []
968
+
969
+ # Execute function calls
970
+ for function_call_response in self.run_function_calls(
971
+ function_calls=function_calls_to_run,
972
+ function_call_results=function_call_results,
973
+ current_function_call_count=function_call_count,
974
+ function_call_limit=tool_call_limit,
975
+ ):
976
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
977
+ streaming_responses.append(function_call_response)
978
+ yield function_call_response
979
+
980
+ # Add a function call for each successful execution
981
+ function_call_count += len(function_call_results)
982
+
983
+ # Format and add results to messages
984
+ if stream_data and stream_data.extra is not None:
985
+ self.format_function_call_results(
986
+ messages=messages, function_call_results=function_call_results, **stream_data.extra
987
+ )
988
+ elif model_response and model_response.extra is not None:
989
+ self.format_function_call_results(
990
+ messages=messages, function_call_results=function_call_results, **model_response.extra
991
+ )
992
+ else:
993
+ self.format_function_call_results(
994
+ messages=messages, function_call_results=function_call_results
995
+ )
763
996
 
764
- # Handle function call media
765
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
766
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
997
+ # Handle function call media
998
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
999
+ self._handle_function_call_media(
1000
+ messages=messages,
1001
+ function_call_results=function_call_results,
1002
+ send_media_to_model=send_media_to_model,
1003
+ )
767
1004
 
768
- for function_call_result in function_call_results:
769
- function_call_result.log(metrics=True)
1005
+ for function_call_result in function_call_results:
1006
+ function_call_result.log(metrics=True)
770
1007
 
771
- # Check if we should stop after tool calls
772
- if any(m.stop_after_tool_call for m in function_call_results):
773
- break
1008
+ # Check if we should stop after tool calls
1009
+ if any(m.stop_after_tool_call for m in function_call_results):
1010
+ break
774
1011
 
775
- # If we have any tool calls that require confirmation, break the loop
776
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
777
- break
1012
+ # If we have any tool calls that require confirmation, break the loop
1013
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1014
+ break
778
1015
 
779
- # If we have any tool calls that require external execution, break the loop
780
- if any(fc.function.external_execution for fc in function_calls_to_run):
781
- break
1016
+ # If we have any tool calls that require external execution, break the loop
1017
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1018
+ break
782
1019
 
783
- # If we have any tool calls that require user input, break the loop
784
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
785
- break
1020
+ # If we have any tool calls that require user input, break the loop
1021
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1022
+ break
786
1023
 
787
- # Continue loop to get next response
788
- continue
1024
+ # Continue loop to get next response
1025
+ continue
789
1026
 
790
- # No tool calls or finished processing them
791
- break
1027
+ # No tool calls or finished processing them
1028
+ break
792
1029
 
793
- log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1030
+ log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1031
+
1032
+ # Save streaming responses to cache if enabled
1033
+ if self.cache_response and cache_key and streaming_responses:
1034
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1035
+ finally:
1036
+ # Close the Gemini client
1037
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1038
+ try:
1039
+ self.client.close() # type: ignore
1040
+ self.client = None
1041
+ except AttributeError:
1042
+ log_warning(
1043
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1044
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1045
+ )
794
1046
 
795
1047
  async def aprocess_response_stream(
796
1048
  self,
@@ -800,7 +1052,7 @@ class Model(ABC):
800
1052
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
801
1053
  tools: Optional[List[Dict[str, Any]]] = None,
802
1054
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
803
- run_response: Optional[RunOutput] = None,
1055
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
804
1056
  ) -> AsyncIterator[ModelResponse]:
805
1057
  """
806
1058
  Process a streaming response from the model.
@@ -813,153 +1065,229 @@ class Model(ABC):
813
1065
  tool_choice=tool_choice or self._tool_choice,
814
1066
  run_response=run_response,
815
1067
  ): # type: ignore
816
- for model_response in self._populate_stream_data_and_assistant_message(
1068
+ for model_response_delta in self._populate_stream_data(
817
1069
  stream_data=stream_data,
818
- assistant_message=assistant_message,
819
1070
  model_response_delta=response_delta,
820
1071
  ):
821
- yield model_response
1072
+ yield model_response_delta
822
1073
 
823
- # Populate the assistant message
824
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=model_response)
1074
+ # Populate assistant message from stream data after the stream ends
1075
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
825
1076
 
826
1077
  async def aresponse_stream(
827
1078
  self,
828
1079
  messages: List[Message],
829
1080
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
830
- tools: Optional[List[Dict[str, Any]]] = None,
831
- functions: Optional[Dict[str, Function]] = None,
1081
+ tools: Optional[List[Union[Function, dict]]] = None,
832
1082
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
833
1083
  tool_call_limit: Optional[int] = None,
834
1084
  stream_model_response: bool = True,
835
- run_response: Optional[RunOutput] = None,
1085
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1086
+ send_media_to_model: bool = True,
836
1087
  ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
837
1088
  """
838
1089
  Generate an asynchronous streaming response from the model.
839
1090
  """
1091
+ try:
1092
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
1093
+ cache_key = None
1094
+ if self.cache_response:
1095
+ cache_key = self._get_model_cache_key(
1096
+ messages, stream=True, response_format=response_format, tools=tools
1097
+ )
1098
+ cached_data = self._get_cached_model_response(cache_key)
840
1099
 
841
- log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
842
- log_debug(f"Model: {self.id}", center=True, symbol="-")
843
- _log_messages(messages)
1100
+ if cached_data:
1101
+ log_info("Cache hit for async streaming model response")
1102
+ # Yield cached responses
1103
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
1104
+ yield response
1105
+ return
844
1106
 
845
- function_call_count = 0
1107
+ log_info("Cache miss for async streaming model response")
846
1108
 
847
- while True:
848
- # Create assistant message and stream data
849
- assistant_message = Message(role=self.assistant_message_role)
850
- stream_data = MessageData()
851
- if stream_model_response:
852
- # Generate response
853
- async for response in self.aprocess_response_stream(
854
- messages=messages,
855
- assistant_message=assistant_message,
856
- stream_data=stream_data,
857
- response_format=response_format,
858
- tools=tools,
859
- tool_choice=tool_choice or self._tool_choice,
860
- run_response=run_response,
861
- ):
862
- yield response
863
-
864
- # Populate assistant message from stream data
865
- if stream_data.response_content:
866
- assistant_message.content = stream_data.response_content
867
- if stream_data.response_reasoning_content:
868
- assistant_message.reasoning_content = stream_data.response_reasoning_content
869
- if stream_data.response_redacted_reasoning_content:
870
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
871
- if stream_data.response_provider_data:
872
- assistant_message.provider_data = stream_data.response_provider_data
873
- if stream_data.response_audio:
874
- assistant_message.audio_output = stream_data.response_audio
875
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
876
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1109
+ # Track streaming responses for caching
1110
+ streaming_responses: List[ModelResponse] = []
877
1111
 
878
- else:
879
- model_response = ModelResponse()
880
- await self._aprocess_model_response(
881
- messages=messages,
882
- assistant_message=assistant_message,
883
- model_response=model_response,
884
- response_format=response_format,
885
- tools=tools,
886
- tool_choice=tool_choice or self._tool_choice,
887
- run_response=run_response,
888
- )
889
- yield model_response
1112
+ log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
1113
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
1114
+ _log_messages(messages)
890
1115
 
891
- # Add assistant message to messages
892
- messages.append(assistant_message)
893
- assistant_message.log(metrics=True)
1116
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
1117
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
1118
+
1119
+ function_call_count = 0
1120
+
1121
+ while True:
1122
+ # Create assistant message and stream data
1123
+ assistant_message = Message(role=self.assistant_message_role)
1124
+ stream_data = MessageData()
1125
+ model_response = ModelResponse()
1126
+ if stream_model_response:
1127
+ # Generate response
1128
+ async for model_response in self.aprocess_response_stream(
1129
+ messages=messages,
1130
+ assistant_message=assistant_message,
1131
+ stream_data=stream_data,
1132
+ response_format=response_format,
1133
+ tools=_tool_dicts,
1134
+ tool_choice=tool_choice or self._tool_choice,
1135
+ run_response=run_response,
1136
+ ):
1137
+ if self.cache_response and isinstance(model_response, ModelResponse):
1138
+ streaming_responses.append(model_response)
1139
+ yield model_response
894
1140
 
895
- # Handle tool calls if present
896
- if assistant_message.tool_calls is not None:
897
- # Prepare function calls
898
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
899
- assistant_message, messages, functions
900
- )
901
- function_call_results: List[Message] = []
902
-
903
- # Execute function calls
904
- async for function_call_response in self.arun_function_calls(
905
- function_calls=function_calls_to_run,
906
- function_call_results=function_call_results,
907
- current_function_call_count=function_call_count,
908
- function_call_limit=tool_call_limit,
909
- ):
910
- yield function_call_response
911
-
912
- # Add a function call for each successful execution
913
- function_call_count += len(function_call_results)
914
-
915
- # Format and add results to messages
916
- if stream_data and stream_data.extra is not None:
917
- self.format_function_call_results(
918
- messages=messages, function_call_results=function_call_results, **stream_data.extra
919
- )
920
1141
  else:
921
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
1142
+ await self._aprocess_model_response(
1143
+ messages=messages,
1144
+ assistant_message=assistant_message,
1145
+ model_response=model_response,
1146
+ response_format=response_format,
1147
+ tools=_tool_dicts,
1148
+ tool_choice=tool_choice or self._tool_choice,
1149
+ run_response=run_response,
1150
+ )
1151
+ if self.cache_response:
1152
+ streaming_responses.append(model_response)
1153
+ yield model_response
1154
+
1155
+ # Add assistant message to messages
1156
+ messages.append(assistant_message)
1157
+ assistant_message.log(metrics=True)
1158
+
1159
+ # Handle tool calls if present
1160
+ if assistant_message.tool_calls is not None:
1161
+ # Prepare function calls
1162
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1163
+ assistant_message=assistant_message, messages=messages, functions=_functions
1164
+ )
1165
+ function_call_results: List[Message] = []
1166
+
1167
+ # Execute function calls
1168
+ async for function_call_response in self.arun_function_calls(
1169
+ function_calls=function_calls_to_run,
1170
+ function_call_results=function_call_results,
1171
+ current_function_call_count=function_call_count,
1172
+ function_call_limit=tool_call_limit,
1173
+ ):
1174
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
1175
+ streaming_responses.append(function_call_response)
1176
+ yield function_call_response
922
1177
 
923
- # Handle function call media
924
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
925
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
1178
+ # Add a function call for each successful execution
1179
+ function_call_count += len(function_call_results)
926
1180
 
927
- for function_call_result in function_call_results:
928
- function_call_result.log(metrics=True)
1181
+ # Format and add results to messages
1182
+ if stream_data and stream_data.extra is not None:
1183
+ self.format_function_call_results(
1184
+ messages=messages, function_call_results=function_call_results, **stream_data.extra
1185
+ )
1186
+ elif model_response and model_response.extra is not None:
1187
+ self.format_function_call_results(
1188
+ messages=messages, function_call_results=function_call_results, **model_response.extra or {}
1189
+ )
1190
+ else:
1191
+ self.format_function_call_results(
1192
+ messages=messages, function_call_results=function_call_results
1193
+ )
929
1194
 
930
- # Check if we should stop after tool calls
931
- if any(m.stop_after_tool_call for m in function_call_results):
932
- break
1195
+ # Handle function call media
1196
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
1197
+ self._handle_function_call_media(
1198
+ messages=messages,
1199
+ function_call_results=function_call_results,
1200
+ send_media_to_model=send_media_to_model,
1201
+ )
933
1202
 
934
- # If we have any tool calls that require confirmation, break the loop
935
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
936
- break
1203
+ for function_call_result in function_call_results:
1204
+ function_call_result.log(metrics=True)
937
1205
 
938
- # If we have any tool calls that require external execution, break the loop
939
- if any(fc.function.external_execution for fc in function_calls_to_run):
940
- break
1206
+ # Check if we should stop after tool calls
1207
+ if any(m.stop_after_tool_call for m in function_call_results):
1208
+ break
941
1209
 
942
- # If we have any tool calls that require user input, break the loop
943
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
944
- break
1210
+ # If we have any tool calls that require confirmation, break the loop
1211
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1212
+ break
945
1213
 
946
- # Continue loop to get next response
947
- continue
1214
+ # If we have any tool calls that require external execution, break the loop
1215
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1216
+ break
1217
+
1218
+ # If we have any tool calls that require user input, break the loop
1219
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1220
+ break
1221
+
1222
+ # Continue loop to get next response
1223
+ continue
948
1224
 
949
- # No tool calls or finished processing them
950
- break
1225
+ # No tool calls or finished processing them
1226
+ break
951
1227
 
952
- log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
1228
+ log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
1229
+
1230
+ # Save streaming responses to cache if enabled
1231
+ if self.cache_response and cache_key and streaming_responses:
1232
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1233
+
1234
+ finally:
1235
+ # Close the Gemini client
1236
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1237
+ try:
1238
+ await self.client.aio.aclose() # type: ignore
1239
+ self.client = None
1240
+ except AttributeError:
1241
+ log_warning(
1242
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1243
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1244
+ )
953
1245
 
954
- def _populate_stream_data_and_assistant_message(
955
- self, stream_data: MessageData, assistant_message: Message, model_response_delta: ModelResponse
1246
+ def _populate_assistant_message_from_stream_data(
1247
+ self, assistant_message: Message, stream_data: MessageData
1248
+ ) -> None:
1249
+ """
1250
+ Populate an assistant message with the stream data.
1251
+ """
1252
+ if stream_data.response_role is not None:
1253
+ assistant_message.role = stream_data.response_role
1254
+ if stream_data.response_metrics is not None:
1255
+ assistant_message.metrics = stream_data.response_metrics
1256
+ if stream_data.response_content:
1257
+ assistant_message.content = stream_data.response_content
1258
+ if stream_data.response_reasoning_content:
1259
+ assistant_message.reasoning_content = stream_data.response_reasoning_content
1260
+ if stream_data.response_redacted_reasoning_content:
1261
+ assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
1262
+ if stream_data.response_provider_data:
1263
+ assistant_message.provider_data = stream_data.response_provider_data
1264
+ if stream_data.response_citations:
1265
+ assistant_message.citations = stream_data.response_citations
1266
+ if stream_data.response_audio:
1267
+ assistant_message.audio_output = stream_data.response_audio
1268
+ if stream_data.response_image:
1269
+ assistant_message.image_output = stream_data.response_image
1270
+ if stream_data.response_video:
1271
+ assistant_message.video_output = stream_data.response_video
1272
+ if stream_data.response_file:
1273
+ assistant_message.file_output = stream_data.response_file
1274
+ if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
1275
+ assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1276
+
1277
+ def _populate_stream_data(
1278
+ self, stream_data: MessageData, model_response_delta: ModelResponse
956
1279
  ) -> Iterator[ModelResponse]:
957
1280
  """Update the stream data and assistant message with the model response."""
958
- # Add role to assistant message
959
- if model_response_delta.role is not None:
960
- assistant_message.role = model_response_delta.role
961
1281
 
962
1282
  should_yield = False
1283
+ if model_response_delta.role is not None:
1284
+ stream_data.response_role = model_response_delta.role # type: ignore
1285
+
1286
+ if model_response_delta.response_usage is not None:
1287
+ if stream_data.response_metrics is None:
1288
+ stream_data.response_metrics = Metrics()
1289
+ stream_data.response_metrics += model_response_delta.response_usage
1290
+
963
1291
  # Update stream_data content
964
1292
  if model_response_delta.content is not None:
965
1293
  stream_data.response_content += model_response_delta.content
@@ -1026,7 +1354,13 @@ class Model(ABC):
1026
1354
  if model_response_delta.extra is not None:
1027
1355
  if stream_data.extra is None:
1028
1356
  stream_data.extra = {}
1029
- stream_data.extra.update(model_response_delta.extra)
1357
+ for key in model_response_delta.extra:
1358
+ if isinstance(model_response_delta.extra[key], list):
1359
+ if not stream_data.extra.get(key):
1360
+ stream_data.extra[key] = []
1361
+ stream_data.extra[key].extend(model_response_delta.extra[key])
1362
+ else:
1363
+ stream_data.extra[key] = model_response_delta.extra[key]
1030
1364
 
1031
1365
  if should_yield:
1032
1366
  yield model_response_delta
@@ -1098,12 +1432,14 @@ class Model(ABC):
1098
1432
  images = None
1099
1433
  videos = None
1100
1434
  audios = None
1435
+ files = None
1101
1436
 
1102
1437
  if success and function_execution_result:
1103
1438
  # With unified classes, no conversion needed - use directly
1104
1439
  images = function_execution_result.images
1105
1440
  videos = function_execution_result.videos
1106
1441
  audios = function_execution_result.audios
1442
+ files = function_execution_result.files
1107
1443
 
1108
1444
  return Message(
1109
1445
  role=self.tool_message_role,
@@ -1116,6 +1452,7 @@ class Model(ABC):
1116
1452
  images=images,
1117
1453
  videos=videos,
1118
1454
  audio=audios,
1455
+ files=files,
1119
1456
  **kwargs, # type: ignore
1120
1457
  )
1121
1458
 
@@ -1172,32 +1509,49 @@ class Model(ABC):
1172
1509
  function_call_output: str = ""
1173
1510
 
1174
1511
  if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
1175
- for item in function_execution_result.result:
1176
- # This function yields agent/team run events
1177
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1178
- item, tuple(get_args(TeamRunOutputEvent))
1179
- ):
1180
- # We only capture content events
1181
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1182
- if item.content is not None and isinstance(item.content, BaseModel):
1183
- function_call_output += item.content.model_dump_json()
1184
- else:
1185
- # Capture output
1186
- function_call_output += item.content or ""
1512
+ try:
1513
+ for item in function_execution_result.result:
1514
+ # This function yields agent/team/workflow run events
1515
+ if (
1516
+ isinstance(item, tuple(get_args(RunOutputEvent)))
1517
+ or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
1518
+ or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
1519
+ ):
1520
+ # We only capture content events for output accumulation
1521
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1522
+ if item.content is not None and isinstance(item.content, BaseModel):
1523
+ function_call_output += item.content.model_dump_json()
1524
+ else:
1525
+ # Capture output
1526
+ function_call_output += item.content or ""
1187
1527
 
1188
- if function_call.function.show_result:
1189
- yield ModelResponse(content=item.content)
1528
+ if function_call.function.show_result and item.content is not None:
1529
+ yield ModelResponse(content=item.content)
1190
1530
 
1191
1531
  if isinstance(item, CustomEvent):
1192
1532
  function_call_output += str(item)
1193
1533
 
1194
- # Yield the event itself to bubble it up
1195
- yield item
1534
+ # For WorkflowCompletedEvent, extract content for final output
1535
+ from agno.run.workflow import WorkflowCompletedEvent
1196
1536
 
1197
- else:
1198
- function_call_output += str(item)
1199
- if function_call.function.show_result:
1200
- yield ModelResponse(content=str(item))
1537
+ if isinstance(item, WorkflowCompletedEvent):
1538
+ if item.content is not None:
1539
+ if isinstance(item.content, BaseModel):
1540
+ function_call_output += item.content.model_dump_json()
1541
+ else:
1542
+ function_call_output += str(item.content)
1543
+
1544
+ # Yield the event itself to bubble it up
1545
+ yield item
1546
+
1547
+ else:
1548
+ function_call_output += str(item)
1549
+ if function_call.function.show_result and item is not None:
1550
+ yield ModelResponse(content=str(item))
1551
+ except Exception as e:
1552
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
1553
+ function_call.error = str(e)
1554
+ function_call_success = False
1201
1555
  else:
1202
1556
  from agno.tools.function import ToolResult
1203
1557
 
@@ -1213,10 +1567,12 @@ class Model(ABC):
1213
1567
  function_execution_result.videos = tool_result.videos
1214
1568
  if tool_result.audios:
1215
1569
  function_execution_result.audios = tool_result.audios
1570
+ if tool_result.files:
1571
+ function_execution_result.files = tool_result.files
1216
1572
  else:
1217
1573
  function_call_output = str(function_execution_result.result) if function_execution_result.result else ""
1218
1574
 
1219
- if function_call.function.show_result:
1575
+ if function_call.function.show_result and function_call_output is not None:
1220
1576
  yield ModelResponse(content=function_call_output)
1221
1577
 
1222
1578
  # Create and yield function call result
@@ -1228,7 +1584,7 @@ class Model(ABC):
1228
1584
  function_execution_result=function_execution_result,
1229
1585
  )
1230
1586
  yield ModelResponse(
1231
- content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
1587
+ content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1232
1588
  tool_executions=[
1233
1589
  ToolExecution(
1234
1590
  tool_call_id=function_call_result.tool_call_id,
@@ -1246,6 +1602,7 @@ class Model(ABC):
1246
1602
  images=function_execution_result.images,
1247
1603
  videos=function_execution_result.videos,
1248
1604
  audios=function_execution_result.audios,
1605
+ files=function_execution_result.files,
1249
1606
  )
1250
1607
 
1251
1608
  # Add function call to function call results
@@ -1364,6 +1721,7 @@ class Model(ABC):
1364
1721
  function_call_timer = Timer()
1365
1722
  function_call_timer.start()
1366
1723
  success: Union[bool, AgentRunException] = False
1724
+ result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
1367
1725
 
1368
1726
  try:
1369
1727
  if (
@@ -1529,57 +1887,41 @@ class Model(ABC):
1529
1887
  *(self.arun_function_call(fc) for fc in function_calls_to_run), return_exceptions=True
1530
1888
  )
1531
1889
 
1532
- # Process results
1890
+ # Separate async generators from other results for concurrent processing
1891
+ async_generator_results: List[Any] = []
1892
+ non_async_generator_results: List[Any] = []
1893
+
1533
1894
  for result in results:
1534
- # If result is an exception, skip processing it
1535
1895
  if isinstance(result, BaseException):
1536
- log_error(f"Error during function call: {result}")
1537
- raise result
1896
+ non_async_generator_results.append(result)
1897
+ continue
1538
1898
 
1539
- # Unpack result
1540
1899
  function_call_success, function_call_timer, function_call, function_execution_result = result
1541
1900
 
1542
- updated_session_state = function_execution_result.updated_session_state
1543
-
1544
- # Handle AgentRunException
1545
- if isinstance(function_call_success, AgentRunException):
1546
- a_exc = function_call_success
1547
- # Update additional messages from function call
1548
- _handle_agent_exception(a_exc, additional_input)
1549
- # Set function call success to False if an exception occurred
1550
- function_call_success = False
1901
+ # Check if this result contains an async generator
1902
+ if isinstance(function_call.result, (AsyncGeneratorType, AsyncIterator)):
1903
+ async_generator_results.append(result)
1904
+ else:
1905
+ non_async_generator_results.append(result)
1551
1906
 
1552
- # Process function call output
1553
- function_call_output: str = ""
1554
- if isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
1555
- for item in function_call.result:
1556
- # This function yields agent/team run events
1557
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1558
- item, tuple(get_args(TeamRunOutputEvent))
1559
- ):
1560
- # We only capture content events
1561
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1562
- if item.content is not None and isinstance(item.content, BaseModel):
1563
- function_call_output += item.content.model_dump_json()
1564
- else:
1565
- # Capture output
1566
- function_call_output += item.content or ""
1907
+ # Process async generators with real-time event streaming using asyncio.Queue
1908
+ async_generator_outputs: Dict[int, Tuple[Any, str, Optional[BaseException]]] = {}
1909
+ event_queue: asyncio.Queue = asyncio.Queue()
1910
+ active_generators_count: int = len(async_generator_results)
1567
1911
 
1568
- if function_call.function.show_result:
1569
- yield ModelResponse(content=item.content)
1570
- continue
1912
+ # Create background tasks for each async generator
1913
+ async def process_async_generator(result, generator_id):
1914
+ function_call_success, function_call_timer, function_call, function_execution_result = result
1915
+ function_call_output = ""
1571
1916
 
1572
- # Yield the event itself to bubble it up
1573
- yield item
1574
- else:
1575
- function_call_output += str(item)
1576
- if function_call.function.show_result:
1577
- yield ModelResponse(content=str(item))
1578
- elif isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
1917
+ try:
1579
1918
  async for item in function_call.result:
1580
- # This function yields agent/team run events
1581
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1582
- item, tuple(get_args(TeamRunOutputEvent))
1919
+ # This function yields agent/team/workflow run events
1920
+ if isinstance(
1921
+ item,
1922
+ tuple(get_args(RunOutputEvent))
1923
+ + tuple(get_args(TeamRunOutputEvent))
1924
+ + tuple(get_args(WorkflowRunOutputEvent)),
1583
1925
  ):
1584
1926
  # We only capture content events
1585
1927
  if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
@@ -1589,21 +1931,143 @@ class Model(ABC):
1589
1931
  # Capture output
1590
1932
  function_call_output += item.content or ""
1591
1933
 
1592
- if function_call.function.show_result:
1593
- yield ModelResponse(content=item.content)
1934
+ if function_call.function.show_result and item.content is not None:
1935
+ await event_queue.put(ModelResponse(content=item.content))
1594
1936
  continue
1595
1937
 
1596
- if isinstance(item, CustomEvent):
1597
- function_call_output += str(item)
1938
+ if isinstance(item, CustomEvent):
1939
+ function_call_output += str(item)
1598
1940
 
1599
- # Yield the event itself to bubble it up
1600
- yield item
1941
+ # For WorkflowCompletedEvent, extract content for final output
1942
+ from agno.run.workflow import WorkflowCompletedEvent
1943
+
1944
+ if isinstance(item, WorkflowCompletedEvent):
1945
+ if item.content is not None:
1946
+ if isinstance(item.content, BaseModel):
1947
+ function_call_output += item.content.model_dump_json()
1948
+ else:
1949
+ function_call_output += str(item.content)
1950
+
1951
+ # Put the event into the queue to be yielded
1952
+ await event_queue.put(item)
1601
1953
 
1602
1954
  # Yield custom events emitted by the tool
1603
1955
  else:
1604
1956
  function_call_output += str(item)
1605
- if function_call.function.show_result:
1606
- yield ModelResponse(content=str(item))
1957
+ if function_call.function.show_result and item is not None:
1958
+ await event_queue.put(ModelResponse(content=str(item)))
1959
+
1960
+ # Store the final output for this generator
1961
+ async_generator_outputs[generator_id] = (result, function_call_output, None)
1962
+
1963
+ except Exception as e:
1964
+ # Store the exception
1965
+ async_generator_outputs[generator_id] = (result, "", e)
1966
+
1967
+ # Signal that this generator is done
1968
+ await event_queue.put(("GENERATOR_DONE", generator_id))
1969
+
1970
+ # Start all async generator tasks
1971
+ generator_tasks = []
1972
+ for i, result in enumerate(async_generator_results):
1973
+ task = asyncio.create_task(process_async_generator(result, i))
1974
+ generator_tasks.append(task)
1975
+
1976
+ # Stream events from the queue as they arrive
1977
+ completed_generators_count = 0
1978
+ while completed_generators_count < active_generators_count:
1979
+ try:
1980
+ event = await event_queue.get()
1981
+
1982
+ # Check if this is a completion signal
1983
+ if isinstance(event, tuple) and event[0] == "GENERATOR_DONE":
1984
+ completed_generators_count += 1
1985
+ continue
1986
+
1987
+ # Yield the actual event
1988
+ yield event
1989
+
1990
+ except Exception as e:
1991
+ log_error(f"Error processing async generator event: {e}")
1992
+ break
1993
+
1994
+ # Now process all results (non-async generators and completed async generators)
1995
+ for i, original_result in enumerate(results):
1996
+ # If result is an exception, skip processing it
1997
+ if isinstance(original_result, BaseException):
1998
+ log_error(f"Error during function call: {original_result}")
1999
+ raise original_result
2000
+
2001
+ # Unpack result
2002
+ function_call_success, function_call_timer, function_call, function_execution_result = original_result
2003
+
2004
+ # Check if this was an async generator that was already processed
2005
+ async_function_call_output = None
2006
+ if isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
2007
+ # Find the corresponding processed result
2008
+ async_gen_index = 0
2009
+ for j, result in enumerate(results[: i + 1]):
2010
+ if not isinstance(result, BaseException):
2011
+ _, _, fc, _ = result
2012
+ if isinstance(fc.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
2013
+ if j == i: # This is our async generator
2014
+ if async_gen_index in async_generator_outputs:
2015
+ _, async_function_call_output, error = async_generator_outputs[async_gen_index]
2016
+ if error:
2017
+ log_error(f"Error in async generator: {error}")
2018
+ raise error
2019
+ break
2020
+ async_gen_index += 1
2021
+
2022
+ updated_session_state = function_execution_result.updated_session_state
2023
+
2024
+ # Handle AgentRunException
2025
+ if isinstance(function_call_success, AgentRunException):
2026
+ a_exc = function_call_success
2027
+ # Update additional messages from function call
2028
+ _handle_agent_exception(a_exc, additional_input)
2029
+ # Set function call success to False if an exception occurred
2030
+ function_call_success = False
2031
+
2032
+ # Process function call output
2033
+ function_call_output: str = ""
2034
+
2035
+ # Check if this was an async generator that was already processed
2036
+ if async_function_call_output is not None:
2037
+ function_call_output = async_function_call_output
2038
+ # Events from async generators were already yielded in real-time above
2039
+ elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
2040
+ try:
2041
+ for item in function_call.result:
2042
+ # This function yields agent/team/workflow run events
2043
+ if isinstance(
2044
+ item,
2045
+ tuple(get_args(RunOutputEvent))
2046
+ + tuple(get_args(TeamRunOutputEvent))
2047
+ + tuple(get_args(WorkflowRunOutputEvent)),
2048
+ ):
2049
+ # We only capture content events
2050
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
2051
+ if item.content is not None and isinstance(item.content, BaseModel):
2052
+ function_call_output += item.content.model_dump_json()
2053
+ else:
2054
+ # Capture output
2055
+ function_call_output += item.content or ""
2056
+
2057
+ if function_call.function.show_result and item.content is not None:
2058
+ yield ModelResponse(content=item.content)
2059
+ continue
2060
+
2061
+ # Yield the event itself to bubble it up
2062
+ yield item
2063
+ else:
2064
+ function_call_output += str(item)
2065
+ if function_call.function.show_result and item is not None:
2066
+ yield ModelResponse(content=str(item))
2067
+ except Exception as e:
2068
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
2069
+ function_call.error = str(e)
2070
+ function_call_success = False
1607
2071
  else:
1608
2072
  from agno.tools.function import ToolResult
1609
2073
 
@@ -1617,10 +2081,12 @@ class Model(ABC):
1617
2081
  function_execution_result.videos = tool_result.videos
1618
2082
  if tool_result.audios:
1619
2083
  function_execution_result.audios = tool_result.audios
2084
+ if tool_result.files:
2085
+ function_execution_result.files = tool_result.files
1620
2086
  else:
1621
2087
  function_call_output = str(function_call.result)
1622
2088
 
1623
- if function_call.function.show_result:
2089
+ if function_call.function.show_result and function_call_output is not None:
1624
2090
  yield ModelResponse(content=function_call_output)
1625
2091
 
1626
2092
  # Create and yield function call result
@@ -1632,7 +2098,7 @@ class Model(ABC):
1632
2098
  function_execution_result=function_execution_result,
1633
2099
  )
1634
2100
  yield ModelResponse(
1635
- content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
2101
+ content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1636
2102
  tool_executions=[
1637
2103
  ToolExecution(
1638
2104
  tool_call_id=function_call_result.tool_call_id,
@@ -1649,6 +2115,7 @@ class Model(ABC):
1649
2115
  images=function_execution_result.images,
1650
2116
  videos=function_execution_result.videos,
1651
2117
  audios=function_execution_result.audios,
2118
+ files=function_execution_result.files,
1652
2119
  )
1653
2120
 
1654
2121
  # Add function call result to function call results
@@ -1674,7 +2141,7 @@ class Model(ABC):
1674
2141
  model_response.tool_calls = []
1675
2142
 
1676
2143
  function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1677
- assistant_message, messages, functions
2144
+ assistant_message=assistant_message, messages=messages, functions=functions
1678
2145
  )
1679
2146
  return function_calls_to_run
1680
2147
 
@@ -1687,7 +2154,9 @@ class Model(ABC):
1687
2154
  if len(function_call_results) > 0:
1688
2155
  messages.extend(function_call_results)
1689
2156
 
1690
- def _handle_function_call_media(self, messages: List[Message], function_call_results: List[Message]) -> None:
2157
+ def _handle_function_call_media(
2158
+ self, messages: List[Message], function_call_results: List[Message], send_media_to_model: bool = True
2159
+ ) -> None:
1691
2160
  """
1692
2161
  Handle media artifacts from function calls by adding follow-up user messages for generated media if needed.
1693
2162
  """
@@ -1698,6 +2167,7 @@ class Model(ABC):
1698
2167
  all_images: List[Image] = []
1699
2168
  all_videos: List[Video] = []
1700
2169
  all_audio: List[Audio] = []
2170
+ all_files: List[File] = []
1701
2171
 
1702
2172
  for result_message in function_call_results:
1703
2173
  if result_message.images:
@@ -1713,15 +2183,21 @@ class Model(ABC):
1713
2183
  all_audio.extend(result_message.audio)
1714
2184
  result_message.audio = None
1715
2185
 
1716
- # If we have media artifacts, add a follow-up "user" message instead of a "tool"
1717
- # message with the media artifacts which throws error for some models
1718
- if all_images or all_videos or all_audio:
2186
+ if result_message.files:
2187
+ all_files.extend(result_message.files)
2188
+ result_message.files = None
2189
+
2190
+ # Only add media message if we should send media to model
2191
+ if send_media_to_model and (all_images or all_videos or all_audio or all_files):
2192
+ # If we have media artifacts, add a follow-up "user" message instead of a "tool"
2193
+ # message with the media artifacts which throws error for some models
1719
2194
  media_message = Message(
1720
2195
  role="user",
1721
2196
  content="Take note of the following content",
1722
2197
  images=all_images if all_images else None,
1723
2198
  videos=all_videos if all_videos else None,
1724
2199
  audio=all_audio if all_audio else None,
2200
+ files=all_files if all_files else None,
1725
2201
  )
1726
2202
  messages.append(media_message)
1727
2203
 
@@ -1747,10 +2223,14 @@ class Model(ABC):
1747
2223
  new_model = cls.__new__(cls)
1748
2224
  memo[id(self)] = new_model
1749
2225
 
1750
- # Deep copy all attributes
2226
+ # Deep copy all attributes except client objects
1751
2227
  for k, v in self.__dict__.items():
1752
2228
  if k in {"response_format", "_tools", "_functions"}:
1753
2229
  continue
2230
+ # Skip client objects
2231
+ if k in {"client", "async_client", "http_client", "mistral_client", "model_client"}:
2232
+ setattr(new_model, k, None)
2233
+ continue
1754
2234
  try:
1755
2235
  setattr(new_model, k, deepcopy(v, memo))
1756
2236
  except Exception: