agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. agno/agent/agent.py +6009 -2874
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +595 -187
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +3 -0
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +339 -266
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +1011 -566
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +110 -37
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +143 -4
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +60 -6
  142. agno/models/openai/chat.py +102 -43
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +81 -5
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -175
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +266 -112
  205. agno/run/base.py +53 -24
  206. agno/run/team.py +252 -111
  207. agno/run/workflow.py +156 -45
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1692
  213. agno/tools/brightdata.py +3 -3
  214. agno/tools/cartesia.py +3 -5
  215. agno/tools/dalle.py +9 -8
  216. agno/tools/decorator.py +4 -2
  217. agno/tools/desi_vocal.py +2 -2
  218. agno/tools/duckduckgo.py +15 -11
  219. agno/tools/e2b.py +20 -13
  220. agno/tools/eleven_labs.py +26 -28
  221. agno/tools/exa.py +21 -16
  222. agno/tools/fal.py +4 -4
  223. agno/tools/file.py +153 -23
  224. agno/tools/file_generation.py +350 -0
  225. agno/tools/firecrawl.py +4 -4
  226. agno/tools/function.py +257 -37
  227. agno/tools/giphy.py +2 -2
  228. agno/tools/gmail.py +238 -14
  229. agno/tools/google_drive.py +270 -0
  230. agno/tools/googlecalendar.py +36 -8
  231. agno/tools/googlesheets.py +20 -5
  232. agno/tools/jira.py +20 -0
  233. agno/tools/knowledge.py +3 -3
  234. agno/tools/lumalab.py +3 -3
  235. agno/tools/mcp/__init__.py +10 -0
  236. agno/tools/mcp/mcp.py +331 -0
  237. agno/tools/mcp/multi_mcp.py +347 -0
  238. agno/tools/mcp/params.py +24 -0
  239. agno/tools/mcp_toolbox.py +284 -0
  240. agno/tools/mem0.py +11 -17
  241. agno/tools/memori.py +1 -53
  242. agno/tools/memory.py +419 -0
  243. agno/tools/models/azure_openai.py +2 -2
  244. agno/tools/models/gemini.py +3 -3
  245. agno/tools/models/groq.py +3 -5
  246. agno/tools/models/nebius.py +7 -7
  247. agno/tools/models_labs.py +25 -15
  248. agno/tools/notion.py +204 -0
  249. agno/tools/openai.py +4 -9
  250. agno/tools/opencv.py +3 -3
  251. agno/tools/parallel.py +314 -0
  252. agno/tools/replicate.py +7 -7
  253. agno/tools/scrapegraph.py +58 -31
  254. agno/tools/searxng.py +2 -2
  255. agno/tools/serper.py +2 -2
  256. agno/tools/slack.py +18 -3
  257. agno/tools/spider.py +2 -2
  258. agno/tools/tavily.py +146 -0
  259. agno/tools/whatsapp.py +1 -1
  260. agno/tools/workflow.py +278 -0
  261. agno/tools/yfinance.py +12 -11
  262. agno/utils/agent.py +820 -0
  263. agno/utils/audio.py +27 -0
  264. agno/utils/common.py +90 -1
  265. agno/utils/events.py +222 -7
  266. agno/utils/gemini.py +181 -23
  267. agno/utils/hooks.py +57 -0
  268. agno/utils/http.py +111 -0
  269. agno/utils/knowledge.py +12 -5
  270. agno/utils/log.py +1 -0
  271. agno/utils/mcp.py +95 -5
  272. agno/utils/media.py +188 -10
  273. agno/utils/merge_dict.py +22 -1
  274. agno/utils/message.py +60 -0
  275. agno/utils/models/claude.py +40 -11
  276. agno/utils/models/cohere.py +1 -1
  277. agno/utils/models/watsonx.py +1 -1
  278. agno/utils/openai.py +1 -1
  279. agno/utils/print_response/agent.py +105 -21
  280. agno/utils/print_response/team.py +103 -38
  281. agno/utils/print_response/workflow.py +251 -34
  282. agno/utils/reasoning.py +22 -1
  283. agno/utils/serialize.py +32 -0
  284. agno/utils/streamlit.py +16 -10
  285. agno/utils/string.py +41 -0
  286. agno/utils/team.py +98 -9
  287. agno/utils/tools.py +1 -1
  288. agno/vectordb/base.py +23 -4
  289. agno/vectordb/cassandra/cassandra.py +65 -9
  290. agno/vectordb/chroma/chromadb.py +182 -38
  291. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  292. agno/vectordb/couchbase/couchbase.py +105 -10
  293. agno/vectordb/lancedb/lance_db.py +183 -135
  294. agno/vectordb/langchaindb/langchaindb.py +25 -7
  295. agno/vectordb/lightrag/lightrag.py +17 -3
  296. agno/vectordb/llamaindex/__init__.py +3 -0
  297. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  298. agno/vectordb/milvus/milvus.py +126 -9
  299. agno/vectordb/mongodb/__init__.py +7 -1
  300. agno/vectordb/mongodb/mongodb.py +112 -7
  301. agno/vectordb/pgvector/pgvector.py +142 -21
  302. agno/vectordb/pineconedb/pineconedb.py +80 -8
  303. agno/vectordb/qdrant/qdrant.py +125 -39
  304. agno/vectordb/redis/__init__.py +9 -0
  305. agno/vectordb/redis/redisdb.py +694 -0
  306. agno/vectordb/singlestore/singlestore.py +111 -25
  307. agno/vectordb/surrealdb/surrealdb.py +31 -5
  308. agno/vectordb/upstashdb/upstashdb.py +76 -8
  309. agno/vectordb/weaviate/weaviate.py +86 -15
  310. agno/workflow/__init__.py +2 -0
  311. agno/workflow/agent.py +299 -0
  312. agno/workflow/condition.py +112 -18
  313. agno/workflow/loop.py +69 -10
  314. agno/workflow/parallel.py +266 -118
  315. agno/workflow/router.py +110 -17
  316. agno/workflow/step.py +645 -136
  317. agno/workflow/steps.py +65 -6
  318. agno/workflow/types.py +71 -33
  319. agno/workflow/workflow.py +2113 -300
  320. agno-2.3.0.dist-info/METADATA +618 -0
  321. agno-2.3.0.dist-info/RECORD +577 -0
  322. agno-2.3.0.dist-info/licenses/LICENSE +201 -0
  323. agno/knowledge/reader/url_reader.py +0 -128
  324. agno/tools/googlesearch.py +0 -98
  325. agno/tools/mcp.py +0 -610
  326. agno/utils/models/aws_claude.py +0 -170
  327. agno-2.0.0rc2.dist-info/METADATA +0 -355
  328. agno-2.0.0rc2.dist-info/RECORD +0 -515
  329. agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
  330. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  331. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
agno/models/base.py CHANGED
@@ -1,7 +1,11 @@
1
1
  import asyncio
2
2
  import collections.abc
3
+ import json
3
4
  from abc import ABC, abstractmethod
4
5
  from dataclasses import dataclass, field
6
+ from hashlib import md5
7
+ from pathlib import Path
8
+ from time import time
5
9
  from types import AsyncGeneratorType, GeneratorType
6
10
  from typing import (
7
11
  Any,
@@ -21,15 +25,16 @@ from uuid import uuid4
21
25
  from pydantic import BaseModel
22
26
 
23
27
  from agno.exceptions import AgentRunException
24
- from agno.media import Audio, AudioArtifact, AudioResponse, Image, ImageArtifact, Video, VideoArtifact
28
+ from agno.media import Audio, File, Image, Video
25
29
  from agno.models.message import Citations, Message
26
30
  from agno.models.metrics import Metrics
27
31
  from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
28
32
  from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
29
33
  from agno.run.team import RunContentEvent as TeamRunContentEvent
30
- from agno.run.team import TeamRunOutputEvent
34
+ from agno.run.team import TeamRunOutput, TeamRunOutputEvent
35
+ from agno.run.workflow import WorkflowRunOutputEvent
31
36
  from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
32
- from agno.utils.log import log_debug, log_error, log_warning
37
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
33
38
  from agno.utils.timer import Timer
34
39
  from agno.utils.tools import get_function_call_for_tool_call, get_function_call_for_tool_execution
35
40
 
@@ -43,9 +48,12 @@ class MessageData:
43
48
  response_citations: Optional[Citations] = None
44
49
  response_tool_calls: List[Dict[str, Any]] = field(default_factory=list)
45
50
 
46
- response_audio: Optional[AudioResponse] = None
47
- response_image: Optional[ImageArtifact] = None
48
- response_video: Optional[VideoArtifact] = None
51
+ response_audio: Optional[Audio] = None
52
+ response_image: Optional[Image] = None
53
+ response_video: Optional[Video] = None
54
+ response_file: Optional[File] = None
55
+
56
+ response_metrics: Optional[Metrics] = None
49
57
 
50
58
  # Data from the provider that we might need on subsequent messages
51
59
  response_provider_data: Optional[Dict[str, Any]] = None
@@ -132,6 +140,11 @@ class Model(ABC):
132
140
  # The role of the assistant message.
133
141
  assistant_message_role: str = "assistant"
134
142
 
143
+ # Cache model responses to avoid redundant API calls during development
144
+ cache_response: bool = False
145
+ cache_ttl: Optional[int] = None
146
+ cache_dir: Optional[str] = None
147
+
135
148
  def __post_init__(self):
136
149
  if self.provider is None and self.name is not None:
137
150
  self.provider = f"{self.name} ({self.id})"
@@ -144,6 +157,100 @@ class Model(ABC):
144
157
  def get_provider(self) -> str:
145
158
  return self.provider or self.name or self.__class__.__name__
146
159
 
160
+ def _get_model_cache_key(self, messages: List[Message], stream: bool, **kwargs: Any) -> str:
161
+ """Generate a cache key based on model messages and core parameters."""
162
+ message_data = []
163
+ for msg in messages:
164
+ msg_dict = {
165
+ "role": msg.role,
166
+ "content": msg.content,
167
+ }
168
+ message_data.append(msg_dict)
169
+
170
+ # Include tools parameter in cache key
171
+ has_tools = bool(kwargs.get("tools"))
172
+
173
+ cache_data = {
174
+ "model_id": self.id,
175
+ "messages": message_data,
176
+ "has_tools": has_tools,
177
+ "response_format": kwargs.get("response_format"),
178
+ "stream": stream,
179
+ }
180
+
181
+ cache_str = json.dumps(cache_data, sort_keys=True)
182
+ return md5(cache_str.encode()).hexdigest()
183
+
184
+ def _get_model_cache_file_path(self, cache_key: str) -> Path:
185
+ """Get the file path for a cache key."""
186
+ if self.cache_dir:
187
+ cache_dir = Path(self.cache_dir)
188
+ else:
189
+ cache_dir = Path.home() / ".agno" / "cache" / "model_responses"
190
+
191
+ cache_dir.mkdir(parents=True, exist_ok=True)
192
+ return cache_dir / f"{cache_key}.json"
193
+
194
+ def _get_cached_model_response(self, cache_key: str) -> Optional[Dict[str, Any]]:
195
+ """Retrieve a cached response if it exists and is not expired."""
196
+ cache_file = self._get_model_cache_file_path(cache_key)
197
+
198
+ if not cache_file.exists():
199
+ return None
200
+
201
+ try:
202
+ with open(cache_file, "r") as f:
203
+ cached_data = json.load(f)
204
+
205
+ # Check TTL if set (None means no expiration)
206
+ if self.cache_ttl is not None:
207
+ if time() - cached_data["timestamp"] > self.cache_ttl:
208
+ return None
209
+
210
+ return cached_data
211
+ except Exception:
212
+ return None
213
+
214
+ def _save_model_response_to_cache(self, cache_key: str, result: ModelResponse, is_streaming: bool = False) -> None:
215
+ """Save a model response to cache."""
216
+ try:
217
+ cache_file = self._get_model_cache_file_path(cache_key)
218
+
219
+ cache_data = {
220
+ "timestamp": int(time()),
221
+ "is_streaming": is_streaming,
222
+ "result": result.to_dict(),
223
+ }
224
+ with open(cache_file, "w") as f:
225
+ json.dump(cache_data, f)
226
+ except Exception:
227
+ pass
228
+
229
+ def _save_streaming_responses_to_cache(self, cache_key: str, responses: List[ModelResponse]) -> None:
230
+ """Save streaming responses to cache."""
231
+ cache_file = self._get_model_cache_file_path(cache_key)
232
+
233
+ cache_data = {
234
+ "timestamp": int(time()),
235
+ "is_streaming": True,
236
+ "streaming_responses": [r.to_dict() for r in responses],
237
+ }
238
+
239
+ try:
240
+ with open(cache_file, "w") as f:
241
+ json.dump(cache_data, f)
242
+ except Exception:
243
+ pass
244
+
245
+ def _model_response_from_cache(self, cached_data: Dict[str, Any]) -> ModelResponse:
246
+ """Reconstruct a ModelResponse from cached data."""
247
+ return ModelResponse.from_dict(cached_data["result"])
248
+
249
+ def _streaming_responses_from_cache(self, cached_data: list) -> Iterator[ModelResponse]:
250
+ """Reconstruct streaming responses from cached data."""
251
+ for cached_response in cached_data:
252
+ yield ModelResponse.from_dict(cached_response)
253
+
147
254
  @abstractmethod
148
255
  def invoke(self, *args, **kwargs) -> ModelResponse:
149
256
  pass
@@ -186,278 +293,377 @@ class Model(ABC):
186
293
  """
187
294
  pass
188
295
 
296
+ def _format_tools(self, tools: Optional[List[Union[Function, dict]]]) -> List[Dict[str, Any]]:
297
+ _tool_dicts = []
298
+ for tool in tools or []:
299
+ if isinstance(tool, Function):
300
+ _tool_dicts.append({"type": "function", "function": tool.to_dict()})
301
+ else:
302
+ # If a dict is passed, it is a builtin tool
303
+ _tool_dicts.append(tool)
304
+ return _tool_dicts
305
+
189
306
  def response(
190
307
  self,
191
308
  messages: List[Message],
192
309
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
193
- tools: Optional[List[Dict[str, Any]]] = None,
194
- functions: Optional[Dict[str, Function]] = None,
310
+ tools: Optional[List[Union[Function, dict]]] = None,
195
311
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
196
312
  tool_call_limit: Optional[int] = None,
197
- run_response: Optional[RunOutput] = None,
313
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
314
+ send_media_to_model: bool = True,
198
315
  ) -> ModelResponse:
199
316
  """
200
317
  Generate a response from the model.
318
+
319
+ Args:
320
+ messages: List of messages to send to the model
321
+ response_format: Response format to use
322
+ tools: List of tools to use. This includes the original Function objects and dicts for built-in tools.
323
+ tool_choice: Tool choice to use
324
+ tool_call_limit: Tool call limit
325
+ run_response: Run response to use
326
+ send_media_to_model: Whether to send media to the model
201
327
  """
328
+ try:
329
+ # Check cache if enabled
330
+ if self.cache_response:
331
+ cache_key = self._get_model_cache_key(
332
+ messages, stream=False, response_format=response_format, tools=tools
333
+ )
334
+ cached_data = self._get_cached_model_response(cache_key)
202
335
 
203
- log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
204
- log_debug(f"Model: {self.id}", center=True, symbol="-")
205
-
206
- _log_messages(messages)
207
- model_response = ModelResponse()
208
-
209
- function_call_count = 0
210
-
211
- while True:
212
- # Get response from model
213
- assistant_message = Message(role=self.assistant_message_role)
214
- self._process_model_response(
215
- messages=messages,
216
- assistant_message=assistant_message,
217
- model_response=model_response,
218
- response_format=response_format,
219
- tools=tools,
220
- tool_choice=tool_choice or self._tool_choice,
221
- run_response=run_response,
222
- )
336
+ if cached_data:
337
+ log_info("Cache hit for model response")
338
+ return self._model_response_from_cache(cached_data)
223
339
 
224
- # Add assistant message to messages
225
- messages.append(assistant_message)
340
+ log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
341
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
226
342
 
227
- # Log response and metrics
228
- assistant_message.log(metrics=True)
343
+ _log_messages(messages)
344
+ model_response = ModelResponse()
229
345
 
230
- # Handle tool calls if present
231
- if assistant_message.tool_calls:
232
- # Prepare function calls
233
- function_calls_to_run = self._prepare_function_calls(
234
- assistant_message=assistant_message,
346
+ function_call_count = 0
347
+
348
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
349
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
350
+
351
+ while True:
352
+ # Get response from model
353
+ assistant_message = Message(role=self.assistant_message_role)
354
+ self._process_model_response(
235
355
  messages=messages,
356
+ assistant_message=assistant_message,
236
357
  model_response=model_response,
237
- functions=functions,
358
+ response_format=response_format,
359
+ tools=_tool_dicts,
360
+ tool_choice=tool_choice or self._tool_choice,
361
+ run_response=run_response,
238
362
  )
239
- function_call_results: List[Message] = []
240
-
241
- # Execute function calls
242
- for function_call_response in self.run_function_calls(
243
- function_calls=function_calls_to_run,
244
- function_call_results=function_call_results,
245
- current_function_call_count=function_call_count,
246
- function_call_limit=tool_call_limit,
247
- ):
248
- if isinstance(function_call_response, ModelResponse):
249
- # The session state is updated by the function call
250
- if function_call_response.updated_session_state is not None:
251
- model_response.updated_session_state = function_call_response.updated_session_state
252
-
253
- # Media artifacts are generated by the function call
254
- if function_call_response.images is not None:
255
- if model_response.images is None:
256
- model_response.images = []
257
- model_response.images.extend(function_call_response.images)
258
-
259
- if function_call_response.audios is not None:
260
- if model_response.audios is None:
261
- model_response.audios = []
262
- model_response.audios.extend(function_call_response.audios)
263
-
264
- if function_call_response.videos is not None:
265
- if model_response.videos is None:
266
- model_response.videos = []
267
- model_response.videos.extend(function_call_response.videos)
268
-
269
- if (
270
- function_call_response.event
271
- in [
363
+
364
+ # Add assistant message to messages
365
+ messages.append(assistant_message)
366
+
367
+ # Log response and metrics
368
+ assistant_message.log(metrics=True)
369
+
370
+ # Handle tool calls if present
371
+ if assistant_message.tool_calls:
372
+ # Prepare function calls
373
+ function_calls_to_run = self._prepare_function_calls(
374
+ assistant_message=assistant_message,
375
+ messages=messages,
376
+ model_response=model_response,
377
+ functions=_functions,
378
+ )
379
+ function_call_results: List[Message] = []
380
+
381
+ # Execute function calls
382
+ for function_call_response in self.run_function_calls(
383
+ function_calls=function_calls_to_run,
384
+ function_call_results=function_call_results,
385
+ current_function_call_count=function_call_count,
386
+ function_call_limit=tool_call_limit,
387
+ ):
388
+ if isinstance(function_call_response, ModelResponse):
389
+ # The session state is updated by the function call
390
+ if function_call_response.updated_session_state is not None:
391
+ model_response.updated_session_state = function_call_response.updated_session_state
392
+
393
+ # Media artifacts are generated by the function call
394
+ if function_call_response.images is not None:
395
+ if model_response.images is None:
396
+ model_response.images = []
397
+ model_response.images.extend(function_call_response.images)
398
+
399
+ if function_call_response.audios is not None:
400
+ if model_response.audios is None:
401
+ model_response.audios = []
402
+ model_response.audios.extend(function_call_response.audios)
403
+
404
+ if function_call_response.videos is not None:
405
+ if model_response.videos is None:
406
+ model_response.videos = []
407
+ model_response.videos.extend(function_call_response.videos)
408
+
409
+ if function_call_response.files is not None:
410
+ if model_response.files is None:
411
+ model_response.files = []
412
+ model_response.files.extend(function_call_response.files)
413
+
414
+ if (
415
+ function_call_response.event
416
+ in [
417
+ ModelResponseEvent.tool_call_completed.value,
418
+ ModelResponseEvent.tool_call_paused.value,
419
+ ]
420
+ and function_call_response.tool_executions is not None
421
+ ):
422
+ if model_response.tool_executions is None:
423
+ model_response.tool_executions = []
424
+ model_response.tool_executions.extend(function_call_response.tool_executions)
425
+
426
+ elif function_call_response.event not in [
427
+ ModelResponseEvent.tool_call_started.value,
272
428
  ModelResponseEvent.tool_call_completed.value,
273
- ModelResponseEvent.tool_call_paused.value,
274
- ]
275
- and function_call_response.tool_executions is not None
276
- ):
277
- if model_response.tool_executions is None:
278
- model_response.tool_executions = []
279
- model_response.tool_executions.extend(function_call_response.tool_executions)
280
-
281
- elif function_call_response.event not in [
282
- ModelResponseEvent.tool_call_started.value,
283
- ModelResponseEvent.tool_call_completed.value,
284
- ]:
285
- if function_call_response.content:
286
- model_response.content += function_call_response.content # type: ignore
287
-
288
- # Add a function call for each successful execution
289
- function_call_count += len(function_call_results)
290
-
291
- # Format and add results to messages
292
- self.format_function_call_results(
293
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
294
- )
429
+ ]:
430
+ if function_call_response.content:
431
+ model_response.content += function_call_response.content # type: ignore
295
432
 
296
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
297
- # Handle function call media
298
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
433
+ # Add a function call for each successful execution
434
+ function_call_count += len(function_call_results)
435
+
436
+ # Format and add results to messages
437
+ self.format_function_call_results(
438
+ messages=messages, function_call_results=function_call_results, **model_response.extra or {}
439
+ )
299
440
 
300
- for function_call_result in function_call_results:
301
- function_call_result.log(metrics=True)
441
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
442
+ # Handle function call media
443
+ self._handle_function_call_media(
444
+ messages=messages,
445
+ function_call_results=function_call_results,
446
+ send_media_to_model=send_media_to_model,
447
+ )
302
448
 
303
- # Check if we should stop after tool calls
304
- if any(m.stop_after_tool_call for m in function_call_results):
305
- break
449
+ for function_call_result in function_call_results:
450
+ function_call_result.log(metrics=True)
306
451
 
307
- # If we have any tool calls that require confirmation, break the loop
308
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
309
- break
452
+ # Check if we should stop after tool calls
453
+ if any(m.stop_after_tool_call for m in function_call_results):
454
+ break
310
455
 
311
- # If we have any tool calls that require external execution, break the loop
312
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
313
- break
456
+ # If we have any tool calls that require confirmation, break the loop
457
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
458
+ break
314
459
 
315
- # If we have any tool calls that require user input, break the loop
316
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
317
- break
460
+ # If we have any tool calls that require external execution, break the loop
461
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
462
+ break
318
463
 
319
- # Continue loop to get next response
320
- continue
464
+ # If we have any tool calls that require user input, break the loop
465
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
466
+ break
467
+
468
+ # Continue loop to get next response
469
+ continue
321
470
 
322
- # No tool calls or finished processing them
323
- break
471
+ # No tool calls or finished processing them
472
+ break
473
+
474
+ log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
475
+
476
+ # Save to cache if enabled
477
+ if self.cache_response:
478
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
479
+ finally:
480
+ # Close the Gemini client
481
+ if self.__class__.__name__ == "Gemini" and self.client is not None: # type: ignore
482
+ try:
483
+ self.client.close() # type: ignore
484
+ self.client = None
485
+ except AttributeError:
486
+ log_warning(
487
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
488
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
489
+ )
324
490
 
325
- log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
326
491
  return model_response
327
492
 
328
493
  async def aresponse(
329
494
  self,
330
495
  messages: List[Message],
331
496
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
332
- tools: Optional[List[Dict[str, Any]]] = None,
333
- functions: Optional[Dict[str, Function]] = None,
497
+ tools: Optional[List[Union[Function, dict]]] = None,
334
498
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
335
499
  tool_call_limit: Optional[int] = None,
500
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
501
+ send_media_to_model: bool = True,
336
502
  ) -> ModelResponse:
337
503
  """
338
504
  Generate an asynchronous response from the model.
339
505
  """
506
+ try:
507
+ # Check cache if enabled
508
+ if self.cache_response:
509
+ cache_key = self._get_model_cache_key(
510
+ messages, stream=False, response_format=response_format, tools=tools
511
+ )
512
+ cached_data = self._get_cached_model_response(cache_key)
340
513
 
341
- log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
342
- log_debug(f"Model: {self.id}", center=True, symbol="-")
343
- _log_messages(messages)
344
- model_response = ModelResponse()
345
-
346
- function_call_count = 0
347
-
348
- while True:
349
- # Get response from model
350
- assistant_message = Message(role=self.assistant_message_role)
351
- await self._aprocess_model_response(
352
- messages=messages,
353
- assistant_message=assistant_message,
354
- model_response=model_response,
355
- response_format=response_format,
356
- tools=tools,
357
- tool_choice=tool_choice or self._tool_choice,
358
- )
514
+ if cached_data:
515
+ log_info("Cache hit for model response")
516
+ return self._model_response_from_cache(cached_data)
359
517
 
360
- # Add assistant message to messages
361
- messages.append(assistant_message)
518
+ log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
519
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
520
+ _log_messages(messages)
521
+ model_response = ModelResponse()
362
522
 
363
- # Log response and metrics
364
- assistant_message.log(metrics=True)
523
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
524
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
365
525
 
366
- # Handle tool calls if present
367
- if assistant_message.tool_calls:
368
- # Prepare function calls
369
- function_calls_to_run = self._prepare_function_calls(
370
- assistant_message=assistant_message,
526
+ function_call_count = 0
527
+
528
+ while True:
529
+ # Get response from model
530
+ assistant_message = Message(role=self.assistant_message_role)
531
+ await self._aprocess_model_response(
371
532
  messages=messages,
533
+ assistant_message=assistant_message,
372
534
  model_response=model_response,
373
- functions=functions,
535
+ response_format=response_format,
536
+ tools=_tool_dicts,
537
+ tool_choice=tool_choice or self._tool_choice,
538
+ run_response=run_response,
374
539
  )
375
- function_call_results: List[Message] = []
376
-
377
- # Execute function calls
378
- async for function_call_response in self.arun_function_calls(
379
- function_calls=function_calls_to_run,
380
- function_call_results=function_call_results,
381
- current_function_call_count=function_call_count,
382
- function_call_limit=tool_call_limit,
383
- ):
384
- if isinstance(function_call_response, ModelResponse):
385
- # The session state is updated by the function call
386
- if function_call_response.updated_session_state is not None:
387
- model_response.updated_session_state = function_call_response.updated_session_state
388
-
389
- # Media artifacts are generated by the function call
390
- if function_call_response.images is not None:
391
- if model_response.images is None:
392
- model_response.images = []
393
- model_response.images.extend(function_call_response.images)
394
-
395
- if function_call_response.audios is not None:
396
- if model_response.audios is None:
397
- model_response.audios = []
398
- model_response.audios.extend(function_call_response.audios)
399
-
400
- if function_call_response.videos is not None:
401
- if model_response.videos is None:
402
- model_response.videos = []
403
- model_response.videos.extend(function_call_response.videos)
404
-
405
- if (
406
- function_call_response.event
407
- in [
540
+
541
+ # Add assistant message to messages
542
+ messages.append(assistant_message)
543
+
544
+ # Log response and metrics
545
+ assistant_message.log(metrics=True)
546
+
547
+ # Handle tool calls if present
548
+ if assistant_message.tool_calls:
549
+ # Prepare function calls
550
+ function_calls_to_run = self._prepare_function_calls(
551
+ assistant_message=assistant_message,
552
+ messages=messages,
553
+ model_response=model_response,
554
+ functions=_functions,
555
+ )
556
+ function_call_results: List[Message] = []
557
+
558
+ # Execute function calls
559
+ async for function_call_response in self.arun_function_calls(
560
+ function_calls=function_calls_to_run,
561
+ function_call_results=function_call_results,
562
+ current_function_call_count=function_call_count,
563
+ function_call_limit=tool_call_limit,
564
+ ):
565
+ if isinstance(function_call_response, ModelResponse):
566
+ # The session state is updated by the function call
567
+ if function_call_response.updated_session_state is not None:
568
+ model_response.updated_session_state = function_call_response.updated_session_state
569
+
570
+ # Media artifacts are generated by the function call
571
+ if function_call_response.images is not None:
572
+ if model_response.images is None:
573
+ model_response.images = []
574
+ model_response.images.extend(function_call_response.images)
575
+
576
+ if function_call_response.audios is not None:
577
+ if model_response.audios is None:
578
+ model_response.audios = []
579
+ model_response.audios.extend(function_call_response.audios)
580
+
581
+ if function_call_response.videos is not None:
582
+ if model_response.videos is None:
583
+ model_response.videos = []
584
+ model_response.videos.extend(function_call_response.videos)
585
+
586
+ if function_call_response.files is not None:
587
+ if model_response.files is None:
588
+ model_response.files = []
589
+ model_response.files.extend(function_call_response.files)
590
+
591
+ if (
592
+ function_call_response.event
593
+ in [
594
+ ModelResponseEvent.tool_call_completed.value,
595
+ ModelResponseEvent.tool_call_paused.value,
596
+ ]
597
+ and function_call_response.tool_executions is not None
598
+ ):
599
+ if model_response.tool_executions is None:
600
+ model_response.tool_executions = []
601
+ model_response.tool_executions.extend(function_call_response.tool_executions)
602
+ elif function_call_response.event not in [
603
+ ModelResponseEvent.tool_call_started.value,
408
604
  ModelResponseEvent.tool_call_completed.value,
409
- ModelResponseEvent.tool_call_paused.value,
410
- ]
411
- and function_call_response.tool_executions is not None
412
- ):
413
- if model_response.tool_executions is None:
414
- model_response.tool_executions = []
415
- model_response.tool_executions.extend(function_call_response.tool_executions)
416
- elif function_call_response.event not in [
417
- ModelResponseEvent.tool_call_started.value,
418
- ModelResponseEvent.tool_call_completed.value,
419
- ]:
420
- if function_call_response.content:
421
- model_response.content += function_call_response.content # type: ignore
422
-
423
- # Add a function call for each successful execution
424
- function_call_count += len(function_call_results)
425
-
426
- # Format and add results to messages
427
- self.format_function_call_results(
428
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
429
- )
605
+ ]:
606
+ if function_call_response.content:
607
+ model_response.content += function_call_response.content # type: ignore
430
608
 
431
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
432
- # Handle function call media
433
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
609
+ # Add a function call for each successful execution
610
+ function_call_count += len(function_call_results)
434
611
 
435
- for function_call_result in function_call_results:
436
- function_call_result.log(metrics=True)
612
+ # Format and add results to messages
613
+ self.format_function_call_results(
614
+ messages=messages, function_call_results=function_call_results, **model_response.extra or {}
615
+ )
437
616
 
438
- # Check if we should stop after tool calls
439
- if any(m.stop_after_tool_call for m in function_call_results):
440
- break
617
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
618
+ # Handle function call media
619
+ self._handle_function_call_media(
620
+ messages=messages,
621
+ function_call_results=function_call_results,
622
+ send_media_to_model=send_media_to_model,
623
+ )
441
624
 
442
- # If we have any tool calls that require confirmation, break the loop
443
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
444
- break
625
+ for function_call_result in function_call_results:
626
+ function_call_result.log(metrics=True)
445
627
 
446
- # If we have any tool calls that require external execution, break the loop
447
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
448
- break
628
+ # Check if we should stop after tool calls
629
+ if any(m.stop_after_tool_call for m in function_call_results):
630
+ break
449
631
 
450
- # If we have any tool calls that require user input, break the loop
451
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
452
- break
632
+ # If we have any tool calls that require confirmation, break the loop
633
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
634
+ break
453
635
 
454
- # Continue loop to get next response
455
- continue
636
+ # If we have any tool calls that require external execution, break the loop
637
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
638
+ break
639
+
640
+ # If we have any tool calls that require user input, break the loop
641
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
642
+ break
643
+
644
+ # Continue loop to get next response
645
+ continue
646
+
647
+ # No tool calls or finished processing them
648
+ break
649
+
650
+ log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
456
651
 
457
- # No tool calls or finished processing them
458
- break
652
+ # Save to cache if enabled
653
+ if self.cache_response:
654
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
655
+ finally:
656
+ # Close the Gemini client
657
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
658
+ try:
659
+ await self.client.aio.aclose() # type: ignore
660
+ self.client = None
661
+ except AttributeError:
662
+ log_warning(
663
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
664
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
665
+ )
459
666
 
460
- log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
461
667
  return model_response
462
668
 
463
669
  def _process_model_response(
@@ -468,7 +674,7 @@ class Model(ABC):
468
674
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
469
675
  tools: Optional[List[Dict[str, Any]]] = None,
470
676
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
471
- run_response: Optional[RunOutput] = None,
677
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
472
678
  ) -> None:
473
679
  """
474
680
  Process a single model response and return the assistant message and whether to continue.
@@ -502,9 +708,7 @@ class Model(ABC):
502
708
  if assistant_message.citations is not None:
503
709
  model_response.citations = assistant_message.citations
504
710
  if assistant_message.audio_output is not None:
505
- if isinstance(assistant_message.audio_output, AudioArtifact):
506
- model_response.audios = [assistant_message.audio_output]
507
- elif isinstance(assistant_message.audio_output, AudioResponse):
711
+ if isinstance(assistant_message.audio_output, Audio):
508
712
  model_response.audio = assistant_message.audio_output
509
713
  if assistant_message.image_output is not None:
510
714
  model_response.images = [assistant_message.image_output]
@@ -514,6 +718,8 @@ class Model(ABC):
514
718
  if model_response.extra is None:
515
719
  model_response.extra = {}
516
720
  model_response.extra.update(provider_response.extra)
721
+ if provider_response.provider_data is not None:
722
+ model_response.provider_data = provider_response.provider_data
517
723
 
518
724
  async def _aprocess_model_response(
519
725
  self,
@@ -523,7 +729,7 @@ class Model(ABC):
523
729
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
524
730
  tools: Optional[List[Dict[str, Any]]] = None,
525
731
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
526
- run_response: Optional[RunOutput] = None,
732
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
527
733
  ) -> None:
528
734
  """
529
735
  Process a single async model response and return the assistant message and whether to continue.
@@ -557,9 +763,7 @@ class Model(ABC):
557
763
  if assistant_message.citations is not None:
558
764
  model_response.citations = assistant_message.citations
559
765
  if assistant_message.audio_output is not None:
560
- if isinstance(assistant_message.audio_output, AudioArtifact):
561
- model_response.audios = [assistant_message.audio_output]
562
- elif isinstance(assistant_message.audio_output, AudioResponse):
766
+ if isinstance(assistant_message.audio_output, Audio):
563
767
  model_response.audio = assistant_message.audio_output
564
768
  if assistant_message.image_output is not None:
565
769
  model_response.images = [assistant_message.image_output]
@@ -569,6 +773,8 @@ class Model(ABC):
569
773
  if model_response.extra is None:
570
774
  model_response.extra = {}
571
775
  model_response.extra.update(provider_response.extra)
776
+ if provider_response.provider_data is not None:
777
+ model_response.provider_data = provider_response.provider_data
572
778
 
573
779
  def _populate_assistant_message(
574
780
  self,
@@ -585,7 +791,6 @@ class Model(ABC):
585
791
  Returns:
586
792
  Message: The populated assistant message
587
793
  """
588
- # Add role to assistant message
589
794
  if provider_response.role is not None:
590
795
  assistant_message.role = provider_response.role
591
796
 
@@ -611,6 +816,10 @@ class Model(ABC):
611
816
  if provider_response.videos:
612
817
  assistant_message.video_output = provider_response.videos[-1] # Taking last (most recent) video
613
818
 
819
+ if provider_response.files is not None:
820
+ if provider_response.files:
821
+ assistant_message.file_output = provider_response.files[-1] # Taking last (most recent) file
822
+
614
823
  if provider_response.audios is not None:
615
824
  if provider_response.audios:
616
825
  assistant_message.audio_output = provider_response.audios[-1] # Taking last (most recent) audio
@@ -645,7 +854,7 @@ class Model(ABC):
645
854
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
646
855
  tools: Optional[List[Dict[str, Any]]] = None,
647
856
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
648
- run_response: Optional[RunOutput] = None,
857
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
649
858
  ) -> Iterator[ModelResponse]:
650
859
  """
651
860
  Process a streaming response from the model.
@@ -659,142 +868,181 @@ class Model(ABC):
659
868
  tool_choice=tool_choice or self._tool_choice,
660
869
  run_response=run_response,
661
870
  ):
662
- yield from self._populate_stream_data_and_assistant_message(
871
+ for model_response_delta in self._populate_stream_data(
663
872
  stream_data=stream_data,
664
- assistant_message=assistant_message,
665
873
  model_response_delta=response_delta,
666
- )
874
+ ):
875
+ yield model_response_delta
667
876
 
668
- # Add final metrics to assistant message
669
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=response_delta)
877
+ # Populate assistant message from stream data after the stream ends
878
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
670
879
 
671
880
  def response_stream(
672
881
  self,
673
882
  messages: List[Message],
674
883
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
675
- tools: Optional[List[Dict[str, Any]]] = None,
676
- functions: Optional[Dict[str, Function]] = None,
884
+ tools: Optional[List[Union[Function, dict]]] = None,
677
885
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
678
886
  tool_call_limit: Optional[int] = None,
679
887
  stream_model_response: bool = True,
680
- run_response: Optional[RunOutput] = None,
888
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
889
+ send_media_to_model: bool = True,
681
890
  ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
682
891
  """
683
892
  Generate a streaming response from the model.
684
893
  """
894
+ try:
895
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
896
+ cache_key = None
897
+ if self.cache_response:
898
+ cache_key = self._get_model_cache_key(
899
+ messages, stream=True, response_format=response_format, tools=tools
900
+ )
901
+ cached_data = self._get_cached_model_response(cache_key)
685
902
 
686
- log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
687
- log_debug(f"Model: {self.id}", center=True, symbol="-")
688
- _log_messages(messages)
903
+ if cached_data:
904
+ log_info("Cache hit for streaming model response")
905
+ # Yield cached responses
906
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
907
+ yield response
908
+ return
689
909
 
690
- function_call_count = 0
910
+ log_info("Cache miss for streaming model response")
691
911
 
692
- while True:
693
- assistant_message = Message(role=self.assistant_message_role)
694
- # Create assistant message and stream data
695
- stream_data = MessageData()
696
- if stream_model_response:
697
- # Generate response
698
- yield from self.process_response_stream(
699
- messages=messages,
700
- assistant_message=assistant_message,
701
- stream_data=stream_data,
702
- response_format=response_format,
703
- tools=tools,
704
- tool_choice=tool_choice or self._tool_choice,
705
- run_response=run_response,
706
- )
912
+ # Track streaming responses for caching
913
+ streaming_responses: List[ModelResponse] = []
707
914
 
708
- # Populate assistant message from stream data
709
- if stream_data.response_content:
710
- assistant_message.content = stream_data.response_content
711
- if stream_data.response_reasoning_content:
712
- assistant_message.reasoning_content = stream_data.response_reasoning_content
713
- if stream_data.response_redacted_reasoning_content:
714
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
715
- if stream_data.response_provider_data:
716
- assistant_message.provider_data = stream_data.response_provider_data
717
- if stream_data.response_citations:
718
- assistant_message.citations = stream_data.response_citations
719
- if stream_data.response_audio:
720
- assistant_message.audio_output = stream_data.response_audio
721
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
722
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
915
+ log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
916
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
917
+ _log_messages(messages)
723
918
 
724
- else:
725
- model_response = ModelResponse()
726
- self._process_model_response(
727
- messages=messages,
728
- assistant_message=assistant_message,
729
- model_response=model_response,
730
- response_format=response_format,
731
- tools=tools,
732
- tool_choice=tool_choice or self._tool_choice,
733
- )
734
- yield model_response
919
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
920
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
735
921
 
736
- # Add assistant message to messages
737
- messages.append(assistant_message)
738
- assistant_message.log(metrics=True)
922
+ function_call_count = 0
923
+
924
+ while True:
925
+ assistant_message = Message(role=self.assistant_message_role)
926
+ # Create assistant message and stream data
927
+ stream_data = MessageData()
928
+ model_response = ModelResponse()
929
+ if stream_model_response:
930
+ # Generate response
931
+ for response in self.process_response_stream(
932
+ messages=messages,
933
+ assistant_message=assistant_message,
934
+ stream_data=stream_data,
935
+ response_format=response_format,
936
+ tools=_tool_dicts,
937
+ tool_choice=tool_choice or self._tool_choice,
938
+ run_response=run_response,
939
+ ):
940
+ if self.cache_response and isinstance(response, ModelResponse):
941
+ streaming_responses.append(response)
942
+ yield response
739
943
 
740
- # Handle tool calls if present
741
- if assistant_message.tool_calls is not None:
742
- # Prepare function calls
743
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
744
- assistant_message, messages, functions
745
- )
746
- function_call_results: List[Message] = []
747
-
748
- # Execute function calls
749
- for function_call_response in self.run_function_calls(
750
- function_calls=function_calls_to_run,
751
- function_call_results=function_call_results,
752
- current_function_call_count=function_call_count,
753
- function_call_limit=tool_call_limit,
754
- ):
755
- yield function_call_response
756
-
757
- # Add a function call for each successful execution
758
- function_call_count += len(function_call_results)
759
-
760
- # Format and add results to messages
761
- if stream_data and stream_data.extra is not None:
762
- self.format_function_call_results(
763
- messages=messages, function_call_results=function_call_results, **stream_data.extra
764
- )
765
944
  else:
766
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
945
+ self._process_model_response(
946
+ messages=messages,
947
+ assistant_message=assistant_message,
948
+ model_response=model_response,
949
+ response_format=response_format,
950
+ tools=_tool_dicts,
951
+ tool_choice=tool_choice or self._tool_choice,
952
+ )
953
+ if self.cache_response:
954
+ streaming_responses.append(model_response)
955
+ yield model_response
956
+
957
+ # Add assistant message to messages
958
+ messages.append(assistant_message)
959
+ assistant_message.log(metrics=True)
960
+
961
+ # Handle tool calls if present
962
+ if assistant_message.tool_calls is not None:
963
+ # Prepare function calls
964
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
965
+ assistant_message=assistant_message, messages=messages, functions=_functions
966
+ )
967
+ function_call_results: List[Message] = []
968
+
969
+ # Execute function calls
970
+ for function_call_response in self.run_function_calls(
971
+ function_calls=function_calls_to_run,
972
+ function_call_results=function_call_results,
973
+ current_function_call_count=function_call_count,
974
+ function_call_limit=tool_call_limit,
975
+ ):
976
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
977
+ streaming_responses.append(function_call_response)
978
+ yield function_call_response
979
+
980
+ # Add a function call for each successful execution
981
+ function_call_count += len(function_call_results)
982
+
983
+ # Format and add results to messages
984
+ if stream_data and stream_data.extra is not None:
985
+ self.format_function_call_results(
986
+ messages=messages, function_call_results=function_call_results, **stream_data.extra
987
+ )
988
+ elif model_response and model_response.extra is not None:
989
+ self.format_function_call_results(
990
+ messages=messages, function_call_results=function_call_results, **model_response.extra
991
+ )
992
+ else:
993
+ self.format_function_call_results(
994
+ messages=messages, function_call_results=function_call_results
995
+ )
767
996
 
768
- # Handle function call media
769
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
770
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
997
+ # Handle function call media
998
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
999
+ self._handle_function_call_media(
1000
+ messages=messages,
1001
+ function_call_results=function_call_results,
1002
+ send_media_to_model=send_media_to_model,
1003
+ )
771
1004
 
772
- for function_call_result in function_call_results:
773
- function_call_result.log(metrics=True)
1005
+ for function_call_result in function_call_results:
1006
+ function_call_result.log(metrics=True)
774
1007
 
775
- # Check if we should stop after tool calls
776
- if any(m.stop_after_tool_call for m in function_call_results):
777
- break
1008
+ # Check if we should stop after tool calls
1009
+ if any(m.stop_after_tool_call for m in function_call_results):
1010
+ break
778
1011
 
779
- # If we have any tool calls that require confirmation, break the loop
780
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
781
- break
1012
+ # If we have any tool calls that require confirmation, break the loop
1013
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1014
+ break
782
1015
 
783
- # If we have any tool calls that require external execution, break the loop
784
- if any(fc.function.external_execution for fc in function_calls_to_run):
785
- break
1016
+ # If we have any tool calls that require external execution, break the loop
1017
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1018
+ break
786
1019
 
787
- # If we have any tool calls that require user input, break the loop
788
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
789
- break
1020
+ # If we have any tool calls that require user input, break the loop
1021
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1022
+ break
790
1023
 
791
- # Continue loop to get next response
792
- continue
1024
+ # Continue loop to get next response
1025
+ continue
793
1026
 
794
- # No tool calls or finished processing them
795
- break
1027
+ # No tool calls or finished processing them
1028
+ break
796
1029
 
797
- log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1030
+ log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1031
+
1032
+ # Save streaming responses to cache if enabled
1033
+ if self.cache_response and cache_key and streaming_responses:
1034
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1035
+ finally:
1036
+ # Close the Gemini client
1037
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1038
+ try:
1039
+ self.client.close() # type: ignore
1040
+ self.client = None
1041
+ except AttributeError:
1042
+ log_warning(
1043
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1044
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1045
+ )
798
1046
 
799
1047
  async def aprocess_response_stream(
800
1048
  self,
@@ -804,7 +1052,7 @@ class Model(ABC):
804
1052
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
805
1053
  tools: Optional[List[Dict[str, Any]]] = None,
806
1054
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
807
- run_response: Optional[RunOutput] = None,
1055
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
808
1056
  ) -> AsyncIterator[ModelResponse]:
809
1057
  """
810
1058
  Process a streaming response from the model.
@@ -817,153 +1065,229 @@ class Model(ABC):
817
1065
  tool_choice=tool_choice or self._tool_choice,
818
1066
  run_response=run_response,
819
1067
  ): # type: ignore
820
- for model_response in self._populate_stream_data_and_assistant_message(
1068
+ for model_response_delta in self._populate_stream_data(
821
1069
  stream_data=stream_data,
822
- assistant_message=assistant_message,
823
1070
  model_response_delta=response_delta,
824
1071
  ):
825
- yield model_response
1072
+ yield model_response_delta
826
1073
 
827
- # Populate the assistant message
828
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=model_response)
1074
+ # Populate assistant message from stream data after the stream ends
1075
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
829
1076
 
830
1077
  async def aresponse_stream(
831
1078
  self,
832
1079
  messages: List[Message],
833
1080
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
834
- tools: Optional[List[Dict[str, Any]]] = None,
835
- functions: Optional[Dict[str, Function]] = None,
1081
+ tools: Optional[List[Union[Function, dict]]] = None,
836
1082
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
837
1083
  tool_call_limit: Optional[int] = None,
838
1084
  stream_model_response: bool = True,
839
- run_response: Optional[RunOutput] = None,
1085
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1086
+ send_media_to_model: bool = True,
840
1087
  ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
841
1088
  """
842
1089
  Generate an asynchronous streaming response from the model.
843
1090
  """
1091
+ try:
1092
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
1093
+ cache_key = None
1094
+ if self.cache_response:
1095
+ cache_key = self._get_model_cache_key(
1096
+ messages, stream=True, response_format=response_format, tools=tools
1097
+ )
1098
+ cached_data = self._get_cached_model_response(cache_key)
844
1099
 
845
- log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
846
- log_debug(f"Model: {self.id}", center=True, symbol="-")
847
- _log_messages(messages)
1100
+ if cached_data:
1101
+ log_info("Cache hit for async streaming model response")
1102
+ # Yield cached responses
1103
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
1104
+ yield response
1105
+ return
848
1106
 
849
- function_call_count = 0
1107
+ log_info("Cache miss for async streaming model response")
850
1108
 
851
- while True:
852
- # Create assistant message and stream data
853
- assistant_message = Message(role=self.assistant_message_role)
854
- stream_data = MessageData()
855
- if stream_model_response:
856
- # Generate response
857
- async for response in self.aprocess_response_stream(
858
- messages=messages,
859
- assistant_message=assistant_message,
860
- stream_data=stream_data,
861
- response_format=response_format,
862
- tools=tools,
863
- tool_choice=tool_choice or self._tool_choice,
864
- run_response=run_response,
865
- ):
866
- yield response
867
-
868
- # Populate assistant message from stream data
869
- if stream_data.response_content:
870
- assistant_message.content = stream_data.response_content
871
- if stream_data.response_reasoning_content:
872
- assistant_message.reasoning_content = stream_data.response_reasoning_content
873
- if stream_data.response_redacted_reasoning_content:
874
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
875
- if stream_data.response_provider_data:
876
- assistant_message.provider_data = stream_data.response_provider_data
877
- if stream_data.response_audio:
878
- assistant_message.audio_output = stream_data.response_audio
879
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
880
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1109
+ # Track streaming responses for caching
1110
+ streaming_responses: List[ModelResponse] = []
881
1111
 
882
- else:
883
- model_response = ModelResponse()
884
- await self._aprocess_model_response(
885
- messages=messages,
886
- assistant_message=assistant_message,
887
- model_response=model_response,
888
- response_format=response_format,
889
- tools=tools,
890
- tool_choice=tool_choice or self._tool_choice,
891
- run_response=run_response,
892
- )
893
- yield model_response
1112
+ log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
1113
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
1114
+ _log_messages(messages)
894
1115
 
895
- # Add assistant message to messages
896
- messages.append(assistant_message)
897
- assistant_message.log(metrics=True)
1116
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
1117
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
1118
+
1119
+ function_call_count = 0
1120
+
1121
+ while True:
1122
+ # Create assistant message and stream data
1123
+ assistant_message = Message(role=self.assistant_message_role)
1124
+ stream_data = MessageData()
1125
+ model_response = ModelResponse()
1126
+ if stream_model_response:
1127
+ # Generate response
1128
+ async for model_response in self.aprocess_response_stream(
1129
+ messages=messages,
1130
+ assistant_message=assistant_message,
1131
+ stream_data=stream_data,
1132
+ response_format=response_format,
1133
+ tools=_tool_dicts,
1134
+ tool_choice=tool_choice or self._tool_choice,
1135
+ run_response=run_response,
1136
+ ):
1137
+ if self.cache_response and isinstance(model_response, ModelResponse):
1138
+ streaming_responses.append(model_response)
1139
+ yield model_response
898
1140
 
899
- # Handle tool calls if present
900
- if assistant_message.tool_calls is not None:
901
- # Prepare function calls
902
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
903
- assistant_message, messages, functions
904
- )
905
- function_call_results: List[Message] = []
906
-
907
- # Execute function calls
908
- async for function_call_response in self.arun_function_calls(
909
- function_calls=function_calls_to_run,
910
- function_call_results=function_call_results,
911
- current_function_call_count=function_call_count,
912
- function_call_limit=tool_call_limit,
913
- ):
914
- yield function_call_response
915
-
916
- # Add a function call for each successful execution
917
- function_call_count += len(function_call_results)
918
-
919
- # Format and add results to messages
920
- if stream_data and stream_data.extra is not None:
921
- self.format_function_call_results(
922
- messages=messages, function_call_results=function_call_results, **stream_data.extra
923
- )
924
1141
  else:
925
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
1142
+ await self._aprocess_model_response(
1143
+ messages=messages,
1144
+ assistant_message=assistant_message,
1145
+ model_response=model_response,
1146
+ response_format=response_format,
1147
+ tools=_tool_dicts,
1148
+ tool_choice=tool_choice or self._tool_choice,
1149
+ run_response=run_response,
1150
+ )
1151
+ if self.cache_response:
1152
+ streaming_responses.append(model_response)
1153
+ yield model_response
1154
+
1155
+ # Add assistant message to messages
1156
+ messages.append(assistant_message)
1157
+ assistant_message.log(metrics=True)
1158
+
1159
+ # Handle tool calls if present
1160
+ if assistant_message.tool_calls is not None:
1161
+ # Prepare function calls
1162
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1163
+ assistant_message=assistant_message, messages=messages, functions=_functions
1164
+ )
1165
+ function_call_results: List[Message] = []
1166
+
1167
+ # Execute function calls
1168
+ async for function_call_response in self.arun_function_calls(
1169
+ function_calls=function_calls_to_run,
1170
+ function_call_results=function_call_results,
1171
+ current_function_call_count=function_call_count,
1172
+ function_call_limit=tool_call_limit,
1173
+ ):
1174
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
1175
+ streaming_responses.append(function_call_response)
1176
+ yield function_call_response
926
1177
 
927
- # Handle function call media
928
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
929
- self._handle_function_call_media(messages=messages, function_call_results=function_call_results)
1178
+ # Add a function call for each successful execution
1179
+ function_call_count += len(function_call_results)
930
1180
 
931
- for function_call_result in function_call_results:
932
- function_call_result.log(metrics=True)
1181
+ # Format and add results to messages
1182
+ if stream_data and stream_data.extra is not None:
1183
+ self.format_function_call_results(
1184
+ messages=messages, function_call_results=function_call_results, **stream_data.extra
1185
+ )
1186
+ elif model_response and model_response.extra is not None:
1187
+ self.format_function_call_results(
1188
+ messages=messages, function_call_results=function_call_results, **model_response.extra or {}
1189
+ )
1190
+ else:
1191
+ self.format_function_call_results(
1192
+ messages=messages, function_call_results=function_call_results
1193
+ )
933
1194
 
934
- # Check if we should stop after tool calls
935
- if any(m.stop_after_tool_call for m in function_call_results):
936
- break
1195
+ # Handle function call media
1196
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
1197
+ self._handle_function_call_media(
1198
+ messages=messages,
1199
+ function_call_results=function_call_results,
1200
+ send_media_to_model=send_media_to_model,
1201
+ )
937
1202
 
938
- # If we have any tool calls that require confirmation, break the loop
939
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
940
- break
1203
+ for function_call_result in function_call_results:
1204
+ function_call_result.log(metrics=True)
941
1205
 
942
- # If we have any tool calls that require external execution, break the loop
943
- if any(fc.function.external_execution for fc in function_calls_to_run):
944
- break
1206
+ # Check if we should stop after tool calls
1207
+ if any(m.stop_after_tool_call for m in function_call_results):
1208
+ break
945
1209
 
946
- # If we have any tool calls that require user input, break the loop
947
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
948
- break
1210
+ # If we have any tool calls that require confirmation, break the loop
1211
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1212
+ break
949
1213
 
950
- # Continue loop to get next response
951
- continue
1214
+ # If we have any tool calls that require external execution, break the loop
1215
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1216
+ break
1217
+
1218
+ # If we have any tool calls that require user input, break the loop
1219
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1220
+ break
1221
+
1222
+ # Continue loop to get next response
1223
+ continue
952
1224
 
953
- # No tool calls or finished processing them
954
- break
1225
+ # No tool calls or finished processing them
1226
+ break
955
1227
 
956
- log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
1228
+ log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
1229
+
1230
+ # Save streaming responses to cache if enabled
1231
+ if self.cache_response and cache_key and streaming_responses:
1232
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1233
+
1234
+ finally:
1235
+ # Close the Gemini client
1236
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1237
+ try:
1238
+ await self.client.aio.aclose() # type: ignore
1239
+ self.client = None
1240
+ except AttributeError:
1241
+ log_warning(
1242
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1243
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1244
+ )
957
1245
 
958
- def _populate_stream_data_and_assistant_message(
959
- self, stream_data: MessageData, assistant_message: Message, model_response_delta: ModelResponse
1246
+ def _populate_assistant_message_from_stream_data(
1247
+ self, assistant_message: Message, stream_data: MessageData
1248
+ ) -> None:
1249
+ """
1250
+ Populate an assistant message with the stream data.
1251
+ """
1252
+ if stream_data.response_role is not None:
1253
+ assistant_message.role = stream_data.response_role
1254
+ if stream_data.response_metrics is not None:
1255
+ assistant_message.metrics = stream_data.response_metrics
1256
+ if stream_data.response_content:
1257
+ assistant_message.content = stream_data.response_content
1258
+ if stream_data.response_reasoning_content:
1259
+ assistant_message.reasoning_content = stream_data.response_reasoning_content
1260
+ if stream_data.response_redacted_reasoning_content:
1261
+ assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
1262
+ if stream_data.response_provider_data:
1263
+ assistant_message.provider_data = stream_data.response_provider_data
1264
+ if stream_data.response_citations:
1265
+ assistant_message.citations = stream_data.response_citations
1266
+ if stream_data.response_audio:
1267
+ assistant_message.audio_output = stream_data.response_audio
1268
+ if stream_data.response_image:
1269
+ assistant_message.image_output = stream_data.response_image
1270
+ if stream_data.response_video:
1271
+ assistant_message.video_output = stream_data.response_video
1272
+ if stream_data.response_file:
1273
+ assistant_message.file_output = stream_data.response_file
1274
+ if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
1275
+ assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1276
+
1277
+ def _populate_stream_data(
1278
+ self, stream_data: MessageData, model_response_delta: ModelResponse
960
1279
  ) -> Iterator[ModelResponse]:
961
1280
  """Update the stream data and assistant message with the model response."""
962
- # Add role to assistant message
963
- if model_response_delta.role is not None:
964
- assistant_message.role = model_response_delta.role
965
1281
 
966
1282
  should_yield = False
1283
+ if model_response_delta.role is not None:
1284
+ stream_data.response_role = model_response_delta.role # type: ignore
1285
+
1286
+ if model_response_delta.response_usage is not None:
1287
+ if stream_data.response_metrics is None:
1288
+ stream_data.response_metrics = Metrics()
1289
+ stream_data.response_metrics += model_response_delta.response_usage
1290
+
967
1291
  # Update stream_data content
968
1292
  if model_response_delta.content is not None:
969
1293
  stream_data.response_content += model_response_delta.content
@@ -993,13 +1317,13 @@ class Model(ABC):
993
1317
  stream_data.response_tool_calls.extend(model_response_delta.tool_calls)
994
1318
  should_yield = True
995
1319
 
996
- if model_response_delta.audio is not None and isinstance(model_response_delta.audio, AudioResponse):
1320
+ if model_response_delta.audio is not None and isinstance(model_response_delta.audio, Audio):
997
1321
  if stream_data.response_audio is None:
998
- stream_data.response_audio = AudioResponse(id=str(uuid4()), content="", transcript="")
1322
+ stream_data.response_audio = Audio(id=str(uuid4()), content="", transcript="")
999
1323
 
1000
1324
  from typing import cast
1001
1325
 
1002
- audio_response = cast(AudioResponse, model_response_delta.audio)
1326
+ audio_response = cast(Audio, model_response_delta.audio)
1003
1327
 
1004
1328
  # Update the stream data with audio information
1005
1329
  if audio_response.id is not None:
@@ -1030,7 +1354,13 @@ class Model(ABC):
1030
1354
  if model_response_delta.extra is not None:
1031
1355
  if stream_data.extra is None:
1032
1356
  stream_data.extra = {}
1033
- stream_data.extra.update(model_response_delta.extra)
1357
+ for key in model_response_delta.extra:
1358
+ if isinstance(model_response_delta.extra[key], list):
1359
+ if not stream_data.extra.get(key):
1360
+ stream_data.extra[key] = []
1361
+ stream_data.extra[key].extend(model_response_delta.extra[key])
1362
+ else:
1363
+ stream_data.extra[key] = model_response_delta.extra[key]
1034
1364
 
1035
1365
  if should_yield:
1036
1366
  yield model_response_delta
@@ -1102,43 +1432,14 @@ class Model(ABC):
1102
1432
  images = None
1103
1433
  videos = None
1104
1434
  audios = None
1435
+ files = None
1105
1436
 
1106
1437
  if success and function_execution_result:
1107
- # Convert ImageArtifacts to Images for message compatibility
1108
- if function_execution_result.images:
1109
- from agno.media import Image
1110
-
1111
- images = []
1112
- for img_artifact in function_execution_result.images:
1113
- if img_artifact.url:
1114
- images.append(Image(url=img_artifact.url))
1115
- elif img_artifact.content:
1116
- images.append(Image(content=img_artifact.content))
1117
-
1118
- # Convert VideoArtifacts to Videos for message compatibility
1119
- if function_execution_result.videos:
1120
- from agno.media import Video
1121
-
1122
- videos = []
1123
- for vid_artifact in function_execution_result.videos:
1124
- if vid_artifact.url:
1125
- videos.append(Video(url=vid_artifact.url))
1126
- elif vid_artifact.content:
1127
- videos.append(Video(content=vid_artifact.content))
1128
-
1129
- # Convert AudioArtifacts to Audio for message compatibility
1130
- if function_execution_result.audios:
1131
- from agno.media import Audio
1132
-
1133
- audios = []
1134
- for aud_artifact in function_execution_result.audios:
1135
- if aud_artifact.url:
1136
- audios.append(Audio(url=aud_artifact.url))
1137
- elif aud_artifact.base64_audio:
1138
- import base64
1139
-
1140
- audio_bytes = base64.b64decode(aud_artifact.base64_audio)
1141
- audios.append(Audio(content=audio_bytes))
1438
+ # With unified classes, no conversion needed - use directly
1439
+ images = function_execution_result.images
1440
+ videos = function_execution_result.videos
1441
+ audios = function_execution_result.audios
1442
+ files = function_execution_result.files
1142
1443
 
1143
1444
  return Message(
1144
1445
  role=self.tool_message_role,
@@ -1151,6 +1452,7 @@ class Model(ABC):
1151
1452
  images=images,
1152
1453
  videos=videos,
1153
1454
  audio=audios,
1455
+ files=files,
1154
1456
  **kwargs, # type: ignore
1155
1457
  )
1156
1458
 
@@ -1207,32 +1509,49 @@ class Model(ABC):
1207
1509
  function_call_output: str = ""
1208
1510
 
1209
1511
  if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
1210
- for item in function_execution_result.result:
1211
- # This function yields agent/team run events
1212
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1213
- item, tuple(get_args(TeamRunOutputEvent))
1214
- ):
1215
- # We only capture content events
1216
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1217
- if item.content is not None and isinstance(item.content, BaseModel):
1218
- function_call_output += item.content.model_dump_json()
1219
- else:
1220
- # Capture output
1221
- function_call_output += item.content or ""
1512
+ try:
1513
+ for item in function_execution_result.result:
1514
+ # This function yields agent/team/workflow run events
1515
+ if (
1516
+ isinstance(item, tuple(get_args(RunOutputEvent)))
1517
+ or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
1518
+ or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
1519
+ ):
1520
+ # We only capture content events for output accumulation
1521
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1522
+ if item.content is not None and isinstance(item.content, BaseModel):
1523
+ function_call_output += item.content.model_dump_json()
1524
+ else:
1525
+ # Capture output
1526
+ function_call_output += item.content or ""
1222
1527
 
1223
- if function_call.function.show_result:
1224
- yield ModelResponse(content=item.content)
1528
+ if function_call.function.show_result and item.content is not None:
1529
+ yield ModelResponse(content=item.content)
1225
1530
 
1226
1531
  if isinstance(item, CustomEvent):
1227
1532
  function_call_output += str(item)
1228
1533
 
1229
- # Yield the event itself to bubble it up
1230
- yield item
1534
+ # For WorkflowCompletedEvent, extract content for final output
1535
+ from agno.run.workflow import WorkflowCompletedEvent
1231
1536
 
1232
- else:
1233
- function_call_output += str(item)
1234
- if function_call.function.show_result:
1235
- yield ModelResponse(content=str(item))
1537
+ if isinstance(item, WorkflowCompletedEvent):
1538
+ if item.content is not None:
1539
+ if isinstance(item.content, BaseModel):
1540
+ function_call_output += item.content.model_dump_json()
1541
+ else:
1542
+ function_call_output += str(item.content)
1543
+
1544
+ # Yield the event itself to bubble it up
1545
+ yield item
1546
+
1547
+ else:
1548
+ function_call_output += str(item)
1549
+ if function_call.function.show_result and item is not None:
1550
+ yield ModelResponse(content=str(item))
1551
+ except Exception as e:
1552
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
1553
+ function_call.error = str(e)
1554
+ function_call_success = False
1236
1555
  else:
1237
1556
  from agno.tools.function import ToolResult
1238
1557
 
@@ -1248,10 +1567,12 @@ class Model(ABC):
1248
1567
  function_execution_result.videos = tool_result.videos
1249
1568
  if tool_result.audios:
1250
1569
  function_execution_result.audios = tool_result.audios
1570
+ if tool_result.files:
1571
+ function_execution_result.files = tool_result.files
1251
1572
  else:
1252
1573
  function_call_output = str(function_execution_result.result) if function_execution_result.result else ""
1253
1574
 
1254
- if function_call.function.show_result:
1575
+ if function_call.function.show_result and function_call_output is not None:
1255
1576
  yield ModelResponse(content=function_call_output)
1256
1577
 
1257
1578
  # Create and yield function call result
@@ -1263,7 +1584,7 @@ class Model(ABC):
1263
1584
  function_execution_result=function_execution_result,
1264
1585
  )
1265
1586
  yield ModelResponse(
1266
- content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
1587
+ content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1267
1588
  tool_executions=[
1268
1589
  ToolExecution(
1269
1590
  tool_call_id=function_call_result.tool_call_id,
@@ -1281,6 +1602,7 @@ class Model(ABC):
1281
1602
  images=function_execution_result.images,
1282
1603
  videos=function_execution_result.videos,
1283
1604
  audios=function_execution_result.audios,
1605
+ files=function_execution_result.files,
1284
1606
  )
1285
1607
 
1286
1608
  # Add function call to function call results
@@ -1399,6 +1721,7 @@ class Model(ABC):
1399
1721
  function_call_timer = Timer()
1400
1722
  function_call_timer.start()
1401
1723
  success: Union[bool, AgentRunException] = False
1724
+ result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
1402
1725
 
1403
1726
  try:
1404
1727
  if (
@@ -1564,57 +1887,41 @@ class Model(ABC):
1564
1887
  *(self.arun_function_call(fc) for fc in function_calls_to_run), return_exceptions=True
1565
1888
  )
1566
1889
 
1567
- # Process results
1890
+ # Separate async generators from other results for concurrent processing
1891
+ async_generator_results: List[Any] = []
1892
+ non_async_generator_results: List[Any] = []
1893
+
1568
1894
  for result in results:
1569
- # If result is an exception, skip processing it
1570
1895
  if isinstance(result, BaseException):
1571
- log_error(f"Error during function call: {result}")
1572
- raise result
1896
+ non_async_generator_results.append(result)
1897
+ continue
1573
1898
 
1574
- # Unpack result
1575
1899
  function_call_success, function_call_timer, function_call, function_execution_result = result
1576
1900
 
1577
- updated_session_state = function_execution_result.updated_session_state
1578
-
1579
- # Handle AgentRunException
1580
- if isinstance(function_call_success, AgentRunException):
1581
- a_exc = function_call_success
1582
- # Update additional messages from function call
1583
- _handle_agent_exception(a_exc, additional_input)
1584
- # Set function call success to False if an exception occurred
1585
- function_call_success = False
1901
+ # Check if this result contains an async generator
1902
+ if isinstance(function_call.result, (AsyncGeneratorType, AsyncIterator)):
1903
+ async_generator_results.append(result)
1904
+ else:
1905
+ non_async_generator_results.append(result)
1586
1906
 
1587
- # Process function call output
1588
- function_call_output: str = ""
1589
- if isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
1590
- for item in function_call.result:
1591
- # This function yields agent/team run events
1592
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1593
- item, tuple(get_args(TeamRunOutputEvent))
1594
- ):
1595
- # We only capture content events
1596
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1597
- if item.content is not None and isinstance(item.content, BaseModel):
1598
- function_call_output += item.content.model_dump_json()
1599
- else:
1600
- # Capture output
1601
- function_call_output += item.content or ""
1907
+ # Process async generators with real-time event streaming using asyncio.Queue
1908
+ async_generator_outputs: Dict[int, Tuple[Any, str, Optional[BaseException]]] = {}
1909
+ event_queue: asyncio.Queue = asyncio.Queue()
1910
+ active_generators_count: int = len(async_generator_results)
1602
1911
 
1603
- if function_call.function.show_result:
1604
- yield ModelResponse(content=item.content)
1605
- continue
1912
+ # Create background tasks for each async generator
1913
+ async def process_async_generator(result, generator_id):
1914
+ function_call_success, function_call_timer, function_call, function_execution_result = result
1915
+ function_call_output = ""
1606
1916
 
1607
- # Yield the event itself to bubble it up
1608
- yield item
1609
- else:
1610
- function_call_output += str(item)
1611
- if function_call.function.show_result:
1612
- yield ModelResponse(content=str(item))
1613
- elif isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
1917
+ try:
1614
1918
  async for item in function_call.result:
1615
- # This function yields agent/team run events
1616
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1617
- item, tuple(get_args(TeamRunOutputEvent))
1919
+ # This function yields agent/team/workflow run events
1920
+ if isinstance(
1921
+ item,
1922
+ tuple(get_args(RunOutputEvent))
1923
+ + tuple(get_args(TeamRunOutputEvent))
1924
+ + tuple(get_args(WorkflowRunOutputEvent)),
1618
1925
  ):
1619
1926
  # We only capture content events
1620
1927
  if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
@@ -1624,21 +1931,143 @@ class Model(ABC):
1624
1931
  # Capture output
1625
1932
  function_call_output += item.content or ""
1626
1933
 
1627
- if function_call.function.show_result:
1628
- yield ModelResponse(content=item.content)
1934
+ if function_call.function.show_result and item.content is not None:
1935
+ await event_queue.put(ModelResponse(content=item.content))
1629
1936
  continue
1630
1937
 
1631
- if isinstance(item, CustomEvent):
1632
- function_call_output += str(item)
1938
+ if isinstance(item, CustomEvent):
1939
+ function_call_output += str(item)
1633
1940
 
1634
- # Yield the event itself to bubble it up
1635
- yield item
1941
+ # For WorkflowCompletedEvent, extract content for final output
1942
+ from agno.run.workflow import WorkflowCompletedEvent
1943
+
1944
+ if isinstance(item, WorkflowCompletedEvent):
1945
+ if item.content is not None:
1946
+ if isinstance(item.content, BaseModel):
1947
+ function_call_output += item.content.model_dump_json()
1948
+ else:
1949
+ function_call_output += str(item.content)
1950
+
1951
+ # Put the event into the queue to be yielded
1952
+ await event_queue.put(item)
1636
1953
 
1637
1954
  # Yield custom events emitted by the tool
1638
1955
  else:
1639
1956
  function_call_output += str(item)
1640
- if function_call.function.show_result:
1641
- yield ModelResponse(content=str(item))
1957
+ if function_call.function.show_result and item is not None:
1958
+ await event_queue.put(ModelResponse(content=str(item)))
1959
+
1960
+ # Store the final output for this generator
1961
+ async_generator_outputs[generator_id] = (result, function_call_output, None)
1962
+
1963
+ except Exception as e:
1964
+ # Store the exception
1965
+ async_generator_outputs[generator_id] = (result, "", e)
1966
+
1967
+ # Signal that this generator is done
1968
+ await event_queue.put(("GENERATOR_DONE", generator_id))
1969
+
1970
+ # Start all async generator tasks
1971
+ generator_tasks = []
1972
+ for i, result in enumerate(async_generator_results):
1973
+ task = asyncio.create_task(process_async_generator(result, i))
1974
+ generator_tasks.append(task)
1975
+
1976
+ # Stream events from the queue as they arrive
1977
+ completed_generators_count = 0
1978
+ while completed_generators_count < active_generators_count:
1979
+ try:
1980
+ event = await event_queue.get()
1981
+
1982
+ # Check if this is a completion signal
1983
+ if isinstance(event, tuple) and event[0] == "GENERATOR_DONE":
1984
+ completed_generators_count += 1
1985
+ continue
1986
+
1987
+ # Yield the actual event
1988
+ yield event
1989
+
1990
+ except Exception as e:
1991
+ log_error(f"Error processing async generator event: {e}")
1992
+ break
1993
+
1994
+ # Now process all results (non-async generators and completed async generators)
1995
+ for i, original_result in enumerate(results):
1996
+ # If result is an exception, skip processing it
1997
+ if isinstance(original_result, BaseException):
1998
+ log_error(f"Error during function call: {original_result}")
1999
+ raise original_result
2000
+
2001
+ # Unpack result
2002
+ function_call_success, function_call_timer, function_call, function_execution_result = original_result
2003
+
2004
+ # Check if this was an async generator that was already processed
2005
+ async_function_call_output = None
2006
+ if isinstance(function_call.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
2007
+ # Find the corresponding processed result
2008
+ async_gen_index = 0
2009
+ for j, result in enumerate(results[: i + 1]):
2010
+ if not isinstance(result, BaseException):
2011
+ _, _, fc, _ = result
2012
+ if isinstance(fc.result, (AsyncGeneratorType, collections.abc.AsyncIterator)):
2013
+ if j == i: # This is our async generator
2014
+ if async_gen_index in async_generator_outputs:
2015
+ _, async_function_call_output, error = async_generator_outputs[async_gen_index]
2016
+ if error:
2017
+ log_error(f"Error in async generator: {error}")
2018
+ raise error
2019
+ break
2020
+ async_gen_index += 1
2021
+
2022
+ updated_session_state = function_execution_result.updated_session_state
2023
+
2024
+ # Handle AgentRunException
2025
+ if isinstance(function_call_success, AgentRunException):
2026
+ a_exc = function_call_success
2027
+ # Update additional messages from function call
2028
+ _handle_agent_exception(a_exc, additional_input)
2029
+ # Set function call success to False if an exception occurred
2030
+ function_call_success = False
2031
+
2032
+ # Process function call output
2033
+ function_call_output: str = ""
2034
+
2035
+ # Check if this was an async generator that was already processed
2036
+ if async_function_call_output is not None:
2037
+ function_call_output = async_function_call_output
2038
+ # Events from async generators were already yielded in real-time above
2039
+ elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
2040
+ try:
2041
+ for item in function_call.result:
2042
+ # This function yields agent/team/workflow run events
2043
+ if isinstance(
2044
+ item,
2045
+ tuple(get_args(RunOutputEvent))
2046
+ + tuple(get_args(TeamRunOutputEvent))
2047
+ + tuple(get_args(WorkflowRunOutputEvent)),
2048
+ ):
2049
+ # We only capture content events
2050
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
2051
+ if item.content is not None and isinstance(item.content, BaseModel):
2052
+ function_call_output += item.content.model_dump_json()
2053
+ else:
2054
+ # Capture output
2055
+ function_call_output += item.content or ""
2056
+
2057
+ if function_call.function.show_result and item.content is not None:
2058
+ yield ModelResponse(content=item.content)
2059
+ continue
2060
+
2061
+ # Yield the event itself to bubble it up
2062
+ yield item
2063
+ else:
2064
+ function_call_output += str(item)
2065
+ if function_call.function.show_result and item is not None:
2066
+ yield ModelResponse(content=str(item))
2067
+ except Exception as e:
2068
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
2069
+ function_call.error = str(e)
2070
+ function_call_success = False
1642
2071
  else:
1643
2072
  from agno.tools.function import ToolResult
1644
2073
 
@@ -1652,10 +2081,12 @@ class Model(ABC):
1652
2081
  function_execution_result.videos = tool_result.videos
1653
2082
  if tool_result.audios:
1654
2083
  function_execution_result.audios = tool_result.audios
2084
+ if tool_result.files:
2085
+ function_execution_result.files = tool_result.files
1655
2086
  else:
1656
2087
  function_call_output = str(function_call.result)
1657
2088
 
1658
- if function_call.function.show_result:
2089
+ if function_call.function.show_result and function_call_output is not None:
1659
2090
  yield ModelResponse(content=function_call_output)
1660
2091
 
1661
2092
  # Create and yield function call result
@@ -1667,7 +2098,7 @@ class Model(ABC):
1667
2098
  function_execution_result=function_execution_result,
1668
2099
  )
1669
2100
  yield ModelResponse(
1670
- content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
2101
+ content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1671
2102
  tool_executions=[
1672
2103
  ToolExecution(
1673
2104
  tool_call_id=function_call_result.tool_call_id,
@@ -1684,6 +2115,7 @@ class Model(ABC):
1684
2115
  images=function_execution_result.images,
1685
2116
  videos=function_execution_result.videos,
1686
2117
  audios=function_execution_result.audios,
2118
+ files=function_execution_result.files,
1687
2119
  )
1688
2120
 
1689
2121
  # Add function call result to function call results
@@ -1709,7 +2141,7 @@ class Model(ABC):
1709
2141
  model_response.tool_calls = []
1710
2142
 
1711
2143
  function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1712
- assistant_message, messages, functions
2144
+ assistant_message=assistant_message, messages=messages, functions=functions
1713
2145
  )
1714
2146
  return function_calls_to_run
1715
2147
 
@@ -1722,7 +2154,9 @@ class Model(ABC):
1722
2154
  if len(function_call_results) > 0:
1723
2155
  messages.extend(function_call_results)
1724
2156
 
1725
- def _handle_function_call_media(self, messages: List[Message], function_call_results: List[Message]) -> None:
2157
+ def _handle_function_call_media(
2158
+ self, messages: List[Message], function_call_results: List[Message], send_media_to_model: bool = True
2159
+ ) -> None:
1726
2160
  """
1727
2161
  Handle media artifacts from function calls by adding follow-up user messages for generated media if needed.
1728
2162
  """
@@ -1733,6 +2167,7 @@ class Model(ABC):
1733
2167
  all_images: List[Image] = []
1734
2168
  all_videos: List[Video] = []
1735
2169
  all_audio: List[Audio] = []
2170
+ all_files: List[File] = []
1736
2171
 
1737
2172
  for result_message in function_call_results:
1738
2173
  if result_message.images:
@@ -1748,15 +2183,21 @@ class Model(ABC):
1748
2183
  all_audio.extend(result_message.audio)
1749
2184
  result_message.audio = None
1750
2185
 
1751
- # If we have media artifacts, add a follow-up "user" message instead of a "tool"
1752
- # message with the media artifacts which throws error for some models
1753
- if all_images or all_videos or all_audio:
2186
+ if result_message.files:
2187
+ all_files.extend(result_message.files)
2188
+ result_message.files = None
2189
+
2190
+ # Only add media message if we should send media to model
2191
+ if send_media_to_model and (all_images or all_videos or all_audio or all_files):
2192
+ # If we have media artifacts, add a follow-up "user" message instead of a "tool"
2193
+ # message with the media artifacts which throws error for some models
1754
2194
  media_message = Message(
1755
2195
  role="user",
1756
2196
  content="Take note of the following content",
1757
2197
  images=all_images if all_images else None,
1758
2198
  videos=all_videos if all_videos else None,
1759
2199
  audio=all_audio if all_audio else None,
2200
+ files=all_files if all_files else None,
1760
2201
  )
1761
2202
  messages.append(media_message)
1762
2203
 
@@ -1782,10 +2223,14 @@ class Model(ABC):
1782
2223
  new_model = cls.__new__(cls)
1783
2224
  memo[id(self)] = new_model
1784
2225
 
1785
- # Deep copy all attributes
2226
+ # Deep copy all attributes except client objects
1786
2227
  for k, v in self.__dict__.items():
1787
2228
  if k in {"response_format", "_tools", "_functions"}:
1788
2229
  continue
2230
+ # Skip client objects
2231
+ if k in {"client", "async_client", "http_client", "mistral_client", "model_client"}:
2232
+ setattr(new_model, k, None)
2233
+ continue
1789
2234
  try:
1790
2235
  setattr(new_model, k, deepcopy(v, memo))
1791
2236
  except Exception: