agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/models/base.py CHANGED
@@ -1,9 +1,14 @@
1
1
  import asyncio
2
2
  import collections.abc
3
+ import json
3
4
  from abc import ABC, abstractmethod
4
5
  from dataclasses import dataclass, field
6
+ from hashlib import md5
7
+ from pathlib import Path
8
+ from time import sleep, time
5
9
  from types import AsyncGeneratorType, GeneratorType
6
10
  from typing import (
11
+ TYPE_CHECKING,
7
12
  Any,
8
13
  AsyncIterator,
9
14
  Dict,
@@ -11,25 +16,31 @@ from typing import (
11
16
  List,
12
17
  Literal,
13
18
  Optional,
19
+ Sequence,
14
20
  Tuple,
15
21
  Type,
16
22
  Union,
17
23
  get_args,
18
24
  )
25
+
26
+ if TYPE_CHECKING:
27
+ from agno.compression.manager import CompressionManager
19
28
  from uuid import uuid4
20
29
 
21
30
  from pydantic import BaseModel
22
31
 
23
- from agno.exceptions import AgentRunException
32
+ from agno.exceptions import AgentRunException, ModelProviderError, RetryableModelProviderError
24
33
  from agno.media import Audio, File, Image, Video
25
34
  from agno.models.message import Citations, Message
26
35
  from agno.models.metrics import Metrics
27
36
  from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
28
37
  from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
38
+ from agno.run.requirement import RunRequirement
29
39
  from agno.run.team import RunContentEvent as TeamRunContentEvent
30
- from agno.run.team import TeamRunOutputEvent
40
+ from agno.run.team import TeamRunOutput, TeamRunOutputEvent
41
+ from agno.run.workflow import WorkflowRunOutputEvent
31
42
  from agno.tools.function import Function, FunctionCall, FunctionExecutionResult, UserInputField
32
- from agno.utils.log import log_debug, log_error, log_warning
43
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
33
44
  from agno.utils.timer import Timer
34
45
  from agno.utils.tools import get_function_call_for_tool_call, get_function_call_for_tool_execution
35
46
 
@@ -48,6 +59,8 @@ class MessageData:
48
59
  response_video: Optional[Video] = None
49
60
  response_file: Optional[File] = None
50
61
 
62
+ response_metrics: Optional[Metrics] = None
63
+
51
64
  # Data from the provider that we might need on subsequent messages
52
65
  response_provider_data: Optional[Dict[str, Any]] = None
53
66
 
@@ -133,18 +146,323 @@ class Model(ABC):
133
146
  # The role of the assistant message.
134
147
  assistant_message_role: str = "assistant"
135
148
 
149
+ # Cache model responses to avoid redundant API calls during development
150
+ cache_response: bool = False
151
+ cache_ttl: Optional[int] = None
152
+ cache_dir: Optional[str] = None
153
+
154
+ # Retry configuration for model provider errors
155
+ # Number of retries to attempt when a ModelProviderError occurs
156
+ retries: int = 0
157
+ # Delay between retries (in seconds)
158
+ delay_between_retries: int = 1
159
+ # Exponential backoff: if True, the delay between retries is doubled each time
160
+ exponential_backoff: bool = False
161
+ # Enable retrying a model invocation once with a guidance message.
162
+ # This is useful for known errors avoidable with extra instructions.
163
+ retry_with_guidance: bool = True
164
+ # Set the number of times to retry the model invocation with guidance.
165
+ retry_with_guidance_limit: int = 1
166
+
136
167
  def __post_init__(self):
137
168
  if self.provider is None and self.name is not None:
138
169
  self.provider = f"{self.name} ({self.id})"
139
170
 
171
+ def _get_retry_delay(self, attempt: int) -> float:
172
+ """Calculate the delay before the next retry attempt."""
173
+ if self.exponential_backoff:
174
+ return self.delay_between_retries * (2**attempt)
175
+ return self.delay_between_retries
176
+
177
+ def _invoke_with_retry(self, **kwargs) -> ModelResponse:
178
+ """
179
+ Invoke the model with retry logic for ModelProviderError.
180
+
181
+ This method wraps the invoke() call and retries on ModelProviderError
182
+ with optional exponential backoff.
183
+ """
184
+ last_exception: Optional[ModelProviderError] = None
185
+
186
+ for attempt in range(self.retries + 1):
187
+ try:
188
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
189
+ return self.invoke(**kwargs)
190
+ except ModelProviderError as e:
191
+ last_exception = e
192
+ if attempt < self.retries:
193
+ delay = self._get_retry_delay(attempt)
194
+ log_warning(
195
+ f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
196
+ )
197
+ sleep(delay)
198
+ else:
199
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
200
+ except RetryableModelProviderError as e:
201
+ current_count = retries_with_guidance_count
202
+ if current_count >= self.retry_with_guidance_limit:
203
+ raise ModelProviderError(
204
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
205
+ model_name=self.name,
206
+ model_id=self.id,
207
+ )
208
+ kwargs.pop("retry_with_guidance", None)
209
+ kwargs["retries_with_guidance_count"] = current_count + 1
210
+
211
+ # Append the guidance message to help the model avoid the error in the next invoke.
212
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
213
+
214
+ return self._invoke_with_retry(**kwargs, retry_with_guidance=True)
215
+
216
+ # If we've exhausted all retries, raise the last exception
217
+ raise last_exception # type: ignore
218
+
219
+ async def _ainvoke_with_retry(self, **kwargs) -> ModelResponse:
220
+ """
221
+ Asynchronously invoke the model with retry logic for ModelProviderError.
222
+
223
+ This method wraps the ainvoke() call and retries on ModelProviderError
224
+ with optional exponential backoff.
225
+ """
226
+ last_exception: Optional[ModelProviderError] = None
227
+
228
+ for attempt in range(self.retries + 1):
229
+ try:
230
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
231
+ return await self.ainvoke(**kwargs)
232
+ except ModelProviderError as e:
233
+ last_exception = e
234
+ if attempt < self.retries:
235
+ delay = self._get_retry_delay(attempt)
236
+ log_warning(
237
+ f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
238
+ )
239
+ await asyncio.sleep(delay)
240
+ else:
241
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
242
+ except RetryableModelProviderError as e:
243
+ current_count = retries_with_guidance_count
244
+ if current_count >= self.retry_with_guidance_limit:
245
+ raise ModelProviderError(
246
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
247
+ model_name=self.name,
248
+ model_id=self.id,
249
+ )
250
+
251
+ kwargs.pop("retry_with_guidance", None)
252
+ kwargs["retries_with_guidance_count"] = current_count + 1
253
+
254
+ # Append the guidance message to help the model avoid the error in the next invoke.
255
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
256
+
257
+ return await self._ainvoke_with_retry(**kwargs, retry_with_guidance=True)
258
+
259
+ # If we've exhausted all retries, raise the last exception
260
+ raise last_exception # type: ignore
261
+
262
+ def _invoke_stream_with_retry(self, **kwargs) -> Iterator[ModelResponse]:
263
+ """
264
+ Invoke the model stream with retry logic for ModelProviderError.
265
+
266
+ This method wraps the invoke_stream() call and retries on ModelProviderError
267
+ with optional exponential backoff. Note that retries restart the entire stream.
268
+ """
269
+ last_exception: Optional[ModelProviderError] = None
270
+
271
+ for attempt in range(self.retries + 1):
272
+ try:
273
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
274
+ yield from self.invoke_stream(**kwargs)
275
+ return # Success, exit the retry loop
276
+ except ModelProviderError as e:
277
+ last_exception = e
278
+ if attempt < self.retries:
279
+ delay = self._get_retry_delay(attempt)
280
+ log_warning(
281
+ f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
282
+ f"Retrying in {delay}s..."
283
+ )
284
+ sleep(delay)
285
+ else:
286
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
287
+ except RetryableModelProviderError as e:
288
+ current_count = retries_with_guidance_count
289
+ if current_count >= self.retry_with_guidance_limit:
290
+ raise ModelProviderError(
291
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
292
+ model_name=self.name,
293
+ model_id=self.id,
294
+ )
295
+
296
+ kwargs.pop("retry_with_guidance", None)
297
+ kwargs["retries_with_guidance_count"] = current_count + 1
298
+
299
+ # Append the guidance message to help the model avoid the error in the next invoke.
300
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
301
+
302
+ yield from self._invoke_stream_with_retry(**kwargs, retry_with_guidance=True)
303
+ return # Success, exit after regeneration
304
+
305
+ # If we've exhausted all retries, raise the last exception
306
+ raise last_exception # type: ignore
307
+
308
+ async def _ainvoke_stream_with_retry(self, **kwargs) -> AsyncIterator[ModelResponse]:
309
+ """
310
+ Asynchronously invoke the model stream with retry logic for ModelProviderError.
311
+
312
+ This method wraps the ainvoke_stream() call and retries on ModelProviderError
313
+ with optional exponential backoff. Note that retries restart the entire stream.
314
+ """
315
+ last_exception: Optional[ModelProviderError] = None
316
+
317
+ for attempt in range(self.retries + 1):
318
+ try:
319
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
320
+ async for response in self.ainvoke_stream(**kwargs):
321
+ yield response
322
+ return # Success, exit the retry loop
323
+ except ModelProviderError as e:
324
+ last_exception = e
325
+ if attempt < self.retries:
326
+ delay = self._get_retry_delay(attempt)
327
+ log_warning(
328
+ f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
329
+ f"Retrying in {delay}s..."
330
+ )
331
+ await asyncio.sleep(delay)
332
+ else:
333
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
334
+ except RetryableModelProviderError as e:
335
+ current_count = retries_with_guidance_count
336
+ if current_count >= self.retry_with_guidance_limit:
337
+ raise ModelProviderError(
338
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
339
+ model_name=self.name,
340
+ model_id=self.id,
341
+ )
342
+
343
+ kwargs.pop("retry_with_guidance", None)
344
+ kwargs["retries_with_guidance_count"] = current_count + 1
345
+
346
+ # Append the guidance message to help the model avoid the error in the next invoke.
347
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
348
+
349
+ async for response in self._ainvoke_stream_with_retry(**kwargs, retry_with_guidance=True):
350
+ yield response
351
+ return # Success, exit after regeneration
352
+
353
+ # If we've exhausted all retries, raise the last exception
354
+ raise last_exception # type: ignore
355
+
140
356
  def to_dict(self) -> Dict[str, Any]:
141
357
  fields = {"name", "id", "provider"}
142
358
  _dict = {field: getattr(self, field) for field in fields if getattr(self, field) is not None}
143
359
  return _dict
144
360
 
361
+ def _remove_temporary_messages(self, messages: List[Message]) -> None:
362
+ """Remove temporary messages from the given list.
363
+
364
+ Args:
365
+ messages: The list of messages to filter (modified in place).
366
+ """
367
+ messages[:] = [m for m in messages if not m.temporary]
368
+
145
369
  def get_provider(self) -> str:
146
370
  return self.provider or self.name or self.__class__.__name__
147
371
 
372
+ def _get_model_cache_key(self, messages: List[Message], stream: bool, **kwargs: Any) -> str:
373
+ """Generate a cache key based on model messages and core parameters."""
374
+ message_data = []
375
+ for msg in messages:
376
+ msg_dict = {
377
+ "role": msg.role,
378
+ "content": msg.content,
379
+ }
380
+ message_data.append(msg_dict)
381
+
382
+ # Include tools parameter in cache key
383
+ has_tools = bool(kwargs.get("tools"))
384
+
385
+ cache_data = {
386
+ "model_id": self.id,
387
+ "messages": message_data,
388
+ "has_tools": has_tools,
389
+ "response_format": kwargs.get("response_format"),
390
+ "stream": stream,
391
+ }
392
+
393
+ cache_str = json.dumps(cache_data, sort_keys=True)
394
+ return md5(cache_str.encode()).hexdigest()
395
+
396
+ def _get_model_cache_file_path(self, cache_key: str) -> Path:
397
+ """Get the file path for a cache key."""
398
+ if self.cache_dir:
399
+ cache_dir = Path(self.cache_dir)
400
+ else:
401
+ cache_dir = Path.home() / ".agno" / "cache" / "model_responses"
402
+
403
+ cache_dir.mkdir(parents=True, exist_ok=True)
404
+ return cache_dir / f"{cache_key}.json"
405
+
406
+ def _get_cached_model_response(self, cache_key: str) -> Optional[Dict[str, Any]]:
407
+ """Retrieve a cached response if it exists and is not expired."""
408
+ cache_file = self._get_model_cache_file_path(cache_key)
409
+
410
+ if not cache_file.exists():
411
+ return None
412
+
413
+ try:
414
+ with open(cache_file, "r") as f:
415
+ cached_data = json.load(f)
416
+
417
+ # Check TTL if set (None means no expiration)
418
+ if self.cache_ttl is not None:
419
+ if time() - cached_data["timestamp"] > self.cache_ttl:
420
+ return None
421
+
422
+ return cached_data
423
+ except Exception:
424
+ return None
425
+
426
+ def _save_model_response_to_cache(self, cache_key: str, result: ModelResponse, is_streaming: bool = False) -> None:
427
+ """Save a model response to cache."""
428
+ try:
429
+ cache_file = self._get_model_cache_file_path(cache_key)
430
+
431
+ cache_data = {
432
+ "timestamp": int(time()),
433
+ "is_streaming": is_streaming,
434
+ "result": result.to_dict(),
435
+ }
436
+ with open(cache_file, "w") as f:
437
+ json.dump(cache_data, f)
438
+ except Exception:
439
+ pass
440
+
441
+ def _save_streaming_responses_to_cache(self, cache_key: str, responses: List[ModelResponse]) -> None:
442
+ """Save streaming responses to cache."""
443
+ cache_file = self._get_model_cache_file_path(cache_key)
444
+
445
+ cache_data = {
446
+ "timestamp": int(time()),
447
+ "is_streaming": True,
448
+ "streaming_responses": [r.to_dict() for r in responses],
449
+ }
450
+
451
+ try:
452
+ with open(cache_file, "w") as f:
453
+ json.dump(cache_data, f)
454
+ except Exception:
455
+ pass
456
+
457
+ def _model_response_from_cache(self, cached_data: Dict[str, Any]) -> ModelResponse:
458
+ """Reconstruct a ModelResponse from cached data."""
459
+ return ModelResponse.from_dict(cached_data["result"])
460
+
461
+ def _streaming_responses_from_cache(self, cached_data: list) -> Iterator[ModelResponse]:
462
+ """Reconstruct streaming responses from cached data."""
463
+ for cached_response in cached_data:
464
+ yield ModelResponse.from_dict(cached_response)
465
+
148
466
  @abstractmethod
149
467
  def invoke(self, *args, **kwargs) -> ModelResponse:
150
468
  pass
@@ -187,298 +505,455 @@ class Model(ABC):
187
505
  """
188
506
  pass
189
507
 
508
+ def _format_tools(self, tools: Optional[List[Union[Function, dict]]]) -> List[Dict[str, Any]]:
509
+ _tool_dicts = []
510
+ for tool in tools or []:
511
+ if isinstance(tool, Function):
512
+ _tool_dicts.append({"type": "function", "function": tool.to_dict()})
513
+ else:
514
+ # If a dict is passed, it is a builtin tool
515
+ _tool_dicts.append(tool)
516
+ return _tool_dicts
517
+
518
+ def count_tokens(
519
+ self,
520
+ messages: List[Message],
521
+ tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
522
+ output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
523
+ ) -> int:
524
+ from agno.utils.tokens import count_tokens
525
+
526
+ return count_tokens(
527
+ messages,
528
+ tools=list(tools) if tools else None,
529
+ model_id=self.id,
530
+ output_schema=output_schema,
531
+ )
532
+
533
+ async def acount_tokens(
534
+ self,
535
+ messages: List[Message],
536
+ tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
537
+ output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
538
+ ) -> int:
539
+ return self.count_tokens(messages, tools, output_schema=output_schema)
540
+
190
541
  def response(
191
542
  self,
192
543
  messages: List[Message],
193
544
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
194
- tools: Optional[List[Dict[str, Any]]] = None,
195
- functions: Optional[Dict[str, Function]] = None,
545
+ tools: Optional[List[Union[Function, dict]]] = None,
196
546
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
197
547
  tool_call_limit: Optional[int] = None,
198
- run_response: Optional[RunOutput] = None,
548
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
199
549
  send_media_to_model: bool = True,
550
+ compression_manager: Optional["CompressionManager"] = None,
200
551
  ) -> ModelResponse:
201
552
  """
202
553
  Generate a response from the model.
554
+
555
+ Args:
556
+ messages: List of messages to send to the model
557
+ response_format: Response format to use
558
+ tools: List of tools to use. This includes the original Function objects and dicts for built-in tools.
559
+ tool_choice: Tool choice to use
560
+ tool_call_limit: Tool call limit
561
+ run_response: Run response to use
562
+ send_media_to_model: Whether to send media to the model
203
563
  """
564
+ try:
565
+ # Check cache if enabled
566
+ if self.cache_response:
567
+ cache_key = self._get_model_cache_key(
568
+ messages, stream=False, response_format=response_format, tools=tools
569
+ )
570
+ cached_data = self._get_cached_model_response(cache_key)
204
571
 
205
- log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
206
- log_debug(f"Model: {self.id}", center=True, symbol="-")
207
-
208
- _log_messages(messages)
209
- model_response = ModelResponse()
210
-
211
- function_call_count = 0
212
-
213
- while True:
214
- # Get response from model
215
- assistant_message = Message(role=self.assistant_message_role)
216
- self._process_model_response(
217
- messages=messages,
218
- assistant_message=assistant_message,
219
- model_response=model_response,
220
- response_format=response_format,
221
- tools=tools,
222
- tool_choice=tool_choice or self._tool_choice,
223
- run_response=run_response,
224
- )
572
+ if cached_data:
573
+ log_info("Cache hit for model response")
574
+ return self._model_response_from_cache(cached_data)
225
575
 
226
- # Add assistant message to messages
227
- messages.append(assistant_message)
576
+ log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
577
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
228
578
 
229
- # Log response and metrics
230
- assistant_message.log(metrics=True)
579
+ _log_messages(messages)
580
+ model_response = ModelResponse()
231
581
 
232
- # Handle tool calls if present
233
- if assistant_message.tool_calls:
234
- # Prepare function calls
235
- function_calls_to_run = self._prepare_function_calls(
236
- assistant_message=assistant_message,
582
+ function_call_count = 0
583
+
584
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
585
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
586
+
587
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
588
+ _compression_manager = compression_manager if _compress_tool_results else None
589
+
590
+ while True:
591
+ # Compress tool results if compression is enabled and threshold is met
592
+ if _compression_manager is not None and _compression_manager.should_compress(
593
+ messages, tools, model=self, response_format=response_format
594
+ ):
595
+ _compression_manager.compress(messages)
596
+
597
+ # Get response from model
598
+ assistant_message = Message(role=self.assistant_message_role)
599
+ self._process_model_response(
237
600
  messages=messages,
601
+ assistant_message=assistant_message,
238
602
  model_response=model_response,
239
- functions=functions,
603
+ response_format=response_format,
604
+ tools=_tool_dicts,
605
+ tool_choice=tool_choice or self._tool_choice,
606
+ run_response=run_response,
607
+ compress_tool_results=_compress_tool_results,
240
608
  )
241
- function_call_results: List[Message] = []
242
-
243
- # Execute function calls
244
- for function_call_response in self.run_function_calls(
245
- function_calls=function_calls_to_run,
246
- function_call_results=function_call_results,
247
- current_function_call_count=function_call_count,
248
- function_call_limit=tool_call_limit,
249
- ):
250
- if isinstance(function_call_response, ModelResponse):
251
- # The session state is updated by the function call
252
- if function_call_response.updated_session_state is not None:
253
- model_response.updated_session_state = function_call_response.updated_session_state
254
-
255
- # Media artifacts are generated by the function call
256
- if function_call_response.images is not None:
257
- if model_response.images is None:
258
- model_response.images = []
259
- model_response.images.extend(function_call_response.images)
260
-
261
- if function_call_response.audios is not None:
262
- if model_response.audios is None:
263
- model_response.audios = []
264
- model_response.audios.extend(function_call_response.audios)
265
-
266
- if function_call_response.videos is not None:
267
- if model_response.videos is None:
268
- model_response.videos = []
269
- model_response.videos.extend(function_call_response.videos)
270
-
271
- if function_call_response.files is not None:
272
- if model_response.files is None:
273
- model_response.files = []
274
- model_response.files.extend(function_call_response.files)
275
-
276
- if (
277
- function_call_response.event
278
- in [
609
+
610
+ # Add assistant message to messages
611
+ messages.append(assistant_message)
612
+
613
+ # Log response and metrics
614
+ assistant_message.log(metrics=True, use_compressed_content=_compress_tool_results)
615
+
616
+ # Handle tool calls if present
617
+ if assistant_message.tool_calls:
618
+ # Prepare function calls
619
+ function_calls_to_run = self._prepare_function_calls(
620
+ assistant_message=assistant_message,
621
+ messages=messages,
622
+ model_response=model_response,
623
+ functions=_functions,
624
+ )
625
+ function_call_results: List[Message] = []
626
+
627
+ # Execute function calls
628
+ for function_call_response in self.run_function_calls(
629
+ function_calls=function_calls_to_run,
630
+ function_call_results=function_call_results,
631
+ current_function_call_count=function_call_count,
632
+ function_call_limit=tool_call_limit,
633
+ ):
634
+ if isinstance(function_call_response, ModelResponse):
635
+ # The session state is updated by the function call
636
+ if function_call_response.updated_session_state is not None:
637
+ model_response.updated_session_state = function_call_response.updated_session_state
638
+
639
+ # Media artifacts are generated by the function call
640
+ if function_call_response.images is not None:
641
+ if model_response.images is None:
642
+ model_response.images = []
643
+ model_response.images.extend(function_call_response.images)
644
+
645
+ if function_call_response.audios is not None:
646
+ if model_response.audios is None:
647
+ model_response.audios = []
648
+ model_response.audios.extend(function_call_response.audios)
649
+
650
+ if function_call_response.videos is not None:
651
+ if model_response.videos is None:
652
+ model_response.videos = []
653
+ model_response.videos.extend(function_call_response.videos)
654
+
655
+ if function_call_response.files is not None:
656
+ if model_response.files is None:
657
+ model_response.files = []
658
+ model_response.files.extend(function_call_response.files)
659
+
660
+ if (
661
+ function_call_response.event
662
+ in [
663
+ ModelResponseEvent.tool_call_completed.value,
664
+ ModelResponseEvent.tool_call_paused.value,
665
+ ]
666
+ and function_call_response.tool_executions is not None
667
+ ):
668
+ # Record the tool execution in the model response
669
+ if model_response.tool_executions is None:
670
+ model_response.tool_executions = []
671
+ model_response.tool_executions.extend(function_call_response.tool_executions)
672
+
673
+ # If the tool is currently paused (HITL flow), add the requirement to the run response
674
+ if (
675
+ function_call_response.event == ModelResponseEvent.tool_call_paused.value
676
+ and run_response is not None
677
+ ):
678
+ current_tool_execution = function_call_response.tool_executions[-1]
679
+ if run_response.requirements is None:
680
+ run_response.requirements = []
681
+ run_response.requirements.append(
682
+ RunRequirement(tool_execution=current_tool_execution)
683
+ )
684
+
685
+ elif function_call_response.event not in [
686
+ ModelResponseEvent.tool_call_started.value,
279
687
  ModelResponseEvent.tool_call_completed.value,
280
- ModelResponseEvent.tool_call_paused.value,
281
- ]
282
- and function_call_response.tool_executions is not None
283
- ):
284
- if model_response.tool_executions is None:
285
- model_response.tool_executions = []
286
- model_response.tool_executions.extend(function_call_response.tool_executions)
287
-
288
- elif function_call_response.event not in [
289
- ModelResponseEvent.tool_call_started.value,
290
- ModelResponseEvent.tool_call_completed.value,
291
- ]:
292
- if function_call_response.content:
293
- model_response.content += function_call_response.content # type: ignore
294
-
295
- # Add a function call for each successful execution
296
- function_call_count += len(function_call_results)
297
-
298
- # Format and add results to messages
299
- self.format_function_call_results(
300
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
301
- )
688
+ ]:
689
+ if function_call_response.content:
690
+ model_response.content += function_call_response.content # type: ignore
302
691
 
303
- if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
304
- # Handle function call media
305
- self._handle_function_call_media(
692
+ # Add a function call for each successful execution
693
+ function_call_count += len(function_call_results)
694
+
695
+ # Format and add results to messages
696
+ self.format_function_call_results(
306
697
  messages=messages,
307
698
  function_call_results=function_call_results,
308
- send_media_to_model=send_media_to_model,
699
+ compress_tool_results=_compress_tool_results,
700
+ **model_response.extra or {},
309
701
  )
310
702
 
311
- for function_call_result in function_call_results:
312
- function_call_result.log(metrics=True)
703
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
704
+ # Handle function call media
705
+ self._handle_function_call_media(
706
+ messages=messages,
707
+ function_call_results=function_call_results,
708
+ send_media_to_model=send_media_to_model,
709
+ )
313
710
 
314
- # Check if we should stop after tool calls
315
- if any(m.stop_after_tool_call for m in function_call_results):
316
- break
711
+ for function_call_result in function_call_results:
712
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
317
713
 
318
- # If we have any tool calls that require confirmation, break the loop
319
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
320
- break
714
+ # Check if we should stop after tool calls
715
+ if any(m.stop_after_tool_call for m in function_call_results):
716
+ break
321
717
 
322
- # If we have any tool calls that require external execution, break the loop
323
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
324
- break
718
+ # If we have any tool calls that require confirmation, break the loop
719
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
720
+ break
325
721
 
326
- # If we have any tool calls that require user input, break the loop
327
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
328
- break
722
+ # If we have any tool calls that require external execution, break the loop
723
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
724
+ break
329
725
 
330
- # Continue loop to get next response
331
- continue
726
+ # If we have any tool calls that require user input, break the loop
727
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
728
+ break
332
729
 
333
- # No tool calls or finished processing them
334
- break
730
+ # Continue loop to get next response
731
+ continue
732
+
733
+ # No tool calls or finished processing them
734
+ break
735
+
736
+ log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
737
+
738
+ # Save to cache if enabled
739
+ if self.cache_response:
740
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
741
+ finally:
742
+ # Close the Gemini client
743
+ if self.__class__.__name__ == "Gemini" and self.client is not None: # type: ignore
744
+ try:
745
+ self.client.close() # type: ignore
746
+ self.client = None
747
+ except AttributeError:
748
+ log_warning(
749
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
750
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
751
+ )
335
752
 
336
- log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
337
753
  return model_response
338
754
 
339
755
  async def aresponse(
340
756
  self,
341
757
  messages: List[Message],
342
758
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
343
- tools: Optional[List[Dict[str, Any]]] = None,
344
- functions: Optional[Dict[str, Function]] = None,
759
+ tools: Optional[List[Union[Function, dict]]] = None,
345
760
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
346
761
  tool_call_limit: Optional[int] = None,
762
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
347
763
  send_media_to_model: bool = True,
764
+ compression_manager: Optional["CompressionManager"] = None,
348
765
  ) -> ModelResponse:
349
766
  """
350
767
  Generate an asynchronous response from the model.
351
768
  """
352
769
 
353
- log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
354
- log_debug(f"Model: {self.id}", center=True, symbol="-")
355
- _log_messages(messages)
356
- model_response = ModelResponse()
357
-
358
- function_call_count = 0
359
-
360
- while True:
361
- # Get response from model
362
- assistant_message = Message(role=self.assistant_message_role)
363
- await self._aprocess_model_response(
364
- messages=messages,
365
- assistant_message=assistant_message,
366
- model_response=model_response,
367
- response_format=response_format,
368
- tools=tools,
369
- tool_choice=tool_choice or self._tool_choice,
370
- )
770
+ try:
771
+ # Check cache if enabled
772
+ if self.cache_response:
773
+ cache_key = self._get_model_cache_key(
774
+ messages, stream=False, response_format=response_format, tools=tools
775
+ )
776
+ cached_data = self._get_cached_model_response(cache_key)
777
+
778
+ if cached_data:
779
+ log_info("Cache hit for model response")
780
+ return self._model_response_from_cache(cached_data)
371
781
 
372
- # Add assistant message to messages
373
- messages.append(assistant_message)
782
+ log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
783
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
784
+ _log_messages(messages)
785
+ model_response = ModelResponse()
374
786
 
375
- # Log response and metrics
376
- assistant_message.log(metrics=True)
787
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
788
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
377
789
 
378
- # Handle tool calls if present
379
- if assistant_message.tool_calls:
380
- # Prepare function calls
381
- function_calls_to_run = self._prepare_function_calls(
382
- assistant_message=assistant_message,
790
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
791
+ _compression_manager = compression_manager if _compress_tool_results else None
792
+
793
+ function_call_count = 0
794
+
795
+ while True:
796
+ # Compress existing tool results BEFORE making API call to avoid context overflow
797
+ if _compression_manager is not None and await _compression_manager.ashould_compress(
798
+ messages, tools, model=self, response_format=response_format
799
+ ):
800
+ await _compression_manager.acompress(messages)
801
+
802
+ # Get response from model
803
+ assistant_message = Message(role=self.assistant_message_role)
804
+ await self._aprocess_model_response(
383
805
  messages=messages,
806
+ assistant_message=assistant_message,
384
807
  model_response=model_response,
385
- functions=functions,
808
+ response_format=response_format,
809
+ tools=_tool_dicts,
810
+ tool_choice=tool_choice or self._tool_choice,
811
+ run_response=run_response,
812
+ compress_tool_results=_compress_tool_results,
386
813
  )
387
- function_call_results: List[Message] = []
388
-
389
- # Execute function calls
390
- async for function_call_response in self.arun_function_calls(
391
- function_calls=function_calls_to_run,
392
- function_call_results=function_call_results,
393
- current_function_call_count=function_call_count,
394
- function_call_limit=tool_call_limit,
395
- ):
396
- if isinstance(function_call_response, ModelResponse):
397
- # The session state is updated by the function call
398
- if function_call_response.updated_session_state is not None:
399
- model_response.updated_session_state = function_call_response.updated_session_state
400
-
401
- # Media artifacts are generated by the function call
402
- if function_call_response.images is not None:
403
- if model_response.images is None:
404
- model_response.images = []
405
- model_response.images.extend(function_call_response.images)
406
-
407
- if function_call_response.audios is not None:
408
- if model_response.audios is None:
409
- model_response.audios = []
410
- model_response.audios.extend(function_call_response.audios)
411
-
412
- if function_call_response.videos is not None:
413
- if model_response.videos is None:
414
- model_response.videos = []
415
- model_response.videos.extend(function_call_response.videos)
416
-
417
- if function_call_response.files is not None:
418
- if model_response.files is None:
419
- model_response.files = []
420
- model_response.files.extend(function_call_response.files)
421
-
422
- if (
423
- function_call_response.event
424
- in [
814
+
815
+ # Add assistant message to messages
816
+ messages.append(assistant_message)
817
+
818
+ # Log response and metrics
819
+ assistant_message.log(metrics=True)
820
+
821
+ # Handle tool calls if present
822
+ if assistant_message.tool_calls:
823
+ # Prepare function calls
824
+ function_calls_to_run = self._prepare_function_calls(
825
+ assistant_message=assistant_message,
826
+ messages=messages,
827
+ model_response=model_response,
828
+ functions=_functions,
829
+ )
830
+ function_call_results: List[Message] = []
831
+
832
+ # Execute function calls
833
+ async for function_call_response in self.arun_function_calls(
834
+ function_calls=function_calls_to_run,
835
+ function_call_results=function_call_results,
836
+ current_function_call_count=function_call_count,
837
+ function_call_limit=tool_call_limit,
838
+ ):
839
+ if isinstance(function_call_response, ModelResponse):
840
+ # The session state is updated by the function call
841
+ if function_call_response.updated_session_state is not None:
842
+ model_response.updated_session_state = function_call_response.updated_session_state
843
+
844
+ # Media artifacts are generated by the function call
845
+ if function_call_response.images is not None:
846
+ if model_response.images is None:
847
+ model_response.images = []
848
+ model_response.images.extend(function_call_response.images)
849
+
850
+ if function_call_response.audios is not None:
851
+ if model_response.audios is None:
852
+ model_response.audios = []
853
+ model_response.audios.extend(function_call_response.audios)
854
+
855
+ if function_call_response.videos is not None:
856
+ if model_response.videos is None:
857
+ model_response.videos = []
858
+ model_response.videos.extend(function_call_response.videos)
859
+
860
+ if function_call_response.files is not None:
861
+ if model_response.files is None:
862
+ model_response.files = []
863
+ model_response.files.extend(function_call_response.files)
864
+
865
+ if (
866
+ function_call_response.event
867
+ in [
868
+ ModelResponseEvent.tool_call_completed.value,
869
+ ModelResponseEvent.tool_call_paused.value,
870
+ ]
871
+ and function_call_response.tool_executions is not None
872
+ ):
873
+ if model_response.tool_executions is None:
874
+ model_response.tool_executions = []
875
+ model_response.tool_executions.extend(function_call_response.tool_executions)
876
+
877
+ # If the tool is currently paused (HITL flow), add the requirement to the run response
878
+ if (
879
+ function_call_response.event == ModelResponseEvent.tool_call_paused.value
880
+ and run_response is not None
881
+ ):
882
+ current_tool_execution = function_call_response.tool_executions[-1]
883
+ if run_response.requirements is None:
884
+ run_response.requirements = []
885
+ run_response.requirements.append(
886
+ RunRequirement(tool_execution=current_tool_execution)
887
+ )
888
+
889
+ elif function_call_response.event not in [
890
+ ModelResponseEvent.tool_call_started.value,
425
891
  ModelResponseEvent.tool_call_completed.value,
426
- ModelResponseEvent.tool_call_paused.value,
427
- ]
428
- and function_call_response.tool_executions is not None
429
- ):
430
- if model_response.tool_executions is None:
431
- model_response.tool_executions = []
432
- model_response.tool_executions.extend(function_call_response.tool_executions)
433
- elif function_call_response.event not in [
434
- ModelResponseEvent.tool_call_started.value,
435
- ModelResponseEvent.tool_call_completed.value,
436
- ]:
437
- if function_call_response.content:
438
- model_response.content += function_call_response.content # type: ignore
439
-
440
- # Add a function call for each successful execution
441
- function_call_count += len(function_call_results)
442
-
443
- # Format and add results to messages
444
- self.format_function_call_results(
445
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
446
- )
892
+ ]:
893
+ if function_call_response.content:
894
+ model_response.content += function_call_response.content # type: ignore
447
895
 
448
- if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
449
- # Handle function call media
450
- self._handle_function_call_media(
896
+ # Add a function call for each successful execution
897
+ function_call_count += len(function_call_results)
898
+
899
+ # Format and add results to messages
900
+ self.format_function_call_results(
451
901
  messages=messages,
452
902
  function_call_results=function_call_results,
453
- send_media_to_model=send_media_to_model,
903
+ compress_tool_results=_compress_tool_results,
904
+ **model_response.extra or {},
454
905
  )
455
906
 
456
- for function_call_result in function_call_results:
457
- function_call_result.log(metrics=True)
907
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
908
+ # Handle function call media
909
+ self._handle_function_call_media(
910
+ messages=messages,
911
+ function_call_results=function_call_results,
912
+ send_media_to_model=send_media_to_model,
913
+ )
458
914
 
459
- # Check if we should stop after tool calls
460
- if any(m.stop_after_tool_call for m in function_call_results):
461
- break
915
+ for function_call_result in function_call_results:
916
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
462
917
 
463
- # If we have any tool calls that require confirmation, break the loop
464
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
465
- break
918
+ # Check if we should stop after tool calls
919
+ if any(m.stop_after_tool_call for m in function_call_results):
920
+ break
466
921
 
467
- # If we have any tool calls that require external execution, break the loop
468
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
469
- break
922
+ # If we have any tool calls that require confirmation, break the loop
923
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
924
+ break
470
925
 
471
- # If we have any tool calls that require user input, break the loop
472
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
473
- break
926
+ # If we have any tool calls that require external execution, break the loop
927
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
928
+ break
474
929
 
475
- # Continue loop to get next response
476
- continue
930
+ # If we have any tool calls that require user input, break the loop
931
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
932
+ break
477
933
 
478
- # No tool calls or finished processing them
479
- break
934
+ # Continue loop to get next response
935
+ continue
936
+
937
+ # No tool calls or finished processing them
938
+ break
939
+
940
+ log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
941
+
942
+ # Save to cache if enabled
943
+ if self.cache_response:
944
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
945
+ finally:
946
+ # Close the Gemini client
947
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
948
+ try:
949
+ await self.client.aio.aclose() # type: ignore
950
+ self.client = None
951
+ except AttributeError:
952
+ log_warning(
953
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
954
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
955
+ )
480
956
 
481
- log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
482
957
  return model_response
483
958
 
484
959
  def _process_model_response(
@@ -489,7 +964,8 @@ class Model(ABC):
489
964
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
490
965
  tools: Optional[List[Dict[str, Any]]] = None,
491
966
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
492
- run_response: Optional[RunOutput] = None,
967
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
968
+ compress_tool_results: bool = False,
493
969
  ) -> None:
494
970
  """
495
971
  Process a single model response and return the assistant message and whether to continue.
@@ -497,14 +973,15 @@ class Model(ABC):
497
973
  Returns:
498
974
  Tuple[Message, bool]: (assistant_message, should_continue)
499
975
  """
500
- # Generate response
501
- provider_response = self.invoke(
976
+ # Generate response with retry logic for ModelProviderError
977
+ provider_response = self._invoke_with_retry(
502
978
  assistant_message=assistant_message,
503
979
  messages=messages,
504
980
  response_format=response_format,
505
981
  tools=tools,
506
982
  tool_choice=tool_choice or self._tool_choice,
507
983
  run_response=run_response,
984
+ compress_tool_results=compress_tool_results,
508
985
  )
509
986
 
510
987
  # Populate the assistant message
@@ -533,6 +1010,8 @@ class Model(ABC):
533
1010
  if model_response.extra is None:
534
1011
  model_response.extra = {}
535
1012
  model_response.extra.update(provider_response.extra)
1013
+ if provider_response.provider_data is not None:
1014
+ model_response.provider_data = provider_response.provider_data
536
1015
 
537
1016
  async def _aprocess_model_response(
538
1017
  self,
@@ -542,7 +1021,8 @@ class Model(ABC):
542
1021
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
543
1022
  tools: Optional[List[Dict[str, Any]]] = None,
544
1023
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
545
- run_response: Optional[RunOutput] = None,
1024
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1025
+ compress_tool_results: bool = False,
546
1026
  ) -> None:
547
1027
  """
548
1028
  Process a single async model response and return the assistant message and whether to continue.
@@ -550,14 +1030,15 @@ class Model(ABC):
550
1030
  Returns:
551
1031
  Tuple[Message, bool]: (assistant_message, should_continue)
552
1032
  """
553
- # Generate response
554
- provider_response = await self.ainvoke(
1033
+ # Generate response with retry logic for ModelProviderError
1034
+ provider_response = await self._ainvoke_with_retry(
555
1035
  messages=messages,
556
1036
  response_format=response_format,
557
1037
  tools=tools,
558
1038
  tool_choice=tool_choice or self._tool_choice,
559
1039
  assistant_message=assistant_message,
560
1040
  run_response=run_response,
1041
+ compress_tool_results=compress_tool_results,
561
1042
  )
562
1043
 
563
1044
  # Populate the assistant message
@@ -586,6 +1067,8 @@ class Model(ABC):
586
1067
  if model_response.extra is None:
587
1068
  model_response.extra = {}
588
1069
  model_response.extra.update(provider_response.extra)
1070
+ if provider_response.provider_data is not None:
1071
+ model_response.provider_data = provider_response.provider_data
589
1072
 
590
1073
  def _populate_assistant_message(
591
1074
  self,
@@ -602,7 +1085,6 @@ class Model(ABC):
602
1085
  Returns:
603
1086
  Message: The populated assistant message
604
1087
  """
605
- # Add role to assistant message
606
1088
  if provider_response.role is not None:
607
1089
  assistant_message.role = provider_response.role
608
1090
 
@@ -666,165 +1148,218 @@ class Model(ABC):
666
1148
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
667
1149
  tools: Optional[List[Dict[str, Any]]] = None,
668
1150
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
669
- run_response: Optional[RunOutput] = None,
1151
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1152
+ compress_tool_results: bool = False,
670
1153
  ) -> Iterator[ModelResponse]:
671
1154
  """
672
- Process a streaming response from the model.
1155
+ Process a streaming response from the model with retry logic for ModelProviderError.
673
1156
  """
674
1157
 
675
- for response_delta in self.invoke_stream(
1158
+ for response_delta in self._invoke_stream_with_retry(
676
1159
  messages=messages,
677
1160
  assistant_message=assistant_message,
678
1161
  response_format=response_format,
679
1162
  tools=tools,
680
1163
  tool_choice=tool_choice or self._tool_choice,
681
1164
  run_response=run_response,
1165
+ compress_tool_results=compress_tool_results,
682
1166
  ):
683
- yield from self._populate_stream_data_and_assistant_message(
1167
+ for model_response_delta in self._populate_stream_data(
684
1168
  stream_data=stream_data,
685
- assistant_message=assistant_message,
686
1169
  model_response_delta=response_delta,
687
- )
1170
+ ):
1171
+ yield model_response_delta
688
1172
 
689
- # Add final metrics to assistant message
690
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=response_delta)
1173
+ # Populate assistant message from stream data after the stream ends
1174
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
691
1175
 
692
1176
  def response_stream(
693
1177
  self,
694
1178
  messages: List[Message],
695
1179
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
696
- tools: Optional[List[Dict[str, Any]]] = None,
697
- functions: Optional[Dict[str, Function]] = None,
1180
+ tools: Optional[List[Union[Function, dict]]] = None,
698
1181
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
699
1182
  tool_call_limit: Optional[int] = None,
700
1183
  stream_model_response: bool = True,
701
- run_response: Optional[RunOutput] = None,
1184
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
702
1185
  send_media_to_model: bool = True,
1186
+ compression_manager: Optional["CompressionManager"] = None,
703
1187
  ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
704
1188
  """
705
1189
  Generate a streaming response from the model.
706
1190
  """
1191
+ try:
1192
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
1193
+ cache_key = None
1194
+ if self.cache_response:
1195
+ cache_key = self._get_model_cache_key(
1196
+ messages, stream=True, response_format=response_format, tools=tools
1197
+ )
1198
+ cached_data = self._get_cached_model_response(cache_key)
707
1199
 
708
- log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
709
- log_debug(f"Model: {self.id}", center=True, symbol="-")
710
- _log_messages(messages)
1200
+ if cached_data:
1201
+ log_info("Cache hit for streaming model response")
1202
+ # Yield cached responses
1203
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
1204
+ yield response
1205
+ return
711
1206
 
712
- function_call_count = 0
1207
+ log_info("Cache miss for streaming model response")
713
1208
 
714
- while True:
715
- assistant_message = Message(role=self.assistant_message_role)
716
- # Create assistant message and stream data
717
- stream_data = MessageData()
718
- model_response = ModelResponse()
719
- if stream_model_response:
720
- # Generate response
721
- yield from self.process_response_stream(
722
- messages=messages,
723
- assistant_message=assistant_message,
724
- stream_data=stream_data,
725
- response_format=response_format,
726
- tools=tools,
727
- tool_choice=tool_choice or self._tool_choice,
728
- run_response=run_response,
729
- )
1209
+ # Track streaming responses for caching
1210
+ streaming_responses: List[ModelResponse] = []
730
1211
 
731
- # Populate assistant message from stream data
732
- if stream_data.response_content:
733
- assistant_message.content = stream_data.response_content
734
- if stream_data.response_reasoning_content:
735
- assistant_message.reasoning_content = stream_data.response_reasoning_content
736
- if stream_data.response_redacted_reasoning_content:
737
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
738
- if stream_data.response_provider_data:
739
- assistant_message.provider_data = stream_data.response_provider_data
740
- if stream_data.response_citations:
741
- assistant_message.citations = stream_data.response_citations
742
- if stream_data.response_audio:
743
- assistant_message.audio_output = stream_data.response_audio
744
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
745
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1212
+ log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
1213
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
1214
+ _log_messages(messages)
746
1215
 
747
- else:
748
- self._process_model_response(
749
- messages=messages,
750
- assistant_message=assistant_message,
751
- model_response=model_response,
752
- response_format=response_format,
753
- tools=tools,
754
- tool_choice=tool_choice or self._tool_choice,
755
- )
756
- yield model_response
1216
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
1217
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
757
1218
 
758
- # Add assistant message to messages
759
- messages.append(assistant_message)
760
- assistant_message.log(metrics=True)
1219
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
1220
+ _compression_manager = compression_manager if _compress_tool_results else None
761
1221
 
762
- # Handle tool calls if present
763
- if assistant_message.tool_calls is not None:
764
- # Prepare function calls
765
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
766
- assistant_message, messages, functions
767
- )
768
- function_call_results: List[Message] = []
769
-
770
- # Execute function calls
771
- for function_call_response in self.run_function_calls(
772
- function_calls=function_calls_to_run,
773
- function_call_results=function_call_results,
774
- current_function_call_count=function_call_count,
775
- function_call_limit=tool_call_limit,
776
- ):
777
- yield function_call_response
1222
+ function_call_count = 0
778
1223
 
779
- # Add a function call for each successful execution
780
- function_call_count += len(function_call_results)
1224
+ while True:
1225
+ # Compress existing tool results BEFORE invoke
1226
+ if _compression_manager is not None and _compression_manager.should_compress(
1227
+ messages, tools, model=self, response_format=response_format
1228
+ ):
1229
+ _compression_manager.compress(messages)
1230
+
1231
+ assistant_message = Message(role=self.assistant_message_role)
1232
+ # Create assistant message and stream data
1233
+ stream_data = MessageData()
1234
+ model_response = ModelResponse()
1235
+ if stream_model_response:
1236
+ # Generate response
1237
+ for response in self.process_response_stream(
1238
+ messages=messages,
1239
+ assistant_message=assistant_message,
1240
+ stream_data=stream_data,
1241
+ response_format=response_format,
1242
+ tools=_tool_dicts,
1243
+ tool_choice=tool_choice or self._tool_choice,
1244
+ run_response=run_response,
1245
+ compress_tool_results=_compress_tool_results,
1246
+ ):
1247
+ if self.cache_response and isinstance(response, ModelResponse):
1248
+ streaming_responses.append(response)
1249
+ yield response
781
1250
 
782
- # Format and add results to messages
783
- if stream_data and stream_data.extra is not None:
784
- self.format_function_call_results(
785
- messages=messages, function_call_results=function_call_results, **stream_data.extra
1251
+ else:
1252
+ self._process_model_response(
1253
+ messages=messages,
1254
+ assistant_message=assistant_message,
1255
+ model_response=model_response,
1256
+ response_format=response_format,
1257
+ tools=_tool_dicts,
1258
+ tool_choice=tool_choice or self._tool_choice,
1259
+ run_response=run_response,
1260
+ compress_tool_results=_compress_tool_results,
786
1261
  )
787
- elif model_response and model_response.extra is not None:
788
- self.format_function_call_results(
789
- messages=messages, function_call_results=function_call_results, **model_response.extra
1262
+ if self.cache_response:
1263
+ streaming_responses.append(model_response)
1264
+ yield model_response
1265
+
1266
+ # Add assistant message to messages
1267
+ messages.append(assistant_message)
1268
+ assistant_message.log(metrics=True)
1269
+
1270
+ # Handle tool calls if present
1271
+ if assistant_message.tool_calls is not None:
1272
+ # Prepare function calls
1273
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1274
+ assistant_message=assistant_message, messages=messages, functions=_functions
790
1275
  )
791
- else:
792
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
1276
+ function_call_results: List[Message] = []
793
1277
 
794
- # Handle function call media
795
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
796
- self._handle_function_call_media(
797
- messages=messages,
1278
+ # Execute function calls
1279
+ for function_call_response in self.run_function_calls(
1280
+ function_calls=function_calls_to_run,
798
1281
  function_call_results=function_call_results,
799
- send_media_to_model=send_media_to_model,
800
- )
1282
+ current_function_call_count=function_call_count,
1283
+ function_call_limit=tool_call_limit,
1284
+ ):
1285
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
1286
+ streaming_responses.append(function_call_response)
1287
+ yield function_call_response
1288
+
1289
+ # Add a function call for each successful execution
1290
+ function_call_count += len(function_call_results)
1291
+
1292
+ # Format and add results to messages
1293
+ if stream_data and stream_data.extra is not None:
1294
+ self.format_function_call_results(
1295
+ messages=messages,
1296
+ function_call_results=function_call_results,
1297
+ compress_tool_results=_compress_tool_results,
1298
+ **stream_data.extra,
1299
+ )
1300
+ elif model_response and model_response.extra is not None:
1301
+ self.format_function_call_results(
1302
+ messages=messages,
1303
+ function_call_results=function_call_results,
1304
+ compress_tool_results=_compress_tool_results,
1305
+ **model_response.extra,
1306
+ )
1307
+ else:
1308
+ self.format_function_call_results(
1309
+ messages=messages,
1310
+ function_call_results=function_call_results,
1311
+ compress_tool_results=_compress_tool_results,
1312
+ )
801
1313
 
802
- for function_call_result in function_call_results:
803
- function_call_result.log(metrics=True)
1314
+ # Handle function call media
1315
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
1316
+ self._handle_function_call_media(
1317
+ messages=messages,
1318
+ function_call_results=function_call_results,
1319
+ send_media_to_model=send_media_to_model,
1320
+ )
804
1321
 
805
- # Check if we should stop after tool calls
806
- if any(m.stop_after_tool_call for m in function_call_results):
807
- break
1322
+ for function_call_result in function_call_results:
1323
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
808
1324
 
809
- # If we have any tool calls that require confirmation, break the loop
810
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
811
- break
1325
+ # Check if we should stop after tool calls
1326
+ if any(m.stop_after_tool_call for m in function_call_results):
1327
+ break
812
1328
 
813
- # If we have any tool calls that require external execution, break the loop
814
- if any(fc.function.external_execution for fc in function_calls_to_run):
815
- break
1329
+ # If we have any tool calls that require confirmation, break the loop
1330
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1331
+ break
816
1332
 
817
- # If we have any tool calls that require user input, break the loop
818
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
819
- break
1333
+ # If we have any tool calls that require external execution, break the loop
1334
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1335
+ break
820
1336
 
821
- # Continue loop to get next response
822
- continue
1337
+ # If we have any tool calls that require user input, break the loop
1338
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1339
+ break
823
1340
 
824
- # No tool calls or finished processing them
825
- break
1341
+ # Continue loop to get next response
1342
+ continue
826
1343
 
827
- log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1344
+ # No tool calls or finished processing them
1345
+ break
1346
+
1347
+ log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1348
+
1349
+ # Save streaming responses to cache if enabled
1350
+ if self.cache_response and cache_key and streaming_responses:
1351
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1352
+ finally:
1353
+ # Close the Gemini client
1354
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1355
+ try:
1356
+ self.client.close() # type: ignore
1357
+ self.client = None
1358
+ except AttributeError:
1359
+ log_warning(
1360
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1361
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1362
+ )
828
1363
 
829
1364
  async def aprocess_response_stream(
830
1365
  self,
@@ -834,175 +1369,264 @@ class Model(ABC):
834
1369
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
835
1370
  tools: Optional[List[Dict[str, Any]]] = None,
836
1371
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
837
- run_response: Optional[RunOutput] = None,
1372
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1373
+ compress_tool_results: bool = False,
838
1374
  ) -> AsyncIterator[ModelResponse]:
839
1375
  """
840
- Process a streaming response from the model.
1376
+ Process a streaming response from the model with retry logic for ModelProviderError.
841
1377
  """
842
- async for response_delta in self.ainvoke_stream(
1378
+ async for response_delta in self._ainvoke_stream_with_retry(
843
1379
  messages=messages,
844
1380
  assistant_message=assistant_message,
845
1381
  response_format=response_format,
846
1382
  tools=tools,
847
1383
  tool_choice=tool_choice or self._tool_choice,
848
1384
  run_response=run_response,
849
- ): # type: ignore
850
- for model_response in self._populate_stream_data_and_assistant_message(
1385
+ compress_tool_results=compress_tool_results,
1386
+ ):
1387
+ for model_response_delta in self._populate_stream_data(
851
1388
  stream_data=stream_data,
852
- assistant_message=assistant_message,
853
1389
  model_response_delta=response_delta,
854
1390
  ):
855
- yield model_response
1391
+ yield model_response_delta
856
1392
 
857
- # Populate the assistant message
858
- self._populate_assistant_message(assistant_message=assistant_message, provider_response=model_response)
1393
+ # Populate assistant message from stream data after the stream ends
1394
+ self._populate_assistant_message_from_stream_data(assistant_message=assistant_message, stream_data=stream_data)
859
1395
 
860
1396
  async def aresponse_stream(
861
1397
  self,
862
1398
  messages: List[Message],
863
1399
  response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
864
- tools: Optional[List[Dict[str, Any]]] = None,
865
- functions: Optional[Dict[str, Function]] = None,
1400
+ tools: Optional[List[Union[Function, dict]]] = None,
866
1401
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
867
1402
  tool_call_limit: Optional[int] = None,
868
1403
  stream_model_response: bool = True,
869
- run_response: Optional[RunOutput] = None,
1404
+ run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
870
1405
  send_media_to_model: bool = True,
1406
+ compression_manager: Optional["CompressionManager"] = None,
871
1407
  ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
872
1408
  """
873
1409
  Generate an asynchronous streaming response from the model.
874
1410
  """
1411
+ try:
1412
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
1413
+ cache_key = None
1414
+ if self.cache_response:
1415
+ cache_key = self._get_model_cache_key(
1416
+ messages, stream=True, response_format=response_format, tools=tools
1417
+ )
1418
+ cached_data = self._get_cached_model_response(cache_key)
875
1419
 
876
- log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
877
- log_debug(f"Model: {self.id}", center=True, symbol="-")
878
- _log_messages(messages)
1420
+ if cached_data:
1421
+ log_info("Cache hit for async streaming model response")
1422
+ # Yield cached responses
1423
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
1424
+ yield response
1425
+ return
879
1426
 
880
- function_call_count = 0
1427
+ log_info("Cache miss for async streaming model response")
881
1428
 
882
- while True:
883
- # Create assistant message and stream data
884
- assistant_message = Message(role=self.assistant_message_role)
885
- stream_data = MessageData()
886
- model_response = ModelResponse()
887
- if stream_model_response:
888
- # Generate response
889
- async for model_response in self.aprocess_response_stream(
890
- messages=messages,
891
- assistant_message=assistant_message,
892
- stream_data=stream_data,
893
- response_format=response_format,
894
- tools=tools,
895
- tool_choice=tool_choice or self._tool_choice,
896
- run_response=run_response,
897
- ):
898
- yield model_response
1429
+ # Track streaming responses for caching
1430
+ streaming_responses: List[ModelResponse] = []
899
1431
 
900
- # Populate assistant message from stream data
901
- if stream_data.response_content:
902
- assistant_message.content = stream_data.response_content
903
- if stream_data.response_reasoning_content:
904
- assistant_message.reasoning_content = stream_data.response_reasoning_content
905
- if stream_data.response_redacted_reasoning_content:
906
- assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
907
- if stream_data.response_provider_data:
908
- assistant_message.provider_data = stream_data.response_provider_data
909
- if stream_data.response_audio:
910
- assistant_message.audio_output = stream_data.response_audio
911
- if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
912
- assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1432
+ log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
1433
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
1434
+ _log_messages(messages)
913
1435
 
914
- else:
915
- await self._aprocess_model_response(
916
- messages=messages,
917
- assistant_message=assistant_message,
918
- model_response=model_response,
919
- response_format=response_format,
920
- tools=tools,
921
- tool_choice=tool_choice or self._tool_choice,
922
- run_response=run_response,
923
- )
924
- yield model_response
1436
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
1437
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
925
1438
 
926
- # Add assistant message to messages
927
- messages.append(assistant_message)
928
- assistant_message.log(metrics=True)
1439
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
1440
+ _compression_manager = compression_manager if _compress_tool_results else None
929
1441
 
930
- # Handle tool calls if present
931
- if assistant_message.tool_calls is not None:
932
- # Prepare function calls
933
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
934
- assistant_message, messages, functions
935
- )
936
- function_call_results: List[Message] = []
937
-
938
- # Execute function calls
939
- async for function_call_response in self.arun_function_calls(
940
- function_calls=function_calls_to_run,
941
- function_call_results=function_call_results,
942
- current_function_call_count=function_call_count,
943
- function_call_limit=tool_call_limit,
944
- ):
945
- yield function_call_response
1442
+ function_call_count = 0
946
1443
 
947
- # Add a function call for each successful execution
948
- function_call_count += len(function_call_results)
1444
+ while True:
1445
+ # Compress existing tool results BEFORE making API call to avoid context overflow
1446
+ if _compression_manager is not None and await _compression_manager.ashould_compress(
1447
+ messages, tools, model=self, response_format=response_format
1448
+ ):
1449
+ await _compression_manager.acompress(messages)
1450
+
1451
+ # Create assistant message and stream data
1452
+ assistant_message = Message(role=self.assistant_message_role)
1453
+ stream_data = MessageData()
1454
+ model_response = ModelResponse()
1455
+ if stream_model_response:
1456
+ # Generate response
1457
+ async for model_response in self.aprocess_response_stream(
1458
+ messages=messages,
1459
+ assistant_message=assistant_message,
1460
+ stream_data=stream_data,
1461
+ response_format=response_format,
1462
+ tools=_tool_dicts,
1463
+ tool_choice=tool_choice or self._tool_choice,
1464
+ run_response=run_response,
1465
+ compress_tool_results=_compress_tool_results,
1466
+ ):
1467
+ if self.cache_response and isinstance(model_response, ModelResponse):
1468
+ streaming_responses.append(model_response)
1469
+ yield model_response
949
1470
 
950
- # Format and add results to messages
951
- if stream_data and stream_data.extra is not None:
952
- self.format_function_call_results(
953
- messages=messages, function_call_results=function_call_results, **stream_data.extra
1471
+ else:
1472
+ await self._aprocess_model_response(
1473
+ messages=messages,
1474
+ assistant_message=assistant_message,
1475
+ model_response=model_response,
1476
+ response_format=response_format,
1477
+ tools=_tool_dicts,
1478
+ tool_choice=tool_choice or self._tool_choice,
1479
+ run_response=run_response,
1480
+ compress_tool_results=_compress_tool_results,
954
1481
  )
955
- elif model_response and model_response.extra is not None:
956
- self.format_function_call_results(
957
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
1482
+ if self.cache_response:
1483
+ streaming_responses.append(model_response)
1484
+ yield model_response
1485
+
1486
+ # Add assistant message to messages
1487
+ messages.append(assistant_message)
1488
+ assistant_message.log(metrics=True)
1489
+
1490
+ # Handle tool calls if present
1491
+ if assistant_message.tool_calls is not None:
1492
+ # Prepare function calls
1493
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1494
+ assistant_message=assistant_message, messages=messages, functions=_functions
958
1495
  )
959
- else:
960
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
1496
+ function_call_results: List[Message] = []
961
1497
 
962
- # Handle function call media
963
- if any(msg.images or msg.videos or msg.audio for msg in function_call_results):
964
- self._handle_function_call_media(
965
- messages=messages,
1498
+ # Execute function calls
1499
+ async for function_call_response in self.arun_function_calls(
1500
+ function_calls=function_calls_to_run,
966
1501
  function_call_results=function_call_results,
967
- send_media_to_model=send_media_to_model,
968
- )
1502
+ current_function_call_count=function_call_count,
1503
+ function_call_limit=tool_call_limit,
1504
+ ):
1505
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
1506
+ streaming_responses.append(function_call_response)
1507
+ yield function_call_response
1508
+
1509
+ # Add a function call for each successful execution
1510
+ function_call_count += len(function_call_results)
1511
+
1512
+ # Format and add results to messages
1513
+ if stream_data and stream_data.extra is not None:
1514
+ self.format_function_call_results(
1515
+ messages=messages,
1516
+ function_call_results=function_call_results,
1517
+ compress_tool_results=_compress_tool_results,
1518
+ **stream_data.extra,
1519
+ )
1520
+ elif model_response and model_response.extra is not None:
1521
+ self.format_function_call_results(
1522
+ messages=messages,
1523
+ function_call_results=function_call_results,
1524
+ compress_tool_results=_compress_tool_results,
1525
+ **model_response.extra or {},
1526
+ )
1527
+ else:
1528
+ self.format_function_call_results(
1529
+ messages=messages,
1530
+ function_call_results=function_call_results,
1531
+ compress_tool_results=_compress_tool_results,
1532
+ )
969
1533
 
970
- for function_call_result in function_call_results:
971
- function_call_result.log(metrics=True)
1534
+ # Handle function call media
1535
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
1536
+ self._handle_function_call_media(
1537
+ messages=messages,
1538
+ function_call_results=function_call_results,
1539
+ send_media_to_model=send_media_to_model,
1540
+ )
972
1541
 
973
- # Check if we should stop after tool calls
974
- if any(m.stop_after_tool_call for m in function_call_results):
975
- break
1542
+ for function_call_result in function_call_results:
1543
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
976
1544
 
977
- # If we have any tool calls that require confirmation, break the loop
978
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
979
- break
1545
+ # Check if we should stop after tool calls
1546
+ if any(m.stop_after_tool_call for m in function_call_results):
1547
+ break
980
1548
 
981
- # If we have any tool calls that require external execution, break the loop
982
- if any(fc.function.external_execution for fc in function_calls_to_run):
983
- break
1549
+ # If we have any tool calls that require confirmation, break the loop
1550
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1551
+ break
984
1552
 
985
- # If we have any tool calls that require user input, break the loop
986
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
987
- break
1553
+ # If we have any tool calls that require external execution, break the loop
1554
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1555
+ break
988
1556
 
989
- # Continue loop to get next response
990
- continue
1557
+ # If we have any tool calls that require user input, break the loop
1558
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1559
+ break
1560
+
1561
+ # Continue loop to get next response
1562
+ continue
1563
+
1564
+ # No tool calls or finished processing them
1565
+ break
1566
+
1567
+ log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
991
1568
 
992
- # No tool calls or finished processing them
993
- break
1569
+ # Save streaming responses to cache if enabled
1570
+ if self.cache_response and cache_key and streaming_responses:
1571
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
994
1572
 
995
- log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
1573
+ finally:
1574
+ # Close the Gemini client
1575
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1576
+ try:
1577
+ await self.client.aio.aclose() # type: ignore
1578
+ self.client = None
1579
+ except AttributeError:
1580
+ log_warning(
1581
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1582
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1583
+ )
996
1584
 
997
- def _populate_stream_data_and_assistant_message(
998
- self, stream_data: MessageData, assistant_message: Message, model_response_delta: ModelResponse
1585
+ def _populate_assistant_message_from_stream_data(
1586
+ self, assistant_message: Message, stream_data: MessageData
1587
+ ) -> None:
1588
+ """
1589
+ Populate an assistant message with the stream data.
1590
+ """
1591
+ if stream_data.response_role is not None:
1592
+ assistant_message.role = stream_data.response_role
1593
+ if stream_data.response_metrics is not None:
1594
+ assistant_message.metrics = stream_data.response_metrics
1595
+ if stream_data.response_content:
1596
+ assistant_message.content = stream_data.response_content
1597
+ if stream_data.response_reasoning_content:
1598
+ assistant_message.reasoning_content = stream_data.response_reasoning_content
1599
+ if stream_data.response_redacted_reasoning_content:
1600
+ assistant_message.redacted_reasoning_content = stream_data.response_redacted_reasoning_content
1601
+ if stream_data.response_provider_data:
1602
+ assistant_message.provider_data = stream_data.response_provider_data
1603
+ if stream_data.response_citations:
1604
+ assistant_message.citations = stream_data.response_citations
1605
+ if stream_data.response_audio:
1606
+ assistant_message.audio_output = stream_data.response_audio
1607
+ if stream_data.response_image:
1608
+ assistant_message.image_output = stream_data.response_image
1609
+ if stream_data.response_video:
1610
+ assistant_message.video_output = stream_data.response_video
1611
+ if stream_data.response_file:
1612
+ assistant_message.file_output = stream_data.response_file
1613
+ if stream_data.response_tool_calls and len(stream_data.response_tool_calls) > 0:
1614
+ assistant_message.tool_calls = self.parse_tool_calls(stream_data.response_tool_calls)
1615
+
1616
+ def _populate_stream_data(
1617
+ self, stream_data: MessageData, model_response_delta: ModelResponse
999
1618
  ) -> Iterator[ModelResponse]:
1000
1619
  """Update the stream data and assistant message with the model response."""
1001
- # Add role to assistant message
1002
- if model_response_delta.role is not None:
1003
- assistant_message.role = model_response_delta.role
1004
1620
 
1005
1621
  should_yield = False
1622
+ if model_response_delta.role is not None:
1623
+ stream_data.response_role = model_response_delta.role # type: ignore
1624
+
1625
+ if model_response_delta.response_usage is not None:
1626
+ if stream_data.response_metrics is None:
1627
+ stream_data.response_metrics = Metrics()
1628
+ stream_data.response_metrics += model_response_delta.response_usage
1629
+
1006
1630
  # Update stream_data content
1007
1631
  if model_response_delta.content is not None:
1008
1632
  stream_data.response_content += model_response_delta.content
@@ -1147,12 +1771,14 @@ class Model(ABC):
1147
1771
  images = None
1148
1772
  videos = None
1149
1773
  audios = None
1774
+ files = None
1150
1775
 
1151
1776
  if success and function_execution_result:
1152
1777
  # With unified classes, no conversion needed - use directly
1153
1778
  images = function_execution_result.images
1154
1779
  videos = function_execution_result.videos
1155
1780
  audios = function_execution_result.audios
1781
+ files = function_execution_result.files
1156
1782
 
1157
1783
  return Message(
1158
1784
  role=self.tool_message_role,
@@ -1165,6 +1791,7 @@ class Model(ABC):
1165
1791
  images=images,
1166
1792
  videos=videos,
1167
1793
  audio=audios,
1794
+ files=files,
1168
1795
  **kwargs, # type: ignore
1169
1796
  )
1170
1797
 
@@ -1202,11 +1829,15 @@ class Model(ABC):
1202
1829
 
1203
1830
  # Run function calls sequentially
1204
1831
  function_execution_result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
1832
+ stop_after_tool_call_from_exception = False
1205
1833
  try:
1206
1834
  function_execution_result = function_call.execute()
1207
1835
  except AgentRunException as a_exc:
1208
1836
  # Update additional messages from function call
1209
1837
  _handle_agent_exception(a_exc, additional_input)
1838
+ # If stop_execution is True, mark that we should stop after this tool call
1839
+ if a_exc.stop_execution:
1840
+ stop_after_tool_call_from_exception = True
1210
1841
  # Set function call success to False if an exception occurred
1211
1842
  except Exception as e:
1212
1843
  log_error(f"Error executing function {function_call.function.name}: {e}")
@@ -1221,32 +1852,60 @@ class Model(ABC):
1221
1852
  function_call_output: str = ""
1222
1853
 
1223
1854
  if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
1224
- for item in function_execution_result.result:
1225
- # This function yields agent/team run events
1226
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1227
- item, tuple(get_args(TeamRunOutputEvent))
1228
- ):
1229
- # We only capture content events
1230
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1231
- if item.content is not None and isinstance(item.content, BaseModel):
1232
- function_call_output += item.content.model_dump_json()
1233
- else:
1234
- # Capture output
1235
- function_call_output += item.content or ""
1855
+ try:
1856
+ for item in function_execution_result.result:
1857
+ # This function yields agent/team/workflow run events
1858
+ if (
1859
+ isinstance(item, tuple(get_args(RunOutputEvent)))
1860
+ or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
1861
+ or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
1862
+ ):
1863
+ # We only capture content events for output accumulation
1864
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1865
+ if item.content is not None and isinstance(item.content, BaseModel):
1866
+ function_call_output += item.content.model_dump_json()
1867
+ else:
1868
+ # Capture output
1869
+ function_call_output += item.content or ""
1236
1870
 
1237
- if function_call.function.show_result:
1238
- yield ModelResponse(content=item.content)
1871
+ if function_call.function.show_result and item.content is not None:
1872
+ yield ModelResponse(content=item.content)
1239
1873
 
1240
1874
  if isinstance(item, CustomEvent):
1241
1875
  function_call_output += str(item)
1242
1876
 
1243
- # Yield the event itself to bubble it up
1244
- yield item
1877
+ # For WorkflowCompletedEvent, extract content for final output
1878
+ from agno.run.workflow import WorkflowCompletedEvent
1245
1879
 
1246
- else:
1247
- function_call_output += str(item)
1248
- if function_call.function.show_result:
1249
- yield ModelResponse(content=str(item))
1880
+ if isinstance(item, WorkflowCompletedEvent):
1881
+ if item.content is not None:
1882
+ if isinstance(item.content, BaseModel):
1883
+ function_call_output += item.content.model_dump_json()
1884
+ else:
1885
+ function_call_output += str(item.content)
1886
+
1887
+ # Yield the event itself to bubble it up
1888
+ yield item
1889
+
1890
+ else:
1891
+ function_call_output += str(item)
1892
+ if function_call.function.show_result and item is not None:
1893
+ yield ModelResponse(content=str(item))
1894
+ except Exception as e:
1895
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
1896
+ function_call.error = str(e)
1897
+ function_call_success = False
1898
+
1899
+ # For generators, re-capture updated_session_state after consumption
1900
+ # since session_state modifications were made during iteration
1901
+ if function_execution_result.updated_session_state is None:
1902
+ if (
1903
+ function_call.function._run_context is not None
1904
+ and function_call.function._run_context.session_state is not None
1905
+ ):
1906
+ function_execution_result.updated_session_state = function_call.function._run_context.session_state
1907
+ elif function_call.function._session_state is not None:
1908
+ function_execution_result.updated_session_state = function_call.function._session_state
1250
1909
  else:
1251
1910
  from agno.tools.function import ToolResult
1252
1911
 
@@ -1267,7 +1926,7 @@ class Model(ABC):
1267
1926
  else:
1268
1927
  function_call_output = str(function_execution_result.result) if function_execution_result.result else ""
1269
1928
 
1270
- if function_call.function.show_result:
1929
+ if function_call.function.show_result and function_call_output is not None:
1271
1930
  yield ModelResponse(content=function_call_output)
1272
1931
 
1273
1932
  # Create and yield function call result
@@ -1278,6 +1937,9 @@ class Model(ABC):
1278
1937
  timer=function_call_timer,
1279
1938
  function_execution_result=function_execution_result,
1280
1939
  )
1940
+ # Override stop_after_tool_call if set by exception
1941
+ if stop_after_tool_call_from_exception:
1942
+ function_call_result.stop_after_tool_call = True
1281
1943
  yield ModelResponse(
1282
1944
  content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1283
1945
  tool_executions=[
@@ -1325,7 +1987,7 @@ class Model(ABC):
1325
1987
 
1326
1988
  paused_tool_executions = []
1327
1989
 
1328
- # The function cannot be executed without user confirmation
1990
+ # The function requires user confirmation (HITL)
1329
1991
  if fc.function.requires_confirmation:
1330
1992
  paused_tool_executions.append(
1331
1993
  ToolExecution(
@@ -1335,7 +1997,8 @@ class Model(ABC):
1335
1997
  requires_confirmation=True,
1336
1998
  )
1337
1999
  )
1338
- # If the function requires user input, we yield a message to the user
2000
+
2001
+ # The function requires user input (HITL)
1339
2002
  if fc.function.requires_user_input:
1340
2003
  user_input_schema = fc.function.user_input_schema
1341
2004
  if fc.arguments and user_input_schema:
@@ -1353,7 +2016,8 @@ class Model(ABC):
1353
2016
  user_input_schema=user_input_schema,
1354
2017
  )
1355
2018
  )
1356
- # If the function is from the user control flow tools, we handle it here
2019
+
2020
+ # If the function is from the user control flow (HITL) tools, we handle it here
1357
2021
  if fc.function.name == "get_user_input" and fc.arguments and fc.arguments.get("user_input_fields"):
1358
2022
  user_input_schema = []
1359
2023
  for input_field in fc.arguments.get("user_input_fields", []):
@@ -1379,7 +2043,8 @@ class Model(ABC):
1379
2043
  user_input_schema=user_input_schema,
1380
2044
  )
1381
2045
  )
1382
- # If the function requires external execution, we yield a message to the user
2046
+
2047
+ # The function requires external execution (HITL)
1383
2048
  if fc.function.external_execution:
1384
2049
  paused_tool_executions.append(
1385
2050
  ToolExecution(
@@ -1416,6 +2081,7 @@ class Model(ABC):
1416
2081
  function_call_timer = Timer()
1417
2082
  function_call_timer.start()
1418
2083
  success: Union[bool, AgentRunException] = False
2084
+ result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
1419
2085
 
1420
2086
  try:
1421
2087
  if (
@@ -1610,9 +2276,12 @@ class Model(ABC):
1610
2276
 
1611
2277
  try:
1612
2278
  async for item in function_call.result:
1613
- # This function yields agent/team run events
1614
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1615
- item, tuple(get_args(TeamRunOutputEvent))
2279
+ # This function yields agent/team/workflow run events
2280
+ if isinstance(
2281
+ item,
2282
+ tuple(get_args(RunOutputEvent))
2283
+ + tuple(get_args(TeamRunOutputEvent))
2284
+ + tuple(get_args(WorkflowRunOutputEvent)),
1616
2285
  ):
1617
2286
  # We only capture content events
1618
2287
  if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
@@ -1622,12 +2291,22 @@ class Model(ABC):
1622
2291
  # Capture output
1623
2292
  function_call_output += item.content or ""
1624
2293
 
1625
- if function_call.function.show_result:
2294
+ if function_call.function.show_result and item.content is not None:
1626
2295
  await event_queue.put(ModelResponse(content=item.content))
1627
2296
  continue
1628
2297
 
1629
- if isinstance(item, CustomEvent):
1630
- function_call_output += str(item)
2298
+ if isinstance(item, CustomEvent):
2299
+ function_call_output += str(item)
2300
+
2301
+ # For WorkflowCompletedEvent, extract content for final output
2302
+ from agno.run.workflow import WorkflowCompletedEvent
2303
+
2304
+ if isinstance(item, WorkflowCompletedEvent):
2305
+ if item.content is not None:
2306
+ if isinstance(item.content, BaseModel):
2307
+ function_call_output += item.content.model_dump_json()
2308
+ else:
2309
+ function_call_output += str(item.content)
1631
2310
 
1632
2311
  # Put the event into the queue to be yielded
1633
2312
  await event_queue.put(item)
@@ -1635,7 +2314,7 @@ class Model(ABC):
1635
2314
  # Yield custom events emitted by the tool
1636
2315
  else:
1637
2316
  function_call_output += str(item)
1638
- if function_call.function.show_result:
2317
+ if function_call.function.show_result and item is not None:
1639
2318
  await event_queue.put(ModelResponse(content=str(item)))
1640
2319
 
1641
2320
  # Store the final output for this generator
@@ -1703,10 +2382,14 @@ class Model(ABC):
1703
2382
  updated_session_state = function_execution_result.updated_session_state
1704
2383
 
1705
2384
  # Handle AgentRunException
2385
+ stop_after_tool_call_from_exception = False
1706
2386
  if isinstance(function_call_success, AgentRunException):
1707
2387
  a_exc = function_call_success
1708
2388
  # Update additional messages from function call
1709
2389
  _handle_agent_exception(a_exc, additional_input)
2390
+ # If stop_execution is True, mark that we should stop after this tool call
2391
+ if a_exc.stop_execution:
2392
+ stop_after_tool_call_from_exception = True
1710
2393
  # Set function call success to False if an exception occurred
1711
2394
  function_call_success = False
1712
2395
 
@@ -1718,30 +2401,60 @@ class Model(ABC):
1718
2401
  function_call_output = async_function_call_output
1719
2402
  # Events from async generators were already yielded in real-time above
1720
2403
  elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
1721
- for item in function_call.result:
1722
- # This function yields agent/team run events
1723
- if isinstance(item, tuple(get_args(RunOutputEvent))) or isinstance(
1724
- item, tuple(get_args(TeamRunOutputEvent))
2404
+ try:
2405
+ for item in function_call.result:
2406
+ # This function yields agent/team/workflow run events
2407
+ if isinstance(
2408
+ item,
2409
+ tuple(get_args(RunOutputEvent))
2410
+ + tuple(get_args(TeamRunOutputEvent))
2411
+ + tuple(get_args(WorkflowRunOutputEvent)),
2412
+ ):
2413
+ # We only capture content events
2414
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
2415
+ if item.content is not None and isinstance(item.content, BaseModel):
2416
+ function_call_output += item.content.model_dump_json()
2417
+ else:
2418
+ # Capture output
2419
+ function_call_output += item.content or ""
2420
+
2421
+ if function_call.function.show_result and item.content is not None:
2422
+ yield ModelResponse(content=item.content)
2423
+ continue
2424
+
2425
+ # Yield the event itself to bubble it up
2426
+ yield item
2427
+ else:
2428
+ function_call_output += str(item)
2429
+ if function_call.function.show_result and item is not None:
2430
+ yield ModelResponse(content=str(item))
2431
+ except Exception as e:
2432
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
2433
+ function_call.error = str(e)
2434
+ function_call_success = False
2435
+
2436
+ # For generators (sync or async), re-capture updated_session_state after consumption
2437
+ # since session_state modifications were made during iteration
2438
+ if async_function_call_output is not None or isinstance(
2439
+ function_call.result,
2440
+ (GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
2441
+ ):
2442
+ if updated_session_state is None:
2443
+ if (
2444
+ function_call.function._run_context is not None
2445
+ and function_call.function._run_context.session_state is not None
1725
2446
  ):
1726
- # We only capture content events
1727
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1728
- if item.content is not None and isinstance(item.content, BaseModel):
1729
- function_call_output += item.content.model_dump_json()
1730
- else:
1731
- # Capture output
1732
- function_call_output += item.content or ""
1733
-
1734
- if function_call.function.show_result:
1735
- yield ModelResponse(content=item.content)
1736
- continue
1737
-
1738
- # Yield the event itself to bubble it up
1739
- yield item
1740
- else:
1741
- function_call_output += str(item)
1742
- if function_call.function.show_result:
1743
- yield ModelResponse(content=str(item))
1744
- else:
2447
+ updated_session_state = function_call.function._run_context.session_state
2448
+ elif function_call.function._session_state is not None:
2449
+ updated_session_state = function_call.function._session_state
2450
+
2451
+ if not (
2452
+ async_function_call_output is not None
2453
+ or isinstance(
2454
+ function_call.result,
2455
+ (GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
2456
+ )
2457
+ ):
1745
2458
  from agno.tools.function import ToolResult
1746
2459
 
1747
2460
  if isinstance(function_execution_result.result, ToolResult):
@@ -1759,7 +2472,7 @@ class Model(ABC):
1759
2472
  else:
1760
2473
  function_call_output = str(function_call.result)
1761
2474
 
1762
- if function_call.function.show_result:
2475
+ if function_call.function.show_result and function_call_output is not None:
1763
2476
  yield ModelResponse(content=function_call_output)
1764
2477
 
1765
2478
  # Create and yield function call result
@@ -1770,6 +2483,9 @@ class Model(ABC):
1770
2483
  timer=function_call_timer,
1771
2484
  function_execution_result=function_execution_result,
1772
2485
  )
2486
+ # Override stop_after_tool_call if set by exception
2487
+ if stop_after_tool_call_from_exception:
2488
+ function_call_result.stop_after_tool_call = True
1773
2489
  yield ModelResponse(
1774
2490
  content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1775
2491
  tool_executions=[
@@ -1814,12 +2530,16 @@ class Model(ABC):
1814
2530
  model_response.tool_calls = []
1815
2531
 
1816
2532
  function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1817
- assistant_message, messages, functions
2533
+ assistant_message=assistant_message, messages=messages, functions=functions
1818
2534
  )
1819
2535
  return function_calls_to_run
1820
2536
 
1821
2537
  def format_function_call_results(
1822
- self, messages: List[Message], function_call_results: List[Message], **kwargs
2538
+ self,
2539
+ messages: List[Message],
2540
+ function_call_results: List[Message],
2541
+ compress_tool_results: bool = False,
2542
+ **kwargs,
1823
2543
  ) -> None:
1824
2544
  """
1825
2545
  Format function call results.
@@ -1896,10 +2616,14 @@ class Model(ABC):
1896
2616
  new_model = cls.__new__(cls)
1897
2617
  memo[id(self)] = new_model
1898
2618
 
1899
- # Deep copy all attributes
2619
+ # Deep copy all attributes except client objects
1900
2620
  for k, v in self.__dict__.items():
1901
2621
  if k in {"response_format", "_tools", "_functions"}:
1902
2622
  continue
2623
+ # Skip client objects
2624
+ if k in {"client", "async_client", "http_client", "mistral_client", "model_client"}:
2625
+ setattr(new_model, k, None)
2626
+ continue
1903
2627
  try:
1904
2628
  setattr(new_model, k, deepcopy(v, memo))
1905
2629
  except Exception: